2 ############################################################
4 # perltidy - a perl script indenter and formatter
6 # Copyright (c) 2000-2016 by Steve Hancock
7 # Distributed under the GPL license agreement; see file COPYING
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License along
20 # with this program; if not, write to the Free Software Foundation, Inc.,
21 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 # For brief instructions, try 'perltidy -h'.
24 # For more complete documentation, try 'man perltidy'
25 # or visit http://perltidy.sourceforge.net
27 # This script is an example of the default style. It was formatted with:
31 # Code Contributions: See ChangeLog.html for a complete history.
32 # Michael Cartmell supplied code for adaptation to VMS and helped with
34 # Hugh S. Myers supplied sub streamhandle and the supporting code to
35 # create a Perl::Tidy module which can operate on strings, arrays, etc.
36 # Yves Orton supplied coding to help detect Windows versions.
37 # Axel Rose supplied a patch for MacPerl.
38 # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
39 # Dan Tyrell contributed a patch for binary I/O.
40 # Ueli Hugenschmidt contributed a patch for -fpsc
41 # Sam Kington supplied a patch to identify the initial indentation of
43 # jonathan swartz supplied patches for:
44 # * .../ pattern, which looks upwards from directory
45 # * --notidy, to be used in directories where we want to avoid
46 # accidentally tidying
47 # * prefilter and postfilter
50 # Many others have supplied key ideas, suggestions, and bug reports;
51 # see the CHANGES file.
53 ############################################################
57 # Actually should use a version later than about 5.8.5 to use
59 use 5.004; # need IO::File from 5.004 or later
72 $rOpts_character_encoding
75 @ISA = qw( Exporter );
76 @EXPORT = qw( &perltidy );
83 use File::Temp qw(tempfile);
86 ( $VERSION = q($Id: Tidy.pm,v 1.74 2016/03/02 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
91 # given filename and mode (r or w), create an object which:
92 # has a 'getline' method if mode='r', and
93 # has a 'print' method if mode='w'.
94 # The objects also need a 'close' method.
96 # How the object is made:
98 # if $filename is: Make object using:
99 # ---------------- -----------------
100 # '-' (STDIN if mode = 'r', STDOUT if mode='w')
102 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
103 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
105 # (check for 'print' method for 'w' mode)
106 # (check for 'getline' method for 'r' mode)
107 my $ref = ref( my $filename = shift );
114 if ( $ref eq 'ARRAY' ) {
115 $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
117 elsif ( $ref eq 'SCALAR' ) {
118 $New = sub { Perl::Tidy::IOScalar->new(@_) };
122 # Accept an object with a getline method for reading. Note:
123 # IO::File is built-in and does not respond to the defined
124 # operator. If this causes trouble, the check can be
125 # skipped and we can just let it crash if there is no
127 if ( $mode =~ /[rR]/ ) {
129 # RT#97159; part 1 of 2: updated to use 'can'
130 ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
131 if ( $ref->can('getline') ) {
132 $New = sub { $filename };
135 $New = sub { undef };
137 ------------------------------------------------------------------------
138 No 'getline' method is defined for object of class $ref
139 Please check your call to Perl::Tidy::perltidy. Trace follows.
140 ------------------------------------------------------------------------
145 # Accept an object with a print method for writing.
146 # See note above about IO::File
147 if ( $mode =~ /[wW]/ ) {
149 # RT#97159; part 2 of 2: updated to use 'can'
150 ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
151 if ( $ref->can('print') ) {
152 $New = sub { $filename };
155 $New = sub { undef };
157 ------------------------------------------------------------------------
158 No 'print' method is defined for object of class $ref
159 Please check your call to Perl::Tidy::perltidy. Trace follows.
160 ------------------------------------------------------------------------
169 if ( $filename eq '-' ) {
170 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
173 $New = sub { IO::File->new(@_) };
176 $fh = $New->( $filename, $mode )
177 or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
179 return $fh, ( $ref or $filename );
182 sub find_input_line_ending {
184 # Peek at a file and return first line ending character.
185 # Quietly return undef in case of any trouble.
186 my ($input_file) = @_;
189 # silently ignore input from object or stdin
190 if ( ref($input_file) || $input_file eq '-' ) {
193 open( INFILE, $input_file ) || return $ending;
197 read( INFILE, $buf, 1024 );
199 if ( $buf && $buf =~ /([\012\015]+)/ ) {
203 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
206 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
209 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
223 # concatenate a path and file basename
224 # returns undef in case of error
226 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
228 # use File::Spec if we can
229 unless ($missing_file_spec) {
230 return File::Spec->catfile(@_);
233 # Perl 5.004 systems may not have File::Spec so we'll make
234 # a simple try. We assume File::Basename is available.
235 # return undef if not successful.
237 my $path = join '/', @_;
238 my $test_file = $path . $name;
239 my ( $test_name, $test_path ) = fileparse($test_file);
240 return $test_file if ( $test_name eq $name );
241 return undef if ( $^O eq 'VMS' );
243 # this should work at least for Windows and Unix:
244 $test_file = $path . '/' . $name;
245 ( $test_name, $test_path ) = fileparse($test_file);
246 return $test_file if ( $test_name eq $name );
250 # Here is a map of the flow of data from the input source to the output
253 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
254 # input groups output
255 # lines tokens lines of lines lines
258 # The names correspond to the package names responsible for the unit processes.
260 # The overall process is controlled by the "main" package.
262 # LineSource is the stream of input lines
264 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
265 # if necessary. A token is any section of the input line which should be
266 # manipulated as a single entity during formatting. For example, a single
267 # ',' character is a token, and so is an entire side comment. It handles
268 # the complexities of Perl syntax, such as distinguishing between '<<' as
269 # a shift operator and as a here-document, or distinguishing between '/'
270 # as a divide symbol and as a pattern delimiter.
272 # Formatter inserts and deletes whitespace between tokens, and breaks
273 # sequences of tokens at appropriate points as output lines. It bases its
274 # decisions on the default rules as modified by any command-line options.
276 # VerticalAligner collects groups of lines together and tries to line up
277 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
279 # FileWriter simply writes lines to the output stream.
281 # The Logger package, not shown, records significant events and warning
282 # messages. It writes a .LOG file, which may be saved with a
283 # '-log' or a '-g' flag.
289 destination => undef,
296 dump_options => undef,
297 dump_options_type => undef,
298 dump_getopt_flags => undef,
299 dump_options_category => undef,
300 dump_options_range => undef,
301 dump_abbreviations => undef,
306 # don't overwrite callers ARGV
308 local *STDERR = *STDERR;
312 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
314 my @good_keys = sort keys %defaults;
315 @bad_keys = sort @bad_keys;
317 ------------------------------------------------------------------------
318 Unknown perltidy parameter : (@bad_keys)
319 perltidy only understands : (@good_keys)
320 ------------------------------------------------------------------------
325 my $get_hash_ref = sub {
327 my $hash_ref = $input_hash{$key};
328 if ( defined($hash_ref) ) {
329 unless ( ref($hash_ref) eq 'HASH' ) {
330 my $what = ref($hash_ref);
332 $what ? "but is ref to $what" : "but is not a reference";
334 ------------------------------------------------------------------------
335 error in call to perltidy:
336 -$key must be reference to HASH $but_is
337 ------------------------------------------------------------------------
344 %input_hash = ( %defaults, %input_hash );
345 my $argv = $input_hash{'argv'};
346 my $destination_stream = $input_hash{'destination'};
347 my $errorfile_stream = $input_hash{'errorfile'};
348 my $logfile_stream = $input_hash{'logfile'};
349 my $perltidyrc_stream = $input_hash{'perltidyrc'};
350 my $source_stream = $input_hash{'source'};
351 my $stderr_stream = $input_hash{'stderr'};
352 my $user_formatter = $input_hash{'formatter'};
353 my $prefilter = $input_hash{'prefilter'};
354 my $postfilter = $input_hash{'postfilter'};
356 if ($stderr_stream) {
357 ( $fh_stderr, my $stderr_file ) =
358 Perl::Tidy::streamhandle( $stderr_stream, 'w' );
361 ------------------------------------------------------------------------
362 Unable to redirect STDERR to $stderr_stream
363 Please check value of -stderr in call to perltidy
364 ------------------------------------------------------------------------
369 $fh_stderr = *STDERR;
372 sub Warn ($) { $fh_stderr->print( $_[0] ); }
375 if ( $_[0] ) { goto ERROR_EXIT }
376 else { goto NORMAL_EXIT }
379 sub Die ($) { Warn $_[0]; Exit(1); }
381 # extract various dump parameters
382 my $dump_options_type = $input_hash{'dump_options_type'};
383 my $dump_options = $get_hash_ref->('dump_options');
384 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
385 my $dump_options_category = $get_hash_ref->('dump_options_category');
386 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
387 my $dump_options_range = $get_hash_ref->('dump_options_range');
389 # validate dump_options_type
390 if ( defined($dump_options) ) {
391 unless ( defined($dump_options_type) ) {
392 $dump_options_type = 'perltidyrc';
394 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
396 ------------------------------------------------------------------------
397 Please check value of -dump_options_type in call to perltidy;
398 saw: '$dump_options_type'
399 expecting: 'perltidyrc' or 'full'
400 ------------------------------------------------------------------------
406 $dump_options_type = "";
409 if ($user_formatter) {
411 # if the user defines a formatter, there is no output stream,
412 # but we need a null stream to keep coding simple
413 $destination_stream = Perl::Tidy::DevNull->new();
416 # see if ARGV is overridden
417 if ( defined($argv) ) {
419 my $rargv = ref $argv;
420 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
424 if ( $rargv eq 'ARRAY' ) {
429 ------------------------------------------------------------------------
430 Please check value of -argv in call to perltidy;
431 it must be a string or ref to ARRAY but is: $rargv
432 ------------------------------------------------------------------------
439 my ( $rargv, $msg ) = parse_args($argv);
442 Error parsing this string passed to to perltidy with 'argv':
450 my $rpending_complaint;
451 $$rpending_complaint = "";
452 my $rpending_logfile_message;
453 $$rpending_logfile_message = "";
455 my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
457 # VMS file names are restricted to a 40.40 format, so we append _tdy
458 # instead of .tdy, etc. (but see also sub check_vms_filename)
461 if ( $^O eq 'VMS' ) {
467 $dot_pattern = '\.'; # must escape for use in regex
470 #---------------------------------------------------------------
471 # get command line options
472 #---------------------------------------------------------------
473 my ( $rOpts, $config_file, $rraw_options, $roption_string,
474 $rexpansion, $roption_category, $roption_range )
475 = process_command_line(
476 $perltidyrc_stream, $is_Windows, $Windows_type,
477 $rpending_complaint, $dump_options_type,
480 my $saw_extrude = ( grep m/^-extrude$/, @$rraw_options ) ? 1 : 0;
482 ( grep m/^-(pbp|perl-best-practices)$/, @$rraw_options ) ? 1 : 0;
484 #---------------------------------------------------------------
485 # Handle requests to dump information
486 #---------------------------------------------------------------
488 # return or exit immediately after all dumps
491 # Getopt parameters and their flags
492 if ( defined($dump_getopt_flags) ) {
494 foreach my $op ( @{$roption_string} ) {
503 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
507 $dump_getopt_flags->{$opt} = $flag;
511 if ( defined($dump_options_category) ) {
513 %{$dump_options_category} = %{$roption_category};
516 if ( defined($dump_options_range) ) {
518 %{$dump_options_range} = %{$roption_range};
521 if ( defined($dump_abbreviations) ) {
523 %{$dump_abbreviations} = %{$rexpansion};
526 if ( defined($dump_options) ) {
528 %{$dump_options} = %{$rOpts};
531 Exit 0 if ($quit_now);
533 # make printable string of options for this run as possible diagnostic
534 my $readable_options = readable_options( $rOpts, $roption_string );
536 # dump from command line
537 if ( $rOpts->{'dump-options'} ) {
538 print STDOUT $readable_options;
542 #---------------------------------------------------------------
543 # check parameters and their interactions
544 #---------------------------------------------------------------
546 check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
548 if ($user_formatter) {
549 $rOpts->{'format'} = 'user';
552 # there must be one entry here for every possible format
553 my %default_file_extension = (
559 $rOpts_character_encoding = $rOpts->{'character-encoding'};
561 # be sure we have a valid output format
562 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
563 my $formats = join ' ',
564 sort map { "'" . $_ . "'" } keys %default_file_extension;
565 my $fmt = $rOpts->{'format'};
566 Die "-format='$fmt' but must be one of: $formats\n";
569 my $output_extension = make_extension( $rOpts->{'output-file-extension'},
570 $default_file_extension{ $rOpts->{'format'} }, $dot );
572 # If the backup extension contains a / character then the backup should
573 # be deleted when the -b option is used. On older versions of
574 # perltidy this will generate an error message due to an illegal
577 # A backup file will still be generated but will be deleted
578 # at the end. If -bext='/' then this extension will be
579 # the default 'bak'. Otherwise it will be whatever characters
580 # remains after all '/' characters are removed. For example:
581 # -bext extension slashes
585 # '/dev/null' devnull 2 (Currently not allowed)
586 my $bext = $rOpts->{'backup-file-extension'};
587 my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
589 # At present only one forward slash is allowed. In the future multiple
590 # slashes may be allowed to allow for other options
591 if ( $delete_backup > 1 ) {
592 Die "-bext=$bext contains more than one '/'\n";
595 my $backup_extension =
596 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
598 my $html_toc_extension =
599 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
601 my $html_src_extension =
602 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
604 # check for -b option;
605 # silently ignore unless beautify mode
606 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
607 && $rOpts->{'format'} eq 'tidy';
609 # Turn off -b with warnings in case of conflicts with other options.
610 # NOTE: Do this silently, without warnings, if there is a source or
611 # destination stream, or standard output is used. This is because the -b
612 # flag may have been in a .perltidyrc file and warnings break
613 # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014.
614 if ($in_place_modify) {
615 if ( $rOpts->{'standard-output'} ) {
616 ## my $msg = "Ignoring -b; you may not use -b and -st together";
617 ## $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
619 $in_place_modify = 0;
621 if ($destination_stream) {
622 ##Warn "Ignoring -b; you may not specify a destination stream and -b together\n";
623 $in_place_modify = 0;
625 if ( ref($source_stream) ) {
626 ##Warn "Ignoring -b; you may not specify a source array and -b together\n";
627 $in_place_modify = 0;
629 if ( $rOpts->{'outfile'} ) {
630 ##Warn "Ignoring -b; you may not use -b and -o together\n";
631 $in_place_modify = 0;
633 if ( defined( $rOpts->{'output-path'} ) ) {
634 ##Warn "Ignoring -b; you may not use -b and -opath together\n";
635 $in_place_modify = 0;
639 Perl::Tidy::Formatter::check_options($rOpts);
640 if ( $rOpts->{'format'} eq 'html' ) {
641 Perl::Tidy::HtmlWriter->check_options($rOpts);
644 # make the pattern of file extensions that we shouldn't touch
645 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
646 if ($output_extension) {
647 my $ext = quotemeta($output_extension);
648 $forbidden_file_extensions .= "|$ext";
650 if ( $in_place_modify && $backup_extension ) {
651 my $ext = quotemeta($backup_extension);
652 $forbidden_file_extensions .= "|$ext";
654 $forbidden_file_extensions .= ')$';
656 # Create a diagnostics object if requested;
657 # This is only useful for code development
658 my $diagnostics_object = undef;
659 if ( $rOpts->{'DIAGNOSTICS'} ) {
660 $diagnostics_object = Perl::Tidy::Diagnostics->new();
663 # no filenames should be given if input is from an array
664 if ($source_stream) {
667 "You may not specify any filenames when a source array is given\n";
670 # we'll stuff the source array into ARGV
671 unshift( @ARGV, $source_stream );
673 # No special treatment for source stream which is a filename.
674 # This will enable checks for binary files and other bad stuff.
675 $source_stream = undef unless ref($source_stream);
678 # use stdin by default if no source array and no args
680 unshift( @ARGV, '-' ) unless @ARGV;
683 #---------------------------------------------------------------
685 # main loop to process all files in argument list
686 #---------------------------------------------------------------
687 my $number_of_files = @ARGV;
688 my $formatter = undef;
689 my $tokenizer = undef;
690 while ( my $input_file = shift @ARGV ) {
692 my $input_file_permissions;
694 #---------------------------------------------------------------
695 # prepare this input stream
696 #---------------------------------------------------------------
697 if ($source_stream) {
698 $fileroot = "perltidy";
700 # If the source is from an array or string, then .LOG output
701 # is only possible if a logfile stream is specified. This prevents
702 # unexpected perltidy.LOG files.
703 if ( !defined($logfile_stream) ) {
704 $logfile_stream = Perl::Tidy::DevNull->new();
707 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
708 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
709 $in_place_modify = 0;
712 $fileroot = $input_file;
713 unless ( -e $input_file ) {
715 # file doesn't exist - check for a file glob
716 if ( $input_file =~ /([\?\*\[\{])/ ) {
718 # Windows shell may not remove quotes, so do it
719 my $input_file = $input_file;
720 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
721 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
722 my $pattern = fileglob_to_re($input_file);
724 if ( !$@ && opendir( DIR, './' ) ) {
726 grep { /$pattern/ && !-d $_ } readdir(DIR);
729 unshift @ARGV, @files;
734 Warn "skipping file: '$input_file': no matches found\n";
738 unless ( -f $input_file ) {
739 Warn "skipping file: $input_file: not a regular file\n";
743 # As a safety precaution, skip zero length files.
744 # If for example a source file got clobbered somehow,
745 # the old .tdy or .bak files might still exist so we
746 # shouldn't overwrite them with zero length files.
747 unless ( -s $input_file ) {
748 Warn "skipping file: $input_file: Zero size\n";
752 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
754 "skipping file: $input_file: Non-text (override with -f)\n";
758 # we should have a valid filename now
759 $fileroot = $input_file;
760 $input_file_permissions = ( stat $input_file )[2] & 07777;
762 if ( $^O eq 'VMS' ) {
763 ( $fileroot, $dot ) = check_vms_filename($fileroot);
766 # add option to change path here
767 if ( defined( $rOpts->{'output-path'} ) ) {
769 my ( $base, $old_path ) = fileparse($fileroot);
770 my $new_path = $rOpts->{'output-path'};
771 unless ( -d $new_path ) {
772 unless ( mkdir $new_path, 0777 ) {
773 Die "unable to create directory $new_path: $!\n";
776 my $path = $new_path;
777 $fileroot = catfile( $path, $base );
780 ------------------------------------------------------------------------
781 Problem combining $new_path and $base to make a filename; check -opath
782 ------------------------------------------------------------------------
788 # Skip files with same extension as the output files because
789 # this can lead to a messy situation with files like
790 # script.tdy.tdy.tdy ... or worse problems ... when you
791 # rerun perltidy over and over with wildcard input.
794 && ( $input_file =~ /$forbidden_file_extensions/o
795 || $input_file eq 'DIAGNOSTICS' )
798 Warn "skipping file: $input_file: wrong extension\n";
802 # the 'source_object' supplies a method to read the input file
804 Perl::Tidy::LineSource->new( $input_file, $rOpts,
805 $rpending_logfile_message );
806 next unless ($source_object);
808 # Prefilters and postfilters: The prefilter is a code reference
809 # that will be applied to the source before tidying, and the
810 # postfilter is a code reference to the result before outputting.
813 || ( $rOpts_character_encoding
814 && $rOpts_character_encoding eq 'utf8' )
818 while ( my $line = $source_object->get_line() ) {
822 $buf = $prefilter->($buf) if $prefilter;
824 if ( $rOpts_character_encoding
825 && $rOpts_character_encoding eq 'utf8'
826 && !utf8::is_utf8($buf) )
829 $buf = Encode::decode( 'UTF-8', $buf,
830 Encode::FB_CROAK | Encode::LEAVE_SRC );
834 "skipping file: $input_file: Unable to decode source as UTF-8\n";
839 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
840 $rpending_logfile_message );
843 # register this file name with the Diagnostics package
844 $diagnostics_object->set_input_file($input_file)
845 if $diagnostics_object;
847 #---------------------------------------------------------------
848 # prepare the output stream
849 #---------------------------------------------------------------
850 my $output_file = undef;
851 my $actual_output_extension;
853 if ( $rOpts->{'outfile'} ) {
855 if ( $number_of_files <= 1 ) {
857 if ( $rOpts->{'standard-output'} ) {
858 my $msg = "You may not use -o and -st together";
859 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
862 elsif ($destination_stream) {
864 "You may not specify a destination array and -o together\n";
866 elsif ( defined( $rOpts->{'output-path'} ) ) {
867 Die "You may not specify -o and -opath together\n";
869 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
870 Die "You may not specify -o and -oext together\n";
872 $output_file = $rOpts->{outfile};
874 # make sure user gives a file name after -o
875 if ( $output_file =~ /^-/ ) {
876 Die "You must specify a valid filename after -o\n";
879 # do not overwrite input file with -o
880 if ( defined($input_file_permissions)
881 && ( $output_file eq $input_file ) )
883 Die "Use 'perltidy -b $input_file' to modify in-place\n";
887 Die "You may not use -o with more than one input file\n";
890 elsif ( $rOpts->{'standard-output'} ) {
891 if ($destination_stream) {
893 "You may not specify a destination array and -st together\n";
894 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
899 if ( $number_of_files <= 1 ) {
902 Die "You may not use -st with more than one input file\n";
905 elsif ($destination_stream) {
906 $output_file = $destination_stream;
908 elsif ($source_stream) { # source but no destination goes to stdout
911 elsif ( $input_file eq '-' ) {
915 if ($in_place_modify) {
916 $output_file = IO::File->new_tmpfile()
917 or Die "cannot open temp file for -b option: $!\n";
920 $actual_output_extension = $output_extension;
921 $output_file = $fileroot . $output_extension;
925 # the 'sink_object' knows how to write the output file
926 my $tee_file = $fileroot . $dot . "TEE";
928 my $line_separator = $rOpts->{'output-line-ending'};
929 if ( $rOpts->{'preserve-line-endings'} ) {
930 $line_separator = find_input_line_ending($input_file);
933 # Eventually all I/O may be done with binmode, but for now it is
934 # only done when a user requests a particular line separator
935 # through the -ple or -ole flags
936 my $binmode = defined($line_separator)
937 || defined($rOpts_character_encoding);
938 $line_separator = "\n" unless defined($line_separator);
940 my ( $sink_object, $postfilter_buffer );
943 Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
944 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
948 Perl::Tidy::LineSink->new( $output_file, $tee_file,
949 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
952 #---------------------------------------------------------------
953 # initialize the error logger for this file
954 #---------------------------------------------------------------
955 my $warning_file = $fileroot . $dot . "ERR";
956 if ($errorfile_stream) { $warning_file = $errorfile_stream }
957 my $log_file = $fileroot . $dot . "LOG";
958 if ($logfile_stream) { $log_file = $logfile_stream }
961 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
962 $fh_stderr, $saw_extrude );
963 write_logfile_header(
964 $rOpts, $logger_object, $config_file,
965 $rraw_options, $Windows_type, $readable_options,
967 if ($$rpending_logfile_message) {
968 $logger_object->write_logfile_entry($$rpending_logfile_message);
970 if ($$rpending_complaint) {
971 $logger_object->complain($$rpending_complaint);
974 #---------------------------------------------------------------
975 # initialize the debug object, if any
976 #---------------------------------------------------------------
977 my $debugger_object = undef;
978 if ( $rOpts->{DEBUG} ) {
980 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
983 #---------------------------------------------------------------
984 # loop over iterations for one source stream
985 #---------------------------------------------------------------
987 # We will do a convergence test if 3 or more iterations are allowed.
988 # It would be pointless for fewer because we have to make at least
989 # two passes before we can see if we are converged, and the test
990 # would just slow things down.
991 my $max_iterations = $rOpts->{'iterations'};
992 my $convergence_log_message;
994 my $do_convergence_test = $max_iterations > 2;
995 if ($do_convergence_test) {
996 eval "use Digest::MD5 qw(md5_hex)";
997 $do_convergence_test = !$@;
999 # Trying to avoid problems with ancient versions of perl because
1000 # I don't know in which version number utf8::encode was introduced.
1001 eval { my $string = "perltidy"; utf8::encode($string) };
1002 $do_convergence_test = $do_convergence_test && !$@;
1005 # save objects to allow redirecting output during iterations
1006 my $sink_object_final = $sink_object;
1007 my $debugger_object_final = $debugger_object;
1008 my $logger_object_final = $logger_object;
1010 for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
1012 # send output stream to temp buffers until last iteration
1014 if ( $iter < $max_iterations ) {
1016 Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
1017 $line_separator, $rOpts, $rpending_logfile_message,
1021 $sink_object = $sink_object_final;
1024 # Save logger, debugger output only on pass 1 because:
1025 # (1) line number references must be to the starting
1026 # source, not an intermediate result, and
1027 # (2) we need to know if there are errors so we can stop the
1028 # iterations early if necessary.
1030 $debugger_object = undef;
1031 $logger_object = undef;
1034 #------------------------------------------------------------
1035 # create a formatter for this file : html writer or
1037 #------------------------------------------------------------
1039 # we have to delete any old formatter because, for safety,
1040 # the formatter will check to see that there is only one.
1043 if ($user_formatter) {
1044 $formatter = $user_formatter;
1046 elsif ( $rOpts->{'format'} eq 'html' ) {
1048 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1049 $actual_output_extension, $html_toc_extension,
1050 $html_src_extension );
1052 elsif ( $rOpts->{'format'} eq 'tidy' ) {
1053 $formatter = Perl::Tidy::Formatter->new(
1054 logger_object => $logger_object,
1055 diagnostics_object => $diagnostics_object,
1056 sink_object => $sink_object,
1060 Die "I don't know how to do -format=$rOpts->{'format'}\n";
1063 unless ($formatter) {
1064 Die "Unable to continue with $rOpts->{'format'} formatting\n";
1067 #---------------------------------------------------------------
1068 # create the tokenizer for this file
1069 #---------------------------------------------------------------
1070 $tokenizer = undef; # must destroy old tokenizer
1071 $tokenizer = Perl::Tidy::Tokenizer->new(
1072 source_object => $source_object,
1073 logger_object => $logger_object,
1074 debugger_object => $debugger_object,
1075 diagnostics_object => $diagnostics_object,
1076 tabsize => $tabsize,
1078 starting_level => $rOpts->{'starting-indentation-level'},
1079 indent_columns => $rOpts->{'indent-columns'},
1080 look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
1081 look_for_autoloader => $rOpts->{'look-for-autoloader'},
1082 look_for_selfloader => $rOpts->{'look-for-selfloader'},
1083 trim_qw => $rOpts->{'trim-qw'},
1084 extended_syntax => $rOpts->{'extended-syntax'},
1086 continuation_indentation =>
1087 $rOpts->{'continuation-indentation'},
1088 outdent_labels => $rOpts->{'outdent-labels'},
1091 #---------------------------------------------------------------
1093 #---------------------------------------------------------------
1094 process_this_file( $tokenizer, $formatter );
1096 #---------------------------------------------------------------
1097 # close the input source and report errors
1098 #---------------------------------------------------------------
1099 $source_object->close_input_file();
1101 # line source for next iteration (if any) comes from the current
1102 # temporary output buffer
1103 if ( $iter < $max_iterations ) {
1105 $sink_object->close_output_file();
1107 Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1108 $rpending_logfile_message );
1110 # stop iterations if errors or converged
1111 my $stop_now = $logger_object->{_warning_count};
1113 $convergence_log_message = <<EOM;
1114 Stopping iterations because of errors.
1117 elsif ($do_convergence_test) {
1119 # Patch for [rt.cpan.org #88020]
1120 # Use utf8::encode since md5_hex() only operates on bytes.
1121 my $digest = md5_hex( utf8::encode($sink_buffer) );
1122 if ( !$saw_md5{$digest} ) {
1123 $saw_md5{$digest} = $iter;
1127 # Deja vu, stop iterating
1129 my $iterm = $iter - 1;
1130 if ( $saw_md5{$digest} != $iterm ) {
1132 # Blinking (oscillating) between two stable
1133 # end states. This has happened in the past
1134 # but at present there are no known instances.
1135 $convergence_log_message = <<EOM;
1136 Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1138 $diagnostics_object->write_diagnostics(
1139 $convergence_log_message)
1140 if $diagnostics_object;
1143 $convergence_log_message = <<EOM;
1144 Converged. Output for iteration $iter same as for iter $iterm.
1146 $diagnostics_object->write_diagnostics(
1147 $convergence_log_message)
1148 if $diagnostics_object && $iterm > 2;
1151 } ## end if ($do_convergence_test)
1155 # we are stopping the iterations early;
1156 # copy the output stream to its final destination
1157 $sink_object = $sink_object_final;
1158 while ( my $line = $source_object->get_line() ) {
1159 $sink_object->write_line($line);
1161 $source_object->close_input_file();
1164 } ## end if ( $iter < $max_iterations)
1165 } # end loop over iterations for one source file
1167 # restore objects which have been temporarily undefined
1168 # for second and higher iterations
1169 $debugger_object = $debugger_object_final;
1170 $logger_object = $logger_object_final;
1172 $logger_object->write_logfile_entry($convergence_log_message)
1173 if $convergence_log_message;
1175 #---------------------------------------------------------------
1176 # Perform any postfilter operation
1177 #---------------------------------------------------------------
1179 $sink_object->close_output_file();
1181 Perl::Tidy::LineSink->new( $output_file, $tee_file,
1182 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
1183 my $buf = $postfilter->($postfilter_buffer);
1185 Perl::Tidy::LineSource->new( \$buf, $rOpts,
1186 $rpending_logfile_message );
1187 while ( my $line = $source_object->get_line() ) {
1188 $sink_object->write_line($line);
1190 $source_object->close_input_file();
1193 # Save names of the input and output files for syntax check
1194 my $ifname = $input_file;
1195 my $ofname = $output_file;
1197 #---------------------------------------------------------------
1198 # handle the -b option (backup and modify in-place)
1199 #---------------------------------------------------------------
1200 if ($in_place_modify) {
1201 unless ( -f $input_file ) {
1203 # oh, oh, no real file to backup ..
1204 # shouldn't happen because of numerous preliminary checks
1206 "problem with -b backing up input file '$input_file': not a file\n";
1208 my $backup_name = $input_file . $backup_extension;
1209 if ( -f $backup_name ) {
1210 unlink($backup_name)
1212 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1215 # backup the input file
1216 # we use copy for symlinks, move for regular files
1217 if ( -l $input_file ) {
1218 File::Copy::copy( $input_file, $backup_name )
1219 or Die "File::Copy failed trying to backup source: $!";
1222 rename( $input_file, $backup_name )
1224 "problem renaming $input_file to $backup_name for -b option: $!\n";
1226 $ifname = $backup_name;
1228 # copy the output to the original input file
1229 # NOTE: it would be nice to just close $output_file and use
1230 # File::Copy::copy here, but in this case $output_file is the
1231 # handle of an open nameless temporary file so we would lose
1232 # everything if we closed it.
1233 seek( $output_file, 0, 0 )
1234 or Die "unable to rewind a temporary file for -b option: $!\n";
1235 my $fout = IO::File->new("> $input_file")
1237 "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1240 while ( $line = $output_file->getline() ) {
1241 $fout->print($line);
1244 $output_file = $input_file;
1245 $ofname = $input_file;
1248 #---------------------------------------------------------------
1249 # clean up and report errors
1250 #---------------------------------------------------------------
1251 $sink_object->close_output_file() if $sink_object;
1252 $debugger_object->close_debug_file() if $debugger_object;
1254 # set output file permissions
1255 if ( $output_file && -f $output_file && !-l $output_file ) {
1256 if ($input_file_permissions) {
1258 # give output script same permissions as input script, but
1259 # make it user-writable or else we can't run perltidy again.
1260 # Thus we retain whatever executable flags were set.
1261 if ( $rOpts->{'format'} eq 'tidy' ) {
1262 chmod( $input_file_permissions | 0600, $output_file );
1265 # else use default permissions for html and any other format
1269 #---------------------------------------------------------------
1270 # Do syntax check if requested and possible
1271 #---------------------------------------------------------------
1272 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
1274 && $rOpts->{'check-syntax'}
1279 check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1282 #---------------------------------------------------------------
1283 # remove the original file for in-place modify as follows:
1284 # $delete_backup=0 never
1285 # $delete_backup=1 only if no errors
1286 # $delete_backup>1 always : NOT ALLOWED, too risky, see above
1287 #---------------------------------------------------------------
1288 if ( $in_place_modify
1291 && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1294 # As an added safety precaution, do not delete the source file
1295 # if its size has dropped from positive to zero, since this
1296 # could indicate a disaster of some kind, including a hardware
1297 # failure. Actually, this could happen if you had a file of
1298 # all comments (or pod) and deleted everything with -dac (-dap)
1300 if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1302 "output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1308 "unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1312 $logger_object->finish( $infile_syntax_ok, $formatter )
1314 } # end of main loop to process all files
1321 } # end of main program perltidy
1323 sub get_stream_as_named_file {
1325 # Return the name of a file containing a stream of data, creating
1326 # a temporary file if necessary.
1328 # $stream - the name of a file or stream
1330 # $fname = name of file if possible, or undef
1331 # $if_tmpfile = true if temp file, undef if not temp file
1333 # This routine is needed for passing actual files to Perl for
1339 if ( ref($stream) ) {
1340 my ( $fh_stream, $fh_name ) =
1341 Perl::Tidy::streamhandle( $stream, 'r' );
1343 my ( $fout, $tmpnam ) = File::Temp::tempfile();
1348 while ( my $line = $fh_stream->getline() ) {
1349 $fout->print($line);
1353 $fh_stream->close();
1356 elsif ( $stream ne '-' && -f $stream ) {
1360 return ( $fname, $is_tmpfile );
1363 sub fileglob_to_re {
1365 # modified (corrected) from version in find2perl
1367 $x =~ s#([./^\$()])#\\$1#g; # escape special characters
1368 $x =~ s#\*#.*#g; # '*' -> '.*'
1369 $x =~ s#\?#.#g; # '?' -> '.'
1370 "^$x\\z"; # match whole word
1373 sub make_extension {
1375 # Make a file extension, including any leading '.' if necessary
1376 # The '.' may actually be an '_' under VMS
1377 my ( $extension, $default, $dot ) = @_;
1379 # Use the default if none specified
1380 $extension = $default unless ($extension);
1382 # Only extensions with these leading characters get a '.'
1383 # This rule gives the user some freedom
1384 if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1385 $extension = $dot . $extension;
1390 sub write_logfile_header {
1392 $rOpts, $logger_object, $config_file,
1393 $rraw_options, $Windows_type, $readable_options
1395 $logger_object->write_logfile_entry(
1396 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1398 if ($Windows_type) {
1399 $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1401 my $options_string = join( ' ', @$rraw_options );
1404 $logger_object->write_logfile_entry(
1405 "Found Configuration File >>> $config_file \n");
1407 $logger_object->write_logfile_entry(
1408 "Configuration and command line parameters for this run:\n");
1409 $logger_object->write_logfile_entry("$options_string\n");
1411 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1412 $rOpts->{'logfile'} = 1; # force logfile to be saved
1413 $logger_object->write_logfile_entry(
1414 "Final parameter set for this run\n");
1415 $logger_object->write_logfile_entry(
1416 "------------------------------------\n");
1418 $logger_object->write_logfile_entry($readable_options);
1420 $logger_object->write_logfile_entry(
1421 "------------------------------------\n");
1423 $logger_object->write_logfile_entry(
1424 "To find error messages search for 'WARNING' with your editor\n");
1427 sub generate_options {
1429 ######################################################################
1430 # Generate and return references to:
1431 # @option_string - the list of options to be passed to Getopt::Long
1432 # @defaults - the list of default options
1433 # %expansion - a hash showing how all abbreviations are expanded
1434 # %category - a hash giving the general category of each option
1435 # %option_range - a hash giving the valid ranges of certain options
1437 # Note: a few options are not documented in the man page and usage
1438 # message. This is because these are experimental or debug options and
1439 # may or may not be retained in future versions.
1441 # Here are the undocumented flags as far as I know. Any of them
1442 # may disappear at any time. They are mainly for fine-tuning
1445 # fll --> fuzzy-line-length # a trivial parameter which gets
1446 # turned off for the extrude option
1447 # which is mainly for debugging
1448 # scl --> short-concatenation-item-length # helps break at '.'
1449 # recombine # for debugging line breaks
1450 # valign # for debugging vertical alignment
1451 # I --> DIAGNOSTICS # for debugging
1452 ######################################################################
1454 # here is a summary of the Getopt codes:
1455 # <none> does not take an argument
1456 # =s takes a mandatory string
1457 # :s takes an optional string (DO NOT USE - filenames will get eaten up)
1458 # =i takes a mandatory integer
1459 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1460 # ! does not take an argument and may be negated
1461 # i.e., -foo and -nofoo are allowed
1462 # a double dash signals the end of the options list
1464 #---------------------------------------------------------------
1465 # Define the option string passed to GetOptions.
1466 #---------------------------------------------------------------
1468 my @option_string = ();
1470 my %option_category = ();
1471 my %option_range = ();
1472 my $rexpansion = \%expansion;
1474 # names of categories in manual
1475 # leading integers will allow sorting
1476 my @category_name = (
1478 '1. Basic formatting options',
1479 '2. Code indentation control',
1480 '3. Whitespace control',
1481 '4. Comment controls',
1482 '5. Linebreak controls',
1483 '6. Controlling list formatting',
1484 '7. Retaining or ignoring existing line breaks',
1485 '8. Blank line control',
1486 '9. Other controls',
1488 '11. pod2html options',
1489 '12. Controlling HTML properties',
1493 # These options are parsed directly by perltidy:
1496 # However, they are included in the option set so that they will
1497 # be seen in the options dump.
1499 # These long option names have no abbreviations or are treated specially
1500 @option_string = qw(
1510 my $category = 13; # Debugging
1511 foreach (@option_string) {
1512 my $opt = $_; # must avoid changing the actual flag
1514 $option_category{$opt} = $category_name[$category];
1517 $category = 11; # HTML
1518 $option_category{html} = $category_name[$category];
1520 # routine to install and check options
1521 my $add_option = sub {
1522 my ( $long_name, $short_name, $flag ) = @_;
1523 push @option_string, $long_name . $flag;
1524 $option_category{$long_name} = $category_name[$category];
1526 if ( $expansion{$short_name} ) {
1527 my $existing_name = $expansion{$short_name}[0];
1529 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1531 $expansion{$short_name} = [$long_name];
1532 if ( $flag eq '!' ) {
1533 my $nshort_name = 'n' . $short_name;
1534 my $nolong_name = 'no' . $long_name;
1535 if ( $expansion{$nshort_name} ) {
1536 my $existing_name = $expansion{$nshort_name}[0];
1538 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1540 $expansion{$nshort_name} = [$nolong_name];
1545 # Install long option names which have a simple abbreviation.
1546 # Options with code '!' get standard negation ('no' for long names,
1547 # 'n' for abbreviations). Categories follow the manual.
1549 ###########################
1550 $category = 0; # I/O_Control
1551 ###########################
1552 $add_option->( 'backup-and-modify-in-place', 'b', '!' );
1553 $add_option->( 'backup-file-extension', 'bext', '=s' );
1554 $add_option->( 'force-read-binary', 'f', '!' );
1555 $add_option->( 'format', 'fmt', '=s' );
1556 $add_option->( 'iterations', 'it', '=i' );
1557 $add_option->( 'logfile', 'log', '!' );
1558 $add_option->( 'logfile-gap', 'g', ':i' );
1559 $add_option->( 'outfile', 'o', '=s' );
1560 $add_option->( 'output-file-extension', 'oext', '=s' );
1561 $add_option->( 'output-path', 'opath', '=s' );
1562 $add_option->( 'profile', 'pro', '=s' );
1563 $add_option->( 'quiet', 'q', '!' );
1564 $add_option->( 'standard-error-output', 'se', '!' );
1565 $add_option->( 'standard-output', 'st', '!' );
1566 $add_option->( 'warning-output', 'w', '!' );
1567 $add_option->( 'character-encoding', 'enc', '=s' );
1569 # options which are both toggle switches and values moved here
1570 # to hide from tidyview (which does not show category 0 flags):
1571 # -ole moved here from category 1
1572 # -sil moved here from category 2
1573 $add_option->( 'output-line-ending', 'ole', '=s' );
1574 $add_option->( 'starting-indentation-level', 'sil', '=i' );
1576 ########################################
1577 $category = 1; # Basic formatting options
1578 ########################################
1579 $add_option->( 'check-syntax', 'syn', '!' );
1580 $add_option->( 'entab-leading-whitespace', 'et', '=i' );
1581 $add_option->( 'indent-columns', 'i', '=i' );
1582 $add_option->( 'maximum-line-length', 'l', '=i' );
1583 $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
1584 $add_option->( 'whitespace-cycle', 'wc', '=i' );
1585 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1586 $add_option->( 'preserve-line-endings', 'ple', '!' );
1587 $add_option->( 'tabs', 't', '!' );
1588 $add_option->( 'default-tabsize', 'dt', '=i' );
1589 $add_option->( 'extended-syntax', 'xs', '!' );
1591 ########################################
1592 $category = 2; # Code indentation control
1593 ########################################
1594 $add_option->( 'continuation-indentation', 'ci', '=i' );
1595 $add_option->( 'line-up-parentheses', 'lp', '!' );
1596 $add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1597 $add_option->( 'outdent-keywords', 'okw', '!' );
1598 $add_option->( 'outdent-labels', 'ola', '!' );
1599 $add_option->( 'outdent-long-quotes', 'olq', '!' );
1600 $add_option->( 'indent-closing-brace', 'icb', '!' );
1601 $add_option->( 'closing-token-indentation', 'cti', '=i' );
1602 $add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1603 $add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1604 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1605 $add_option->( 'brace-left-and-indent', 'bli', '!' );
1606 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1608 ########################################
1609 $category = 3; # Whitespace control
1610 ########################################
1611 $add_option->( 'add-semicolons', 'asc', '!' );
1612 $add_option->( 'add-whitespace', 'aws', '!' );
1613 $add_option->( 'block-brace-tightness', 'bbt', '=i' );
1614 $add_option->( 'brace-tightness', 'bt', '=i' );
1615 $add_option->( 'delete-old-whitespace', 'dws', '!' );
1616 $add_option->( 'delete-semicolons', 'dsm', '!' );
1617 $add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1618 $add_option->( 'nowant-left-space', 'nwls', '=s' );
1619 $add_option->( 'nowant-right-space', 'nwrs', '=s' );
1620 $add_option->( 'paren-tightness', 'pt', '=i' );
1621 $add_option->( 'space-after-keyword', 'sak', '=s' );
1622 $add_option->( 'space-for-semicolon', 'sfs', '!' );
1623 $add_option->( 'space-function-paren', 'sfp', '!' );
1624 $add_option->( 'space-keyword-paren', 'skp', '!' );
1625 $add_option->( 'space-terminal-semicolon', 'sts', '!' );
1626 $add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1627 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1628 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1629 $add_option->( 'tight-secret-operators', 'tso', '!' );
1630 $add_option->( 'trim-qw', 'tqw', '!' );
1631 $add_option->( 'trim-pod', 'trp', '!' );
1632 $add_option->( 'want-left-space', 'wls', '=s' );
1633 $add_option->( 'want-right-space', 'wrs', '=s' );
1635 ########################################
1636 $category = 4; # Comment controls
1637 ########################################
1638 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1639 $add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1640 $add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1641 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1642 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1643 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1644 $add_option->( 'closing-side-comments', 'csc', '!' );
1645 $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
1646 $add_option->( 'format-skipping', 'fs', '!' );
1647 $add_option->( 'format-skipping-begin', 'fsb', '=s' );
1648 $add_option->( 'format-skipping-end', 'fse', '=s' );
1649 $add_option->( 'hanging-side-comments', 'hsc', '!' );
1650 $add_option->( 'indent-block-comments', 'ibc', '!' );
1651 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1652 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
1653 $add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1654 $add_option->( 'outdent-long-comments', 'olc', '!' );
1655 $add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1656 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1657 $add_option->( 'static-block-comments', 'sbc', '!' );
1658 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1659 $add_option->( 'static-side-comments', 'ssc', '!' );
1660 $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' );
1662 ########################################
1663 $category = 5; # Linebreak controls
1664 ########################################
1665 $add_option->( 'add-newlines', 'anl', '!' );
1666 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1667 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1668 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1669 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1670 $add_option->( 'cuddled-else', 'ce', '!' );
1671 $add_option->( 'delete-old-newlines', 'dnl', '!' );
1672 $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
1673 $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1674 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1675 $add_option->( 'opening-paren-right', 'opr', '!' );
1676 $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1677 $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
1678 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1679 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1680 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1681 $add_option->( 'stack-closing-block-brace', 'scbb', '!' );
1682 $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1683 $add_option->( 'stack-closing-paren', 'scp', '!' );
1684 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1685 $add_option->( 'stack-opening-block-brace', 'sobb', '!' );
1686 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1687 $add_option->( 'stack-opening-paren', 'sop', '!' );
1688 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1689 $add_option->( 'vertical-tightness', 'vt', '=i' );
1690 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1691 $add_option->( 'want-break-after', 'wba', '=s' );
1692 $add_option->( 'want-break-before', 'wbb', '=s' );
1693 $add_option->( 'break-after-all-operators', 'baao', '!' );
1694 $add_option->( 'break-before-all-operators', 'bbao', '!' );
1695 $add_option->( 'keep-interior-semicolons', 'kis', '!' );
1697 ########################################
1698 $category = 6; # Controlling list formatting
1699 ########################################
1700 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1701 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1702 $add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1704 ########################################
1705 $category = 7; # Retaining or ignoring existing line breaks
1706 ########################################
1707 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1708 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1709 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1710 $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1711 $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1713 ########################################
1714 $category = 8; # Blank line control
1715 ########################################
1716 $add_option->( 'blanks-before-blocks', 'bbb', '!' );
1717 $add_option->( 'blanks-before-comments', 'bbc', '!' );
1718 $add_option->( 'blank-lines-before-subs', 'blbs', '=i' );
1719 $add_option->( 'blank-lines-before-packages', 'blbp', '=i' );
1720 $add_option->( 'long-block-line-count', 'lbl', '=i' );
1721 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1722 $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
1724 ########################################
1725 $category = 9; # Other controls
1726 ########################################
1727 $add_option->( 'delete-block-comments', 'dbc', '!' );
1728 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1729 $add_option->( 'delete-pod', 'dp', '!' );
1730 $add_option->( 'delete-side-comments', 'dsc', '!' );
1731 $add_option->( 'tee-block-comments', 'tbc', '!' );
1732 $add_option->( 'tee-pod', 'tp', '!' );
1733 $add_option->( 'tee-side-comments', 'tsc', '!' );
1734 $add_option->( 'look-for-autoloader', 'lal', '!' );
1735 $add_option->( 'look-for-hash-bang', 'x', '!' );
1736 $add_option->( 'look-for-selfloader', 'lsl', '!' );
1737 $add_option->( 'pass-version-line', 'pvl', '!' );
1739 ########################################
1740 $category = 13; # Debugging
1741 ########################################
1742 $add_option->( 'DEBUG', 'D', '!' );
1743 $add_option->( 'DIAGNOSTICS', 'I', '!' );
1744 $add_option->( 'dump-defaults', 'ddf', '!' );
1745 $add_option->( 'dump-long-names', 'dln', '!' );
1746 $add_option->( 'dump-options', 'dop', '!' );
1747 $add_option->( 'dump-profile', 'dpro', '!' );
1748 $add_option->( 'dump-short-names', 'dsn', '!' );
1749 $add_option->( 'dump-token-types', 'dtt', '!' );
1750 $add_option->( 'dump-want-left-space', 'dwls', '!' );
1751 $add_option->( 'dump-want-right-space', 'dwrs', '!' );
1752 $add_option->( 'fuzzy-line-length', 'fll', '!' );
1753 $add_option->( 'help', 'h', '' );
1754 $add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1755 $add_option->( 'show-options', 'opt', '!' );
1756 $add_option->( 'version', 'v', '' );
1757 $add_option->( 'memoize', 'mem', '!' );
1759 #---------------------------------------------------------------------
1761 # The Perl::Tidy::HtmlWriter will add its own options to the string
1762 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1764 ########################################
1765 # Set categories 10, 11, 12
1766 ########################################
1767 # Based on their known order
1768 $category = 12; # HTML properties
1769 foreach my $opt (@option_string) {
1770 my $long_name = $opt;
1771 $long_name =~ s/(!|=.*|:.*)$//;
1772 unless ( defined( $option_category{$long_name} ) ) {
1773 if ( $long_name =~ /^html-linked/ ) {
1774 $category = 10; # HTML options
1776 elsif ( $long_name =~ /^pod2html/ ) {
1777 $category = 11; # Pod2html
1779 $option_category{$long_name} = $category_name[$category];
1783 #---------------------------------------------------------------
1784 # Assign valid ranges to certain options
1785 #---------------------------------------------------------------
1786 # In the future, these may be used to make preliminary checks
1787 # hash keys are long names
1788 # If key or value is undefined:
1789 # strings may have any value
1790 # integer ranges are >=0
1791 # If value is defined:
1792 # value is [qw(any valid words)] for strings
1793 # value is [min, max] for integers
1794 # if min is undefined, there is no lower limit
1795 # if max is undefined, there is no upper limit
1796 # Parameters not listed here have defaults
1798 'format' => [ 'tidy', 'html', 'user' ],
1799 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
1800 'character-encoding' => [ 'none', 'utf8' ],
1802 'block-brace-tightness' => [ 0, 2 ],
1803 'brace-tightness' => [ 0, 2 ],
1804 'paren-tightness' => [ 0, 2 ],
1805 'square-bracket-tightness' => [ 0, 2 ],
1807 'block-brace-vertical-tightness' => [ 0, 2 ],
1808 'brace-vertical-tightness' => [ 0, 2 ],
1809 'brace-vertical-tightness-closing' => [ 0, 2 ],
1810 'paren-vertical-tightness' => [ 0, 2 ],
1811 'paren-vertical-tightness-closing' => [ 0, 2 ],
1812 'square-bracket-vertical-tightness' => [ 0, 2 ],
1813 'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1814 'vertical-tightness' => [ 0, 2 ],
1815 'vertical-tightness-closing' => [ 0, 2 ],
1817 'closing-brace-indentation' => [ 0, 3 ],
1818 'closing-paren-indentation' => [ 0, 3 ],
1819 'closing-square-bracket-indentation' => [ 0, 3 ],
1820 'closing-token-indentation' => [ 0, 3 ],
1822 'closing-side-comment-else-flag' => [ 0, 2 ],
1823 'comma-arrow-breakpoints' => [ 0, 5 ],
1826 # Note: we could actually allow negative ci if someone really wants it:
1827 # $option_range{'continuation-indentation'} = [ undef, undef ];
1829 #---------------------------------------------------------------
1830 # Assign default values to the above options here, except
1831 # for 'outfile' and 'help'.
1832 # These settings should approximate the perlstyle(1) suggestions.
1833 #---------------------------------------------------------------
1838 blanks-before-blocks
1839 blanks-before-comments
1840 blank-lines-before-subs=1
1841 blank-lines-before-packages=1
1842 block-brace-tightness=0
1843 block-brace-vertical-tightness=0
1845 brace-vertical-tightness-closing=0
1846 brace-vertical-tightness=0
1847 break-at-old-logical-breakpoints
1848 break-at-old-ternary-breakpoints
1849 break-at-old-attribute-breakpoints
1850 break-at-old-keyword-breakpoints
1851 comma-arrow-breakpoints=5
1853 closing-side-comment-interval=6
1854 closing-side-comment-maximum-text=20
1855 closing-side-comment-else-flag=0
1856 closing-side-comments-balanced
1857 closing-paren-indentation=0
1858 closing-brace-indentation=0
1859 closing-square-bracket-indentation=0
1860 continuation-indentation=2
1865 hanging-side-comments
1866 indent-block-comments
1869 keep-old-blank-lines=1
1870 long-block-line-count=8
1873 maximum-consecutive-blank-lines=1
1874 maximum-fields-per-table=0
1875 maximum-line-length=80
1877 minimum-space-to-comment=4
1878 nobrace-left-and-indent
1880 nodelete-old-whitespace
1885 nostatic-side-comments
1888 character-encoding=none
1891 outdent-long-comments
1893 paren-vertical-tightness-closing=0
1894 paren-vertical-tightness=0
1898 short-concatenation-item-length=8
1900 square-bracket-tightness=1
1901 square-bracket-vertical-tightness-closing=0
1902 square-bracket-vertical-tightness=0
1903 static-block-comments
1906 backup-file-extension=bak
1911 html-table-of-contents
1915 push @defaults, "perl-syntax-check-flags=-c -T";
1917 #---------------------------------------------------------------
1918 # Define abbreviations which will be expanded into the above primitives.
1919 # These may be defined recursively.
1920 #---------------------------------------------------------------
1923 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1924 'fnl' => [qw(freeze-newlines)],
1925 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1926 'fws' => [qw(freeze-whitespace)],
1927 'freeze-blank-lines' =>
1928 [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1929 'fbl' => [qw(freeze-blank-lines)],
1930 'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1931 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1932 'nooutdent-long-lines' =>
1933 [qw(nooutdent-long-quotes nooutdent-long-comments)],
1934 'noll' => [qw(nooutdent-long-lines)],
1935 'io' => [qw(indent-only)],
1936 'delete-all-comments' =>
1937 [qw(delete-block-comments delete-side-comments delete-pod)],
1938 'nodelete-all-comments' =>
1939 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1940 'dac' => [qw(delete-all-comments)],
1941 'ndac' => [qw(nodelete-all-comments)],
1942 'gnu' => [qw(gnu-style)],
1943 'pbp' => [qw(perl-best-practices)],
1944 'tee-all-comments' =>
1945 [qw(tee-block-comments tee-side-comments tee-pod)],
1946 'notee-all-comments' =>
1947 [qw(notee-block-comments notee-side-comments notee-pod)],
1948 'tac' => [qw(tee-all-comments)],
1949 'ntac' => [qw(notee-all-comments)],
1950 'html' => [qw(format=html)],
1951 'nhtml' => [qw(format=tidy)],
1952 'tidy' => [qw(format=tidy)],
1954 'utf8' => [qw(character-encoding=utf8)],
1955 'UTF8' => [qw(character-encoding=utf8)],
1957 'swallow-optional-blank-lines' => [qw(kbl=0)],
1958 'noswallow-optional-blank-lines' => [qw(kbl=1)],
1959 'sob' => [qw(kbl=0)],
1960 'nsob' => [qw(kbl=1)],
1962 'break-after-comma-arrows' => [qw(cab=0)],
1963 'nobreak-after-comma-arrows' => [qw(cab=1)],
1964 'baa' => [qw(cab=0)],
1965 'nbaa' => [qw(cab=1)],
1967 'blanks-before-subs' => [qw(blbs=1 blbp=1)],
1968 'bbs' => [qw(blbs=1 blbp=1)],
1969 'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
1970 'nbbs' => [qw(blbs=0 blbp=0)],
1972 'break-at-old-trinary-breakpoints' => [qw(bot)],
1974 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1975 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1976 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1977 'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1978 'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1980 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1981 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1982 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1983 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1984 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1986 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1987 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1988 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1990 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1991 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1992 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1994 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1995 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1996 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1998 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1999 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
2000 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
2002 'otr' => [qw(opr ohbr osbr)],
2003 'opening-token-right' => [qw(opr ohbr osbr)],
2004 'notr' => [qw(nopr nohbr nosbr)],
2005 'noopening-token-right' => [qw(nopr nohbr nosbr)],
2007 'sot' => [qw(sop sohb sosb)],
2008 'nsot' => [qw(nsop nsohb nsosb)],
2009 'stack-opening-tokens' => [qw(sop sohb sosb)],
2010 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
2012 'sct' => [qw(scp schb scsb)],
2013 'stack-closing-tokens' => => [qw(scp schb scsb)],
2014 'nsct' => [qw(nscp nschb nscsb)],
2015 'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
2017 'sac' => [qw(sot sct)],
2018 'nsac' => [qw(nsot nsct)],
2019 'stack-all-containers' => [qw(sot sct)],
2020 'nostack-all-containers' => [qw(nsot nsct)],
2022 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2023 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2024 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2025 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2026 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2027 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2029 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)],
2030 'sobb' => [qw(bbvt=2 bbvtl=*)],
2031 'nostack-opening-block-brace' => [qw(bbvt=0)],
2032 'nsobb' => [qw(bbvt=0)],
2034 'converge' => [qw(it=4)],
2035 'noconverge' => [qw(it=1)],
2036 'conv' => [qw(it=4)],
2037 'nconv' => [qw(it=1)],
2039 # 'mangle' originally deleted pod and comments, but to keep it
2040 # reversible, it no longer does. But if you really want to
2041 # delete them, just use:
2044 # An interesting use for 'mangle' is to do this:
2045 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2046 # which will form as many one-line blocks as possible
2051 keep-old-blank-lines=0
2053 delete-old-whitespace
2056 maximum-consecutive-blank-lines=0
2057 maximum-line-length=100000
2061 noblanks-before-blocks
2062 blank-lines-before-subs=0
2063 blank-lines-before-packages=0
2068 # 'extrude' originally deleted pod and comments, but to keep it
2069 # reversible, it no longer does. But if you really want to
2070 # delete them, just use
2073 # An interesting use for 'extrude' is to do this:
2074 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2075 # which will break up all one-line blocks.
2082 delete-old-whitespace
2085 maximum-consecutive-blank-lines=0
2086 maximum-line-length=1
2089 noblanks-before-blocks
2090 blank-lines-before-subs=0
2091 blank-lines-before-packages=0
2098 # this style tries to follow the GNU Coding Standards (which do
2099 # not really apply to perl but which are followed by some perl
2103 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2107 # Style suggested in Damian Conway's Perl Best Practices
2108 'perl-best-practices' => [
2109 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2110 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2113 # Additional styles can be added here
2116 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2118 # Uncomment next line to dump all expansions for debugging:
2119 # dump_short_names(\%expansion);
2121 \@option_string, \@defaults, \%expansion,
2122 \%option_category, \%option_range
2125 } # end of generate_options
2127 # Memoize process_command_line. Given same @ARGV passed in, return same
2128 # values and same @ARGV back.
2129 # This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
2130 # up masontidy (https://metacpan.org/module/masontidy)
2132 my %process_command_line_cache;
2134 sub process_command_line {
2137 $perltidyrc_stream, $is_Windows, $Windows_type,
2138 $rpending_complaint, $dump_options_type
2141 my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
2143 my $cache_key = join( chr(28), @ARGV );
2144 if ( my $result = $process_command_line_cache{$cache_key} ) {
2145 my ( $argv, @retvals ) = @$result;
2150 my @retvals = _process_command_line(@_);
2151 $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
2152 if $retvals[0]->{'memoize'};
2157 return _process_command_line(@_);
2161 # (note the underscore here)
2162 sub _process_command_line {
2165 $perltidyrc_stream, $is_Windows, $Windows_type,
2166 $rpending_complaint, $dump_options_type
2172 $roption_string, $rdefaults, $rexpansion,
2173 $roption_category, $roption_range
2174 ) = generate_options();
2176 #---------------------------------------------------------------
2177 # set the defaults by passing the above list through GetOptions
2178 #---------------------------------------------------------------
2184 # do not load the defaults if we are just dumping perltidyrc
2185 unless ( $dump_options_type eq 'perltidyrc' ) {
2186 for $i (@$rdefaults) { push @ARGV, "--" . $i }
2189 # Patch to save users Getopt::Long configuration
2190 # and set to Getopt::Long defaults. Use eval to avoid
2191 # breaking old versions of Perl without these routines.
2193 eval { $glc = Getopt::Long::Configure() };
2195 eval { Getopt::Long::ConfigDefaults() };
2197 else { $glc = undef }
2199 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2200 Die "Programming Bug: error in setting default options";
2203 # Patch to put the previous Getopt::Long configuration back
2204 eval { Getopt::Long::Configure($glc) } if defined $glc;
2208 my @raw_options = ();
2209 my $config_file = "";
2210 my $saw_ignore_profile = 0;
2211 my $saw_dump_profile = 0;
2214 #---------------------------------------------------------------
2215 # Take a first look at the command-line parameters. Do as many
2216 # immediate dumps as possible, which can avoid confusion if the
2217 # perltidyrc file has an error.
2218 #---------------------------------------------------------------
2219 foreach $i (@ARGV) {
2222 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2223 $saw_ignore_profile = 1;
2226 # note: this must come before -pro and -profile, below:
2227 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2228 $saw_dump_profile = 1;
2230 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2233 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2237 # resolve <dir>/.../<file>, meaning look upwards from directory
2238 if ( defined($config_file) ) {
2239 if ( my ( $start_dir, $search_file ) =
2240 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2242 $start_dir = '.' if !$start_dir;
2243 $start_dir = Cwd::realpath($start_dir);
2244 if ( my $found_file =
2245 find_file_upwards( $start_dir, $search_file ) )
2247 $config_file = $found_file;
2251 unless ( -e $config_file ) {
2252 Warn "cannot find file given with -pro=$config_file: $!\n";
2256 elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2257 Die "usage: -pro=filename or --profile=filename, no spaces\n";
2259 elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2263 elsif ( $i =~ /^-(version|v)$/ ) {
2267 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2268 dump_defaults(@$rdefaults);
2271 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2272 dump_long_names(@$roption_string);
2275 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2276 dump_short_names($rexpansion);
2279 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2280 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2285 if ( $saw_dump_profile && $saw_ignore_profile ) {
2286 Warn "No profile to dump because of -npro\n";
2290 #---------------------------------------------------------------
2291 # read any .perltidyrc configuration file
2292 #---------------------------------------------------------------
2293 unless ($saw_ignore_profile) {
2295 # resolve possible conflict between $perltidyrc_stream passed
2296 # as call parameter to perltidy and -pro=filename on command
2298 if ($perltidyrc_stream) {
2301 Conflict: a perltidyrc configuration file was specified both as this
2302 perltidy call parameter: $perltidyrc_stream
2303 and with this -profile=$config_file.
2304 Using -profile=$config_file.
2308 $config_file = $perltidyrc_stream;
2312 # look for a config file if we don't have one yet
2313 my $rconfig_file_chatter;
2314 $$rconfig_file_chatter = "";
2316 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2317 $rpending_complaint )
2318 unless $config_file;
2320 # open any config file
2323 ( $fh_config, $config_file ) =
2324 Perl::Tidy::streamhandle( $config_file, 'r' );
2325 unless ($fh_config) {
2326 $$rconfig_file_chatter .=
2327 "# $config_file exists but cannot be opened\n";
2331 if ($saw_dump_profile) {
2332 dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2338 my ( $rconfig_list, $death_message ) =
2339 read_config_file( $fh_config, $config_file, $rexpansion );
2340 Die $death_message if ($death_message);
2342 # process any .perltidyrc parameters right now so we can
2344 if (@$rconfig_list) {
2345 local @ARGV = @$rconfig_list;
2347 expand_command_abbreviations( $rexpansion, \@raw_options,
2350 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2352 "Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
2355 # Anything left in this local @ARGV is an error and must be
2356 # invalid bare words from the configuration file. We cannot
2357 # check this earlier because bare words may have been valid
2358 # values for parameters. We had to wait for GetOptions to have
2362 my $str = "\'" . pop(@ARGV) . "\'";
2363 while ( my $param = pop(@ARGV) ) {
2364 if ( length($str) < 70 ) {
2365 $str .= ", '$param'";
2373 There are $count unrecognized values in the configuration file '$config_file':
2375 Use leading dashes for parameters. Use -npro to ignore this file.
2379 # Undo any options which cause premature exit. They are not
2380 # appropriate for a config file, and it could be hard to
2381 # diagnose the cause of the premature exit.
2390 dump-want-left-space
2391 dump-want-right-space
2399 if ( defined( $Opts{$_} ) ) {
2401 Warn "ignoring --$_ in config file: $config_file\n";
2408 #---------------------------------------------------------------
2409 # now process the command line parameters
2410 #---------------------------------------------------------------
2411 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2413 local $SIG{'__WARN__'} = sub { Warn $_[0] };
2414 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2415 Die "Error on command line; for help try 'perltidy -h'\n";
2418 return ( \%Opts, $config_file, \@raw_options, $roption_string,
2419 $rexpansion, $roption_category, $roption_range );
2420 } # end of _process_command_line
2424 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2426 #---------------------------------------------------------------
2427 # check and handle any interactions among the basic options..
2428 #---------------------------------------------------------------
2430 # Since -vt, -vtc, and -cti are abbreviations, but under
2431 # msdos, an unquoted input parameter like vtc=1 will be
2432 # seen as 2 parameters, vtc and 1, so the abbreviations
2433 # won't be seen. Therefore, we will catch them here if
2436 if ( defined $rOpts->{'vertical-tightness'} ) {
2437 my $vt = $rOpts->{'vertical-tightness'};
2438 $rOpts->{'paren-vertical-tightness'} = $vt;
2439 $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2440 $rOpts->{'brace-vertical-tightness'} = $vt;
2443 if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2444 my $vtc = $rOpts->{'vertical-tightness-closing'};
2445 $rOpts->{'paren-vertical-tightness-closing'} = $vtc;
2446 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2447 $rOpts->{'brace-vertical-tightness-closing'} = $vtc;
2450 if ( defined $rOpts->{'closing-token-indentation'} ) {
2451 my $cti = $rOpts->{'closing-token-indentation'};
2452 $rOpts->{'closing-square-bracket-indentation'} = $cti;
2453 $rOpts->{'closing-brace-indentation'} = $cti;
2454 $rOpts->{'closing-paren-indentation'} = $cti;
2457 # In quiet mode, there is no log file and hence no way to report
2458 # results of syntax check, so don't do it.
2459 if ( $rOpts->{'quiet'} ) {
2460 $rOpts->{'check-syntax'} = 0;
2463 # can't check syntax if no output
2464 if ( $rOpts->{'format'} ne 'tidy' ) {
2465 $rOpts->{'check-syntax'} = 0;
2468 # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2469 # wide variety of nasty problems on these systems, because they cannot
2470 # reliably run backticks. Don't even think about changing this!
2471 if ( $rOpts->{'check-syntax'}
2473 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2475 $rOpts->{'check-syntax'} = 0;
2478 # It's really a bad idea to check syntax as root unless you wrote
2479 # the script yourself. FIXME: not sure if this works with VMS
2480 unless ($is_Windows) {
2482 if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2483 $rOpts->{'check-syntax'} = 0;
2484 $$rpending_complaint .=
2485 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2489 # check iteration count and quietly fix if necessary:
2490 # - iterations option only applies to code beautification mode
2491 # - the convergence check should stop most runs on iteration 2, and
2492 # virtually all on iteration 3. But we'll allow up to 6.
2493 if ( $rOpts->{'format'} ne 'tidy' ) {
2494 $rOpts->{'iterations'} = 1;
2496 elsif ( defined( $rOpts->{'iterations'} ) ) {
2497 if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2498 elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 }
2501 $rOpts->{'iterations'} = 1;
2504 # check for reasonable number of blank lines and fix to avoid problems
2505 if ( $rOpts->{'blank-lines-before-subs'} ) {
2506 if ( $rOpts->{'blank-lines-before-subs'} < 0 ) {
2507 $rOpts->{'blank-lines-before-subs'} = 0;
2508 Warn "negative value of -blbs, setting 0\n";
2510 if ( $rOpts->{'blank-lines-before-subs'} > 100 ) {
2511 Warn "unreasonably large value of -blbs, reducing\n";
2512 $rOpts->{'blank-lines-before-subs'} = 100;
2515 if ( $rOpts->{'blank-lines-before-packages'} ) {
2516 if ( $rOpts->{'blank-lines-before-packages'} < 0 ) {
2517 Warn "negative value of -blbp, setting 0\n";
2518 $rOpts->{'blank-lines-before-packages'} = 0;
2520 if ( $rOpts->{'blank-lines-before-packages'} > 100 ) {
2521 Warn "unreasonably large value of -blbp, reducing\n";
2522 $rOpts->{'blank-lines-before-packages'} = 100;
2526 # setting a non-negative logfile gap causes logfile to be saved
2527 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2528 $rOpts->{'logfile'} = 1;
2531 # set short-cut flag when only indentation is to be done.
2532 # Note that the user may or may not have already set the
2534 if ( !$rOpts->{'add-whitespace'}
2535 && !$rOpts->{'delete-old-whitespace'}
2536 && !$rOpts->{'add-newlines'}
2537 && !$rOpts->{'delete-old-newlines'} )
2539 $rOpts->{'indent-only'} = 1;
2542 # -isbc implies -ibc
2543 if ( $rOpts->{'indent-spaced-block-comments'} ) {
2544 $rOpts->{'indent-block-comments'} = 1;
2547 # -bli flag implies -bl
2548 if ( $rOpts->{'brace-left-and-indent'} ) {
2549 $rOpts->{'opening-brace-on-new-line'} = 1;
2552 if ( $rOpts->{'opening-brace-always-on-right'}
2553 && $rOpts->{'opening-brace-on-new-line'} )
2556 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2557 'opening-brace-on-new-line' (-bl). Ignoring -bl.
2559 $rOpts->{'opening-brace-on-new-line'} = 0;
2562 # it simplifies things if -bl is 0 rather than undefined
2563 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2564 $rOpts->{'opening-brace-on-new-line'} = 0;
2567 # -sbl defaults to -bl if not defined
2568 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2569 $rOpts->{'opening-sub-brace-on-new-line'} =
2570 $rOpts->{'opening-brace-on-new-line'};
2573 if ( $rOpts->{'entab-leading-whitespace'} ) {
2574 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2575 Warn "-et=n must use a positive integer; ignoring -et\n";
2576 $rOpts->{'entab-leading-whitespace'} = undef;
2579 # entab leading whitespace has priority over the older 'tabs' option
2580 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2583 # set a default tabsize to be used in guessing the starting indentation
2584 # level if and only if this run does not use tabs and the old code does
2586 if ( $rOpts->{'default-tabsize'} ) {
2587 if ( $rOpts->{'default-tabsize'} < 0 ) {
2588 Warn "negative value of -dt, setting 0\n";
2589 $rOpts->{'default-tabsize'} = 0;
2591 if ( $rOpts->{'default-tabsize'} > 20 ) {
2592 Warn "unreasonably large value of -dt, reducing\n";
2593 $rOpts->{'default-tabsize'} = 20;
2597 $rOpts->{'default-tabsize'} = 8;
2600 # Define $tabsize, the number of spaces per tab for use in
2601 # guessing the indentation of source lines with leading tabs.
2602 # Assume same as for this run if tabs are used , otherwise assume
2603 # a default value, typically 8
2605 $rOpts->{'entab-leading-whitespace'}
2606 ? $rOpts->{'entab-leading-whitespace'}
2607 : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
2608 : $rOpts->{'default-tabsize'};
2612 sub find_file_upwards {
2613 my ( $search_dir, $search_file ) = @_;
2615 $search_dir =~ s{/+$}{};
2616 $search_file =~ s{^/+}{};
2619 my $try_path = "$search_dir/$search_file";
2620 if ( -f $try_path ) {
2623 elsif ( $search_dir eq '/' ) {
2627 $search_dir = dirname($search_dir);
2632 sub expand_command_abbreviations {
2634 # go through @ARGV and expand any abbreviations
2636 my ( $rexpansion, $rraw_options, $config_file ) = @_;
2639 # set a pass limit to prevent an infinite loop;
2640 # 10 should be plenty, but it may be increased to allow deeply
2641 # nested expansions.
2642 my $max_passes = 10;
2645 # keep looping until all expansions have been converted into actual
2647 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2649 my $abbrev_count = 0;
2651 # loop over each item in @ARGV..
2652 foreach $word (@ARGV) {
2654 # convert any leading 'no-' to just 'no'
2655 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2657 # if it is a dash flag (instead of a file name)..
2658 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2663 # save the raw input for debug output in case of circular refs
2664 if ( $pass_count == 0 ) {
2665 push( @$rraw_options, $word );
2668 # recombine abbreviation and flag, if necessary,
2669 # to allow abbreviations with arguments such as '-vt=1'
2670 if ( $rexpansion->{ $abr . $flags } ) {
2671 $abr = $abr . $flags;
2675 # if we see this dash item in the expansion hash..
2676 if ( $rexpansion->{$abr} ) {
2679 # stuff all of the words that it expands to into the
2680 # new arg list for the next pass
2681 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2682 next unless $abbrev; # for safety; shouldn't happen
2683 push( @new_argv, '--' . $abbrev . $flags );
2687 # not in expansion hash, must be actual long name
2689 push( @new_argv, $word );
2693 # not a dash item, so just save it for the next pass
2695 push( @new_argv, $word );
2697 } # end of this pass
2699 # update parameter list @ARGV to the new one
2701 last unless ( $abbrev_count > 0 );
2703 # make sure we are not in an infinite loop
2704 if ( $pass_count == $max_passes ) {
2707 I'm tired. We seem to be in an infinite loop trying to expand aliases.
2708 Here are the raw options;
2711 my $num = @new_argv;
2714 After $max_passes passes here is ARGV
2720 After $max_passes passes ARGV has $num entries
2726 Please check your configuration file $config_file for circular-references.
2727 To deactivate it, use -npro.
2732 Program bug - circular-references in the %expansion hash, probably due to
2733 a recent program change.
2736 } # end of check for circular references
2737 } # end of loop over all passes
2740 # Debug routine -- this will dump the expansion hash
2741 sub dump_short_names {
2742 my $rexpansion = shift;
2744 List of short names. This list shows how all abbreviations are
2745 translated into other abbreviations and, eventually, into long names.
2746 New abbreviations may be defined in a .perltidyrc file.
2747 For a list of all long names, use perltidy --dump-long-names (-dln).
2748 --------------------------------------------------------------------------
2750 foreach my $abbrev ( sort keys %$rexpansion ) {
2751 my @list = @{ $$rexpansion{$abbrev} };
2752 print STDOUT "$abbrev --> @list\n";
2756 sub check_vms_filename {
2758 # given a valid filename (the perltidy input file)
2759 # create a modified filename and separator character
2762 # Contributed by Michael Cartmell
2764 my ( $base, $path ) = fileparse( $_[0] );
2766 # remove explicit ; version
2767 $base =~ s/;-?\d*$//
2769 # remove explicit . version ie two dots in filename NB ^ escapes a dot
2770 or $base =~ s/( # begin capture $1
2771 (?:^|[^^])\. # match a dot not preceded by a caret
2772 (?: # followed by nothing
2774 .*[^^] # anything ending in a non caret
2777 \.-?\d*$ # match . version number
2780 # normalise filename, if there are no unescaped dots then append one
2781 $base .= '.' unless $base =~ /(?:^|[^^])\./;
2783 # if we don't already have an extension then we just append the extension
2784 my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2785 return ( $path . $base, $separator );
2790 # TODO: are these more standard names?
2791 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2793 # Returns a string that determines what MS OS we are on.
2794 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2795 # Returns blank string if not an MS system.
2796 # Original code contributed by: Yves Orton
2797 # We need to know this to decide where to look for config files
2799 my $rpending_complaint = shift;
2801 return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2803 # Systems built from Perl source may not have Win32.pm
2804 # But probably have Win32::GetOSVersion() anyway so the
2805 # following line is not 'required':
2806 # return $os unless eval('require Win32');
2808 # Use the standard API call to determine the version
2809 my ( $undef, $major, $minor, $build, $id );
2810 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2813 # NAME ID MAJOR MINOR
2814 # Windows NT 4 2 4 0
2815 # Windows 2000 2 5 0
2817 # Windows Server 2003 2 5 2
2819 return "win32s" unless $id; # If id==0 then its a win32s box.
2820 $os = { # Magic numbers from MSDN
2821 # documentation of GetOSVersion
2828 0 => "2000", # or NT 4, see below
2835 # If $os is undefined, the above code is out of date. Suggested updates
2837 unless ( defined $os ) {
2839 $$rpending_complaint .= <<EOS;
2840 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2841 We won't be able to look for a system-wide config file.
2845 # Unfortunately the logic used for the various versions isn't so clever..
2846 # so we have to handle an outside case.
2847 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2852 ( $^O !~ /win32|dos/i )
2855 && ( $^O ne 'MacOS' );
2858 sub look_for_Windows {
2860 # determine Windows sub-type and location of
2861 # system-wide configuration files
2862 my $rpending_complaint = shift;
2863 my $is_Windows = ( $^O =~ /win32|dos/i );
2864 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2865 return ( $is_Windows, $Windows_type );
2868 sub find_config_file {
2870 # look for a .perltidyrc configuration file
2871 # For Windows also look for a file named perltidy.ini
2872 my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2873 $rpending_complaint ) = @_;
2875 $$rconfig_file_chatter .= "# Config file search...system reported as:";
2877 $$rconfig_file_chatter .= "Windows $Windows_type\n";
2880 $$rconfig_file_chatter .= " $^O\n";
2883 # sub to check file existence and record all tests
2884 my $exists_config_file = sub {
2885 my $config_file = shift;
2886 return 0 unless $config_file;
2887 $$rconfig_file_chatter .= "# Testing: $config_file\n";
2888 return -f $config_file;
2893 # look in current directory first
2894 $config_file = ".perltidyrc";
2895 return $config_file if $exists_config_file->($config_file);
2897 $config_file = "perltidy.ini";
2898 return $config_file if $exists_config_file->($config_file);
2901 # Default environment vars.
2902 my @envs = qw(PERLTIDY HOME);
2904 # Check the NT/2k/XP locations, first a local machine def, then a
2906 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2908 # Now go through the environment ...
2909 foreach my $var (@envs) {
2910 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2911 if ( defined( $ENV{$var} ) ) {
2912 $$rconfig_file_chatter .= " = $ENV{$var}\n";
2914 # test ENV{ PERLTIDY } as file:
2915 if ( $var eq 'PERLTIDY' ) {
2916 $config_file = "$ENV{$var}";
2917 return $config_file if $exists_config_file->($config_file);
2920 # test ENV as directory:
2921 $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2922 return $config_file if $exists_config_file->($config_file);
2925 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2926 return $config_file if $exists_config_file->($config_file);
2930 $$rconfig_file_chatter .= "\n";
2934 # then look for a system-wide definition
2935 # where to look varies with OS
2938 if ($Windows_type) {
2939 my ( $os, $system, $allusers ) =
2940 Win_Config_Locs( $rpending_complaint, $Windows_type );
2942 # Check All Users directory, if there is one.
2943 # i.e. C:\Documents and Settings\User\perltidy.ini
2946 $config_file = catfile( $allusers, ".perltidyrc" );
2947 return $config_file if $exists_config_file->($config_file);
2949 $config_file = catfile( $allusers, "perltidy.ini" );
2950 return $config_file if $exists_config_file->($config_file);
2953 # Check system directory.
2954 # retain old code in case someone has been able to create
2955 # a file with a leading period.
2956 $config_file = catfile( $system, ".perltidyrc" );
2957 return $config_file if $exists_config_file->($config_file);
2959 $config_file = catfile( $system, "perltidy.ini" );
2960 return $config_file if $exists_config_file->($config_file);
2964 # Place to add customization code for other systems
2965 elsif ( $^O eq 'OS2' ) {
2967 elsif ( $^O eq 'MacOS' ) {
2969 elsif ( $^O eq 'VMS' ) {
2972 # Assume some kind of Unix
2975 $config_file = "/usr/local/etc/perltidyrc";
2976 return $config_file if $exists_config_file->($config_file);
2978 $config_file = "/etc/perltidyrc";
2979 return $config_file if $exists_config_file->($config_file);
2982 # Couldn't find a config file
2986 sub Win_Config_Locs {
2988 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2989 # or undef if its not a win32 OS. In list context returns OS, System
2990 # Directory, and All Users Directory. All Users will be empty on a
2991 # 9x/Me box. Contributed by: Yves Orton.
2993 my $rpending_complaint = shift;
2994 my $os = (@_) ? shift : Win_OS_Type();
3000 if ( $os =~ /9[58]|Me/ ) {
3001 $system = "C:/Windows";
3003 elsif ( $os =~ /NT|XP|200?/ ) {
3004 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
3007 ? "C:/WinNT/profiles/All Users/"
3008 : "C:/Documents and Settings/All Users/";
3012 # This currently would only happen on a win32s computer. I don't have
3013 # one to test, so I am unsure how to proceed. Suggestions welcome!
3014 $$rpending_complaint .=
3015 "I dont know a sensible place to look for config files on an $os system.\n";
3018 return wantarray ? ( $os, $system, $allusers ) : $os;
3021 sub dump_config_file {
3023 my $config_file = shift;
3024 my $rconfig_file_chatter = shift;
3025 print STDOUT "$$rconfig_file_chatter";
3027 print STDOUT "# Dump of file: '$config_file'\n";
3028 while ( my $line = $fh->getline() ) { print STDOUT $line }
3029 eval { $fh->close() };
3032 print STDOUT "# ...no config file found\n";
3036 sub read_config_file {
3038 my ( $fh, $config_file, $rexpansion ) = @_;
3039 my @config_list = ();
3041 # file is bad if non-empty $death_message is returned
3042 my $death_message = "";
3046 my $opening_brace_line;
3047 while ( my $line = $fh->getline() ) {
3050 ( $line, $death_message ) =
3051 strip_comment( $line, $config_file, $line_no );
3052 last if ($death_message);
3054 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
3060 # Look for complete or partial abbreviation definition of the form
3061 # name { body } or name { or name { body
3062 # See rules in perltidy's perldoc page
3063 # Section: Other Controls - Creating a new abbreviation
3064 if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
3065 my $oldname = $name;
3066 ( $name, $body ) = ( $2, $3 );
3068 # Cannot start new abbreviation unless old abbreviation is complete
3069 last if ($opening_brace_line);
3071 $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
3073 # handle a new alias definition
3074 if ( ${$rexpansion}{$name} ) {
3076 my @names = sort keys %$rexpansion;
3078 "Here is a list of all installed aliases\n(@names)\n"
3079 . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
3082 ${$rexpansion}{$name} = [];
3085 # leading opening braces not allowed
3086 elsif ( $line =~ /^{/ ) {
3087 $opening_brace_line = undef;
3089 "Unexpected '{' at line $line_no in config file '$config_file'\n";
3093 # Look for abbreviation closing: body } or }
3094 elsif ( $line =~ /^(.*)?\}$/ ) {
3096 if ($opening_brace_line) {
3097 $opening_brace_line = undef;
3101 "Unexpected '}' at line $line_no in config file '$config_file'\n";
3106 # Now store any parameters
3109 my ( $rbody_parts, $msg ) = parse_args($body);
3111 $death_message = <<EOM;
3112 Error reading file '$config_file' at line number $line_no.
3114 Please fix this line or use -npro to avoid reading this file
3121 # remove leading dashes if this is an alias
3122 foreach (@$rbody_parts) { s/^\-+//; }
3123 push @{ ${$rexpansion}{$name} }, @$rbody_parts;
3126 push( @config_list, @$rbody_parts );
3131 if ($opening_brace_line) {
3133 "Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
3135 eval { $fh->close() };
3136 return ( \@config_list, $death_message );
3141 # Strip any comment from a command line
3142 my ( $instr, $config_file, $line_no ) = @_;
3145 # check for full-line comment
3146 if ( $instr =~ /^\s*#/ ) {
3147 return ( "", $msg );
3150 # nothing to do if no comments
3151 if ( $instr !~ /#/ ) {
3152 return ( $instr, $msg );
3155 # handle case of no quotes
3156 elsif ( $instr !~ /['"]/ ) {
3158 # We now require a space before the # of a side comment
3159 # this allows something like:
3161 # Otherwise, it would have to be quoted:
3163 $instr =~ s/\s+\#.*$//;
3164 return ( $instr, $msg );
3167 # handle comments and quotes
3169 my $quote_char = "";
3172 # looking for ending quote character
3174 if ( $instr =~ /\G($quote_char)/gc ) {
3178 elsif ( $instr =~ /\G(.)/gc ) {
3182 # error..we reached the end without seeing the ending quote char
3185 Error reading file $config_file at line number $line_no.
3186 Did not see ending quote character <$quote_char> in this text:
3188 Please fix this line or use -npro to avoid reading this file
3194 # accumulating characters and looking for start of a quoted string
3196 if ( $instr =~ /\G([\"\'])/gc ) {
3201 # Note: not yet enforcing the space-before-hash rule for side
3202 # comments if the parameter is quoted.
3203 elsif ( $instr =~ /\G#/gc ) {
3206 elsif ( $instr =~ /\G(.)/gc ) {
3214 return ( $outstr, $msg );
3219 # Parse a command string containing multiple string with possible
3220 # quotes, into individual commands. It might look like this, for example:
3222 # -wba=" + - " -some-thing -wbb='. && ||'
3224 # There is no need, at present, to handle escaped quote characters.
3225 # (They are not perltidy tokens, so needn't be in strings).
3228 my @body_parts = ();
3229 my $quote_char = "";
3234 # looking for ending quote character
3236 if ( $body =~ /\G($quote_char)/gc ) {
3239 elsif ( $body =~ /\G(.)/gc ) {
3243 # error..we reached the end without seeing the ending quote char
3245 if ( length($part) ) { push @body_parts, $part; }
3247 Did not see ending quote character <$quote_char> in this text:
3254 # accumulating characters and looking for start of a quoted string
3256 if ( $body =~ /\G([\"\'])/gc ) {
3259 elsif ( $body =~ /\G(\s+)/gc ) {
3260 if ( length($part) ) { push @body_parts, $part; }
3263 elsif ( $body =~ /\G(.)/gc ) {
3267 if ( length($part) ) { push @body_parts, $part; }
3272 return ( \@body_parts, $msg );
3275 sub dump_long_names {
3277 my @names = sort @_;
3279 # Command line long names (passed to GetOptions)
3280 #---------------------------------------------------------------
3281 # here is a summary of the Getopt codes:
3282 # <none> does not take an argument
3283 # =s takes a mandatory string
3284 # :s takes an optional string
3285 # =i takes a mandatory integer
3286 # :i takes an optional integer
3287 # ! does not take an argument and may be negated
3288 # i.e., -foo and -nofoo are allowed
3289 # a double dash signals the end of the options list
3291 #---------------------------------------------------------------
3294 foreach (@names) { print STDOUT "$_\n" }
3298 my @defaults = sort @_;
3299 print STDOUT "Default command line options:\n";
3300 foreach (@_) { print STDOUT "$_\n" }
3303 sub readable_options {
3305 # return options for this run as a string which could be
3306 # put in a perltidyrc file
3307 my ( $rOpts, $roption_string ) = @_;
3309 my $rGetopt_flags = \%Getopt_flags;
3310 my $readable_options = "# Final parameter set for this run.\n";
3311 $readable_options .=
3312 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3313 foreach my $opt ( @{$roption_string} ) {
3315 if ( $opt =~ /(.*)(!|=.*)$/ ) {
3319 if ( defined( $rOpts->{$opt} ) ) {
3320 $rGetopt_flags->{$opt} = $flag;
3323 foreach my $key ( sort keys %{$rOpts} ) {
3324 my $flag = $rGetopt_flags->{$key};
3325 my $value = $rOpts->{$key};
3329 if ( $flag =~ /^=/ ) {
3330 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3331 $suffix = "=" . $value;
3333 elsif ( $flag =~ /^!/ ) {
3334 $prefix .= "no" unless ($value);
3339 $readable_options .=
3340 "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3343 $readable_options .= $prefix . $key . $suffix . "\n";
3345 return $readable_options;
3349 print STDOUT <<"EOM";
3350 This is perltidy, v$VERSION
3352 Copyright 2000-2016, Steve Hancock
3354 Perltidy is free software and may be copied under the terms of the GNU
3355 General Public License, which is included in the distribution files.
3357 Complete documentation for perltidy can be found using 'man perltidy'
3358 or on the internet at http://perltidy.sourceforge.net.
3365 This is perltidy version $VERSION, a perl script indenter. Usage:
3367 perltidy [ options ] file1 file2 file3 ...
3368 (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3369 perltidy [ options ] file1 -o outfile
3370 perltidy [ options ] file1 -st >outfile
3371 perltidy [ options ] <infile >outfile
3373 Options have short and long forms. Short forms are shown; see
3374 man pages for long forms. Note: '=s' indicates a required string,
3375 and '=n' indicates a required integer.
3379 -o=file name of the output file (only if single input file)
3380 -oext=s change output extension from 'tdy' to s
3381 -opath=path change path to be 'path' for output files
3382 -b backup original to .bak and modify file in-place
3383 -bext=s change default backup extension from 'bak' to s
3384 -q deactivate error messages (for running under editor)
3385 -w include non-critical warning messages in the .ERR error output
3386 -syn run perl -c to check syntax (default under unix systems)
3387 -log save .LOG file, which has useful diagnostics
3388 -f force perltidy to read a binary file
3389 -g like -log but writes more detailed .LOG file, for debugging scripts
3390 -opt write the set of options actually used to a .LOG file
3391 -npro ignore .perltidyrc configuration command file
3392 -pro=file read configuration commands from file instead of .perltidyrc
3393 -st send output to standard output, STDOUT
3394 -se send all error output to standard error output, STDERR
3395 -v display version number to standard output and quit
3398 -i=n use n columns per indentation level (default n=4)
3399 -t tabs: use one tab character per indentation level, not recommeded
3400 -nt no tabs: use n spaces per indentation level (default)
3401 -et=n entab leading whitespace n spaces per tab; not recommended
3402 -io "indent only": just do indentation, no other formatting.
3403 -sil=n set starting indentation level to n; use if auto detection fails
3404 -ole=s specify output line ending (s=dos or win, mac, unix)
3405 -ple keep output line endings same as input (input must be filename)
3408 -fws freeze whitespace; this disables all whitespace changes
3409 and disables the following switches:
3410 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
3411 -bbt same as -bt but for code block braces; same as -bt if not given
3412 -bbvt block braces vertically tight; use with -bl or -bli
3413 -bbvtl=s make -bbvt to apply to selected list of block types
3414 -pt=n paren tightness (n=0, 1 or 2)
3415 -sbt=n square bracket tightness (n=0, 1, or 2)
3416 -bvt=n brace vertical tightness,
3417 n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3418 -pvt=n paren vertical tightness (see -bvt for n)
3419 -sbvt=n square bracket vertical tightness (see -bvt for n)
3420 -bvtc=n closing brace vertical tightness:
3421 n=(0=open, 1=sometimes close, 2=always close)
3422 -pvtc=n closing paren vertical tightness, see -bvtc for n.
3423 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3424 -ci=n sets continuation indentation=n, default is n=2 spaces
3425 -lp line up parentheses, brackets, and non-BLOCK braces
3426 -sfs add space before semicolon in for( ; ; )
3427 -aws allow perltidy to add whitespace (default)
3428 -dws delete all old non-essential whitespace
3429 -icb indent closing brace of a code block
3430 -cti=n closing indentation of paren, square bracket, or non-block brace:
3431 n=0 none, =1 align with opening, =2 one full indentation level
3432 -icp equivalent to -cti=2
3433 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
3434 -wrs=s want space right of tokens in string;
3435 -sts put space before terminal semicolon of a statement
3436 -sak=s put space between keywords given in s and '(';
3437 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3440 -fnl freeze newlines; this disables all line break changes
3441 and disables the following switches:
3442 -anl add newlines; ok to introduce new line breaks
3443 -bbs add blank line before subs and packages
3444 -bbc add blank line before block comments
3445 -bbb add blank line between major blocks
3446 -kbl=n keep old blank lines? 0=no, 1=some, 2=all
3447 -mbl=n maximum consecutive blank lines to output (default=1)
3448 -ce cuddled else; use this style: '} else {'
3449 -dnl delete old newlines (default)
3450 -l=n maximum line length; default n=80
3451 -bl opening brace on new line
3452 -sbl opening sub brace on new line. value of -bl is used if not given.
3453 -bli opening brace on new line and indented
3454 -bar opening brace always on right, even for long clauses
3455 -vt=n vertical tightness (requires -lp); n controls break after opening
3456 token: 0=never 1=no break if next line balanced 2=no break
3457 -vtc=n vertical tightness of closing container; n controls if closing
3458 token starts new line: 0=always 1=not unless list 1=never
3459 -wba=s want break after tokens in string; i.e. wba=': .'
3460 -wbb=s want break before tokens in string
3462 Following Old Breakpoints
3463 -kis keep interior semicolons. Allows multiple statements per line.
3464 -boc break at old comma breaks: turns off all automatic list formatting
3465 -bol break at old logical breakpoints: or, and, ||, && (default)
3466 -bok break at old list keyword breakpoints such as map, sort (default)
3467 -bot break at old conditional (ternary ?:) operator breakpoints (default)
3468 -boa break at old attribute breakpoints
3469 -cab=n break at commas after a comma-arrow (=>):
3470 n=0 break at all commas after =>
3471 n=1 stable: break unless this breaks an existing one-line container
3472 n=2 break only if a one-line container cannot be formed
3473 n=3 do not treat commas after => specially at all
3476 -ibc indent block comments (default)
3477 -isbc indent spaced block comments; may indent unless no leading space
3478 -msc=n minimum desired spaces to side comment, default 4
3479 -fpsc=n fix position for side comments; default 0;
3480 -csc add or update closing side comments after closing BLOCK brace
3481 -dcsc delete closing side comments created by a -csc command
3482 -cscp=s change closing side comment prefix to be other than '## end'
3483 -cscl=s change closing side comment to apply to selected list of blocks
3484 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3485 -csct=n maximum number of columns of appended text, default n=20
3486 -cscw causes warning if old side comment is overwritten with -csc
3488 -sbc use 'static block comments' identified by leading '##' (default)
3489 -sbcp=s change static block comment identifier to be other than '##'
3490 -osbc outdent static block comments
3492 -ssc use 'static side comments' identified by leading '##' (default)
3493 -sscp=s change static side comment identifier to be other than '##'
3495 Delete selected text
3496 -dac delete all comments AND pod
3497 -dbc delete block comments
3498 -dsc delete side comments
3501 Send selected text to a '.TEE' file
3502 -tac tee all comments AND pod
3503 -tbc tee block comments
3504 -tsc tee side comments
3508 -olq outdent long quoted strings (default)
3509 -olc outdent a long block comment line
3510 -ola outdent statement labels
3511 -okw outdent control keywords (redo, next, last, goto, return)
3512 -okwl=s specify alternative keywords for -okw command
3515 -mft=n maximum fields per table; default n=40
3516 -x do not format lines before hash-bang line (i.e., for VMS)
3517 -asc allows perltidy to add a ';' when missing (default)
3518 -dsm allows perltidy to delete an unnecessary ';' (default)
3520 Combinations of other parameters
3521 -gnu attempt to follow GNU Coding Standards as applied to perl
3522 -mangle remove as many newlines as possible (but keep comments and pods)
3523 -extrude insert as many newlines as possible
3525 Dump and die, debugging
3526 -dop dump options used in this run to standard output and quit
3527 -ddf dump default options to standard output and quit
3528 -dsn dump all option short names to standard output and quit
3529 -dln dump option long names to standard output and quit
3530 -dpro dump whatever configuration file is in effect to standard output
3531 -dtt dump all token types to standard output and quit
3534 -html write an html file (see 'man perl2web' for many options)
3535 Note: when -html is used, no indentation or formatting are done.
3536 Hint: try perltidy -html -css=mystyle.css filename.pl
3537 and edit mystyle.css to change the appearance of filename.html.
3538 -nnn gives line numbers
3539 -pre only writes out <pre>..</pre> code section
3540 -toc places a table of contents to subs at the top (default)
3541 -pod passes pod text through pod2html (default)
3542 -frm write html as a frame (3 files)
3543 -text=s extra extension for table of contents if -frm, default='toc'
3544 -sext=s extra extension for file content if -frm, default='src'
3546 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3547 negates the long forms. For example, -nasc means don't add missing
3550 If you are unable to see this entire text, try "perltidy -h | more"
3551 For more detailed information, and additional options, try "man perltidy",
3552 or go to the perltidy home page at http://perltidy.sourceforge.net
3557 sub process_this_file {
3559 my ( $truth, $beauty ) = @_;
3561 # loop to process each line of this file
3562 while ( my $line_of_tokens = $truth->get_line() ) {
3563 $beauty->write_line($line_of_tokens);
3567 eval { $beauty->finish_formatting() };
3568 $truth->report_tokenization_errors();
3573 # Use 'perl -c' to make sure that we did not create bad syntax
3574 # This is a very good independent check for programming errors
3576 # Given names of the input and output files, ($istream, $ostream),
3577 # we do the following:
3578 # - check syntax of the input file
3579 # - if bad, all done (could be an incomplete code snippet)
3580 # - if infile syntax ok, then check syntax of the output file;
3581 # - if outfile syntax bad, issue warning; this implies a code bug!
3582 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3584 my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3585 my $infile_syntax_ok = 0;
3586 my $line_of_dashes = '-' x 42 . "\n";
3588 my $flags = $rOpts->{'perl-syntax-check-flags'};
3590 # be sure we invoke perl with -c
3591 # note: perl will accept repeated flags like '-c -c'. It is safest
3592 # to append another -c than try to find an interior bundled c, as
3593 # in -Tc, because such a 'c' might be in a quoted string, for example.
3594 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3596 # be sure we invoke perl with -x if requested
3597 # same comments about repeated parameters applies
3598 if ( $rOpts->{'look-for-hash-bang'} ) {
3599 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3602 # this shouldn't happen unless a temporary file couldn't be made
3603 if ( $istream eq '-' ) {
3604 $logger_object->write_logfile_entry(
3605 "Cannot run perl -c on STDIN and STDOUT\n");
3606 return $infile_syntax_ok;
3609 $logger_object->write_logfile_entry(
3610 "checking input file syntax with perl $flags\n");
3612 # Not all operating systems/shells support redirection of the standard
3614 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3616 my ( $istream_filename, $perl_output ) =
3617 do_syntax_check( $istream, $flags, $error_redirection );
3618 $logger_object->write_logfile_entry(
3619 "Input stream passed to Perl as file $istream_filename\n");
3620 $logger_object->write_logfile_entry($line_of_dashes);
3621 $logger_object->write_logfile_entry("$perl_output\n");
3623 if ( $perl_output =~ /syntax\s*OK/ ) {
3624 $infile_syntax_ok = 1;
3625 $logger_object->write_logfile_entry($line_of_dashes);
3626 $logger_object->write_logfile_entry(
3627 "checking output file syntax with perl $flags ...\n");
3628 my ( $ostream_filename, $perl_output ) =
3629 do_syntax_check( $ostream, $flags, $error_redirection );
3630 $logger_object->write_logfile_entry(
3631 "Output stream passed to Perl as file $ostream_filename\n");
3632 $logger_object->write_logfile_entry($line_of_dashes);
3633 $logger_object->write_logfile_entry("$perl_output\n");
3635 unless ( $perl_output =~ /syntax\s*OK/ ) {
3636 $logger_object->write_logfile_entry($line_of_dashes);
3637 $logger_object->warning(
3638 "The output file has a syntax error when tested with perl $flags $ostream !\n"
3640 $logger_object->warning(
3641 "This implies an error in perltidy; the file $ostream is bad\n"
3643 $logger_object->report_definite_bug();
3645 # the perl version number will be helpful for diagnosing the problem
3646 $logger_object->write_logfile_entry(
3647 qx/perl -v $error_redirection/ . "\n" );
3652 # Only warn of perl -c syntax errors. Other messages,
3653 # such as missing modules, are too common. They can be
3654 # seen by running with perltidy -w
3655 $logger_object->complain("A syntax check using perl $flags\n");
3656 $logger_object->complain(
3657 "for the output in file $istream_filename gives:\n");
3658 $logger_object->complain($line_of_dashes);
3659 $logger_object->complain("$perl_output\n");
3660 $logger_object->complain($line_of_dashes);
3661 $infile_syntax_ok = -1;
3662 $logger_object->write_logfile_entry($line_of_dashes);
3663 $logger_object->write_logfile_entry(
3664 "The output file will not be checked because of input file problems\n"
3667 return $infile_syntax_ok;
3670 sub do_syntax_check {
3671 my ( $stream, $flags, $error_redirection ) = @_;
3673 # We need a named input file for executing perl
3674 my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3676 # TODO: Need to add name of file to log somewhere
3677 # otherwise Perl output is hard to read
3678 if ( !$stream_filename ) { return $stream_filename, "" }
3680 # We have to quote the filename in case it has unusual characters
3681 # or spaces. Example: this filename #CM11.pm# gives trouble.
3682 my $quoted_stream_filename = '"' . $stream_filename . '"';
3684 # Under VMS something like -T will become -t (and an error) so we
3685 # will put quotes around the flags. Double quotes seem to work on
3686 # Unix/Windows/VMS, but this may not work on all systems. (Single
3687 # quotes do not work under Windows). It could become necessary to
3688 # put double quotes around each flag, such as: -"c" -"T"
3689 # We may eventually need some system-dependent coding here.
3690 $flags = '"' . $flags . '"';
3692 # now wish for luck...
3693 my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3695 unlink $stream_filename if ($is_tmpfile);
3696 return $stream_filename, $msg;
3699 #####################################################################
3701 # This is a stripped down version of IO::Scalar
3702 # Given a reference to a scalar, it supplies either:
3703 # a getline method which reads lines (mode='r'), or
3704 # a print method which reads lines (mode='w')
3706 #####################################################################
3707 package Perl::Tidy::IOScalar;
3711 my ( $package, $rscalar, $mode ) = @_;
3712 my $ref = ref $rscalar;
3713 if ( $ref ne 'SCALAR' ) {
3715 ------------------------------------------------------------------------
3716 expecting ref to SCALAR but got ref to ($ref); trace follows:
3717 ------------------------------------------------------------------------
3721 if ( $mode eq 'w' ) {
3723 return bless [ $rscalar, $mode ], $package;
3725 elsif ( $mode eq 'r' ) {
3727 # Convert a scalar to an array.
3728 # This avoids looking for "\n" on each call to getline
3730 # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3731 # (which might be important in a DATA section).
3733 if ( $rscalar && ${$rscalar} ) {
3734 @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3736 # remove possible extra blank line introduced with split
3737 if ( @array && $array[-1] eq "\n" ) { pop @array }
3740 return bless [ \@array, $mode, $i_next ], $package;
3744 ------------------------------------------------------------------------
3745 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3746 ------------------------------------------------------------------------
3753 my $mode = $self->[1];
3754 if ( $mode ne 'r' ) {
3756 ------------------------------------------------------------------------
3757 getline call requires mode = 'r' but mode = ($mode); trace follows:
3758 ------------------------------------------------------------------------
3761 my $i = $self->[2]++;
3762 return $self->[0]->[$i];
3767 my $mode = $self->[1];
3768 if ( $mode ne 'w' ) {
3770 ------------------------------------------------------------------------
3771 print call requires mode = 'w' but mode = ($mode); trace follows:
3772 ------------------------------------------------------------------------
3775 ${ $self->[0] } .= $_[0];
3777 sub close { return }
3779 #####################################################################
3781 # This is a stripped down version of IO::ScalarArray
3782 # Given a reference to an array, it supplies either:
3783 # a getline method which reads lines (mode='r'), or
3784 # a print method which reads lines (mode='w')
3786 # NOTE: this routine assumes that there aren't any embedded
3787 # newlines within any of the array elements. There are no checks
3790 #####################################################################
3791 package Perl::Tidy::IOScalarArray;
3795 my ( $package, $rarray, $mode ) = @_;
3796 my $ref = ref $rarray;
3797 if ( $ref ne 'ARRAY' ) {
3799 ------------------------------------------------------------------------
3800 expecting ref to ARRAY but got ref to ($ref); trace follows:
3801 ------------------------------------------------------------------------
3805 if ( $mode eq 'w' ) {
3807 return bless [ $rarray, $mode ], $package;
3809 elsif ( $mode eq 'r' ) {
3811 return bless [ $rarray, $mode, $i_next ], $package;
3815 ------------------------------------------------------------------------
3816 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3817 ------------------------------------------------------------------------
3824 my $mode = $self->[1];
3825 if ( $mode ne 'r' ) {
3827 ------------------------------------------------------------------------
3828 getline requires mode = 'r' but mode = ($mode); trace follows:
3829 ------------------------------------------------------------------------
3832 my $i = $self->[2]++;
3833 return $self->[0]->[$i];
3838 my $mode = $self->[1];
3839 if ( $mode ne 'w' ) {
3841 ------------------------------------------------------------------------
3842 print requires mode = 'w' but mode = ($mode); trace follows:
3843 ------------------------------------------------------------------------
3846 push @{ $self->[0] }, $_[0];
3848 sub close { return }
3850 #####################################################################
3852 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3853 # which returns the next line to be parsed
3855 #####################################################################
3857 package Perl::Tidy::LineSource;
3861 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3863 my $input_line_ending;
3864 if ( $rOpts->{'preserve-line-endings'} ) {
3865 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3868 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3869 return undef unless $fh;
3871 # in order to check output syntax when standard output is used,
3872 # or when it is an object, we have to make a copy of the file
3873 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3876 # Turning off syntax check when input output is used.
3877 # The reason is that temporary files cause problems on
3879 $rOpts->{'check-syntax'} = 0;
3881 $$rpending_logfile_message .= <<EOM;
3882 Note: --syntax check will be skipped because standard input is used
3889 _filename => $input_file,
3890 _input_line_ending => $input_line_ending,
3891 _rinput_buffer => [],
3896 sub close_input_file {
3899 # Only close physical files, not STDIN and other objects
3900 my $filename = $self->{_filename};
3901 if ( $filename ne '-' && !ref $filename ) {
3902 eval { $self->{_fh}->close() };
3909 my $fh = $self->{_fh};
3910 my $rinput_buffer = $self->{_rinput_buffer};
3912 if ( scalar(@$rinput_buffer) ) {
3913 $line = shift @$rinput_buffer;
3916 $line = $fh->getline();
3918 # patch to read raw mac files under unix, dos
3919 # see if the first line has embedded \r's
3920 if ( $line && !$self->{_started} ) {
3921 if ( $line =~ /[\015][^\015\012]/ ) {
3923 # found one -- break the line up and store in a buffer
3924 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3925 my $count = @$rinput_buffer;
3926 $line = shift @$rinput_buffer;
3928 $self->{_started}++;
3934 #####################################################################
3936 # the Perl::Tidy::LineSink class supplies a write_line method for
3937 # actual file writing
3939 #####################################################################
3941 package Perl::Tidy::LineSink;
3945 my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3946 $rpending_logfile_message, $binmode )
3951 my $output_file_open = 0;
3953 if ( $rOpts->{'format'} eq 'tidy' ) {
3954 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3955 unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
3956 $output_file_open = 1;
3958 if ( ref($fh) eq 'IO::File' ) {
3959 if ( $rOpts->{'character-encoding'}
3960 && $rOpts->{'character-encoding'} eq 'utf8' )
3962 binmode $fh, ":encoding(UTF-8)";
3964 else { binmode $fh }
3966 if ( $output_file eq '-' ) { binmode STDOUT }
3970 # in order to check output syntax when standard output is used,
3971 # or when it is an object, we have to make a copy of the file
3972 if ( $output_file eq '-' || ref $output_file ) {
3973 if ( $rOpts->{'check-syntax'} ) {
3975 # Turning off syntax check when standard output is used.
3976 # The reason is that temporary files cause problems on
3978 $rOpts->{'check-syntax'} = 0;
3979 $$rpending_logfile_message .= <<EOM;
3980 Note: --syntax check will be skipped because standard output is used
3989 _output_file => $output_file,
3990 _output_file_open => $output_file_open,
3992 _tee_file => $tee_file,
3993 _tee_file_opened => 0,
3994 _line_separator => $line_separator,
3995 _binmode => $binmode,
4002 my $fh = $self->{_fh};
4004 my $output_file_open = $self->{_output_file_open};
4006 $_[0] .= $self->{_line_separator};
4008 $fh->print( $_[0] ) if ( $self->{_output_file_open} );
4010 if ( $self->{_tee_flag} ) {
4011 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
4012 my $fh_tee = $self->{_fh_tee};
4013 print $fh_tee $_[0];
4019 $self->{_tee_flag} = 1;
4024 $self->{_tee_flag} = 0;
4027 sub really_open_tee_file {
4029 my $tee_file = $self->{_tee_file};
4031 $fh_tee = IO::File->new(">$tee_file")
4032 or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
4033 binmode $fh_tee if $self->{_binmode};
4034 $self->{_tee_file_opened} = 1;
4035 $self->{_fh_tee} = $fh_tee;
4038 sub close_output_file {
4041 # Only close physical files, not STDOUT and other objects
4042 my $output_file = $self->{_output_file};
4043 if ( $output_file ne '-' && !ref $output_file ) {
4044 eval { $self->{_fh}->close() } if $self->{_output_file_open};
4046 $self->close_tee_file();
4049 sub close_tee_file {
4052 # Only close physical files, not STDOUT and other objects
4053 if ( $self->{_tee_file_opened} ) {
4054 my $tee_file = $self->{_tee_file};
4055 if ( $tee_file ne '-' && !ref $tee_file ) {
4056 eval { $self->{_fh_tee}->close() };
4057 $self->{_tee_file_opened} = 0;
4062 #####################################################################
4064 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
4065 # useful for program development.
4067 # Only one such file is created regardless of the number of input
4068 # files processed. This allows the results of processing many files
4069 # to be summarized in a single file.
4071 #####################################################################
4073 package Perl::Tidy::Diagnostics;
4079 _write_diagnostics_count => 0,
4080 _last_diagnostic_file => "",
4086 sub set_input_file {
4088 $self->{_input_file} = $_[0];
4091 # This is a diagnostic routine which is useful for program development.
4092 # Output from debug messages go to a file named DIAGNOSTICS, where
4093 # they are labeled by file and line. This allows many files to be
4094 # scanned at once for some particular condition of interest.
4095 sub write_diagnostics {
4098 unless ( $self->{_write_diagnostics_count} ) {
4099 open DIAGNOSTICS, ">DIAGNOSTICS"
4100 or death("couldn't open DIAGNOSTICS: $!\n");
4103 my $last_diagnostic_file = $self->{_last_diagnostic_file};
4104 my $input_file = $self->{_input_file};
4105 if ( $last_diagnostic_file ne $input_file ) {
4106 print DIAGNOSTICS "\nFILE:$input_file\n";
4108 $self->{_last_diagnostic_file} = $input_file;
4109 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
4110 print DIAGNOSTICS "$input_line_number:\t@_";
4111 $self->{_write_diagnostics_count}++;
4114 #####################################################################
4116 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
4118 #####################################################################
4120 package Perl::Tidy::Logger;
4125 my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_;
4127 my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
4129 # remove any old error output file if we might write a new one
4130 unless ( $fh_warnings || ref($warning_file) ) {
4131 if ( -e $warning_file ) { unlink($warning_file) }
4135 defined( $rOpts->{'logfile-gap'} )
4136 ? $rOpts->{'logfile-gap'}
4138 if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
4141 _log_file => $log_file,
4142 _logfile_gap => $logfile_gap,
4144 _fh_warnings => $fh_warnings,
4145 _last_input_line_written => 0,
4146 _at_end_of_file => 0,
4148 _block_log_output => 0,
4149 _line_of_tokens => undef,
4150 _output_line_number => undef,
4151 _wrote_line_information_string => 0,
4152 _wrote_column_headings => 0,
4153 _warning_file => $warning_file,
4154 _warning_count => 0,
4155 _complaint_count => 0,
4156 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure
4157 _saw_brace_error => 0,
4158 _saw_extrude => $saw_extrude,
4159 _output_array => [],
4163 sub get_warning_count {
4165 return $self->{_warning_count};
4168 sub get_use_prefix {
4170 return $self->{_use_prefix};
4173 sub block_log_output {
4175 $self->{_block_log_output} = 1;
4178 sub unblock_log_output {
4180 $self->{_block_log_output} = 0;
4183 sub interrupt_logfile {
4185 $self->{_use_prefix} = 0;
4186 $self->warning("\n");
4187 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
4190 sub resume_logfile {
4192 $self->write_logfile_entry( '#' x 60 . "\n" );
4193 $self->{_use_prefix} = 1;
4196 sub we_are_at_the_last_line {
4198 unless ( $self->{_wrote_line_information_string} ) {
4199 $self->write_logfile_entry("Last line\n\n");
4201 $self->{_at_end_of_file} = 1;
4204 # record some stuff in case we go down in flames
4207 my ( $line_of_tokens, $output_line_number ) = @_;
4208 my $input_line = $line_of_tokens->{_line_text};
4209 my $input_line_number = $line_of_tokens->{_line_number};
4211 # save line information in case we have to write a logfile message
4212 $self->{_line_of_tokens} = $line_of_tokens;
4213 $self->{_output_line_number} = $output_line_number;
4214 $self->{_wrote_line_information_string} = 0;
4216 my $last_input_line_written = $self->{_last_input_line_written};
4217 my $rOpts = $self->{_rOpts};
4220 ( $input_line_number - $last_input_line_written ) >=
4221 $self->{_logfile_gap}
4223 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4226 my $rlevels = $line_of_tokens->{_rlevels};
4227 my $structural_indentation_level = $$rlevels[0];
4228 $self->{_last_input_line_written} = $input_line_number;
4229 ( my $out_str = $input_line ) =~ s/^\s*//;
4232 $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4234 if ( length($out_str) > 35 ) {
4235 $out_str = substr( $out_str, 0, 35 ) . " ....";
4237 $self->logfile_output( "", "$out_str\n" );
4241 sub write_logfile_entry {
4244 # add leading >>> to avoid confusing error messages and code
4245 $self->logfile_output( ">>>", "@_" );
4248 sub write_column_headings {
4251 $self->{_wrote_column_headings} = 1;
4252 my $routput_array = $self->{_output_array};
4253 push @{$routput_array}, <<EOM;
4254 The nesting depths in the table below are at the start of the lines.
4255 The indicated output line numbers are not always exact.
4256 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4258 in:out indent c b nesting code + messages; (messages begin with >>>)
4259 lines levels i k (code begins with one '.' per indent level)
4260 ------ ----- - - -------- -------------------------------------------
4264 sub make_line_information_string {
4266 # make columns of information when a logfile message needs to go out
4268 my $line_of_tokens = $self->{_line_of_tokens};
4269 my $input_line_number = $line_of_tokens->{_line_number};
4270 my $line_information_string = "";
4271 if ($input_line_number) {
4273 my $output_line_number = $self->{_output_line_number};
4274 my $brace_depth = $line_of_tokens->{_curly_brace_depth};
4275 my $paren_depth = $line_of_tokens->{_paren_depth};
4276 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4277 my $guessed_indentation_level =
4278 $line_of_tokens->{_guessed_indentation_level};
4279 my $rlevels = $line_of_tokens->{_rlevels};
4280 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
4281 my $rci_levels = $line_of_tokens->{_rci_levels};
4282 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
4284 my $structural_indentation_level = $$rlevels[0];
4286 $self->write_column_headings() unless $self->{_wrote_column_headings};
4288 # keep logfile columns aligned for scripts up to 999 lines;
4289 # for longer scripts it doesn't really matter
4290 my $extra_space = "";
4292 ( $input_line_number < 10 ) ? " "
4293 : ( $input_line_number < 100 ) ? " "
4296 ( $output_line_number < 10 ) ? " "
4297 : ( $output_line_number < 100 ) ? " "
4300 # there are 2 possible nesting strings:
4301 # the original which looks like this: (0 [1 {2
4302 # the new one, which looks like this: {{[
4303 # the new one is easier to read, and shows the order, but
4304 # could be arbitrarily long, so we use it unless it is too long
4305 my $nesting_string =
4306 "($paren_depth [$square_bracket_depth {$brace_depth";
4307 my $nesting_string_new = $$rnesting_tokens[0];
4309 my $ci_level = $$rci_levels[0];
4310 if ( $ci_level > 9 ) { $ci_level = '*' }
4311 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
4313 if ( length($nesting_string_new) <= 8 ) {
4315 $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4317 $line_information_string =
4318 "L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4320 return $line_information_string;
4323 sub logfile_output {
4325 my ( $prompt, $msg ) = @_;
4326 return if ( $self->{_block_log_output} );
4328 my $routput_array = $self->{_output_array};
4329 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4330 push @{$routput_array}, "$msg";
4333 my $line_information_string = $self->make_line_information_string();
4334 $self->{_wrote_line_information_string} = 1;
4336 if ($line_information_string) {
4337 push @{$routput_array}, "$line_information_string $prompt$msg";
4340 push @{$routput_array}, "$msg";
4345 sub get_saw_brace_error {
4347 return $self->{_saw_brace_error};
4350 sub increment_brace_error {
4352 $self->{_saw_brace_error}++;
4357 use constant BRACE_WARNING_LIMIT => 10;
4358 my $saw_brace_error = $self->{_saw_brace_error};
4360 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
4364 $self->{_saw_brace_error} = $saw_brace_error;
4366 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
4367 $self->warning("No further warnings of this type will be given\n");
4373 # handle non-critical warning messages based on input flag
4375 my $rOpts = $self->{_rOpts};
4377 # these appear in .ERR output only if -w flag is used
4378 if ( $rOpts->{'warning-output'} ) {
4382 # otherwise, they go to the .LOG file
4384 $self->{_complaint_count}++;
4385 $self->write_logfile_entry(@_);
4391 # report errors to .ERR file (or stdout)
4393 use constant WARNING_LIMIT => 50;
4395 my $rOpts = $self->{_rOpts};
4396 unless ( $rOpts->{'quiet'} ) {
4398 my $warning_count = $self->{_warning_count};
4399 my $fh_warnings = $self->{_fh_warnings};
4400 if ( !$fh_warnings ) {
4401 my $warning_file = $self->{_warning_file};
4402 ( $fh_warnings, my $filename ) =
4403 Perl::Tidy::streamhandle( $warning_file, 'w' );
4404 $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
4405 Perl::Tidy::Warn "## Please see file $filename\n"
4406 unless ref($warning_file);
4407 $self->{_fh_warnings} = $fh_warnings;
4408 $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
4411 if ( $warning_count < WARNING_LIMIT ) {
4412 if ( $self->get_use_prefix() > 0 ) {
4413 my $input_line_number =
4414 Perl::Tidy::Tokenizer::get_input_line_number();
4415 if ( !defined($input_line_number) ) { $input_line_number = -1 }
4416 $fh_warnings->print("$input_line_number:\t@_");
4417 $self->write_logfile_entry("WARNING: @_");
4420 $fh_warnings->print(@_);
4421 $self->write_logfile_entry(@_);
4425 $self->{_warning_count} = $warning_count;
4427 if ( $warning_count == WARNING_LIMIT ) {
4428 $fh_warnings->print("No further warnings will be given\n");
4433 # programming bug codes:
4435 # 0 = maybe, not sure.
4437 sub report_possible_bug {
4439 my $saw_code_bug = $self->{_saw_code_bug};
4440 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4443 sub report_definite_bug {
4445 $self->{_saw_code_bug} = 1;
4448 sub ask_user_for_bug_report {
4451 my ( $infile_syntax_ok, $formatter ) = @_;
4452 my $saw_code_bug = $self->{_saw_code_bug};
4453 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4454 $self->warning(<<EOM);
4456 You may have encountered a code bug in perltidy. If you think so, and
4457 the problem is not listed in the BUGS file at
4458 http://perltidy.sourceforge.net, please report it so that it can be
4459 corrected. Include the smallest possible script which has the problem,
4460 along with the .LOG file. See the manual pages for contact information.
4465 elsif ( $saw_code_bug == 1 ) {
4466 if ( $self->{_saw_extrude} ) {
4467 $self->warning(<<EOM);
4469 You may have encountered a bug in perltidy. However, since you are using the
4470 -extrude option, the problem may be with perl or one of its modules, which have
4471 occasional problems with this type of file. If you believe that the
4472 problem is with perltidy, and the problem is not listed in the BUGS file at
4473 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4474 Include the smallest possible script which has the problem, along with the .LOG
4475 file. See the manual pages for contact information.
4480 $self->warning(<<EOM);
4482 Oops, you seem to have encountered a bug in perltidy. Please check the
4483 BUGS file at http://perltidy.sourceforge.net. If the problem is not
4484 listed there, please report it so that it can be corrected. Include the
4485 smallest possible script which produces this message, along with the
4486 .LOG file if appropriate. See the manual pages for contact information.
4487 Your efforts are appreciated.
4490 my $added_semicolon_count = 0;
4492 $added_semicolon_count =
4493 $formatter->get_added_semicolon_count();
4495 if ( $added_semicolon_count > 0 ) {
4496 $self->warning(<<EOM);
4498 The log file shows that perltidy added $added_semicolon_count semicolons.
4499 Please rerun with -nasc to see if that is the cause of the syntax error. Even
4500 if that is the problem, please report it so that it can be fixed.
4510 # called after all formatting to summarize errors
4512 my ( $infile_syntax_ok, $formatter ) = @_;
4514 my $rOpts = $self->{_rOpts};
4515 my $warning_count = $self->{_warning_count};
4516 my $saw_code_bug = $self->{_saw_code_bug};
4519 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4520 || $saw_code_bug == 1
4521 || $rOpts->{'logfile'};
4522 my $log_file = $self->{_log_file};
4523 if ($warning_count) {
4524 if ($save_logfile) {
4525 $self->block_log_output(); # avoid echoing this to the logfile
4527 "The logfile $log_file may contain useful information\n");
4528 $self->unblock_log_output();
4531 if ( $self->{_complaint_count} > 0 ) {
4533 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4537 if ( $self->{_saw_brace_error}
4538 && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
4540 $self->warning("To save a full .LOG file rerun with -g\n");
4543 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4545 if ($save_logfile) {
4546 my $log_file = $self->{_log_file};
4547 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4549 my $routput_array = $self->{_output_array};
4550 foreach ( @{$routput_array} ) { $fh->print($_) }
4551 if ( $log_file ne '-' && !ref $log_file ) {
4552 eval { $fh->close() };
4558 #####################################################################
4560 # The Perl::Tidy::DevNull class supplies a dummy print method
4562 #####################################################################
4564 package Perl::Tidy::DevNull;
4565 sub new { return bless {}, $_[0] }
4566 sub print { return }
4567 sub close { return }
4569 #####################################################################
4571 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4573 #####################################################################
4575 package Perl::Tidy::HtmlWriter;
4585 %short_to_long_names
4589 $missing_html_entities
4592 # replace unsafe characters with HTML entity representation if HTML::Entities
4594 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4598 my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4599 $html_src_extension )
4602 my $html_file_opened = 0;
4604 ( $html_fh, my $html_filename ) =
4605 Perl::Tidy::streamhandle( $html_file, 'w' );
4607 Perl::Tidy::Warn("can't open $html_file: $!\n");
4610 $html_file_opened = 1;
4612 if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4613 $input_file = "NONAME";
4616 # write the table of contents to a string
4618 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4621 my @pre_string_stack;
4622 if ( $rOpts->{'html-pre-only'} ) {
4624 # pre section goes directly to the output stream
4625 $html_pre_fh = $html_fh;
4626 $html_pre_fh->print( <<"PRE_END");
4632 # pre section go out to a temporary string
4634 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4635 push @pre_string_stack, \$pre_string;
4638 # pod text gets diverted if the 'pod2html' is used
4641 if ( $rOpts->{'pod2html'} ) {
4642 if ( $rOpts->{'html-pre-only'} ) {
4643 undef $rOpts->{'pod2html'};
4646 eval "use Pod::Html";
4649 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4650 undef $rOpts->{'pod2html'};
4653 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4660 if ( $rOpts->{'frames'} ) {
4661 unless ($extension) {
4663 "cannot use frames without a specified output extension; ignoring -frm\n";
4664 undef $rOpts->{'frames'};
4667 $toc_filename = $input_file . $html_toc_extension . $extension;
4668 $src_filename = $input_file . $html_src_extension . $extension;
4672 # ----------------------------------------------------------
4673 # Output is now directed as follows:
4674 # html_toc_fh <-- table of contents items
4675 # html_pre_fh <-- the <pre> section of formatted code, except:
4676 # html_pod_fh <-- pod goes here with the pod2html option
4677 # ----------------------------------------------------------
4679 my $title = $rOpts->{'title'};
4681 ( $title, my $path ) = fileparse($input_file);
4683 my $toc_item_count = 0;
4684 my $in_toc_package = "";
4687 _input_file => $input_file, # name of input file
4688 _title => $title, # title, unescaped
4689 _html_file => $html_file, # name of .html output file
4690 _toc_filename => $toc_filename, # for frames option
4691 _src_filename => $src_filename, # for frames option
4692 _html_file_opened => $html_file_opened, # a flag
4693 _html_fh => $html_fh, # the output stream
4694 _html_pre_fh => $html_pre_fh, # pre section goes here
4695 _rpre_string_stack => \@pre_string_stack, # stack of pre sections
4696 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4697 _rpod_string => \$pod_string, # string holding pod
4698 _pod_cut_count => 0, # how many =cut's?
4699 _html_toc_fh => $html_toc_fh, # fh for table of contents
4700 _rtoc_string => \$toc_string, # string holding toc
4701 _rtoc_item_count => \$toc_item_count, # how many toc items
4702 _rin_toc_package => \$in_toc_package, # package name
4703 _rtoc_name_count => {}, # hash to track unique names
4704 _rpackage_stack => [], # stack to check for package
4706 _rlast_level => \$last_level, # brace indentation level
4712 # Add an item to the html table of contents.
4713 # This is called even if no table of contents is written,
4714 # because we still want to put the anchors in the <pre> text.
4715 # We are given an anchor name and its type; types are:
4716 # 'package', 'sub', '__END__', '__DATA__', 'EOF'
4717 # There must be an 'EOF' call at the end to wrap things up.
4719 my ( $name, $type ) = @_;
4720 my $html_toc_fh = $self->{_html_toc_fh};
4721 my $html_pre_fh = $self->{_html_pre_fh};
4722 my $rtoc_name_count = $self->{_rtoc_name_count};
4723 my $rtoc_item_count = $self->{_rtoc_item_count};
4724 my $rlast_level = $self->{_rlast_level};
4725 my $rin_toc_package = $self->{_rin_toc_package};
4726 my $rpackage_stack = $self->{_rpackage_stack};
4728 # packages contain sublists of subs, so to avoid errors all package
4729 # items are written and finished with the following routines
4730 my $end_package_list = sub {
4731 if ($$rin_toc_package) {
4732 $html_toc_fh->print("</ul>\n</li>\n");
4733 $$rin_toc_package = "";
4737 my $start_package_list = sub {
4738 my ( $unique_name, $package ) = @_;
4739 if ($$rin_toc_package) { $end_package_list->() }
4740 $html_toc_fh->print(<<EOM);
4741 <li><a href=\"#$unique_name\">package $package</a>
4744 $$rin_toc_package = $package;
4747 # start the table of contents on the first item
4748 unless ($$rtoc_item_count) {
4750 # but just quit if we hit EOF without any other entries
4751 # in this case, there will be no toc
4752 return if ( $type eq 'EOF' );
4753 $html_toc_fh->print( <<"TOC_END");
4754 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4758 $$rtoc_item_count++;
4760 # make a unique anchor name for this location:
4761 # - packages get a 'package-' prefix
4762 # - subs use their names
4763 my $unique_name = $name;
4764 if ( $type eq 'package' ) { $unique_name = "package-$name" }
4766 # append '-1', '-2', etc if necessary to make unique; this will
4767 # be unique because subs and packages cannot have a '-'
4768 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4769 $unique_name .= "-$count";
4772 # - all names get terminal '-' if pod2html is used, to avoid
4773 # conflicts with anchor names created by pod2html
4774 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4776 # start/stop lists of subs
4777 if ( $type eq 'sub' ) {
4778 my $package = $rpackage_stack->[$$rlast_level];
4779 unless ($package) { $package = 'main' }
4781 # if we're already in a package/sub list, be sure its the right
4782 # package or else close it
4783 if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4784 $end_package_list->();
4787 # start a package/sub list if necessary
4788 unless ($$rin_toc_package) {
4789 $start_package_list->( $unique_name, $package );
4793 # now write an entry in the toc for this item
4794 if ( $type eq 'package' ) {
4795 $start_package_list->( $unique_name, $name );
4797 elsif ( $type eq 'sub' ) {
4798 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4801 $end_package_list->();
4802 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4805 # write the anchor in the <pre> section
4806 $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4808 # end the table of contents, if any, on the end of file
4809 if ( $type eq 'EOF' ) {
4810 $html_toc_fh->print( <<"TOC_END");
4812 <!-- END CODE INDEX -->
4819 # This is the official list of tokens which may be identified by the
4820 # user. Long names are used as getopt keys. Short names are
4821 # convenient short abbreviations for specifying input. Short names
4822 # somewhat resemble token type characters, but are often different
4823 # because they may only be alphanumeric, to allow command line
4824 # input. Also, note that because of case insensitivity of html,
4825 # this table must be in a single case only (I've chosen to use all
4827 # When adding NEW_TOKENS: update this hash table
4828 # short names => long names
4829 %short_to_long_names = (
4839 'pu' => 'punctuation',
4840 'i' => 'identifier',
4842 'h' => 'here-doc-target',
4843 'hh' => 'here-doc-text',
4845 'sc' => 'semicolon',
4846 'm' => 'subroutine',
4850 # Now we have to map actual token types into one of the above short
4851 # names; any token types not mapped will get 'punctuation'
4854 # The values of this hash table correspond to the keys of the
4855 # previous hash table.
4856 # The keys of this hash table are token types and can be seen
4857 # by running with --dump-token-types (-dtt).
4859 # When adding NEW_TOKENS: update this hash table
4860 # $type => $short_name
4861 %token_short_names = (
4886 # These token types will all be called identifiers for now
4887 # FIXME: could separate user defined modules as separate type
4888 my @identifier = qw" i t U C Y Z G :: CORE::";
4889 @token_short_names{@identifier} = ('i') x scalar(@identifier);
4891 # These token types will be called 'structure'
4892 my @structure = qw" { } ";
4893 @token_short_names{@structure} = ('s') x scalar(@structure);
4895 # OLD NOTES: save for reference
4896 # Any of these could be added later if it would be useful.
4897 # For now, they will by default become punctuation
4898 # my @list = qw" L R [ ] ";
4899 # @token_long_names{@list} = ('non-structure') x scalar(@list);
4902 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4904 # @token_long_names{@list} = ('math') x scalar(@list);
4906 # my @list = qw" & &= ~ ~= ^ ^= | |= ";
4907 # @token_long_names{@list} = ('bit') x scalar(@list);
4909 # my @list = qw" == != < > <= <=> ";
4910 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4912 # my @list = qw" && || ! &&= ||= //= ";
4913 # @token_long_names{@list} = ('logical') x scalar(@list);
4915 # my @list = qw" . .= =~ !~ x x= ";
4916 # @token_long_names{@list} = ('string-operators') x scalar(@list);
4919 # my @list = qw" .. -> <> ... \ ? ";
4920 # @token_long_names{@list} = ('misc-operators') x scalar(@list);
4924 sub make_getopt_long_names {
4926 my ($rgetopt_names) = @_;
4927 while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4928 push @$rgetopt_names, "html-color-$name=s";
4929 push @$rgetopt_names, "html-italic-$name!";
4930 push @$rgetopt_names, "html-bold-$name!";
4932 push @$rgetopt_names, "html-color-background=s";
4933 push @$rgetopt_names, "html-linked-style-sheet=s";
4934 push @$rgetopt_names, "nohtml-style-sheets";
4935 push @$rgetopt_names, "html-pre-only";
4936 push @$rgetopt_names, "html-line-numbers";
4937 push @$rgetopt_names, "html-entities!";
4938 push @$rgetopt_names, "stylesheet";
4939 push @$rgetopt_names, "html-table-of-contents!";
4940 push @$rgetopt_names, "pod2html!";
4941 push @$rgetopt_names, "frames!";
4942 push @$rgetopt_names, "html-toc-extension=s";
4943 push @$rgetopt_names, "html-src-extension=s";
4945 # Pod::Html parameters:
4946 push @$rgetopt_names, "backlink=s";
4947 push @$rgetopt_names, "cachedir=s";
4948 push @$rgetopt_names, "htmlroot=s";
4949 push @$rgetopt_names, "libpods=s";
4950 push @$rgetopt_names, "podpath=s";
4951 push @$rgetopt_names, "podroot=s";
4952 push @$rgetopt_names, "title=s";
4954 # Pod::Html parameters with leading 'pod' which will be removed
4955 # before the call to Pod::Html
4956 push @$rgetopt_names, "podquiet!";
4957 push @$rgetopt_names, "podverbose!";
4958 push @$rgetopt_names, "podrecurse!";
4959 push @$rgetopt_names, "podflush";
4960 push @$rgetopt_names, "podheader!";
4961 push @$rgetopt_names, "podindex!";
4964 sub make_abbreviated_names {
4966 # We're appending things like this to the expansion list:
4967 # 'hcc' => [qw(html-color-comment)],
4968 # 'hck' => [qw(html-color-keyword)],
4971 my ($rexpansion) = @_;
4973 # abbreviations for color/bold/italic properties
4974 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4975 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
4976 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
4977 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
4978 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4979 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4982 # abbreviations for all other html options
4983 ${$rexpansion}{"hcbg"} = ["html-color-background"];
4984 ${$rexpansion}{"pre"} = ["html-pre-only"];
4985 ${$rexpansion}{"toc"} = ["html-table-of-contents"];
4986 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
4987 ${$rexpansion}{"nnn"} = ["html-line-numbers"];
4988 ${$rexpansion}{"hent"} = ["html-entities"];
4989 ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4990 ${$rexpansion}{"css"} = ["html-linked-style-sheet"];
4991 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
4992 ${$rexpansion}{"ss"} = ["stylesheet"];
4993 ${$rexpansion}{"pod"} = ["pod2html"];
4994 ${$rexpansion}{"npod"} = ["nopod2html"];
4995 ${$rexpansion}{"frm"} = ["frames"];
4996 ${$rexpansion}{"nfrm"} = ["noframes"];
4997 ${$rexpansion}{"text"} = ["html-toc-extension"];
4998 ${$rexpansion}{"sext"} = ["html-src-extension"];
5003 # This will be called once after options have been parsed
5007 # X11 color names for default settings that seemed to look ok
5008 # (these color names are only used for programming clarity; the hex
5009 # numbers are actually written)
5010 use constant ForestGreen => "#228B22";
5011 use constant SaddleBrown => "#8B4513";
5012 use constant magenta4 => "#8B008B";
5013 use constant IndianRed3 => "#CD5555";
5014 use constant DeepSkyBlue4 => "#00688B";
5015 use constant MediumOrchid3 => "#B452CD";
5016 use constant black => "#000000";
5017 use constant white => "#FFFFFF";
5018 use constant red => "#FF0000";
5020 # set default color, bold, italic properties
5021 # anything not listed here will be given the default (punctuation) color --
5022 # these types currently not listed and get default: ws pu s sc cm co p
5023 # When adding NEW_TOKENS: add an entry here if you don't want defaults
5025 # set_default_properties( $short_name, default_color, bold?, italic? );
5026 set_default_properties( 'c', ForestGreen, 0, 0 );
5027 set_default_properties( 'pd', ForestGreen, 0, 1 );
5028 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
5029 set_default_properties( 'q', IndianRed3, 0, 0 );
5030 set_default_properties( 'hh', IndianRed3, 0, 1 );
5031 set_default_properties( 'h', IndianRed3, 1, 0 );
5032 set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
5033 set_default_properties( 'w', black, 0, 0 );
5034 set_default_properties( 'n', MediumOrchid3, 0, 0 );
5035 set_default_properties( 'v', MediumOrchid3, 0, 0 );
5036 set_default_properties( 'j', IndianRed3, 1, 0 );
5037 set_default_properties( 'm', red, 1, 0 );
5039 set_default_color( 'html-color-background', white );
5040 set_default_color( 'html-color-punctuation', black );
5042 # setup property lookup tables for tokens based on their short names
5043 # every token type has a short name, and will use these tables
5044 # to do the html markup
5045 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
5046 $html_color{$short_name} = $rOpts->{"html-color-$long_name"};
5047 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
5048 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
5051 # write style sheet to STDOUT and die if requested
5052 if ( defined( $rOpts->{'stylesheet'} ) ) {
5053 write_style_sheet_file('-');
5057 # make sure user gives a file name after -css
5058 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
5059 $css_linkname = $rOpts->{'html-linked-style-sheet'};
5060 if ( $css_linkname =~ /^-/ ) {
5061 Perl::Tidy::Die "You must specify a valid filename after -css\n";
5065 # check for conflict
5066 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
5067 $rOpts->{'nohtml-style-sheets'} = 0;
5068 warning("You can't specify both -css and -nss; -nss ignored\n");
5071 # write a style sheet file if necessary
5072 if ($css_linkname) {
5074 # if the selected filename exists, don't write, because user may
5075 # have done some work by hand to create it; use backup name instead
5076 # Also, this will avoid a potential disaster in which the user
5077 # forgets to specify the style sheet, like this:
5078 # perltidy -html -css myfile1.pl myfile2.pl
5079 # This would cause myfile1.pl to parsed as the style sheet by GetOpts
5080 my $css_filename = $css_linkname;
5081 unless ( -e $css_filename ) {
5082 write_style_sheet_file($css_filename);
5085 $missing_html_entities = 1 unless $rOpts->{'html-entities'};
5088 sub write_style_sheet_file {
5090 my $css_filename = shift;
5092 unless ( $fh = IO::File->new("> $css_filename") ) {
5093 Perl::Tidy::Die "can't open $css_filename: $!\n";
5095 write_style_sheet_data($fh);
5096 eval { $fh->close };
5099 sub write_style_sheet_data {
5101 # write the style sheet data to an open file handle
5104 my $bg_color = $rOpts->{'html-color-background'};
5105 my $text_color = $rOpts->{'html-color-punctuation'};
5107 # pre-bgcolor is new, and may not be defined
5108 my $pre_bg_color = $rOpts->{'html-pre-color-background'};
5109 $pre_bg_color = $bg_color unless $pre_bg_color;
5111 $fh->print(<<"EOM");
5112 /* default style sheet generated by perltidy */
5113 body {background: $bg_color; color: $text_color}
5114 pre { color: $text_color;
5115 background: $pre_bg_color;
5116 font-family: courier;
5121 foreach my $short_name ( sort keys %short_to_long_names ) {
5122 my $long_name = $short_to_long_names{$short_name};
5124 my $abbrev = '.' . $short_name;
5125 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
5126 my $color = $html_color{$short_name};
5127 if ( !defined($color) ) { $color = $text_color }
5128 $fh->print("$abbrev \{ color: $color;");
5130 if ( $html_bold{$short_name} ) {
5131 $fh->print(" font-weight:bold;");
5134 if ( $html_italic{$short_name} ) {
5135 $fh->print(" font-style:italic;");
5137 $fh->print("} /* $long_name */\n");
5141 sub set_default_color {
5143 # make sure that options hash $rOpts->{$key} contains a valid color
5144 my ( $key, $color ) = @_;
5145 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5146 $rOpts->{$key} = check_RGB($color);
5151 # if color is a 6 digit hex RGB value, prepend a #, otherwise
5152 # assume that it is a valid ascii color name
5154 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5158 sub set_default_properties {
5159 my ( $short_name, $color, $bold, $italic ) = @_;
5161 set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5163 $key = "html-bold-$short_to_long_names{$short_name}";
5164 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5165 $key = "html-italic-$short_to_long_names{$short_name}";
5166 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5171 # Use Pod::Html to process the pod and make the page
5172 # then merge the perltidy code sections into it.
5173 # return 1 if success, 0 otherwise
5175 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
5176 my $input_file = $self->{_input_file};
5177 my $title = $self->{_title};
5178 my $success_flag = 0;
5180 # don't try to use pod2html if no pod
5181 unless ($pod_string) {
5182 return $success_flag;
5185 # Pod::Html requires a real temporary filename
5186 my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
5189 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5190 return $success_flag;
5193 #------------------------------------------------------------------
5194 # Warning: a temporary file is open; we have to clean up if
5195 # things go bad. From here on all returns should be by going to
5196 # RETURN so that the temporary file gets unlinked.
5197 #------------------------------------------------------------------
5199 # write the pod text to the temporary file
5200 $fh_tmp->print($pod_string);
5203 # Hand off the pod to pod2html.
5204 # Note that we can use the same temporary filename for input and output
5205 # because of the way pod2html works.
5209 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5212 # Flags with string args:
5213 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5214 # "podpath=s", "podroot=s"
5215 # Note: -css=s is handled by perltidy itself
5216 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
5217 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5220 # Toggle switches; these have extra leading 'pod'
5221 # "header!", "index!", "recurse!", "quiet!", "verbose!"
5222 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5223 my $kwd = $kw; # allows us to strip 'pod'
5224 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5225 elsif ( defined( $rOpts->{$kw} ) ) {
5227 push @args, "--no$kwd";
5233 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5235 # Must clean up if pod2html dies (it can);
5236 # Be careful not to overwrite callers __DIE__ routine
5237 local $SIG{__DIE__} = sub {
5238 unlink $tmpfile if -e $tmpfile;
5239 Perl::Tidy::Die $_[0];
5244 $fh_tmp = IO::File->new( $tmpfile, 'r' );
5247 # this error shouldn't happen ... we just used this filename
5249 "unable to open temporary file $tmpfile; cannot use pod2html\n";
5253 my $html_fh = $self->{_html_fh};
5259 # This routine will write the html selectively and store the toc
5260 my $html_print = sub {
5262 $html_fh->print($_) unless ($no_print);
5263 if ($in_toc) { push @toc, $_ }
5267 # loop over lines of html output from pod2html and merge in
5268 # the necessary perltidy html sections
5269 my ( $saw_body, $saw_index, $saw_body_end );
5270 while ( my $line = $fh_tmp->getline() ) {
5272 if ( $line =~ /^\s*<html>\s*$/i ) {
5273 my $date = localtime;
5274 $html_print->("<!-- Generated by perltidy on $date -->\n");
5275 $html_print->($line);
5278 # Copy the perltidy css, if any, after <body> tag
5279 elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5281 $html_print->($css_string) if $css_string;
5282 $html_print->($line);
5284 # add a top anchor and heading
5285 $html_print->("<a name=\"-top-\"></a>\n");
5286 $title = escape_html($title);
5287 $html_print->("<h1>$title</h1>\n");
5290 # check for start of index, old pod2html
5291 # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
5292 # <!-- INDEX BEGIN -->
5296 # <!-- INDEX END -->
5298 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5301 # when frames are used, an extra table of contents in the
5302 # contents panel is confusing, so don't print it
5303 $no_print = $rOpts->{'frames'}
5304 || !$rOpts->{'html-table-of-contents'};
5305 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5306 $html_print->($line);
5309 # check for start of index, new pod2html
5310 # After Pod::Html VERSION 1.15_02 it is delimited as:
5314 elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
5318 # when frames are used, an extra table of contents in the
5319 # contents panel is confusing, so don't print it
5320 $no_print = $rOpts->{'frames'}
5321 || !$rOpts->{'html-table-of-contents'};
5322 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5323 $html_print->($line);
5326 # Check for end of index, old pod2html
5327 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5329 $html_print->($line);
5331 # Copy the perltidy toc, if any, after the Pod::Html toc
5333 $html_print->("<hr />\n") if $rOpts->{'frames'};
5334 $html_print->("<h2>Code Index:</h2>\n");
5335 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5336 $html_print->(@toc);
5342 # must track <ul> depth level for new pod2html
5343 elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
5345 $html_print->($line);
5348 # Check for end of index, for new pod2html
5349 elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
5351 $html_print->($line);
5353 # Copy the perltidy toc, if any, after the Pod::Html toc
5354 if ( $ul_level <= 0 ) {
5357 $html_print->("<hr />\n") if $rOpts->{'frames'};
5358 $html_print->("<h2>Code Index:</h2>\n");
5359 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5360 $html_print->(@toc);
5368 # Copy one perltidy section after each marker
5369 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5371 $html_print->($1) if $1;
5373 # Intermingle code and pod sections if we saw multiple =cut's.
5374 if ( $self->{_pod_cut_count} > 1 ) {
5375 my $rpre_string = shift(@$rpre_string_stack);
5376 if ($$rpre_string) {
5377 $html_print->('<pre>');
5378 $html_print->($$rpre_string);
5379 $html_print->('</pre>');
5383 # shouldn't happen: we stored a string before writing
5386 "Problem merging html stream with pod2html; order may be wrong\n";
5388 $html_print->($line);
5391 # If didn't see multiple =cut lines, we'll put the pod out first
5392 # and then the code, because it's less confusing.
5395 # since we are not intermixing code and pod, we don't need
5396 # or want any <hr> lines which separated pod and code
5397 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5401 # Copy any remaining code section before the </body> tag
5402 elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5404 if (@$rpre_string_stack) {
5405 unless ( $self->{_pod_cut_count} > 1 ) {
5406 $html_print->('<hr />');
5408 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
5409 $html_print->('<pre>');
5410 $html_print->($$rpre_string);
5411 $html_print->('</pre>');
5414 $html_print->($line);
5417 $html_print->($line);
5422 unless ($saw_body) {
5423 Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
5426 unless ($saw_body_end) {
5427 Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
5430 unless ($saw_index) {
5431 Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
5436 eval { $html_fh->close() };
5438 # note that we have to unlink tmpfile before making frames
5439 # because the tmpfile may be one of the names used for frames
5440 unlink $tmpfile if -e $tmpfile;
5441 if ( $success_flag && $rOpts->{'frames'} ) {
5442 $self->make_frame( \@toc );
5444 return $success_flag;
5449 # Make a frame with table of contents in the left panel
5450 # and the text in the right panel.
5452 # $html_filename contains the no-frames html output
5453 # $rtoc is a reference to an array with the table of contents
5456 my $input_file = $self->{_input_file};
5457 my $html_filename = $self->{_html_file};
5458 my $toc_filename = $self->{_toc_filename};
5459 my $src_filename = $self->{_src_filename};
5460 my $title = $self->{_title};
5461 $title = escape_html($title);
5463 # FUTURE input parameter:
5464 my $top_basename = "";
5466 # We need to produce 3 html files:
5467 # 1. - the table of contents
5468 # 2. - the contents (source code) itself
5469 # 3. - the frame which contains them
5471 # get basenames for relative links
5472 my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5473 my ( $src_basename, $src_path ) = fileparse($src_filename);
5475 # 1. Make the table of contents panel, with appropriate changes
5476 # to the anchor names
5477 my $src_frame_name = 'SRC';
5479 write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5482 # 2. The current .html filename is renamed to be the contents panel
5483 rename( $html_filename, $src_filename )
5484 or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
5486 # 3. Then use the original html filename for the frame
5488 $title, $html_filename, $top_basename,
5489 $toc_basename, $src_basename, $src_frame_name
5493 sub write_toc_html {
5495 # write a separate html table of contents file for frames
5496 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5497 my $fh = IO::File->new( $toc_filename, 'w' )
5498 or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
5502 <title>$title</title>
5505 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5509 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5510 $fh->print( join "", @$rtoc );
5519 sub write_frame_html {
5521 # write an html file to be the table of contents frame
5523 $title, $frame_filename, $top_basename,
5524 $toc_basename, $src_basename, $src_frame_name
5527 my $fh = IO::File->new( $frame_filename, 'w' )
5528 or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
5531 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5532 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5533 <?xml version="1.0" encoding="iso-8859-1" ?>
5534 <html xmlns="http://www.w3.org/1999/xhtml">
5536 <title>$title</title>
5540 # two left panels, one right, if master index file
5541 if ($top_basename) {
5543 <frameset cols="20%,80%">
5544 <frameset rows="30%,70%">
5545 <frame src = "$top_basename" />
5546 <frame src = "$toc_basename" />
5551 # one left panels, one right, if no master index file
5554 <frameset cols="20%,*">
5555 <frame src = "$toc_basename" />
5559 <frame src = "$src_basename" name = "$src_frame_name" />
5562 <p>If you see this message, you are using a non-frame-capable web client.</p>
5563 <p>This document contains:</p>
5565 <li><a href="$toc_basename">A table of contents</a></li>
5566 <li><a href="$src_basename">The source code</a></li>
5575 sub change_anchor_names {
5577 # add a filename and target to anchors
5578 # also return the first anchor
5579 my ( $rlines, $filename, $target ) = @_;
5581 foreach my $line (@$rlines) {
5583 # We're looking for lines like this:
5584 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5585 # ---- - -------- -----------------
5587 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5591 my $href = "$filename#$name";
5592 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5593 unless ($first_anchor) { $first_anchor = $href }
5596 return $first_anchor;
5599 sub close_html_file {
5601 return unless $self->{_html_file_opened};
5603 my $html_fh = $self->{_html_fh};
5604 my $rtoc_string = $self->{_rtoc_string};
5606 # There are 3 basic paths to html output...
5608 # ---------------------------------
5609 # Path 1: finish up if in -pre mode
5610 # ---------------------------------
5611 if ( $rOpts->{'html-pre-only'} ) {
5612 $html_fh->print( <<"PRE_END");
5615 eval { $html_fh->close() };
5620 $self->add_toc_item( 'EOF', 'EOF' );
5622 my $rpre_string_stack = $self->{_rpre_string_stack};
5624 # Patch to darken the <pre> background color in case of pod2html and
5625 # interleaved code/documentation. Otherwise, the distinction
5626 # between code and documentation is blurred.
5627 if ( $rOpts->{pod2html}
5628 && $self->{_pod_cut_count} >= 1
5629 && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5631 $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5634 # put the css or its link into a string, if used
5636 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5638 # use css linked to another file
5639 if ( $rOpts->{'html-linked-style-sheet'} ) {
5641 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5645 # use css embedded in this file
5646 elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5647 $fh_css->print( <<'ENDCSS');
5648 <style type="text/css">
5651 write_style_sheet_data($fh_css);
5652 $fh_css->print( <<"ENDCSS");
5658 # -----------------------------------------------------------
5659 # path 2: use pod2html if requested
5660 # If we fail for some reason, continue on to path 3
5661 # -----------------------------------------------------------
5662 if ( $rOpts->{'pod2html'} ) {
5663 my $rpod_string = $self->{_rpod_string};
5664 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5665 $rpre_string_stack )
5669 # --------------------------------------------------
5670 # path 3: write code in html, with pod only in italics
5671 # --------------------------------------------------
5672 my $input_file = $self->{_input_file};
5673 my $title = escape_html($input_file);
5674 my $date = localtime;
5675 $html_fh->print( <<"HTML_START");
5676 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5677 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5678 <!-- Generated by perltidy on $date -->
5679 <html xmlns="http://www.w3.org/1999/xhtml">
5681 <title>$title</title>
5684 # output the css, if used
5686 $html_fh->print($css_string);
5687 $html_fh->print( <<"ENDCSS");
5694 $html_fh->print( <<"HTML_START");
5696 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5700 $html_fh->print("<a name=\"-top-\"></a>\n");
5701 $html_fh->print( <<"EOM");
5705 # copy the table of contents
5707 && !$rOpts->{'frames'}
5708 && $rOpts->{'html-table-of-contents'} )
5710 $html_fh->print($$rtoc_string);
5713 # copy the pre section(s)
5714 my $fname_comment = $input_file;
5715 $fname_comment =~ s/--+/-/g; # protect HTML comment tags
5716 $html_fh->print( <<"END_PRE");
5718 <!-- contents of filename: $fname_comment -->
5722 foreach my $rpre_string (@$rpre_string_stack) {
5723 $html_fh->print($$rpre_string);
5726 # and finish the html page
5727 $html_fh->print( <<"HTML_END");
5732 eval { $html_fh->close() }; # could be object without close method
5734 if ( $rOpts->{'frames'} ) {
5735 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5736 $self->make_frame( \@toc );
5742 my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5743 my ( @colored_tokens, $j, $string, $type, $token, $level );
5744 my $rlast_level = $self->{_rlast_level};
5745 my $rpackage_stack = $self->{_rpackage_stack};
5747 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5748 $type = $$rtoken_type[$j];
5749 $token = $$rtokens[$j];
5750 $level = $$rlevels[$j];
5751 $level = 0 if ( $level < 0 );
5753 #-------------------------------------------------------
5754 # Update the package stack. The package stack is needed to keep
5755 # the toc correct because some packages may be declared within
5756 # blocks and go out of scope when we leave the block.
5757 #-------------------------------------------------------
5758 if ( $level > $$rlast_level ) {
5759 unless ( $rpackage_stack->[ $level - 1 ] ) {
5760 $rpackage_stack->[ $level - 1 ] = 'main';
5762 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5764 elsif ( $level < $$rlast_level ) {
5765 my $package = $rpackage_stack->[$level];
5766 unless ($package) { $package = 'main' }
5768 # if we change packages due to a nesting change, we
5769 # have to make an entry in the toc
5770 if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5771 $self->add_toc_item( $package, 'package' );
5774 $$rlast_level = $level;
5776 #-------------------------------------------------------
5777 # Intercept a sub name here; split it
5778 # into keyword 'sub' and sub name; and add an
5780 #-------------------------------------------------------
5781 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5782 $token = $self->markup_html_element( $1, 'k' );
5783 push @colored_tokens, $token;
5787 # but don't include sub declarations in the toc;
5788 # these wlll have leading token types 'i;'
5789 my $signature = join "", @$rtoken_type;
5790 unless ( $signature =~ /^i;/ ) {
5791 my $subname = $token;
5792 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5793 $self->add_toc_item( $subname, 'sub' );
5797 #-------------------------------------------------------
5798 # Intercept a package name here; split it
5799 # into keyword 'package' and name; add to the toc,
5800 # and update the package stack
5801 #-------------------------------------------------------
5802 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5803 $token = $self->markup_html_element( $1, 'k' );
5804 push @colored_tokens, $token;
5807 $self->add_toc_item( "$token", 'package' );
5808 $rpackage_stack->[$level] = $token;
5811 $token = $self->markup_html_element( $token, $type );
5812 push @colored_tokens, $token;
5814 return ( \@colored_tokens );
5817 sub markup_html_element {
5819 my ( $token, $type ) = @_;
5821 return $token if ( $type eq 'b' ); # skip a blank token
5822 return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5823 $token = escape_html($token);
5825 # get the short abbreviation for this token type
5826 my $short_name = $token_short_names{$type};
5827 if ( !defined($short_name) ) {
5828 $short_name = "pu"; # punctuation is default
5831 # handle style sheets..
5832 if ( !$rOpts->{'nohtml-style-sheets'} ) {
5833 if ( $short_name ne 'pu' ) {
5834 $token = qq(<span class="$short_name">) . $token . "</span>";
5838 # handle no style sheets..
5840 my $color = $html_color{$short_name};
5842 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5843 $token = qq(<font color="$color">) . $token . "</font>";
5845 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5846 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
5854 if ($missing_html_entities) {
5855 $token =~ s/\&/&/g;
5856 $token =~ s/\</</g;
5857 $token =~ s/\>/>/g;
5858 $token =~ s/\"/"/g;
5861 HTML::Entities::encode_entities($token);
5866 sub finish_formatting {
5868 # called after last line
5870 $self->close_html_file();
5877 return unless $self->{_html_file_opened};
5878 my $html_pre_fh = $self->{_html_pre_fh};
5879 my ($line_of_tokens) = @_;
5880 my $line_type = $line_of_tokens->{_line_type};
5881 my $input_line = $line_of_tokens->{_line_text};
5882 my $line_number = $line_of_tokens->{_line_number};
5885 # markup line of code..
5887 if ( $line_type eq 'CODE' ) {
5888 my $rtoken_type = $line_of_tokens->{_rtoken_type};
5889 my $rtokens = $line_of_tokens->{_rtokens};
5890 my $rlevels = $line_of_tokens->{_rlevels};
5892 if ( $input_line =~ /(^\s*)/ ) {
5898 my ($rcolored_tokens) =
5899 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5900 $html_line .= join '', @$rcolored_tokens;
5903 # markup line of non-code..
5906 if ( $line_type eq 'HERE' ) { $line_character = 'H' }
5907 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
5908 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
5909 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5910 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
5911 elsif ( $line_type eq 'END_START' ) {
5912 $line_character = 'k';
5913 $self->add_toc_item( '__END__', '__END__' );
5915 elsif ( $line_type eq 'DATA_START' ) {
5916 $line_character = 'k';
5917 $self->add_toc_item( '__DATA__', '__DATA__' );
5919 elsif ( $line_type =~ /^POD/ ) {
5920 $line_character = 'P';
5921 if ( $rOpts->{'pod2html'} ) {
5922 my $html_pod_fh = $self->{_html_pod_fh};
5923 if ( $line_type eq 'POD_START' ) {
5925 my $rpre_string_stack = $self->{_rpre_string_stack};
5926 my $rpre_string = $rpre_string_stack->[-1];
5928 # if we have written any non-blank lines to the
5929 # current pre section, start writing to a new output
5931 if ( $$rpre_string =~ /\S/ ) {
5934 Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5935 $self->{_html_pre_fh} = $html_pre_fh;
5936 push @$rpre_string_stack, \$pre_string;
5938 # leave a marker in the pod stream so we know
5939 # where to put the pre section we just
5941 my $for_html = '=for html'; # don't confuse pod utils
5942 $html_pod_fh->print(<<EOM);
5945 <!-- pERLTIDY sECTION -->
5950 # otherwise, just clear the current string and start
5954 $html_pod_fh->print("\n");
5957 $html_pod_fh->print( $input_line . "\n" );
5958 if ( $line_type eq 'POD_END' ) {
5959 $self->{_pod_cut_count}++;
5960 $html_pod_fh->print("\n");
5965 else { $line_character = 'Q' }
5966 $html_line = $self->markup_html_element( $input_line, $line_character );
5969 # add the line number if requested
5970 if ( $rOpts->{'html-line-numbers'} ) {
5972 ( $line_number < 10 ) ? " "
5973 : ( $line_number < 100 ) ? " "
5974 : ( $line_number < 1000 ) ? " "
5976 $html_line = $extra_space . $line_number . " " . $html_line;
5980 $html_pre_fh->print("$html_line\n");
5983 #####################################################################
5985 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5986 # line breaks to the token stream
5988 # WARNING: This is not a real class for speed reasons. Only one
5989 # Formatter may be used.
5991 #####################################################################
5993 package Perl::Tidy::Formatter;
5997 # Caution: these debug flags produce a lot of output
5998 # They should all be 0 except when debugging small scripts
5999 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
6000 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
6001 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
6002 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
6003 use constant FORMATTER_DEBUG_FLAG_CI => 0;
6004 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
6005 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
6006 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
6007 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
6008 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
6009 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
6010 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
6011 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
6012 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
6014 my $debug_warning = sub {
6015 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
6018 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
6019 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
6020 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
6021 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
6022 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
6023 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
6024 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
6025 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
6026 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
6027 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
6028 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
6029 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
6030 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
6031 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
6038 $max_gnu_stack_index
6039 $gnu_position_predictor
6040 $line_start_index_to_go
6041 $last_indentation_written
6042 $last_unadjusted_indentation
6044 $last_output_short_opening_token
6046 $saw_VERSION_in_this_file
6051 $gnu_sequence_number
6052 $last_output_indentation
6058 @type_sequence_to_go
6059 @container_environment_to_go
6060 @bond_strength_to_go
6061 @forced_breakpoint_to_go
6062 @token_lengths_to_go
6063 @summed_lengths_to_go
6065 @leading_spaces_to_go
6066 @reduced_spaces_to_go
6067 @matching_token_to_go
6069 @nesting_blocks_to_go
6071 @nesting_depth_to_go
6073 @old_breakpoint_to_go
6079 %saved_opening_indentation
6082 $comma_count_in_batch
6083 $old_line_count_in_batch
6084 $last_nonblank_index_to_go
6085 $last_nonblank_type_to_go
6086 $last_nonblank_token_to_go
6087 $last_last_nonblank_index_to_go
6088 $last_last_nonblank_type_to_go
6089 $last_last_nonblank_token_to_go
6090 @nonblank_lines_at_depth
6093 @whitespace_level_stack
6094 $whitespace_last_level
6096 $in_format_skipping_section
6097 $format_skipping_pattern_begin
6098 $format_skipping_pattern_end
6100 $forced_breakpoint_count
6101 $forced_breakpoint_undo_count
6102 @forced_breakpoint_undo_stack
6103 %postponed_breakpoint
6107 $first_embedded_tab_at
6108 $last_embedded_tab_at
6109 $deleted_semicolon_count
6110 $first_deleted_semicolon_at
6111 $last_deleted_semicolon_at
6112 $added_semicolon_count
6113 $first_added_semicolon_at
6114 $last_added_semicolon_at
6115 $first_tabbing_disagreement
6116 $last_tabbing_disagreement
6117 $in_tabbing_disagreement
6118 $tabbing_disagreement_count
6122 $last_line_leading_type
6123 $last_line_leading_level
6124 $last_last_line_leading_level
6127 %block_opening_line_number
6128 $csc_new_statement_ok
6131 $accumulating_text_for_block
6133 $rleading_block_if_elsif_text
6134 $leading_block_text_level
6135 $leading_block_text_length_exceeded
6136 $leading_block_text_line_length
6137 $leading_block_text_line_number
6138 $closing_side_comment_prefix_pattern
6139 $closing_side_comment_list_pattern
6141 $last_nonblank_token
6143 $last_last_nonblank_token
6144 $last_last_nonblank_type
6145 $last_nonblank_block_type
6148 %is_if_brace_follower
6149 %space_after_keyword
6152 %is_last_next_redo_return
6153 %is_other_brace_follower
6154 %is_else_brace_follower
6155 %is_anon_sub_brace_follower
6156 %is_anon_sub_1_brace_follower
6158 %is_sort_map_grep_eval
6159 %is_sort_map_grep_eval_do
6160 %is_block_without_semicolon
6165 %is_if_unless_and_or_last_next_redo_return
6166 %ok_to_add_semicolon_for_block_type
6172 $is_static_block_comment
6173 $index_start_one_line_block
6174 $semicolons_before_block_self_destruct
6175 $index_max_forced_break
6178 $vertical_aligner_object
6183 $last_line_had_side_comment
6186 $static_block_comment_pattern
6187 $static_side_comment_pattern
6188 %opening_vertical_tightness
6189 %closing_vertical_tightness
6190 %closing_token_indentation
6191 $some_closing_token_indentation
6193 %opening_token_right
6194 %stack_opening_token
6195 %stack_closing_token
6197 $block_brace_vertical_tightness_pattern
6200 $rOpts_add_whitespace
6201 $rOpts_block_brace_tightness
6202 $rOpts_block_brace_vertical_tightness
6203 $rOpts_brace_left_and_indent
6204 $rOpts_comma_arrow_breakpoints
6205 $rOpts_break_at_old_keyword_breakpoints
6206 $rOpts_break_at_old_comma_breakpoints
6207 $rOpts_break_at_old_logical_breakpoints
6208 $rOpts_break_at_old_ternary_breakpoints
6209 $rOpts_break_at_old_attribute_breakpoints
6210 $rOpts_closing_side_comment_else_flag
6211 $rOpts_closing_side_comment_maximum_text
6212 $rOpts_continuation_indentation
6214 $rOpts_delete_old_whitespace
6215 $rOpts_fuzzy_line_length
6216 $rOpts_indent_columns
6217 $rOpts_line_up_parentheses
6218 $rOpts_maximum_fields_per_table
6219 $rOpts_maximum_line_length
6220 $rOpts_variable_maximum_line_length
6221 $rOpts_short_concatenation_item_length
6222 $rOpts_keep_old_blank_lines
6223 $rOpts_ignore_old_breakpoints
6224 $rOpts_format_skipping
6225 $rOpts_space_function_paren
6226 $rOpts_space_keyword_paren
6227 $rOpts_keep_interior_semicolons
6228 $rOpts_ignore_side_comment_lengths
6229 $rOpts_stack_closing_block_brace
6230 $rOpts_whitespace_cycle
6231 $rOpts_tight_secret_operators
6235 %is_keyword_returning_list
6239 %right_bond_strength
6256 # default list of block types for which -bli would apply
6257 $bli_list_string = 'if else elsif unless while for foreach do : sub';
6260 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6261 <= >= == =~ !~ != ++ -- /= x=
6263 @is_digraph{@_} = (1) x scalar(@_);
6265 @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
6266 @is_trigraph{@_} = (1) x scalar(@_);
6269 = **= += *= &= <<= &&=
6270 -= /= |= >>= ||= //=
6274 @is_assignment{@_} = (1) x scalar(@_);
6284 @is_keyword_returning_list{@_} = (1) x scalar(@_);
6286 @_ = qw(is if unless and or err last next redo return);
6287 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
6289 @_ = qw(last next redo return);
6290 @is_last_next_redo_return{@_} = (1) x scalar(@_);
6292 @_ = qw(sort map grep);
6293 @is_sort_map_grep{@_} = (1) x scalar(@_);
6295 @_ = qw(sort map grep eval);
6296 @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
6298 @_ = qw(sort map grep eval do);
6299 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
6302 @is_if_unless{@_} = (1) x scalar(@_);
6304 @_ = qw(and or err);
6305 @is_and_or{@_} = (1) x scalar(@_);
6307 # Identify certain operators which often occur in chains.
6308 # Note: the minus (-) causes a side effect of padding of the first line in
6309 # something like this (by sub set_logical_padding):
6310 # Checkbutton => 'Transmission checked',
6311 # -variable => \$TRANS
6312 # This usually improves appearance so it seems ok.
6313 @_ = qw(&& || and or : ? . + - * /);
6314 @is_chain_operator{@_} = (1) x scalar(@_);
6316 # We can remove semicolons after blocks preceded by these keywords
6318 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6319 unless while until for foreach given when default);
6320 @is_block_without_semicolon{@_} = (1) x scalar(@_);
6322 # We will allow semicolons to be added within these block types
6323 # as well as sub and package blocks.
6325 # 1. Note that these keywords are omitted:
6326 # switch case given when default sort map grep
6327 # 2. It is also ok to add for sub and package blocks and a labeled block
6328 # 3. But not okay for other perltidy types including:
6330 # 4. Test files: blktype.t, blktype1.t, semicolon.t
6332 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6333 unless do while until eval for foreach );
6334 @ok_to_add_semicolon_for_block_type{@_} = (1) x scalar(@_);
6336 # 'L' is token for opening { at hash key
6338 @is_opening_type{@_} = (1) x scalar(@_);
6340 # 'R' is token for closing } at hash key
6342 @is_closing_type{@_} = (1) x scalar(@_);
6345 @is_opening_token{@_} = (1) x scalar(@_);
6348 @is_closing_token{@_} = (1) x scalar(@_);
6352 use constant WS_YES => 1;
6353 use constant WS_OPTIONAL => 0;
6354 use constant WS_NO => -1;
6356 # Token bond strengths.
6357 use constant NO_BREAK => 10000;
6358 use constant VERY_STRONG => 100;
6359 use constant STRONG => 2.1;
6360 use constant NOMINAL => 1.1;
6361 use constant WEAK => 0.8;
6362 use constant VERY_WEAK => 0.55;
6364 # values for testing indexes in output array
6365 use constant UNDEFINED_INDEX => -1;
6367 # Maximum number of little messages; probably need not be changed.
6368 use constant MAX_NAG_MESSAGES => 6;
6370 # increment between sequence numbers for each type
6371 # For example, ?: pairs might have numbers 7,11,15,...
6372 use constant TYPE_SEQUENCE_INCREMENT => 4;
6376 # methods to count instances
6378 sub get_count { $_count; }
6379 sub _increment_count { ++$_count }
6380 sub _decrement_count { --$_count }
6385 # trim leading and trailing whitespace from a string
6394 $max = ( $max < $_ ) ? $_ : $max;
6402 $min = ( $min > $_ ) ? $_ : $min;
6409 # given a string containing words separated by whitespace,
6410 # return the list of words
6415 return split( /\s+/, $str );
6418 # interface to Perl::Tidy::Logger routines
6420 if ($logger_object) {
6421 $logger_object->warning(@_);
6426 if ($logger_object) {
6427 $logger_object->complain(@_);
6431 sub write_logfile_entry {
6432 if ($logger_object) {
6433 $logger_object->write_logfile_entry(@_);
6438 if ($logger_object) {
6439 $logger_object->black_box(@_);
6443 sub report_definite_bug {
6444 if ($logger_object) {
6445 $logger_object->report_definite_bug();
6449 sub get_saw_brace_error {
6450 if ($logger_object) {
6451 $logger_object->get_saw_brace_error();
6455 sub we_are_at_the_last_line {
6456 if ($logger_object) {
6457 $logger_object->we_are_at_the_last_line();
6461 # interface to Perl::Tidy::Diagnostics routine
6462 sub write_diagnostics {
6464 if ($diagnostics_object) {
6465 $diagnostics_object->write_diagnostics(@_);
6469 sub get_added_semicolon_count {
6471 return $added_semicolon_count;
6475 $_[0]->_decrement_count();
6482 # we are given an object with a write_line() method to take lines
6484 sink_object => undef,
6485 diagnostics_object => undef,
6486 logger_object => undef,
6488 my %args = ( %defaults, @_ );
6490 $logger_object = $args{logger_object};
6491 $diagnostics_object = $args{diagnostics_object};
6493 # we create another object with a get_line() and peek_ahead() method
6494 my $sink_object = $args{sink_object};
6495 $file_writer_object =
6496 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6498 # initialize the leading whitespace stack to negative levels
6499 # so that we can never run off the end of the stack
6500 $gnu_position_predictor = 0; # where the current token is predicted to be
6501 $max_gnu_stack_index = 0;
6502 $max_gnu_item_index = -1;
6503 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6504 @gnu_item_list = ();
6505 $last_output_indentation = 0;
6506 $last_indentation_written = 0;
6507 $last_unadjusted_indentation = 0;
6508 $last_leading_token = "";
6509 $last_output_short_opening_token = 0;
6511 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6512 $saw_END_or_DATA_ = 0;
6514 @block_type_to_go = ();
6515 @type_sequence_to_go = ();
6516 @container_environment_to_go = ();
6517 @bond_strength_to_go = ();
6518 @forced_breakpoint_to_go = ();
6519 @summed_lengths_to_go = (); # line length to start of ith token
6520 @token_lengths_to_go = ();
6522 @matching_token_to_go = ();
6523 @mate_index_to_go = ();
6524 @nesting_blocks_to_go = ();
6525 @ci_levels_to_go = ();
6526 @nesting_depth_to_go = (0);
6527 @nobreak_to_go = ();
6528 @old_breakpoint_to_go = ();
6531 @leading_spaces_to_go = ();
6532 @reduced_spaces_to_go = ();
6536 @whitespace_level_stack = ();
6537 $whitespace_last_level = -1;
6540 @has_broken_sublist = ();
6541 @want_comma_break = ();
6544 $first_tabbing_disagreement = 0;
6545 $last_tabbing_disagreement = 0;
6546 $tabbing_disagreement_count = 0;
6547 $in_tabbing_disagreement = 0;
6548 $input_line_tabbing = undef;
6550 $last_line_type = "";
6551 $last_last_line_leading_level = 0;
6552 $last_line_leading_level = 0;
6553 $last_line_leading_type = '#';
6555 $last_nonblank_token = ';';
6556 $last_nonblank_type = ';';
6557 $last_last_nonblank_token = ';';
6558 $last_last_nonblank_type = ';';
6559 $last_nonblank_block_type = "";
6560 $last_output_level = 0;
6561 $looking_for_else = 0;
6562 $embedded_tab_count = 0;
6563 $first_embedded_tab_at = 0;
6564 $last_embedded_tab_at = 0;
6565 $deleted_semicolon_count = 0;
6566 $first_deleted_semicolon_at = 0;
6567 $last_deleted_semicolon_at = 0;
6568 $added_semicolon_count = 0;
6569 $first_added_semicolon_at = 0;
6570 $last_added_semicolon_at = 0;
6571 $last_line_had_side_comment = 0;
6572 $is_static_block_comment = 0;
6573 %postponed_breakpoint = ();
6575 # variables for adding side comments
6576 %block_leading_text = ();
6577 %block_opening_line_number = ();
6578 $csc_new_statement_ok = 1;
6579 %csc_block_label = ();
6581 %saved_opening_indentation = ();
6582 $in_format_skipping_section = 0;
6584 reset_block_text_accumulator();
6586 prepare_for_new_input_lines();
6588 $vertical_aligner_object =
6589 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6590 $logger_object, $diagnostics_object );
6592 if ( $rOpts->{'entab-leading-whitespace'} ) {
6593 write_logfile_entry(
6594 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6597 elsif ( $rOpts->{'tabs'} ) {
6598 write_logfile_entry("Indentation will be with a tab character\n");
6601 write_logfile_entry(
6602 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6605 # This was the start of a formatter referent, but object-oriented
6606 # coding has turned out to be too slow here.
6607 $formatter_self = {};
6609 bless $formatter_self, $class;
6611 # Safety check..this is not a class yet
6612 if ( _increment_count() > 1 ) {
6614 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6616 return $formatter_self;
6619 sub prepare_for_new_input_lines {
6621 $gnu_sequence_number++; # increment output batch counter
6622 %last_gnu_equals = ();
6623 %gnu_comma_count = ();
6624 %gnu_arrow_count = ();
6625 $line_start_index_to_go = 0;
6626 $max_gnu_item_index = UNDEFINED_INDEX;
6627 $index_max_forced_break = UNDEFINED_INDEX;
6628 $max_index_to_go = UNDEFINED_INDEX;
6629 $last_nonblank_index_to_go = UNDEFINED_INDEX;
6630 $last_nonblank_type_to_go = '';
6631 $last_nonblank_token_to_go = '';
6632 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6633 $last_last_nonblank_type_to_go = '';
6634 $last_last_nonblank_token_to_go = '';
6635 $forced_breakpoint_count = 0;
6636 $forced_breakpoint_undo_count = 0;
6637 $rbrace_follower = undef;
6638 $summed_lengths_to_go[0] = 0;
6639 $old_line_count_in_batch = 1;
6640 $comma_count_in_batch = 0;
6641 $starting_in_quote = 0;
6643 destroy_one_line_block();
6649 my ($line_of_tokens) = @_;
6651 my $line_type = $line_of_tokens->{_line_type};
6652 my $input_line = $line_of_tokens->{_line_text};
6654 if ( $rOpts->{notidy} ) {
6655 write_unindented_line($input_line);
6656 $last_line_type = $line_type;
6660 # _line_type codes are:
6661 # SYSTEM - system-specific code before hash-bang line
6662 # CODE - line of perl code (including comments)
6663 # POD_START - line starting pod, such as '=head'
6664 # POD - pod documentation text
6665 # POD_END - last line of pod section, '=cut'
6666 # HERE - text of here-document
6667 # HERE_END - last line of here-doc (target word)
6668 # FORMAT - format section
6669 # FORMAT_END - last line of format section, '.'
6670 # DATA_START - __DATA__ line
6671 # DATA - unidentified text following __DATA__
6672 # END_START - __END__ line
6673 # END - unidentified text following __END__
6674 # ERROR - we are in big trouble, probably not a perl script
6676 # put a blank line after an =cut which comes before __END__ and __DATA__
6677 # (required by podchecker)
6678 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6679 $file_writer_object->reset_consecutive_blank_lines();
6680 if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6683 # handle line of code..
6684 if ( $line_type eq 'CODE' ) {
6686 # let logger see all non-blank lines of code
6687 if ( $input_line !~ /^\s*$/ ) {
6688 my $output_line_number =
6689 $vertical_aligner_object->get_output_line_number();
6690 black_box( $line_of_tokens, $output_line_number );
6692 print_line_of_tokens($line_of_tokens);
6695 # handle line of non-code..
6701 if ( $line_type =~ /^POD/ ) {
6703 # Pod docs should have a preceding blank line. But stay
6704 # out of __END__ and __DATA__ sections, because
6705 # the user may be using this section for any purpose whatsoever
6706 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6707 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
6708 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
6710 && $line_type eq 'POD_START'
6711 && !$saw_END_or_DATA_ )
6717 # leave the blank counters in a predictable state
6718 # after __END__ or __DATA__
6719 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6720 $file_writer_object->reset_consecutive_blank_lines();
6721 $saw_END_or_DATA_ = 1;
6724 # write unindented non-code line
6725 if ( !$skip_line ) {
6726 if ($tee_line) { $file_writer_object->tee_on() }
6727 write_unindented_line($input_line);
6728 if ($tee_line) { $file_writer_object->tee_off() }
6731 $last_line_type = $line_type;
6734 sub create_one_line_block {
6735 $index_start_one_line_block = $_[0];
6736 $semicolons_before_block_self_destruct = $_[1];
6739 sub destroy_one_line_block {
6740 $index_start_one_line_block = UNDEFINED_INDEX;
6741 $semicolons_before_block_self_destruct = 0;
6744 sub leading_spaces_to_go {
6746 # return the number of indentation spaces for a token in the output stream;
6747 # these were previously stored by 'set_leading_whitespace'.
6750 if ( $ii < 0 ) { $ii = 0 }
6751 return get_SPACES( $leading_spaces_to_go[$ii] );
6757 # return the number of leading spaces associated with an indentation
6758 # variable $indentation is either a constant number of spaces or an object
6759 # with a get_SPACES method.
6760 my $indentation = shift;
6761 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6764 sub get_RECOVERABLE_SPACES {
6766 # return the number of spaces (+ means shift right, - means shift left)
6767 # that we would like to shift a group of lines with the same indentation
6768 # to get them to line up with their opening parens
6769 my $indentation = shift;
6770 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6773 sub get_AVAILABLE_SPACES_to_go {
6775 my $item = $leading_spaces_to_go[ $_[0] ];
6777 # return the number of available leading spaces associated with an
6778 # indentation variable. $indentation is either a constant number of
6779 # spaces or an object with a get_AVAILABLE_SPACES method.
6780 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6783 sub new_lp_indentation_item {
6785 # this is an interface to the IndentationItem class
6786 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6788 # A negative level implies not to store the item in the item_list
6790 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6792 my $item = Perl::Tidy::IndentationItem->new(
6794 $ci_level, $available_spaces,
6795 $index, $gnu_sequence_number,
6796 $align_paren, $max_gnu_stack_index,
6797 $line_start_index_to_go,
6800 if ( $level >= 0 ) {
6801 $gnu_item_list[$max_gnu_item_index] = $item;
6807 sub set_leading_whitespace {
6809 # This routine defines leading whitespace
6810 # given: the level and continuation_level of a token,
6811 # define: space count of leading string which would apply if it
6812 # were the first token of a new line.
6814 my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
6816 # Adjust levels if necessary to recycle whitespace:
6817 # given $level_abs, the absolute level
6818 # define $level, a possibly reduced level for whitespace
6819 my $level = $level_abs;
6820 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
6821 if ( $level_abs < $whitespace_last_level ) {
6822 pop(@whitespace_level_stack);
6824 if ( !@whitespace_level_stack ) {
6825 push @whitespace_level_stack, $level_abs;
6827 elsif ( $level_abs > $whitespace_last_level ) {
6828 $level = $whitespace_level_stack[-1] +
6829 ( $level_abs - $whitespace_last_level );
6832 # 1 Try to break at a block brace
6834 $level > $rOpts_whitespace_cycle
6835 && $last_nonblank_type eq '{'
6836 && $last_nonblank_token eq '{'
6839 # 2 Then either a brace or bracket
6840 || ( $level > $rOpts_whitespace_cycle + 1
6841 && $last_nonblank_token =~ /^[\{\[]$/ )
6843 # 3 Then a paren too
6844 || $level > $rOpts_whitespace_cycle + 2
6849 push @whitespace_level_stack, $level;
6851 $level = $whitespace_level_stack[-1];
6853 $whitespace_last_level = $level_abs;
6855 # modify for -bli, which adds one continuation indentation for
6857 if ( $rOpts_brace_left_and_indent
6858 && $max_index_to_go == 0
6859 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6864 # patch to avoid trouble when input file has negative indentation.
6865 # other logic should catch this error.
6866 if ( $level < 0 ) { $level = 0 }
6868 #-------------------------------------------
6869 # handle the standard indentation scheme
6870 #-------------------------------------------
6871 unless ($rOpts_line_up_parentheses) {
6873 $ci_level * $rOpts_continuation_indentation +
6874 $level * $rOpts_indent_columns;
6876 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6878 if ($in_continued_quote) {
6882 $leading_spaces_to_go[$max_index_to_go] = $space_count;
6883 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6887 #-------------------------------------------------------------
6888 # handle case of -lp indentation..
6889 #-------------------------------------------------------------
6891 # The continued_quote flag means that this is the first token of a
6892 # line, and it is the continuation of some kind of multi-line quote
6893 # or pattern. It requires special treatment because it must have no
6894 # added leading whitespace. So we create a special indentation item
6895 # which is not in the stack.
6896 if ($in_continued_quote) {
6897 my $space_count = 0;
6898 my $available_space = 0;
6899 $level = -1; # flag to prevent storing in item_list
6900 $leading_spaces_to_go[$max_index_to_go] =
6901 $reduced_spaces_to_go[$max_index_to_go] =
6902 new_lp_indentation_item( $space_count, $level, $ci_level,
6903 $available_space, 0 );
6907 # get the top state from the stack
6908 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6909 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6910 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6912 my $type = $types_to_go[$max_index_to_go];
6913 my $token = $tokens_to_go[$max_index_to_go];
6914 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6916 if ( $type eq '{' || $type eq '(' ) {
6918 $gnu_comma_count{ $total_depth + 1 } = 0;
6919 $gnu_arrow_count{ $total_depth + 1 } = 0;
6921 # If we come to an opening token after an '=' token of some type,
6922 # see if it would be helpful to 'break' after the '=' to save space
6923 my $last_equals = $last_gnu_equals{$total_depth};
6924 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6926 # find the position if we break at the '='
6927 my $i_test = $last_equals;
6928 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6931 ##my $too_close = ($i_test==$max_index_to_go-1);
6933 my $test_position = total_line_length( $i_test, $max_index_to_go );
6934 my $mll = maximum_line_length($i_test);
6938 # the equals is not just before an open paren (testing)
6941 # if we are beyond the midpoint
6942 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
6944 # or we are beyond the 1/4 point and there was an old
6945 # break at the equals
6947 $gnu_position_predictor >
6948 $mll - $rOpts_maximum_line_length * 3 / 4
6950 $old_breakpoint_to_go[$last_equals]
6951 || ( $last_equals > 0
6952 && $old_breakpoint_to_go[ $last_equals - 1 ] )
6953 || ( $last_equals > 1
6954 && $types_to_go[ $last_equals - 1 ] eq 'b'
6955 && $old_breakpoint_to_go[ $last_equals - 2 ] )
6961 # then make the switch -- note that we do not set a real
6962 # breakpoint here because we may not really need one; sub
6963 # scan_list will do that if necessary
6964 $line_start_index_to_go = $i_test + 1;
6965 $gnu_position_predictor = $test_position;
6971 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
6973 # Check for decreasing depth ..
6974 # Note that one token may have both decreasing and then increasing
6975 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
6976 # in this example we would first go back to (1,0) then up to (2,0)
6978 if ( $level < $current_level || $ci_level < $current_ci_level ) {
6980 # loop to find the first entry at or completely below this level
6981 my ( $lev, $ci_lev );
6983 if ($max_gnu_stack_index) {
6985 # save index of token which closes this level
6986 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6988 # Undo any extra indentation if we saw no commas
6989 my $available_spaces =
6990 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6992 my $comma_count = 0;
6993 my $arrow_count = 0;
6994 if ( $type eq '}' || $type eq ')' ) {
6995 $comma_count = $gnu_comma_count{$total_depth};
6996 $arrow_count = $gnu_arrow_count{$total_depth};
6997 $comma_count = 0 unless $comma_count;
6998 $arrow_count = 0 unless $arrow_count;
7000 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
7001 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
7003 if ( $available_spaces > 0 ) {
7005 if ( $comma_count <= 0 || $arrow_count > 0 ) {
7007 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
7009 $gnu_stack[$max_gnu_stack_index]
7010 ->get_SEQUENCE_NUMBER();
7012 # Be sure this item was created in this batch. This
7013 # should be true because we delete any available
7014 # space from open items at the end of each batch.
7015 if ( $gnu_sequence_number != $seqno
7016 || $i > $max_gnu_item_index )
7019 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
7021 report_definite_bug();
7025 if ( $arrow_count == 0 ) {
7027 ->permanently_decrease_AVAILABLE_SPACES(
7032 ->tentatively_decrease_AVAILABLE_SPACES(
7039 $j <= $max_gnu_item_index ;
7044 ->decrease_SPACES($available_spaces);
7051 --$max_gnu_stack_index;
7052 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
7053 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
7055 # stop when we reach a level at or below the current level
7056 if ( $lev <= $level && $ci_lev <= $ci_level ) {
7058 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
7059 $current_level = $lev;
7060 $current_ci_level = $ci_lev;
7065 # reached bottom of stack .. should never happen because
7066 # only negative levels can get here, and $level was forced
7067 # to be positive above.
7070 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
7072 report_definite_bug();
7078 # handle increasing depth
7079 if ( $level > $current_level || $ci_level > $current_ci_level ) {
7081 # Compute the standard incremental whitespace. This will be
7082 # the minimum incremental whitespace that will be used. This
7083 # choice results in a smooth transition between the gnu-style
7084 # and the standard style.
7085 my $standard_increment =
7086 ( $level - $current_level ) * $rOpts_indent_columns +
7087 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
7089 # Now we have to define how much extra incremental space
7090 # ("$available_space") we want. This extra space will be
7091 # reduced as necessary when long lines are encountered or when
7092 # it becomes clear that we do not have a good list.
7093 my $available_space = 0;
7094 my $align_paren = 0;
7097 # initialization on empty stack..
7098 if ( $max_gnu_stack_index == 0 ) {
7099 $space_count = $level * $rOpts_indent_columns;
7102 # if this is a BLOCK, add the standard increment
7103 elsif ($last_nonblank_block_type) {
7104 $space_count += $standard_increment;
7107 # if last nonblank token was not structural indentation,
7108 # just use standard increment
7109 elsif ( $last_nonblank_type ne '{' ) {
7110 $space_count += $standard_increment;
7113 # otherwise use the space to the first non-blank level change token
7116 $space_count = $gnu_position_predictor;
7118 my $min_gnu_indentation =
7119 $gnu_stack[$max_gnu_stack_index]->get_SPACES();
7121 $available_space = $space_count - $min_gnu_indentation;
7122 if ( $available_space >= $standard_increment ) {
7123 $min_gnu_indentation += $standard_increment;
7125 elsif ( $available_space > 1 ) {
7126 $min_gnu_indentation += $available_space + 1;
7128 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
7129 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
7130 $min_gnu_indentation += 2;
7133 $min_gnu_indentation += 1;
7137 $min_gnu_indentation += $standard_increment;
7139 $available_space = $space_count - $min_gnu_indentation;
7141 if ( $available_space < 0 ) {
7142 $space_count = $min_gnu_indentation;
7143 $available_space = 0;
7148 # update state, but not on a blank token
7149 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
7151 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
7153 ++$max_gnu_stack_index;
7154 $gnu_stack[$max_gnu_stack_index] =
7155 new_lp_indentation_item( $space_count, $level, $ci_level,
7156 $available_space, $align_paren );
7158 # If the opening paren is beyond the half-line length, then
7159 # we will use the minimum (standard) indentation. This will
7160 # help avoid problems associated with running out of space
7161 # near the end of a line. As a result, in deeply nested
7162 # lists, there will be some indentations which are limited
7163 # to this minimum standard indentation. But the most deeply
7164 # nested container will still probably be able to shift its
7165 # parameters to the right for proper alignment, so in most
7166 # cases this will not be noticeable.
7167 if ( $available_space > 0 && $space_count > $halfway ) {
7168 $gnu_stack[$max_gnu_stack_index]
7169 ->tentatively_decrease_AVAILABLE_SPACES($available_space);
7174 # Count commas and look for non-list characters. Once we see a
7175 # non-list character, we give up and don't look for any more commas.
7176 if ( $type eq '=>' ) {
7177 $gnu_arrow_count{$total_depth}++;
7179 # tentatively treating '=>' like '=' for estimating breaks
7180 # TODO: this could use some experimentation
7181 $last_gnu_equals{$total_depth} = $max_index_to_go;
7184 elsif ( $type eq ',' ) {
7185 $gnu_comma_count{$total_depth}++;
7188 elsif ( $is_assignment{$type} ) {
7189 $last_gnu_equals{$total_depth} = $max_index_to_go;
7192 # this token might start a new line
7193 # if this is a non-blank..
7194 if ( $type ne 'b' ) {
7199 # this is the first nonblank token of the line
7200 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
7202 # or previous character was one of these:
7203 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
7205 # or previous character was opening and this does not close it
7206 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
7207 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
7209 # or this token is one of these:
7210 || $type =~ /^([\.]|\|\||\&\&)$/
7212 # or this is a closing structure
7213 || ( $last_nonblank_type_to_go eq '}'
7214 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
7216 # or previous token was keyword 'return'
7217 || ( $last_nonblank_type_to_go eq 'k'
7218 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
7220 # or starting a new line at certain keywords is fine
7222 && $is_if_unless_and_or_last_next_redo_return{$token} )
7224 # or this is after an assignment after a closing structure
7226 $is_assignment{$last_nonblank_type_to_go}
7228 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
7230 # and it is significantly to the right
7231 || $gnu_position_predictor > $halfway
7236 check_for_long_gnu_style_lines();
7237 $line_start_index_to_go = $max_index_to_go;
7239 # back up 1 token if we want to break before that type
7240 # otherwise, we may strand tokens like '?' or ':' on a line
7241 if ( $line_start_index_to_go > 0 ) {
7242 if ( $last_nonblank_type_to_go eq 'k' ) {
7244 if ( $want_break_before{$last_nonblank_token_to_go} ) {
7245 $line_start_index_to_go--;
7248 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
7249 $line_start_index_to_go--;
7255 # remember the predicted position of this token on the output line
7256 if ( $max_index_to_go > $line_start_index_to_go ) {
7257 $gnu_position_predictor =
7258 total_line_length( $line_start_index_to_go, $max_index_to_go );
7261 $gnu_position_predictor =
7262 $space_count + $token_lengths_to_go[$max_index_to_go];
7265 # store the indentation object for this token
7266 # this allows us to manipulate the leading whitespace
7267 # (in case we have to reduce indentation to fit a line) without
7268 # having to change any token values
7269 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
7270 $reduced_spaces_to_go[$max_index_to_go] =
7271 ( $max_gnu_stack_index > 0 && $ci_level )
7272 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
7273 : $gnu_stack[$max_gnu_stack_index];
7277 sub check_for_long_gnu_style_lines {
7279 # look at the current estimated maximum line length, and
7280 # remove some whitespace if it exceeds the desired maximum
7282 # this is only for the '-lp' style
7283 return unless ($rOpts_line_up_parentheses);
7285 # nothing can be done if no stack items defined for this line
7286 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7288 # see if we have exceeded the maximum desired line length
7289 # keep 2 extra free because they are needed in some cases
7290 # (result of trial-and-error testing)
7292 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
7294 return if ( $spaces_needed <= 0 );
7296 # We are over the limit, so try to remove a requested number of
7297 # spaces from leading whitespace. We are only allowed to remove
7298 # from whitespace items created on this batch, since others have
7299 # already been used and cannot be undone.
7300 my @candidates = ();
7303 # loop over all whitespace items created for the current batch
7304 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7305 my $item = $gnu_item_list[$i];
7307 # item must still be open to be a candidate (otherwise it
7308 # cannot influence the current token)
7309 next if ( $item->get_CLOSED() >= 0 );
7311 my $available_spaces = $item->get_AVAILABLE_SPACES();
7313 if ( $available_spaces > 0 ) {
7314 push( @candidates, [ $i, $available_spaces ] );
7318 return unless (@candidates);
7320 # sort by available whitespace so that we can remove whitespace
7321 # from the maximum available first
7322 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
7324 # keep removing whitespace until we are done or have no more
7326 foreach $candidate (@candidates) {
7327 my ( $i, $available_spaces ) = @{$candidate};
7328 my $deleted_spaces =
7329 ( $available_spaces > $spaces_needed )
7331 : $available_spaces;
7333 # remove the incremental space from this item
7334 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
7338 # update the leading whitespace of this item and all items
7339 # that came after it
7340 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
7342 my $old_spaces = $gnu_item_list[$i]->get_SPACES();
7343 if ( $old_spaces >= $deleted_spaces ) {
7344 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
7347 # shouldn't happen except for code bug:
7349 my $level = $gnu_item_list[$i_debug]->get_LEVEL();
7350 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
7351 my $old_level = $gnu_item_list[$i]->get_LEVEL();
7352 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
7354 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
7356 report_definite_bug();
7359 $gnu_position_predictor -= $deleted_spaces;
7360 $spaces_needed -= $deleted_spaces;
7361 last unless ( $spaces_needed > 0 );
7365 sub finish_lp_batch {
7367 # This routine is called once after each output stream batch is
7368 # finished to undo indentation for all incomplete -lp
7369 # indentation levels. It is too risky to leave a level open,
7370 # because then we can't backtrack in case of a long line to follow.
7371 # This means that comments and blank lines will disrupt this
7372 # indentation style. But the vertical aligner may be able to
7373 # get the space back if there are side comments.
7375 # this is only for the 'lp' style
7376 return unless ($rOpts_line_up_parentheses);
7378 # nothing can be done if no stack items defined for this line
7379 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7381 # loop over all whitespace items created for the current batch
7383 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7384 my $item = $gnu_item_list[$i];
7386 # only look for open items
7387 next if ( $item->get_CLOSED() >= 0 );
7389 # Tentatively remove all of the available space
7390 # (The vertical aligner will try to get it back later)
7391 my $available_spaces = $item->get_AVAILABLE_SPACES();
7392 if ( $available_spaces > 0 ) {
7394 # delete incremental space for this item
7396 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
7398 # Reduce the total indentation space of any nodes that follow
7399 # Note that any such nodes must necessarily be dependents
7401 foreach ( $i + 1 .. $max_gnu_item_index ) {
7402 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
7409 sub reduce_lp_indentation {
7411 # reduce the leading whitespace at token $i if possible by $spaces_needed
7412 # (a large value of $spaces_needed will remove all excess space)
7413 # NOTE: to be called from scan_list only for a sequence of tokens
7414 # contained between opening and closing parens/braces/brackets
7416 my ( $i, $spaces_wanted ) = @_;
7417 my $deleted_spaces = 0;
7419 my $item = $leading_spaces_to_go[$i];
7420 my $available_spaces = $item->get_AVAILABLE_SPACES();
7423 $available_spaces > 0
7424 && ( ( $spaces_wanted <= $available_spaces )
7425 || !$item->get_HAVE_CHILD() )
7429 # we'll remove these spaces, but mark them as recoverable
7431 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
7434 return $deleted_spaces;
7437 sub token_sequence_length {
7439 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
7440 # returns 0 if $ibeg > $iend (shouldn't happen)
7441 my ( $ibeg, $iend ) = @_;
7442 return 0 if ( $iend < 0 || $ibeg > $iend );
7443 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
7444 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
7447 sub total_line_length {
7449 # return length of a line of tokens ($ibeg .. $iend)
7450 my ( $ibeg, $iend ) = @_;
7451 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
7454 sub maximum_line_length_for_level {
7456 # return maximum line length for line starting with a given level
7457 my $maximum_line_length = $rOpts_maximum_line_length;
7459 # Modify if -vmll option is selected
7460 if ($rOpts_variable_maximum_line_length) {
7462 if ( $level < 0 ) { $level = 0 }
7463 $maximum_line_length += $level * $rOpts_indent_columns;
7465 return $maximum_line_length;
7468 sub maximum_line_length {
7470 # return maximum line length for line starting with the token at given index
7471 return maximum_line_length_for_level( $levels_to_go[ $_[0] ] );
7475 sub excess_line_length {
7477 # return number of characters by which a line of tokens ($ibeg..$iend)
7478 # exceeds the allowable line length.
7479 my ( $ibeg, $iend ) = @_;
7480 return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg);
7483 sub finish_formatting {
7485 # flush buffer and write any informative messages
7489 $file_writer_object->decrement_output_line_number()
7490 ; # fix up line number since it was incremented
7491 we_are_at_the_last_line();
7492 if ( $added_semicolon_count > 0 ) {
7493 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
7495 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
7496 write_logfile_entry("$added_semicolon_count $what added:\n");
7497 write_logfile_entry(
7498 " $first at input line $first_added_semicolon_at\n");
7500 if ( $added_semicolon_count > 1 ) {
7501 write_logfile_entry(
7502 " Last at input line $last_added_semicolon_at\n");
7504 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
7505 write_logfile_entry("\n");
7508 if ( $deleted_semicolon_count > 0 ) {
7509 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
7511 ( $deleted_semicolon_count > 1 )
7514 write_logfile_entry(
7515 "$deleted_semicolon_count unnecessary $what deleted:\n");
7516 write_logfile_entry(
7517 " $first at input line $first_deleted_semicolon_at\n");
7519 if ( $deleted_semicolon_count > 1 ) {
7520 write_logfile_entry(
7521 " Last at input line $last_deleted_semicolon_at\n");
7523 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
7524 write_logfile_entry("\n");
7527 if ( $embedded_tab_count > 0 ) {
7528 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
7530 ( $embedded_tab_count > 1 )
7531 ? "quotes or patterns"
7532 : "quote or pattern";
7533 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
7534 write_logfile_entry(
7535 "This means the display of this script could vary with device or software\n"
7537 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
7539 if ( $embedded_tab_count > 1 ) {
7540 write_logfile_entry(
7541 " Last at input line $last_embedded_tab_at\n");
7543 write_logfile_entry("\n");
7546 if ($first_tabbing_disagreement) {
7547 write_logfile_entry(
7548 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7552 if ($in_tabbing_disagreement) {
7553 write_logfile_entry(
7554 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7559 if ($last_tabbing_disagreement) {
7561 write_logfile_entry(
7562 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7566 write_logfile_entry("No indentation disagreement seen\n");
7569 if ($first_tabbing_disagreement) {
7570 write_logfile_entry(
7571 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
7574 write_logfile_entry("\n");
7576 $vertical_aligner_object->report_anything_unusual();
7578 $file_writer_object->report_line_length_errors();
7583 # This routine is called to check the Opts hash after it is defined
7587 make_static_block_comment_pattern();
7588 make_static_side_comment_pattern();
7589 make_closing_side_comment_prefix();
7590 make_closing_side_comment_list_pattern();
7591 $format_skipping_pattern_begin =
7592 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7593 $format_skipping_pattern_end =
7594 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7596 # If closing side comments ARE selected, then we can safely
7597 # delete old closing side comments unless closing side comment
7598 # warnings are requested. This is a good idea because it will
7599 # eliminate any old csc's which fall below the line count threshold.
7600 # We cannot do this if warnings are turned on, though, because we
7601 # might delete some text which has been added. So that must
7602 # be handled when comments are created.
7603 if ( $rOpts->{'closing-side-comments'} ) {
7604 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7605 $rOpts->{'delete-closing-side-comments'} = 1;
7609 # If closing side comments ARE NOT selected, but warnings ARE
7610 # selected and we ARE DELETING csc's, then we will pretend to be
7611 # adding with a huge interval. This will force the comments to be
7612 # generated for comparison with the old comments, but not added.
7613 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7614 if ( $rOpts->{'delete-closing-side-comments'} ) {
7615 $rOpts->{'delete-closing-side-comments'} = 0;
7616 $rOpts->{'closing-side-comments'} = 1;
7617 $rOpts->{'closing-side-comment-interval'} = 100000000;
7622 make_block_brace_vertical_tightness_pattern();
7624 if ( $rOpts->{'line-up-parentheses'} ) {
7626 if ( $rOpts->{'indent-only'}
7627 || !$rOpts->{'add-newlines'}
7628 || !$rOpts->{'delete-old-newlines'} )
7630 Perl::Tidy::Warn <<EOM;
7631 -----------------------------------------------------------------------
7632 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7634 The -lp indentation logic requires that perltidy be able to coordinate
7635 arbitrarily large numbers of line breakpoints. This isn't possible
7636 with these flags. Sometimes an acceptable workaround is to use -wocb=3
7637 -----------------------------------------------------------------------
7639 $rOpts->{'line-up-parentheses'} = 0;
7643 # At present, tabs are not compatible with the line-up-parentheses style
7644 # (it would be possible to entab the total leading whitespace
7645 # just prior to writing the line, if desired).
7646 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7647 Perl::Tidy::Warn <<EOM;
7648 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
7650 $rOpts->{'tabs'} = 0;
7653 # Likewise, tabs are not compatible with outdenting..
7654 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7655 Perl::Tidy::Warn <<EOM;
7656 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7658 $rOpts->{'tabs'} = 0;
7661 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7662 Perl::Tidy::Warn <<EOM;
7663 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
7665 $rOpts->{'tabs'} = 0;
7668 if ( !$rOpts->{'space-for-semicolon'} ) {
7669 $want_left_space{'f'} = -1;
7672 if ( $rOpts->{'space-terminal-semicolon'} ) {
7673 $want_left_space{';'} = 1;
7676 # implement outdenting preferences for keywords
7677 %outdent_keyword = ();
7678 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7679 @_ = qw(next last redo goto return); # defaults
7682 # FUTURE: if not a keyword, assume that it is an identifier
7684 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7685 $outdent_keyword{$_} = 1;
7688 Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
7692 # implement user whitespace preferences
7693 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7694 @want_left_space{@_} = (1) x scalar(@_);
7697 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7698 @want_right_space{@_} = (1) x scalar(@_);
7701 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7702 @want_left_space{@_} = (-1) x scalar(@_);
7705 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7706 @want_right_space{@_} = (-1) x scalar(@_);
7708 if ( $rOpts->{'dump-want-left-space'} ) {
7709 dump_want_left_space(*STDOUT);
7713 if ( $rOpts->{'dump-want-right-space'} ) {
7714 dump_want_right_space(*STDOUT);
7718 # default keywords for which space is introduced before an opening paren
7719 # (at present, including them messes up vertical alignment)
7720 @_ = qw(my local our and or err eq ne if else elsif until
7721 unless while for foreach return switch case given when);
7722 @space_after_keyword{@_} = (1) x scalar(@_);
7724 # first remove any or all of these if desired
7725 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7727 # -nsak='*' selects all the above keywords
7728 if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
7729 @space_after_keyword{@_} = (0) x scalar(@_);
7732 # then allow user to add to these defaults
7733 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7734 @space_after_keyword{@_} = (1) x scalar(@_);
7737 # implement user break preferences
7738 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7739 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7740 . : ? && || and or err xor
7743 my $break_after = sub {
7744 foreach my $tok (@_) {
7745 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
7746 my $lbs = $left_bond_strength{$tok};
7747 my $rbs = $right_bond_strength{$tok};
7748 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7749 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7755 my $break_before = sub {
7756 foreach my $tok (@_) {
7757 my $lbs = $left_bond_strength{$tok};
7758 my $rbs = $right_bond_strength{$tok};
7759 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7760 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7766 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7767 $break_before->(@all_operators)
7768 if ( $rOpts->{'break-before-all-operators'} );
7770 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7771 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7773 # make note if breaks are before certain key types
7774 %want_break_before = ();
7775 foreach my $tok ( @all_operators, ',' ) {
7776 $want_break_before{$tok} =
7777 $left_bond_strength{$tok} < $right_bond_strength{$tok};
7780 # Coordinate ?/: breaks, which must be similar
7781 if ( !$want_break_before{':'} ) {
7782 $want_break_before{'?'} = $want_break_before{':'};
7783 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7784 $left_bond_strength{'?'} = NO_BREAK;
7787 # Define here tokens which may follow the closing brace of a do statement
7788 # on the same line, as in:
7789 # } while ( $something);
7790 @_ = qw(until while unless if ; : );
7792 @is_do_follower{@_} = (1) x scalar(@_);
7794 # These tokens may follow the closing brace of an if or elsif block.
7795 # In other words, for cuddled else we want code to look like:
7796 # } elsif ( $something) {
7798 if ( $rOpts->{'cuddled-else'} ) {
7799 @_ = qw(else elsif);
7800 @is_if_brace_follower{@_} = (1) x scalar(@_);
7803 %is_if_brace_follower = ();
7806 # nothing can follow the closing curly of an else { } block:
7807 %is_else_brace_follower = ();
7809 # what can follow a multi-line anonymous sub definition closing curly:
7810 @_ = qw# ; : => or and && || ~~ !~~ ) #;
7812 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7814 # what can follow a one-line anonymous sub closing curly:
7815 # one-line anonymous subs also have ']' here...
7816 # see tk3.t and PP.pm
7817 @_ = qw# ; : => or and && || ) ] ~~ !~~ #;
7819 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7821 # What can follow a closing curly of a block
7822 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7823 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7824 @_ = qw# ; : => or and && || ) #;
7827 # allow cuddled continue if cuddled else is specified
7828 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7830 @is_other_brace_follower{@_} = (1) x scalar(@_);
7832 $right_bond_strength{'{'} = WEAK;
7833 $left_bond_strength{'{'} = VERY_STRONG;
7835 # make -l=0 equal to -l=infinite
7836 if ( !$rOpts->{'maximum-line-length'} ) {
7837 $rOpts->{'maximum-line-length'} = 1000000;
7840 # make -lbl=0 equal to -lbl=infinite
7841 if ( !$rOpts->{'long-block-line-count'} ) {
7842 $rOpts->{'long-block-line-count'} = 1000000;
7845 my $enc = $rOpts->{'character-encoding'};
7846 if ( $enc && $enc !~ /^(none|utf8)$/i ) {
7847 Perl::Tidy::Die <<EOM;
7848 Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
7852 my $ole = $rOpts->{'output-line-ending'};
7861 # Patch for RT #99514, a memoization issue.
7862 # Normally, the user enters one of 'dos', 'win', etc, and we change the
7863 # value in the options parameter to be the corresponding line ending
7864 # character. But, if we are using memoization, on later passes through
7865 # here the option parameter will already have the desired ending
7866 # character rather than the keyword 'dos', 'win', etc. So
7867 # we must check to see if conversion has already been done and, if so,
7868 # bypass the conversion step.
7869 my %endings_inverted = (
7870 "\015\012" => 'dos',
7871 "\015\012" => 'win',
7876 if ( defined( $endings_inverted{$ole} ) ) {
7878 # we already have valid line ending, nothing more to do
7882 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7883 my $str = join " ", keys %endings;
7884 Perl::Tidy::Die <<EOM;
7885 Unrecognized line ending '$ole'; expecting one of: $str
7888 if ( $rOpts->{'preserve-line-endings'} ) {
7889 Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
7890 $rOpts->{'preserve-line-endings'} = undef;
7895 # hashes used to simplify setting whitespace
7897 '{' => $rOpts->{'brace-tightness'},
7898 '}' => $rOpts->{'brace-tightness'},
7899 '(' => $rOpts->{'paren-tightness'},
7900 ')' => $rOpts->{'paren-tightness'},
7901 '[' => $rOpts->{'square-bracket-tightness'},
7902 ']' => $rOpts->{'square-bracket-tightness'},
7911 # frequently used parameters
7912 $rOpts_add_newlines = $rOpts->{'add-newlines'};
7913 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
7914 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7915 $rOpts_block_brace_vertical_tightness =
7916 $rOpts->{'block-brace-vertical-tightness'};
7917 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
7918 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7919 $rOpts_break_at_old_ternary_breakpoints =
7920 $rOpts->{'break-at-old-ternary-breakpoints'};
7921 $rOpts_break_at_old_attribute_breakpoints =
7922 $rOpts->{'break-at-old-attribute-breakpoints'};
7923 $rOpts_break_at_old_comma_breakpoints =
7924 $rOpts->{'break-at-old-comma-breakpoints'};
7925 $rOpts_break_at_old_keyword_breakpoints =
7926 $rOpts->{'break-at-old-keyword-breakpoints'};
7927 $rOpts_break_at_old_logical_breakpoints =
7928 $rOpts->{'break-at-old-logical-breakpoints'};
7929 $rOpts_closing_side_comment_else_flag =
7930 $rOpts->{'closing-side-comment-else-flag'};
7931 $rOpts_closing_side_comment_maximum_text =
7932 $rOpts->{'closing-side-comment-maximum-text'};
7933 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7934 $rOpts_cuddled_else = $rOpts->{'cuddled-else'};
7935 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
7936 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
7937 $rOpts_indent_columns = $rOpts->{'indent-columns'};
7938 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
7939 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7940 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
7941 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
7943 $rOpts_variable_maximum_line_length =
7944 $rOpts->{'variable-maximum-line-length'};
7945 $rOpts_short_concatenation_item_length =
7946 $rOpts->{'short-concatenation-item-length'};
7948 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
7949 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7950 $rOpts_format_skipping = $rOpts->{'format-skipping'};
7951 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
7952 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
7953 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7954 $rOpts_ignore_side_comment_lengths =
7955 $rOpts->{'ignore-side-comment-lengths'};
7957 # Note that both opening and closing tokens can access the opening
7958 # and closing flags of their container types.
7959 %opening_vertical_tightness = (
7960 '(' => $rOpts->{'paren-vertical-tightness'},
7961 '{' => $rOpts->{'brace-vertical-tightness'},
7962 '[' => $rOpts->{'square-bracket-vertical-tightness'},
7963 ')' => $rOpts->{'paren-vertical-tightness'},
7964 '}' => $rOpts->{'brace-vertical-tightness'},
7965 ']' => $rOpts->{'square-bracket-vertical-tightness'},
7968 %closing_vertical_tightness = (
7969 '(' => $rOpts->{'paren-vertical-tightness-closing'},
7970 '{' => $rOpts->{'brace-vertical-tightness-closing'},
7971 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7972 ')' => $rOpts->{'paren-vertical-tightness-closing'},
7973 '}' => $rOpts->{'brace-vertical-tightness-closing'},
7974 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7977 $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'};
7979 # assume flag for '>' same as ')' for closing qw quotes
7980 %closing_token_indentation = (
7981 ')' => $rOpts->{'closing-paren-indentation'},
7982 '}' => $rOpts->{'closing-brace-indentation'},
7983 ']' => $rOpts->{'closing-square-bracket-indentation'},
7984 '>' => $rOpts->{'closing-paren-indentation'},
7987 # flag indicating if any closing tokens are indented
7988 $some_closing_token_indentation =
7989 $rOpts->{'closing-paren-indentation'}
7990 || $rOpts->{'closing-brace-indentation'}
7991 || $rOpts->{'closing-square-bracket-indentation'}
7992 || $rOpts->{'indent-closing-brace'};
7994 %opening_token_right = (
7995 '(' => $rOpts->{'opening-paren-right'},
7996 '{' => $rOpts->{'opening-hash-brace-right'},
7997 '[' => $rOpts->{'opening-square-bracket-right'},
8000 %stack_opening_token = (
8001 '(' => $rOpts->{'stack-opening-paren'},
8002 '{' => $rOpts->{'stack-opening-hash-brace'},
8003 '[' => $rOpts->{'stack-opening-square-bracket'},
8006 %stack_closing_token = (
8007 ')' => $rOpts->{'stack-closing-paren'},
8008 '}' => $rOpts->{'stack-closing-hash-brace'},
8009 ']' => $rOpts->{'stack-closing-square-bracket'},
8011 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
8014 sub make_static_block_comment_pattern {
8016 # create the pattern used to identify static block comments
8017 $static_block_comment_pattern = '^\s*##';
8019 # allow the user to change it
8020 if ( $rOpts->{'static-block-comment-prefix'} ) {
8021 my $prefix = $rOpts->{'static-block-comment-prefix'};
8022 $prefix =~ s/^\s*//;
8023 my $pattern = $prefix;
8025 # user may give leading caret to force matching left comments only
8026 if ( $prefix !~ /^\^#/ ) {
8027 if ( $prefix !~ /^#/ ) {
8029 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
8031 $pattern = '^\s*' . $prefix;
8033 eval "'##'=~/$pattern/";
8036 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
8038 $static_block_comment_pattern = $pattern;
8042 sub make_format_skipping_pattern {
8043 my ( $opt_name, $default ) = @_;
8044 my $param = $rOpts->{$opt_name};
8045 unless ($param) { $param = $default }
8047 if ( $param !~ /^#/ ) {
8049 "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
8051 my $pattern = '^' . $param . '\s';
8052 eval "'#'=~/$pattern/";
8055 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
8060 sub make_closing_side_comment_list_pattern {
8062 # turn any input list into a regex for recognizing selected block types
8063 $closing_side_comment_list_pattern = '^\w+';
8064 if ( defined( $rOpts->{'closing-side-comment-list'} )
8065 && $rOpts->{'closing-side-comment-list'} )
8067 $closing_side_comment_list_pattern =
8068 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
8072 sub make_bli_pattern {
8074 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
8075 && $rOpts->{'brace-left-and-indent-list'} )
8077 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
8080 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
8083 sub make_block_brace_vertical_tightness_pattern {
8085 # turn any input list into a regex for recognizing selected block types
8086 $block_brace_vertical_tightness_pattern =
8087 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
8088 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
8089 && $rOpts->{'block-brace-vertical-tightness-list'} )
8091 $block_brace_vertical_tightness_pattern =
8092 make_block_pattern( '-bbvtl',
8093 $rOpts->{'block-brace-vertical-tightness-list'} );
8097 sub make_block_pattern {
8099 # given a string of block-type keywords, return a regex to match them
8100 # The only tricky part is that labels are indicated with a single ':'
8101 # and the 'sub' token text may have additional text after it (name of
8106 # input string: "if else elsif unless while for foreach do : sub";
8107 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
8109 my ( $abbrev, $string ) = @_;
8110 my @list = split_words($string);
8114 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
8117 if ( $i eq 'sub' ) {
8119 elsif ( $i eq ';' ) {
8122 elsif ( $i eq '{' ) {
8125 elsif ( $i eq ':' ) {
8126 push @words, '\w+:';
8128 elsif ( $i =~ /^\w/ ) {
8133 "unrecognized block type $i after $abbrev, ignoring\n";
8136 my $pattern = '(' . join( '|', @words ) . ')$';
8137 if ( $seen{'sub'} ) {
8138 $pattern = '(' . $pattern . '|sub)';
8140 $pattern = '^' . $pattern;
8144 sub make_static_side_comment_pattern {
8146 # create the pattern used to identify static side comments
8147 $static_side_comment_pattern = '^##';
8149 # allow the user to change it
8150 if ( $rOpts->{'static-side-comment-prefix'} ) {
8151 my $prefix = $rOpts->{'static-side-comment-prefix'};
8152 $prefix =~ s/^\s*//;
8153 my $pattern = '^' . $prefix;
8154 eval "'##'=~/$pattern/";
8157 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
8159 $static_side_comment_pattern = $pattern;
8163 sub make_closing_side_comment_prefix {
8165 # Be sure we have a valid closing side comment prefix
8166 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
8167 my $csc_prefix_pattern;
8168 if ( !defined($csc_prefix) ) {
8169 $csc_prefix = '## end';
8170 $csc_prefix_pattern = '^##\s+end';
8173 my $test_csc_prefix = $csc_prefix;
8174 if ( $test_csc_prefix !~ /^#/ ) {
8175 $test_csc_prefix = '#' . $test_csc_prefix;
8178 # make a regex to recognize the prefix
8179 my $test_csc_prefix_pattern = $test_csc_prefix;
8181 # escape any special characters
8182 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
8184 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
8186 # allow exact number of intermediate spaces to vary
8187 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
8189 # make sure we have a good pattern
8190 # if we fail this we probably have an error in escaping
8192 eval "'##'=~/$test_csc_prefix_pattern/";
8195 # shouldn't happen..must have screwed up escaping, above
8196 report_definite_bug();
8198 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
8200 # just warn and keep going with defaults
8201 Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
8203 "Using default -cscp instead; please check output\n";
8206 $csc_prefix = $test_csc_prefix;
8207 $csc_prefix_pattern = $test_csc_prefix_pattern;
8210 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
8211 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
8214 sub dump_want_left_space {
8218 These values are the main control of whitespace to the left of a token type;
8219 They may be altered with the -wls parameter.
8220 For a list of token types, use perltidy --dump-token-types (-dtt)
8221 1 means the token wants a space to its left
8222 -1 means the token does not want a space to its left
8223 ------------------------------------------------------------------------
8225 foreach ( sort keys %want_left_space ) {
8226 print $fh "$_\t$want_left_space{$_}\n";
8230 sub dump_want_right_space {
8234 These values are the main control of whitespace to the right of a token type;
8235 They may be altered with the -wrs parameter.
8236 For a list of token types, use perltidy --dump-token-types (-dtt)
8237 1 means the token wants a space to its right
8238 -1 means the token does not want a space to its right
8239 ------------------------------------------------------------------------
8241 foreach ( sort keys %want_right_space ) {
8242 print $fh "$_\t$want_right_space{$_}\n";
8246 { # begin is_essential_whitespace
8248 my %is_sort_grep_map;
8253 @_ = qw(sort grep map);
8254 @is_sort_grep_map{@_} = (1) x scalar(@_);
8256 @_ = qw(for foreach);
8257 @is_for_foreach{@_} = (1) x scalar(@_);
8261 sub is_essential_whitespace {
8263 # Essential whitespace means whitespace which cannot be safely deleted
8264 # without risking the introduction of a syntax error.
8265 # We are given three tokens and their types:
8266 # ($tokenl, $typel) is the token to the left of the space in question
8267 # ($tokenr, $typer) is the token to the right of the space in question
8268 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
8270 # This is a slow routine but is not needed too often except when -mangle
8273 # Note: This routine should almost never need to be changed. It is
8274 # for avoiding syntax problems rather than for formatting.
8275 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
8279 # never combine two bare words or numbers
8280 # examples: and ::ok(1)
8282 # for bla::bla:: abc
8283 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8284 # $input eq"quit" to make $inputeq"quit"
8285 # my $size=-s::SINK if $file; <==OK but we won't do it
8286 # don't join something like: for bla::bla:: abc
8287 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8288 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
8289 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
8291 # do not combine a number with a concatenation dot
8292 # example: pom.caputo:
8293 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
8294 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
8295 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
8297 # do not join a minus with a bare word, because you might form
8298 # a file test operator. Example from Complex.pm:
8299 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
8300 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
8302 # and something like this could become ambiguous without space
8304 # use constant III=>1;
8308 || ( ( $tokenl eq '-' )
8309 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
8311 # '= -' should not become =- or you will get a warning
8313 # || ($tokenr eq '-')
8315 # keep a space between a quote and a bareword to prevent the
8316 # bareword from becoming a quote modifier.
8317 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8319 # keep a space between a token ending in '$' and any word;
8320 # this caused trouble: "die @$ if $@"
8321 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
8322 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8324 # perl is very fussy about spaces before <<
8325 || ( $tokenr =~ /^\<\</ )
8327 # avoid combining tokens to create new meanings. Example:
8328 # $a+ +$b must not become $a++$b
8329 || ( $is_digraph{ $tokenl . $tokenr } )
8330 || ( $is_trigraph{ $tokenl . $tokenr } )
8332 # another example: do not combine these two &'s:
8333 # allow_options & &OPT_EXECCGI
8334 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
8336 # don't combine $$ or $# with any alphanumeric
8337 # (testfile mangle.t with --mangle)
8338 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
8340 # retain any space after possible filehandle
8341 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
8342 || ( $typel eq 'Z' )
8344 # Perl is sensitive to whitespace after the + here:
8345 # $b = xvals $a + 0.1 * yvals $a;
8346 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
8348 # keep paren separate in 'use Foo::Bar ()'
8352 && $tokenll eq 'use' )
8354 # keep any space between filehandle and paren:
8355 # file mangle.t with --mangle:
8356 || ( $typel eq 'Y' && $tokenr eq '(' )
8358 # retain any space after here doc operator ( hereerr.t)
8359 || ( $typel eq 'h' )
8361 # be careful with a space around ++ and --, to avoid ambiguity as to
8362 # which token it applies
8363 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
8364 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
8366 # need space after foreach my; for example, this will fail in
8367 # older versions of Perl:
8368 # foreach my$ft(@filetypes)...
8373 && $is_for_foreach{$tokenll}
8377 # must have space between grep and left paren; "grep(" will fail
8378 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
8380 # don't stick numbers next to left parens, as in:
8381 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
8382 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
8384 # We must be sure that a space between a ? and a quoted string
8385 # remains if the space before the ? remains. [Loca.pm, lockarea]
8387 # $b=join $comma ? ',' : ':', @_; # ok
8388 # $b=join $comma?',' : ':', @_; # ok!
8389 # $b=join $comma ?',' : ':', @_; # error!
8390 # Not really required:
8391 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
8393 # do not remove space between an '&' and a bare word because
8394 # it may turn into a function evaluation, like here
8395 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
8396 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
8397 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8399 # space stacked labels (TODO: check if really necessary)
8400 || ( $typel eq 'J' && $typer eq 'J' )
8402 ; # the value of this long logic sequence is the result we want
8408 my %secret_operators;
8409 my %is_leading_secret_token;
8413 # token lists for perl secret operators as compiled by Philippe Bruhat
8414 # at: https://metacpan.org/module/perlsecret
8415 %secret_operators = (
8416 'Goatse' => [qw#= ( ) =#], #=( )=
8417 'Venus1' => [qw#0 +#], # 0+
8418 'Venus2' => [qw#+ 0#], # +0
8419 'Enterprise' => [qw#) x ! !#], # ()x!!
8420 'Kite1' => [qw#~ ~ <>#], # ~~<>
8421 'Kite2' => [qw#~~ <>#], # ~~<>
8422 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
8425 # The following operators and constants are not included because they
8426 # are normally kept tight by perltidy:
8430 # Make a lookup table indexed by the first token of each operator:
8431 # first token => [list, list, ...]
8432 foreach my $value ( values(%secret_operators) ) {
8433 my $tok = $value->[0];
8434 push @{ $is_leading_secret_token{$tok} }, $value;
8438 sub secret_operator_whitespace {
8440 my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_;
8442 # Loop over all tokens in this line
8443 my ( $j, $token, $type );
8444 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8446 $token = $$rtokens[$j];
8447 $type = $$rtoken_type[$j];
8449 # Skip unless this token might start a secret operator
8450 next if ( $type eq 'b' );
8451 next unless ( $is_leading_secret_token{$token} );
8453 # Loop over all secret operators with this leading token
8454 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
8456 foreach my $tok ( @{$rpattern} ) {
8460 if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' );
8461 if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) {
8469 # set flags to prevent spaces within this operator
8470 for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) {
8471 $rwhite_space_flag->[$jj] = WS_NO;
8476 } ## End Loop over all operators
8477 } ## End loop over all tokens
8481 sub set_white_space_flag {
8483 # This routine examines each pair of nonblank tokens and
8484 # sets values for array @white_space_flag.
8486 # $white_space_flag[$j] is a flag indicating whether a white space
8487 # BEFORE token $j is needed, with the following values:
8489 # WS_NO = -1 do not want a space before token $j
8490 # WS_OPTIONAL= 0 optional space or $j is a whitespace
8491 # WS_YES = 1 want a space before token $j
8494 # The values for the first token will be defined based
8495 # upon the contents of the "to_go" output array.
8497 # Note: retain debug print statements because they are usually
8498 # required after adding new token types.
8502 # initialize these global hashes, which control the use of
8503 # whitespace around tokens:
8508 # %space_after_keyword
8510 # Many token types are identical to the tokens themselves.
8511 # See the tokenizer for a complete list. Here are some special types:
8513 # f = semicolon in for statement
8516 # Note that :: is excluded since it should be contained in an identifier
8517 # Note that '->' is excluded because it never gets space
8518 # parentheses and brackets are excluded since they are handled specially
8519 # curly braces are included but may be overridden by logic, such as
8522 # NEW_TOKENS: create a whitespace rule here. This can be as
8523 # simple as adding your new letter to @spaces_both_sides, for
8527 @is_opening_type{@_} = (1) x scalar(@_);
8530 @is_closing_type{@_} = (1) x scalar(@_);
8532 my @spaces_both_sides = qw"
8533 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
8534 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
8535 &&= ||= //= <=> A k f w F n C Y U G v
8538 my @spaces_left_side = qw"
8539 t ! ~ m p { \ h pp mm Z j
8541 push( @spaces_left_side, '#' ); # avoids warning message
8543 my @spaces_right_side = qw"
8544 ; } ) ] R J ++ -- **=
8546 push( @spaces_right_side, ',' ); # avoids warning message
8548 # Note that we are in a BEGIN block here. Later in processing
8549 # the values of %want_left_space and %want_right_space
8550 # may be overridden by any user settings specified by the
8551 # -wls and -wrs parameters. However the binary_whitespace_rules
8552 # are hardwired and have priority.
8553 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
8554 @want_right_space{@spaces_both_sides} =
8555 (1) x scalar(@spaces_both_sides);
8556 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
8557 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
8558 @want_left_space{@spaces_right_side} =
8559 (-1) x scalar(@spaces_right_side);
8560 @want_right_space{@spaces_right_side} =
8561 (1) x scalar(@spaces_right_side);
8562 $want_left_space{'->'} = WS_NO;
8563 $want_right_space{'->'} = WS_NO;
8564 $want_left_space{'**'} = WS_NO;
8565 $want_right_space{'**'} = WS_NO;
8566 $want_right_space{'CORE::'} = WS_NO;
8568 # These binary_ws_rules are hardwired and have priority over the above
8569 # settings. It would be nice to allow adjustment by the user,
8570 # but it would be complicated to specify.
8572 # hash type information must stay tightly bound
8574 $binary_ws_rules{'i'}{'L'} = WS_NO;
8575 $binary_ws_rules{'i'}{'{'} = WS_YES;
8576 $binary_ws_rules{'k'}{'{'} = WS_YES;
8577 $binary_ws_rules{'U'}{'{'} = WS_YES;
8578 $binary_ws_rules{'i'}{'['} = WS_NO;
8579 $binary_ws_rules{'R'}{'L'} = WS_NO;
8580 $binary_ws_rules{'R'}{'{'} = WS_NO;
8581 $binary_ws_rules{'t'}{'L'} = WS_NO;
8582 $binary_ws_rules{'t'}{'{'} = WS_NO;
8583 $binary_ws_rules{'}'}{'L'} = WS_NO;
8584 $binary_ws_rules{'}'}{'{'} = WS_NO;
8585 $binary_ws_rules{'$'}{'L'} = WS_NO;
8586 $binary_ws_rules{'$'}{'{'} = WS_NO;
8587 $binary_ws_rules{'@'}{'L'} = WS_NO;
8588 $binary_ws_rules{'@'}{'{'} = WS_NO;
8589 $binary_ws_rules{'='}{'L'} = WS_YES;
8590 $binary_ws_rules{'J'}{'J'} = WS_YES;
8592 # the following includes ') {'
8593 # as in : if ( xxx ) { yyy }
8594 $binary_ws_rules{']'}{'L'} = WS_NO;
8595 $binary_ws_rules{']'}{'{'} = WS_NO;
8596 $binary_ws_rules{')'}{'{'} = WS_YES;
8597 $binary_ws_rules{')'}{'['} = WS_NO;
8598 $binary_ws_rules{']'}{'['} = WS_NO;
8599 $binary_ws_rules{']'}{'{'} = WS_NO;
8600 $binary_ws_rules{'}'}{'['} = WS_NO;
8601 $binary_ws_rules{'R'}{'['} = WS_NO;
8603 $binary_ws_rules{']'}{'++'} = WS_NO;
8604 $binary_ws_rules{']'}{'--'} = WS_NO;
8605 $binary_ws_rules{')'}{'++'} = WS_NO;
8606 $binary_ws_rules{')'}{'--'} = WS_NO;
8608 $binary_ws_rules{'R'}{'++'} = WS_NO;
8609 $binary_ws_rules{'R'}{'--'} = WS_NO;
8611 $binary_ws_rules{'i'}{'Q'} = WS_YES;
8612 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
8614 # FIXME: we could to split 'i' into variables and functions
8615 # and have no space for functions but space for variables. For now,
8616 # I have a special patch in the special rules below
8617 $binary_ws_rules{'i'}{'('} = WS_NO;
8619 $binary_ws_rules{'w'}{'('} = WS_NO;
8620 $binary_ws_rules{'w'}{'{'} = WS_YES;
8621 } ## end BEGIN block
8623 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
8624 my ( $last_token, $last_type, $last_block_type, $token, $type,
8626 my (@white_space_flag);
8627 my $j_tight_closing_paren = -1;
8629 if ( $max_index_to_go >= 0 ) {
8630 $token = $tokens_to_go[$max_index_to_go];
8631 $type = $types_to_go[$max_index_to_go];
8632 $block_type = $block_type_to_go[$max_index_to_go];
8634 #---------------------------------------------------------------
8635 # Patch due to splitting of tokens with leading ->
8636 #---------------------------------------------------------------
8638 # This routine is dealing with the raw tokens from the tokenizer,
8639 # but to get started it needs the previous token, which will
8640 # have been stored in the '_to_go' arrays.
8642 # This patch avoids requiring two iterations to
8643 # converge for cases such as the following, where a paren
8644 # comes in on a line following a variable with leading arrow:
8645 # $self->{main}->add_content_defer_opening
8646 # ($name, $wmkf, $self->{attrs}, $self);
8647 # In this case when we see the opening paren on line 2 we need
8648 # to know if the last token on the previous line had an arrow,
8649 # but it has already been split off so we have to add it back
8650 # in to avoid getting an unwanted space before the paren.
8651 if ( $type =~ /^[wi]$/ ) {
8652 my $im = $iprev_to_go[$max_index_to_go];
8653 my $tm = ( $im >= 0 ) ? $types_to_go[$im] : "";
8654 if ( $tm eq '->' ) { $token = $tm . $token }
8657 #---------------------------------------------------------------
8658 # End patch due to splitting of tokens with leading ->
8659 #---------------------------------------------------------------
8669 # main loop over all tokens to define the whitespace flags
8670 for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8672 if ( $$rtoken_type[$j] eq 'b' ) {
8673 $white_space_flag[$j] = WS_OPTIONAL;
8677 # set a default value, to be changed as needed
8679 $last_token = $token;
8681 $last_block_type = $block_type;
8682 $token = $$rtokens[$j];
8683 $type = $$rtoken_type[$j];
8684 $block_type = $$rblock_type[$j];
8686 #---------------------------------------------------------------
8687 # Whitespace Rules Section 1:
8688 # Handle space on the inside of opening braces.
8689 #---------------------------------------------------------------
8692 if ( $is_opening_type{$last_type} ) {
8694 $j_tight_closing_paren = -1;
8696 # let's keep empty matched braces together: () {} []
8698 if ( $token eq $matching_token{$last_token} ) {
8708 # we're considering the right of an opening brace
8709 # tightness = 0 means always pad inside with space
8710 # tightness = 1 means pad inside if "complex"
8711 # tightness = 2 means never pad inside with space
8714 if ( $last_type eq '{'
8715 && $last_token eq '{'
8716 && $last_block_type )
8718 $tightness = $rOpts_block_brace_tightness;
8720 else { $tightness = $tightness{$last_token} }
8722 #=============================================================
8723 # Patch for test problem fabrice_bug.pl
8724 # We must always avoid spaces around a bare word beginning
8726 # my $before = ${^PREMATCH};
8727 # Because all of the following cause an error in perl:
8728 # my $before = ${ ^PREMATCH };
8729 # my $before = ${ ^PREMATCH};
8730 # my $before = ${^PREMATCH };
8731 # So if brace tightness flag is -bt=0 we must temporarily reset
8732 # to bt=1. Note that here we must set tightness=1 and not 2 so
8733 # that the closing space
8734 # is also avoided (via the $j_tight_closing_paren flag in coding)
8735 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8737 #=============================================================
8739 if ( $tightness <= 0 ) {
8742 elsif ( $tightness > 1 ) {
8747 # Patch to count '-foo' as single token so that
8748 # each of $a{-foo} and $a{foo} and $a{'foo'} do
8749 # not get spaces with default formatting.
8753 && $last_token eq '{'
8754 && $$rtoken_type[ $j + 1 ] eq 'w' );
8756 # $j_next is where a closing token should be if
8757 # the container has a single token
8759 ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8762 my $tok_next = $$rtokens[$j_next];
8763 my $type_next = $$rtoken_type[$j_next];
8765 # for tightness = 1, if there is just one token
8766 # within the matching pair, we will keep it tight
8768 $tok_next eq $matching_token{$last_token}
8770 # but watch out for this: [ [ ] (misc.t)
8771 && $last_token ne $token
8775 # remember where to put the space for the closing paren
8776 $j_tight_closing_paren = $j_next;
8784 } # end setting space flag inside opening tokens
8786 if FORMATTER_DEBUG_FLAG_WHITE;
8788 #---------------------------------------------------------------
8789 # Whitespace Rules Section 2:
8790 # Handle space on inside of closing brace pairs.
8791 #---------------------------------------------------------------
8794 if ( $is_closing_type{$type} ) {
8796 if ( $j == $j_tight_closing_paren ) {
8798 $j_tight_closing_paren = -1;
8803 if ( !defined($ws) ) {
8806 if ( $type eq '}' && $token eq '}' && $block_type ) {
8807 $tightness = $rOpts_block_brace_tightness;
8809 else { $tightness = $tightness{$token} }
8811 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8814 } # end setting space flag inside closing tokens
8817 if FORMATTER_DEBUG_FLAG_WHITE;
8819 #---------------------------------------------------------------
8820 # Whitespace Rules Section 3:
8821 # Use the binary rule table.
8822 #---------------------------------------------------------------
8823 if ( !defined($ws) ) {
8824 $ws = $binary_ws_rules{$last_type}{$type};
8827 if FORMATTER_DEBUG_FLAG_WHITE;
8829 #---------------------------------------------------------------
8830 # Whitespace Rules Section 4:
8831 # Handle some special cases.
8832 #---------------------------------------------------------------
8833 if ( $token eq '(' ) {
8835 # This will have to be tweaked as tokenization changes.
8836 # We usually want a space at '} (', for example:
8837 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8840 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8841 # At present, the above & block is marked as type L/R so this case
8842 # won't go through here.
8843 if ( $last_type eq '}' ) { $ws = WS_YES }
8845 # NOTE: some older versions of Perl had occasional problems if
8846 # spaces are introduced between keywords or functions and opening
8847 # parens. So the default is not to do this except is certain
8848 # cases. The current Perl seems to tolerate spaces.
8850 # Space between keyword and '('
8851 elsif ( $last_type eq 'k' ) {
8853 unless ( $rOpts_space_keyword_paren
8854 || $space_after_keyword{$last_token} );
8857 # Space between function and '('
8858 # -----------------------------------------------------
8859 # 'w' and 'i' checks for something like:
8860 # myfun( &myfun( ->myfun(
8861 # -----------------------------------------------------
8862 elsif (( $last_type =~ /^[wUG]$/ )
8863 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8865 $ws = WS_NO unless ($rOpts_space_function_paren);
8868 # space between something like $i and ( in
8869 # for $i ( 0 .. 20 ) {
8870 # FIXME: eventually, type 'i' needs to be split into multiple
8871 # token types so this can be a hardwired rule.
8872 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8876 # allow constant function followed by '()' to retain no space
8877 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8882 # patch for SWITCH/CASE: make space at ']{' optional
8883 # since the '{' might begin a case or when block
8884 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8888 # keep space between 'sub' and '{' for anonymous sub definition
8889 if ( $type eq '{' ) {
8890 if ( $last_token eq 'sub' ) {
8894 # this is needed to avoid no space in '){'
8895 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8897 # avoid any space before the brace or bracket in something like
8898 # @opts{'a','b',...}
8899 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8904 elsif ( $type eq 'i' ) {
8906 # never a space before ->
8907 if ( $token =~ /^\-\>/ ) {
8912 # retain any space between '-' and bare word
8913 elsif ( $type eq 'w' || $type eq 'C' ) {
8914 $ws = WS_OPTIONAL if $last_type eq '-';
8916 # never a space before ->
8917 if ( $token =~ /^\-\>/ ) {
8922 # retain any space between '-' and bare word
8923 # example: avoid space between 'USER' and '-' here:
8924 # $myhash{USER-NAME}='steve';
8925 elsif ( $type eq 'm' || $type eq '-' ) {
8926 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8929 # always space before side comment
8930 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8932 # always preserver whatever space was used after a possible
8933 # filehandle (except _) or here doc operator
8936 && ( ( $last_type eq 'Z' && $last_token ne '_' )
8937 || $last_type eq 'h' )
8944 if FORMATTER_DEBUG_FLAG_WHITE;
8946 #---------------------------------------------------------------
8947 # Whitespace Rules Section 5:
8948 # Apply default rules not covered above.
8949 #---------------------------------------------------------------
8951 # If we fall through to here, look at the pre-defined hash tables for
8952 # the two tokens, and:
8953 # if (they are equal) use the common value
8954 # if (either is zero or undef) use the other
8955 # if (either is -1) use it
8969 if ( !defined($ws) ) {
8970 my $wl = $want_left_space{$type};
8971 my $wr = $want_right_space{$last_type};
8972 if ( !defined($wl) ) { $wl = 0 }
8973 if ( !defined($wr) ) { $wr = 0 }
8974 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8977 if ( !defined($ws) ) {
8980 "WS flag is undefined for tokens $last_token $token\n");
8983 # Treat newline as a whitespace. Otherwise, we might combine
8984 # 'Send' and '-recipients' here according to the above rules:
8985 # my $msg = new Fax::Send
8986 # -recipients => $to,
8988 if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8993 && ( $last_type !~ /^[Zh]$/ ) )
8996 # If this happens, we have a non-fatal but undesirable
8997 # hole in the above rules which should be patched.
8999 "WS flag is zero for tokens $last_token $token\n");
9001 $white_space_flag[$j] = $ws;
9003 FORMATTER_DEBUG_FLAG_WHITE && do {
9004 my $str = substr( $last_token, 0, 15 );
9005 $str .= ' ' x ( 16 - length($str) );
9006 if ( !defined($ws_1) ) { $ws_1 = "*" }
9007 if ( !defined($ws_2) ) { $ws_2 = "*" }
9008 if ( !defined($ws_3) ) { $ws_3 = "*" }
9009 if ( !defined($ws_4) ) { $ws_4 = "*" }
9011 "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
9015 if ($rOpts_tight_secret_operators) {
9016 secret_operator_whitespace( $jmax, $rtokens, $rtoken_type,
9017 \@white_space_flag );
9020 return \@white_space_flag;
9021 } ## end sub set_white_space_flag
9023 { # begin print_line_of_tokens
9030 my $rcontainer_type;
9031 my $rcontainer_environment;
9034 my $rnesting_tokens;
9036 my $rnesting_blocks;
9039 my $guessed_indentation_level;
9041 # These local token variables are stored by store_token_to_go:
9044 my $container_environment;
9046 my $in_continued_quote;
9049 my $no_internal_newlines;
9055 # routine to pull the jth token from the line of tokens
9058 $token = $$rtokens[$j];
9059 $type = $$rtoken_type[$j];
9060 $block_type = $$rblock_type[$j];
9061 $container_type = $$rcontainer_type[$j];
9062 $container_environment = $$rcontainer_environment[$j];
9063 $type_sequence = $$rtype_sequence[$j];
9064 $level = $$rlevels[$j];
9065 $slevel = $$rslevels[$j];
9066 $nesting_blocks = $$rnesting_blocks[$j];
9067 $ci_level = $$rci_levels[$j];
9073 sub save_current_token {
9076 $block_type, $ci_level,
9077 $container_environment, $container_type,
9078 $in_continued_quote, $level,
9079 $nesting_blocks, $no_internal_newlines,
9081 $type, $type_sequence,
9085 sub restore_current_token {
9087 $block_type, $ci_level,
9088 $container_environment, $container_type,
9089 $in_continued_quote, $level,
9090 $nesting_blocks, $no_internal_newlines,
9092 $type, $type_sequence,
9099 # Returns the length of a token, given:
9100 # $token=text of the token
9102 # $not_first_token = should be TRUE if this is not the first token of
9103 # the line. It might the index of this token in an array. It is
9104 # used to test for a side comment vs a block comment.
9105 # Note: Eventually this should be the only routine determining the
9106 # length of a token in this package.
9107 my ( $token, $type, $not_first_token ) = @_;
9108 my $token_length = length($token);
9110 # We mark lengths of side comments as just 1 if we are
9111 # ignoring their lengths when setting line breaks.
9113 if ( $rOpts_ignore_side_comment_lengths
9116 return $token_length;
9121 # return length of ith token in @{$rtokens}
9123 return token_length( $$rtokens[$i], $$rtoken_type[$i], $i );
9126 # Routine to place the current token into the output stream.
9127 # Called once per output token.
9128 sub store_token_to_go {
9130 my $flag = $no_internal_newlines;
9131 if ( $_[0] ) { $flag = 1 }
9133 $tokens_to_go[ ++$max_index_to_go ] = $token;
9134 $types_to_go[$max_index_to_go] = $type;
9135 $nobreak_to_go[$max_index_to_go] = $flag;
9136 $old_breakpoint_to_go[$max_index_to_go] = 0;
9137 $forced_breakpoint_to_go[$max_index_to_go] = 0;
9138 $block_type_to_go[$max_index_to_go] = $block_type;
9139 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
9140 $container_environment_to_go[$max_index_to_go] = $container_environment;
9141 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
9142 $ci_levels_to_go[$max_index_to_go] = $ci_level;
9143 $mate_index_to_go[$max_index_to_go] = -1;
9144 $matching_token_to_go[$max_index_to_go] = '';
9145 $bond_strength_to_go[$max_index_to_go] = 0;
9147 # Note: negative levels are currently retained as a diagnostic so that
9148 # the 'final indentation level' is correctly reported for bad scripts.
9149 # But this means that every use of $level as an index must be checked.
9150 # If this becomes too much of a problem, we might give up and just clip
9152 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
9153 $levels_to_go[$max_index_to_go] = $level;
9154 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
9156 # link the non-blank tokens
9157 my $iprev = $max_index_to_go - 1;
9158 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
9159 $iprev_to_go[$max_index_to_go] = $iprev;
9160 $inext_to_go[$iprev] = $max_index_to_go
9161 if ( $iprev >= 0 && $type ne 'b' );
9162 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
9164 $token_lengths_to_go[$max_index_to_go] =
9165 token_length( $token, $type, $max_index_to_go );
9167 # We keep a running sum of token lengths from the start of this batch:
9168 # summed_lengths_to_go[$i] = total length to just before token $i
9169 # summed_lengths_to_go[$i+1] = total length to just after token $i
9170 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
9171 $summed_lengths_to_go[$max_index_to_go] +
9172 $token_lengths_to_go[$max_index_to_go];
9174 # Define the indentation that this token would have if it started
9175 # a new line. We have to do this now because we need to know this
9176 # when considering one-line blocks.
9177 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
9179 # remember previous nonblank tokens seen
9180 if ( $type ne 'b' ) {
9181 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
9182 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
9183 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
9184 $last_nonblank_index_to_go = $max_index_to_go;
9185 $last_nonblank_type_to_go = $type;
9186 $last_nonblank_token_to_go = $token;
9187 if ( $type eq ',' ) {
9188 $comma_count_in_batch++;
9192 FORMATTER_DEBUG_FLAG_STORE && do {
9193 my ( $a, $b, $c ) = caller();
9195 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
9199 sub insert_new_token_to_go {
9201 # insert a new token into the output stream. use same level as
9202 # previous token; assumes a character at max_index_to_go.
9203 save_current_token();
9204 ( $token, $type, $slevel, $no_internal_newlines ) = @_;
9206 if ( $max_index_to_go == UNDEFINED_INDEX ) {
9207 warning("code bug: bad call to insert_new_token_to_go\n");
9209 $level = $levels_to_go[$max_index_to_go];
9211 # FIXME: it seems to be necessary to use the next, rather than
9212 # previous, value of this variable when creating a new blank (align.t)
9213 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
9214 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
9215 $ci_level = $ci_levels_to_go[$max_index_to_go];
9216 $container_environment = $container_environment_to_go[$max_index_to_go];
9217 $in_continued_quote = 0;
9219 $type_sequence = "";
9220 store_token_to_go();
9221 restore_current_token();
9225 sub print_line_of_tokens {
9227 my $line_of_tokens = shift;
9229 # This routine is called once per input line to process all of
9230 # the tokens on that line. This is the first stage of
9233 # Full-line comments and blank lines may be processed immediately.
9235 # For normal lines of code, the tokens are stored one-by-one,
9236 # via calls to 'sub store_token_to_go', until a known line break
9237 # point is reached. Then, the batch of collected tokens is
9238 # passed along to 'sub output_line_to_go' for further
9239 # processing. This routine decides if there should be
9240 # whitespace between each pair of non-white tokens, so later
9241 # routines only need to decide on any additional line breaks.
9242 # Any whitespace is initially a single space character. Later,
9243 # the vertical aligner may expand that to be multiple space
9244 # characters if necessary for alignment.
9246 # extract input line number for error messages
9247 $input_line_number = $line_of_tokens->{_line_number};
9249 $rtoken_type = $line_of_tokens->{_rtoken_type};
9250 $rtokens = $line_of_tokens->{_rtokens};
9251 $rlevels = $line_of_tokens->{_rlevels};
9252 $rslevels = $line_of_tokens->{_rslevels};
9253 $rblock_type = $line_of_tokens->{_rblock_type};
9254 $rcontainer_type = $line_of_tokens->{_rcontainer_type};
9255 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
9256 $rtype_sequence = $line_of_tokens->{_rtype_sequence};
9257 $input_line = $line_of_tokens->{_line_text};
9258 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
9259 $rci_levels = $line_of_tokens->{_rci_levels};
9260 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
9262 $in_continued_quote = $starting_in_quote =
9263 $line_of_tokens->{_starting_in_quote};
9264 $in_quote = $line_of_tokens->{_ending_in_quote};
9265 $ending_in_quote = $in_quote;
9266 $guessed_indentation_level =
9267 $line_of_tokens->{_guessed_indentation_level};
9272 my $next_nonblank_token;
9273 my $next_nonblank_token_type;
9274 my $rwhite_space_flag;
9276 $jmax = @$rtokens - 1;
9278 $container_type = "";
9279 $container_environment = "";
9280 $type_sequence = "";
9281 $no_internal_newlines = 1 - $rOpts_add_newlines;
9282 $is_static_block_comment = 0;
9284 # Handle a continued quote..
9285 if ($in_continued_quote) {
9287 # A line which is entirely a quote or pattern must go out
9288 # verbatim. Note: the \n is contained in $input_line.
9290 if ( ( $input_line =~ "\t" ) ) {
9291 note_embedded_tab();
9293 write_unindented_line("$input_line");
9294 $last_line_had_side_comment = 0;
9299 # Write line verbatim if we are in a formatting skip section
9300 if ($in_format_skipping_section) {
9301 write_unindented_line("$input_line");
9302 $last_line_had_side_comment = 0;
9304 # Note: extra space appended to comment simplifies pattern matching
9306 && $$rtoken_type[0] eq '#'
9307 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
9309 $in_format_skipping_section = 0;
9310 write_logfile_entry("Exiting formatting skip section\n");
9311 $file_writer_object->reset_consecutive_blank_lines();
9316 # See if we are entering a formatting skip section
9317 if ( $rOpts_format_skipping
9319 && $$rtoken_type[0] eq '#'
9320 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
9323 $in_format_skipping_section = 1;
9324 write_logfile_entry("Entering formatting skip section\n");
9325 write_unindented_line("$input_line");
9326 $last_line_had_side_comment = 0;
9330 # delete trailing blank tokens
9331 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
9333 # Handle a blank line..
9336 # If keep-old-blank-lines is zero, we delete all
9337 # old blank lines and let the blank line rules generate any
9339 if ($rOpts_keep_old_blank_lines) {
9341 $file_writer_object->write_blank_code_line(
9342 $rOpts_keep_old_blank_lines == 2 );
9343 $last_line_leading_type = 'b';
9345 $last_line_had_side_comment = 0;
9349 # see if this is a static block comment (starts with ## by default)
9350 my $is_static_block_comment_without_leading_space = 0;
9352 && $$rtoken_type[0] eq '#'
9353 && $rOpts->{'static-block-comments'}
9354 && $input_line =~ /$static_block_comment_pattern/o )
9356 $is_static_block_comment = 1;
9357 $is_static_block_comment_without_leading_space =
9358 substr( $input_line, 0, 1 ) eq '#';
9361 # Check for comments which are line directives
9362 # Treat exactly as static block comments without leading space
9363 # reference: perlsyn, near end, section Plain Old Comments (Not!)
9364 # example: '# line 42 "new_filename.plx"'
9367 && $$rtoken_type[0] eq '#'
9368 && $input_line =~ /^\# \s*
9370 (?:\s("?)([^"]+)\2)? \s*
9374 $is_static_block_comment = 1;
9375 $is_static_block_comment_without_leading_space = 1;
9378 # create a hanging side comment if appropriate
9379 my $is_hanging_side_comment;
9382 && $$rtoken_type[0] eq '#' # only token is a comment
9383 && $last_line_had_side_comment # last line had side comment
9384 && $input_line =~ /^\s/ # there is some leading space
9385 && !$is_static_block_comment # do not make static comment hanging
9386 && $rOpts->{'hanging-side-comments'} # user is allowing
9387 # hanging side comments
9392 # We will insert an empty qw string at the start of the token list
9393 # to force this comment to be a side comment. The vertical aligner
9394 # should then line it up with the previous side comment.
9395 $is_hanging_side_comment = 1;
9396 unshift @$rtoken_type, 'q';
9397 unshift @$rtokens, '';
9398 unshift @$rlevels, $$rlevels[0];
9399 unshift @$rslevels, $$rslevels[0];
9400 unshift @$rblock_type, '';
9401 unshift @$rcontainer_type, '';
9402 unshift @$rcontainer_environment, '';
9403 unshift @$rtype_sequence, '';
9404 unshift @$rnesting_tokens, $$rnesting_tokens[0];
9405 unshift @$rci_levels, $$rci_levels[0];
9406 unshift @$rnesting_blocks, $$rnesting_blocks[0];
9410 # remember if this line has a side comment
9411 $last_line_had_side_comment =
9412 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
9414 # Handle a block (full-line) comment..
9415 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
9417 if ( $rOpts->{'delete-block-comments'} ) { return }
9419 if ( $rOpts->{'tee-block-comments'} ) {
9420 $file_writer_object->tee_on();
9423 destroy_one_line_block();
9424 output_line_to_go();
9426 # output a blank line before block comments
9428 # unless we follow a blank or comment line
9429 $last_line_leading_type !~ /^[#b]$/
9432 && $rOpts->{'blanks-before-comments'}
9434 # not if this is an empty comment line
9435 && $$rtokens[0] ne '#'
9437 # not after a short line ending in an opening token
9438 # because we already have space above this comment.
9439 # Note that the first comment in this if block, after
9440 # the 'if (', does not get a blank line because of this.
9441 && !$last_output_short_opening_token
9443 # never before static block comments
9444 && !$is_static_block_comment
9447 flush(); # switching to new output stream
9448 $file_writer_object->write_blank_code_line();
9449 $last_line_leading_type = 'b';
9452 # TRIM COMMENTS -- This could be turned off as a option
9453 $$rtokens[0] =~ s/\s*$//; # trim right end
9456 $rOpts->{'indent-block-comments'}
9457 && ( !$rOpts->{'indent-spaced-block-comments'}
9458 || $input_line =~ /^\s+/ )
9459 && !$is_static_block_comment_without_leading_space
9463 store_token_to_go();
9464 output_line_to_go();
9467 flush(); # switching to new output stream
9468 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
9469 $last_line_leading_type = '#';
9471 if ( $rOpts->{'tee-block-comments'} ) {
9472 $file_writer_object->tee_off();
9477 # compare input/output indentation except for continuation lines
9478 # (because they have an unknown amount of initial blank space)
9479 # and lines which are quotes (because they may have been outdented)
9480 # Note: this test is placed here because we know the continuation flag
9481 # at this point, which allows us to avoid non-meaningful checks.
9482 my $structural_indentation_level = $$rlevels[0];
9483 compare_indentation_levels( $guessed_indentation_level,
9484 $structural_indentation_level )
9485 unless ( $is_hanging_side_comment
9486 || $$rci_levels[0] > 0
9487 || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' );
9489 # Patch needed for MakeMaker. Do not break a statement
9490 # in which $VERSION may be calculated. See MakeMaker.pm;
9491 # this is based on the coding in it.
9492 # The first line of a file that matches this will be eval'd:
9493 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9495 # *VERSION = \'1.01';
9496 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
9497 # We will pass such a line straight through without breaking
9498 # it unless -npvl is used.
9500 # Patch for problem reported in RT #81866, where files
9501 # had been flattened into a single line and couldn't be
9502 # tidied without -npvl. There are two parts to this patch:
9503 # First, it is not done for a really long line (80 tokens for now).
9504 # Second, we will only allow up to one semicolon
9505 # before the VERSION. We need to allow at least one semicolon
9506 # for statements like this:
9507 # require Exporter; our $VERSION = $Exporter::VERSION;
9508 # where both statements must be on a single line for MakeMaker
9510 my $is_VERSION_statement = 0;
9511 if ( !$saw_VERSION_in_this_file
9514 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
9516 $saw_VERSION_in_this_file = 1;
9517 $is_VERSION_statement = 1;
9518 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
9519 $no_internal_newlines = 1;
9522 # take care of indentation-only
9523 # NOTE: In previous versions we sent all qw lines out immediately here.
9524 # No longer doing this: also write a line which is entirely a 'qw' list
9525 # to allow stacking of opening and closing tokens. Note that interior
9526 # qw lines will still go out at the end of this routine.
9527 if ( $rOpts->{'indent-only'} ) {
9529 my $line = $input_line;
9531 # delete side comments if requested with -io, but
9532 # we will not allow deleting of closing side comments with -io
9533 # because the coding would be more complex
9534 if ( $rOpts->{'delete-side-comments'}
9535 && $rtoken_type->[$jmax] eq '#' )
9537 $line = join "", @{$rtokens}[ 0 .. $jmax - 1 ];
9545 $container_type = "";
9546 $container_environment = "";
9547 $type_sequence = "";
9548 store_token_to_go();
9549 output_line_to_go();
9553 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
9554 push( @$rtoken_type, 'b', 'b' );
9555 ($rwhite_space_flag) =
9556 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
9558 # if the buffer hasn't been flushed, add a leading space if
9559 # necessary to keep essential whitespace. This is really only
9560 # necessary if we are squeezing out all ws.
9561 if ( $max_index_to_go >= 0 ) {
9563 $old_line_count_in_batch++;
9566 is_essential_whitespace(
9567 $last_last_nonblank_token,
9568 $last_last_nonblank_type,
9569 $tokens_to_go[$max_index_to_go],
9570 $types_to_go[$max_index_to_go],
9576 my $slevel = $$rslevels[0];
9577 insert_new_token_to_go( ' ', 'b', $slevel,
9578 $no_internal_newlines );
9582 # If we just saw the end of an elsif block, write nag message
9583 # if we do not see another elseif or an else.
9584 if ($looking_for_else) {
9586 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
9587 write_logfile_entry("(No else block)\n");
9589 $looking_for_else = 0;
9592 # This is a good place to kill incomplete one-line blocks
9595 ( $semicolons_before_block_self_destruct == 0 )
9596 && ( $max_index_to_go >= 0 )
9597 && ( $types_to_go[$max_index_to_go] eq ';' )
9598 && ( $$rtokens[0] ne '}' )
9601 # Patch for RT #98902. Honor request to break at old commas.
9602 || ( $rOpts_break_at_old_comma_breakpoints
9603 && $max_index_to_go >= 0
9604 && $types_to_go[$max_index_to_go] eq ',' )
9607 $forced_breakpoint_to_go[$max_index_to_go] = 1
9608 if ($rOpts_break_at_old_comma_breakpoints);
9609 destroy_one_line_block();
9610 output_line_to_go();
9613 # loop to process the tokens one-by-one
9617 foreach $j ( 0 .. $jmax ) {
9619 # pull out the local values for this token
9622 if ( $type eq '#' ) {
9624 # trim trailing whitespace
9625 # (there is no option at present to prevent this)
9629 $rOpts->{'delete-side-comments'}
9631 # delete closing side comments if necessary
9632 || ( $rOpts->{'delete-closing-side-comments'}
9633 && $token =~ /$closing_side_comment_prefix_pattern/o
9634 && $last_nonblank_block_type =~
9635 /$closing_side_comment_list_pattern/o )
9638 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9639 unstore_token_to_go();
9645 # If we are continuing after seeing a right curly brace, flush
9646 # buffer unless we see what we are looking for, as in
9648 if ( $rbrace_follower && $type ne 'b' ) {
9650 unless ( $rbrace_follower->{$token} ) {
9651 output_line_to_go();
9653 $rbrace_follower = undef;
9656 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
9657 $next_nonblank_token = $$rtokens[$j_next];
9658 $next_nonblank_token_type = $$rtoken_type[$j_next];
9660 #--------------------------------------------------------
9661 # Start of section to patch token text
9662 #--------------------------------------------------------
9664 # Modify certain tokens here for whitespace
9665 # The following is not yet done, but could be:
9667 if ( $type =~ /^[wit]$/ ) {
9670 # change '$ var' to '$var' etc
9671 # '-> new' to '->new'
9672 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
9676 # Split identifiers with leading arrows, inserting blanks if
9677 # necessary. It is easier and safer here than in the
9678 # tokenizer. For example '->new' becomes two tokens, '->' and
9679 # 'new' with a possible blank between.
9681 # Note: there is a related patch in sub set_white_space_flag
9682 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
9683 my $token_save = $1;
9684 my $type_save = $type;
9686 # store a blank to left of arrow if necessary
9687 if ( $max_index_to_go >= 0
9688 && $types_to_go[$max_index_to_go] ne 'b'
9689 && $want_left_space{'->'} == WS_YES )
9691 insert_new_token_to_go( ' ', 'b', $slevel,
9692 $no_internal_newlines );
9695 # then store the arrow
9698 store_token_to_go();
9700 # then reset the current token to be the remainder,
9701 # and reset the whitespace flag according to the arrow
9702 $$rwhite_space_flag[$j] = $want_right_space{'->'};
9703 $token = $token_save;
9707 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
9709 # trim identifiers of trailing blanks which can occur
9710 # under some unusual circumstances, such as if the
9711 # identifier 'witch' has trailing blanks on input here:
9715 # () # prototype may be on new line ...
9717 if ( $type eq 'i' ) { $token =~ s/\s+$//g }
9720 # change 'LABEL :' to 'LABEL:'
9721 elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
9723 # patch to add space to something like "x10"
9724 # This avoids having to split this token in the pre-tokenizer
9725 elsif ( $type eq 'n' ) {
9726 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
9729 elsif ( $type eq 'Q' ) {
9730 note_embedded_tab() if ( $token =~ "\t" );
9732 # make note of something like '$var = s/xxx/yyy/;'
9733 # in case it should have been '$var =~ s/xxx/yyy/;'
9735 $token =~ /^(s|tr|y|m|\/)/
9736 && $last_nonblank_token =~ /^(=|==|!=)$/
9738 # preceded by simple scalar
9739 && $last_last_nonblank_type eq 'i'
9740 && $last_last_nonblank_token =~ /^\$/
9742 # followed by some kind of termination
9743 # (but give complaint if we can's see far enough ahead)
9744 && $next_nonblank_token =~ /^[; \)\}]$/
9746 # scalar is not declared
9748 $types_to_go[0] eq 'k'
9749 && $tokens_to_go[0] =~ /^(my|our|local)$/
9753 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
9755 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
9760 # trim blanks from right of qw quotes
9761 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
9762 elsif ( $type eq 'q' ) {
9764 note_embedded_tab() if ( $token =~ "\t" );
9767 #--------------------------------------------------------
9768 # End of section to patch token text
9769 #--------------------------------------------------------
9771 # insert any needed whitespace
9772 if ( ( $type ne 'b' )
9773 && ( $max_index_to_go >= 0 )
9774 && ( $types_to_go[$max_index_to_go] ne 'b' )
9775 && $rOpts_add_whitespace )
9777 my $ws = $$rwhite_space_flag[$j];
9780 insert_new_token_to_go( ' ', 'b', $slevel,
9781 $no_internal_newlines );
9785 # Do not allow breaks which would promote a side comment to a
9786 # block comment. In order to allow a break before an opening
9787 # or closing BLOCK, followed by a side comment, those sections
9788 # of code will handle this flag separately.
9789 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
9790 my $is_opening_BLOCK =
9794 && $block_type ne 't' );
9795 my $is_closing_BLOCK =
9799 && $block_type ne 't' );
9801 if ( $side_comment_follows
9802 && !$is_opening_BLOCK
9803 && !$is_closing_BLOCK )
9805 $no_internal_newlines = 1;
9808 # We're only going to handle breaking for code BLOCKS at this
9809 # (top) level. Other indentation breaks will be handled by
9810 # sub scan_list, which is better suited to dealing with them.
9811 if ($is_opening_BLOCK) {
9813 # Tentatively output this token. This is required before
9814 # calling starting_one_line_block. We may have to unstore
9815 # it, though, if we have to break before it.
9816 store_token_to_go($side_comment_follows);
9818 # Look ahead to see if we might form a one-line block
9820 starting_one_line_block( $j, $jmax, $level, $slevel,
9821 $ci_level, $rtokens, $rtoken_type, $rblock_type );
9822 clear_breakpoint_undo_stack();
9824 # to simplify the logic below, set a flag to indicate if
9825 # this opening brace is far from the keyword which introduces it
9826 my $keyword_on_same_line = 1;
9827 if ( ( $max_index_to_go >= 0 )
9828 && ( $last_nonblank_type eq ')' ) )
9830 if ( $block_type =~ /^(if|else|elsif)$/
9831 && ( $tokens_to_go[0] eq '}' )
9832 && $rOpts_cuddled_else )
9834 $keyword_on_same_line = 1;
9836 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
9838 $keyword_on_same_line = 0;
9842 # decide if user requested break before '{'
9845 # use -bl flag if not a sub block of any type
9846 $block_type !~ /^sub/
9847 ? $rOpts->{'opening-brace-on-new-line'}
9849 # use -sbl flag for a named sub block
9850 : $block_type !~ /^sub\W*$/
9851 ? $rOpts->{'opening-sub-brace-on-new-line'}
9853 # use -asbl flag for an anonymous sub block
9854 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9856 # Break before an opening '{' ...
9862 # and we were unable to start looking for a block,
9863 && $index_start_one_line_block == UNDEFINED_INDEX
9865 # or if it will not be on same line as its keyword, so that
9866 # it will be outdented (eval.t, overload.t), and the user
9867 # has not insisted on keeping it on the right
9868 || ( !$keyword_on_same_line
9869 && !$rOpts->{'opening-brace-always-on-right'} )
9874 # but only if allowed
9875 unless ($no_internal_newlines) {
9877 # since we already stored this token, we must unstore it
9878 unstore_token_to_go();
9880 # then output the line
9881 output_line_to_go();
9883 # and now store this token at the start of a new line
9884 store_token_to_go($side_comment_follows);
9888 # Now update for side comment
9889 if ($side_comment_follows) { $no_internal_newlines = 1 }
9891 # now output this line
9892 unless ($no_internal_newlines) {
9893 output_line_to_go();
9897 elsif ($is_closing_BLOCK) {
9899 # If there is a pending one-line block ..
9900 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9902 # we have to terminate it if..
9905 # it is too long (final length may be different from
9906 # initial estimate). note: must allow 1 space for this token
9907 excess_line_length( $index_start_one_line_block,
9908 $max_index_to_go ) >= 0
9910 # or if it has too many semicolons
9911 || ( $semicolons_before_block_self_destruct == 0
9912 && $last_nonblank_type ne ';' )
9915 destroy_one_line_block();
9919 # put a break before this closing curly brace if appropriate
9920 unless ( $no_internal_newlines
9921 || $index_start_one_line_block != UNDEFINED_INDEX )
9924 # add missing semicolon if ...
9925 # there are some tokens
9927 ( $max_index_to_go > 0 )
9929 # and we don't have one
9930 && ( $last_nonblank_type ne ';' )
9932 # and we are allowed to do so.
9933 && $rOpts->{'add-semicolons'}
9935 # and we are allowed to for this block type
9936 && ( $ok_to_add_semicolon_for_block_type{$block_type}
9937 || $block_type =~ /^(sub|package)/
9938 || $block_type =~ /^\w+\:$/ )
9943 save_current_token();
9946 $level = $levels_to_go[$max_index_to_go];
9947 $slevel = $nesting_depth_to_go[$max_index_to_go];
9949 $nesting_blocks_to_go[$max_index_to_go];
9950 $ci_level = $ci_levels_to_go[$max_index_to_go];
9952 $container_type = "";
9953 $container_environment = "";
9954 $type_sequence = "";
9956 # Note - we remove any blank AFTER extracting its
9957 # parameters such as level, etc, above
9958 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9959 unstore_token_to_go();
9961 store_token_to_go();
9963 note_added_semicolon();
9964 restore_current_token();
9967 # then write out everything before this closing curly brace
9968 output_line_to_go();
9972 # Now update for side comment
9973 if ($side_comment_follows) { $no_internal_newlines = 1 }
9975 # store the closing curly brace
9976 store_token_to_go();
9978 # ok, we just stored a closing curly brace. Often, but
9979 # not always, we want to end the line immediately.
9980 # So now we have to check for special cases.
9982 # if this '}' successfully ends a one-line block..
9983 my $is_one_line_block = 0;
9985 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9987 # Remember the type of token just before the
9988 # opening brace. It would be more general to use
9989 # a stack, but this will work for one-line blocks.
9990 $is_one_line_block =
9991 $types_to_go[$index_start_one_line_block];
9993 # we have to actually make it by removing tentative
9994 # breaks that were set within it
9995 undo_forced_breakpoint_stack(0);
9996 set_nobreaks( $index_start_one_line_block,
9997 $max_index_to_go - 1 );
9999 # then re-initialize for the next one-line block
10000 destroy_one_line_block();
10002 # then decide if we want to break after the '}' ..
10003 # We will keep going to allow certain brace followers as in:
10004 # do { $ifclosed = 1; last } unless $losing;
10006 # But make a line break if the curly ends a
10007 # significant block:
10010 $is_block_without_semicolon{$block_type}
10012 # Follow users break point for
10013 # one line block types U & G, such as a 'try' block
10014 || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
10017 # if needless semicolon follows we handle it later
10018 && $next_nonblank_token ne ';'
10021 output_line_to_go() unless ($no_internal_newlines);
10025 # set string indicating what we need to look for brace follower
10027 if ( $block_type eq 'do' ) {
10028 $rbrace_follower = \%is_do_follower;
10030 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
10031 $rbrace_follower = \%is_if_brace_follower;
10033 elsif ( $block_type eq 'else' ) {
10034 $rbrace_follower = \%is_else_brace_follower;
10037 # added eval for borris.t
10038 elsif ($is_sort_map_grep_eval{$block_type}
10039 || $is_one_line_block eq 'G' )
10041 $rbrace_follower = undef;
10046 elsif ( $block_type =~ /^sub\W*$/ ) {
10048 if ($is_one_line_block) {
10049 $rbrace_follower = \%is_anon_sub_1_brace_follower;
10052 $rbrace_follower = \%is_anon_sub_brace_follower;
10056 # None of the above: specify what can follow a closing
10057 # brace of a block which is not an
10058 # if/elsif/else/do/sort/map/grep/eval
10060 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
10062 $rbrace_follower = \%is_other_brace_follower;
10065 # See if an elsif block is followed by another elsif or else;
10067 if ( $block_type eq 'elsif' ) {
10069 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
10070 $looking_for_else = 1; # ok, check on next line
10074 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
10075 write_logfile_entry("No else block :(\n");
10080 # keep going after certain block types (map,sort,grep,eval)
10081 # added eval for borris.t
10087 # if no more tokens, postpone decision until re-entring
10088 elsif ( ( $next_nonblank_token_type eq 'b' )
10089 && $rOpts_add_newlines )
10091 unless ($rbrace_follower) {
10092 output_line_to_go() unless ($no_internal_newlines);
10096 elsif ($rbrace_follower) {
10098 unless ( $rbrace_follower->{$next_nonblank_token} ) {
10099 output_line_to_go() unless ($no_internal_newlines);
10101 $rbrace_follower = undef;
10105 output_line_to_go() unless ($no_internal_newlines);
10108 } # end treatment of closing block token
10111 elsif ( $type eq ';' ) {
10113 # kill one-line blocks with too many semicolons
10114 $semicolons_before_block_self_destruct--;
10116 ( $semicolons_before_block_self_destruct < 0 )
10117 || ( $semicolons_before_block_self_destruct == 0
10118 && $next_nonblank_token_type !~ /^[b\}]$/ )
10121 destroy_one_line_block();
10124 # Remove unnecessary semicolons, but not after bare
10125 # blocks, where it could be unsafe if the brace is
10129 $last_nonblank_token eq '}'
10131 $is_block_without_semicolon{
10132 $last_nonblank_block_type}
10133 || $last_nonblank_block_type =~ /^sub\s+\w/
10134 || $last_nonblank_block_type =~ /^\w+:$/ )
10136 || $last_nonblank_type eq ';'
10141 $rOpts->{'delete-semicolons'}
10143 # don't delete ; before a # because it would promote it
10144 # to a block comment
10145 && ( $next_nonblank_token_type ne '#' )
10148 note_deleted_semicolon();
10149 output_line_to_go()
10150 unless ( $no_internal_newlines
10151 || $index_start_one_line_block != UNDEFINED_INDEX );
10155 write_logfile_entry("Extra ';'\n");
10158 store_token_to_go();
10160 output_line_to_go()
10161 unless ( $no_internal_newlines
10162 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
10163 || ( $next_nonblank_token eq '}' ) );
10167 # handle here_doc target string
10168 elsif ( $type eq 'h' ) {
10169 $no_internal_newlines =
10170 1; # no newlines after seeing here-target
10171 destroy_one_line_block();
10172 store_token_to_go();
10175 # handle all other token types
10178 # if this is a blank...
10179 if ( $type eq 'b' ) {
10181 # make it just one character
10182 $token = ' ' if $rOpts_add_whitespace;
10184 # delete it if unwanted by whitespace rules
10185 # or we are deleting all whitespace
10186 my $ws = $$rwhite_space_flag[ $j + 1 ];
10187 if ( ( defined($ws) && $ws == -1 )
10188 || $rOpts_delete_old_whitespace )
10191 # unless it might make a syntax error
10193 unless is_essential_whitespace(
10194 $last_last_nonblank_token,
10195 $last_last_nonblank_type,
10196 $tokens_to_go[$max_index_to_go],
10197 $types_to_go[$max_index_to_go],
10198 $$rtokens[ $j + 1 ],
10199 $$rtoken_type[ $j + 1 ]
10203 store_token_to_go();
10206 # remember two previous nonblank OUTPUT tokens
10207 if ( $type ne '#' && $type ne 'b' ) {
10208 $last_last_nonblank_token = $last_nonblank_token;
10209 $last_last_nonblank_type = $last_nonblank_type;
10210 $last_nonblank_token = $token;
10211 $last_nonblank_type = $type;
10212 $last_nonblank_block_type = $block_type;
10215 # unset the continued-quote flag since it only applies to the
10216 # first token, and we want to resume normal formatting if
10217 # there are additional tokens on the line
10218 $in_continued_quote = 0;
10220 } # end of loop over all tokens in this 'line_of_tokens'
10222 # we have to flush ..
10225 # if there is a side comment
10226 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
10228 # if this line ends in a quote
10229 # NOTE: This is critically important for insuring that quoted lines
10230 # do not get processed by things like -sot and -sct
10233 # if this is a VERSION statement
10234 || $is_VERSION_statement
10236 # to keep a label at the end of a line
10239 # if we are instructed to keep all old line breaks
10240 || !$rOpts->{'delete-old-newlines'}
10243 destroy_one_line_block();
10244 output_line_to_go();
10247 # mark old line breakpoints in current output stream
10248 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
10249 $old_breakpoint_to_go[$max_index_to_go] = 1;
10251 } ## end sub print_line_of_tokens
10252 } ## end block print_line_of_tokens
10254 # sub output_line_to_go sends one logical line of tokens on down the
10255 # pipeline to the VerticalAligner package, breaking the line into continuation
10256 # lines as necessary. The line of tokens is ready to go in the "to_go"
10258 sub output_line_to_go {
10260 # debug stuff; this routine can be called from many points
10261 FORMATTER_DEBUG_FLAG_OUTPUT && do {
10262 my ( $a, $b, $c ) = caller;
10264 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
10266 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
10267 write_diagnostics("$output_str\n");
10270 # just set a tentative breakpoint if we might be in a one-line block
10271 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10272 set_forced_breakpoint($max_index_to_go);
10276 my $cscw_block_comment;
10277 $cscw_block_comment = add_closing_side_comment()
10278 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
10280 my $comma_arrow_count_contained = match_opening_and_closing_tokens();
10282 # tell the -lp option we are outputting a batch so it can close
10283 # any unfinished items in its stack
10286 # If this line ends in a code block brace, set breaks at any
10287 # previous closing code block braces to breakup a chain of code
10288 # blocks on one line. This is very rare but can happen for
10289 # user-defined subs. For example we might be looking at this:
10290 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
10291 my $saw_good_break = 0; # flag to force breaks even if short line
10294 # looking for opening or closing block brace
10295 $block_type_to_go[$max_index_to_go]
10297 # but not one of these which are never duplicated on a line:
10298 # until|while|for|if|elsif|else
10299 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
10302 my $lev = $nesting_depth_to_go[$max_index_to_go];
10304 # Walk backwards from the end and
10305 # set break at any closing block braces at the same level.
10306 # But quit if we are not in a chain of blocks.
10307 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
10308 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
10309 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
10311 if ( $block_type_to_go[$i] ) {
10312 if ( $tokens_to_go[$i] eq '}' ) {
10313 set_forced_breakpoint($i);
10314 $saw_good_break = 1;
10318 # quit if we see anything besides words, function, blanks
10320 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
10325 my $imax = $max_index_to_go;
10327 # trim any blank tokens
10328 if ( $max_index_to_go >= 0 ) {
10329 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
10330 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
10333 # anything left to write?
10334 if ( $imin <= $imax ) {
10336 # add a blank line before certain key types but not after a comment
10337 if ( $last_line_leading_type !~ /^[#]/ ) {
10338 my $want_blank = 0;
10339 my $leading_token = $tokens_to_go[$imin];
10340 my $leading_type = $types_to_go[$imin];
10342 # blank lines before subs except declarations and one-liners
10343 # MCONVERSION LOCATION - for sub tokenization change
10344 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
10345 $want_blank = $rOpts->{'blank-lines-before-subs'}
10347 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10348 $imax ) !~ /^[\;\}]$/
10352 # break before all package declarations
10353 # MCONVERSION LOCATION - for tokenizaton change
10354 elsif ($leading_token =~ /^(package\s)/
10355 && $leading_type eq 'i' )
10357 $want_blank = $rOpts->{'blank-lines-before-packages'};
10360 # break before certain key blocks except one-liners
10361 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
10362 $want_blank = $rOpts->{'blank-lines-before-subs'}
10364 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10369 # Break before certain block types if we haven't had a
10370 # break at this level for a while. This is the
10371 # difficult decision..
10372 elsif ($leading_type eq 'k'
10373 && $last_line_leading_type ne 'b'
10374 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
10376 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
10377 if ( !defined($lc) ) { $lc = 0 }
10380 $rOpts->{'blanks-before-blocks'}
10381 && $lc >= $rOpts->{'long-block-line-count'}
10382 && $file_writer_object->get_consecutive_nonblank_lines() >=
10383 $rOpts->{'long-block-line-count'}
10385 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10392 # future: send blank line down normal path to VerticalAligner
10393 Perl::Tidy::VerticalAligner::flush();
10394 $file_writer_object->require_blank_code_lines($want_blank);
10398 # update blank line variables and count number of consecutive
10399 # non-blank, non-comment lines at this level
10400 $last_last_line_leading_level = $last_line_leading_level;
10401 $last_line_leading_level = $levels_to_go[$imin];
10402 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
10403 $last_line_leading_type = $types_to_go[$imin];
10404 if ( $last_line_leading_level == $last_last_line_leading_level
10405 && $last_line_leading_type ne 'b'
10406 && $last_line_leading_type ne '#'
10407 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
10409 $nonblank_lines_at_depth[$last_line_leading_level]++;
10412 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
10415 FORMATTER_DEBUG_FLAG_FLUSH && do {
10416 my ( $package, $file, $line ) = caller;
10418 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
10421 # add a couple of extra terminal blank tokens
10424 # set all forced breakpoints for good list formatting
10425 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
10429 || $old_line_count_in_batch > 1
10431 # must always call scan_list() with unbalanced batches because it
10432 # is maintaining some stacks
10433 || is_unbalanced_batch()
10435 # call scan_list if we might want to break at commas
10437 $comma_count_in_batch
10438 && ( $rOpts_maximum_fields_per_table > 0
10439 || $rOpts_comma_arrow_breakpoints == 0 )
10442 # call scan_list if user may want to break open some one-line
10444 || ( $comma_arrow_count_contained
10445 && $rOpts_comma_arrow_breakpoints != 3 )
10448 ## This caused problems in one version of perl for unknown reasons:
10449 ## $saw_good_break ||= scan_list();
10450 my $sgb = scan_list();
10451 $saw_good_break ||= $sgb;
10454 # let $ri_first and $ri_last be references to lists of
10455 # first and last tokens of line fragments to output..
10456 my ( $ri_first, $ri_last );
10458 # write a single line if..
10461 # we aren't allowed to add any newlines
10462 !$rOpts_add_newlines
10464 # or, we don't already have an interior breakpoint
10465 # and we didn't see a good breakpoint
10467 !$forced_breakpoint_count
10468 && !$saw_good_break
10470 # and this line is 'short'
10475 @$ri_first = ($imin);
10476 @$ri_last = ($imax);
10479 # otherwise use multiple lines
10482 ( $ri_first, $ri_last, my $colon_count ) =
10483 set_continuation_breaks($saw_good_break);
10485 break_all_chain_tokens( $ri_first, $ri_last );
10487 break_equals( $ri_first, $ri_last );
10489 # now we do a correction step to clean this up a bit
10490 # (The only time we would not do this is for debugging)
10491 if ( $rOpts->{'recombine'} ) {
10492 ( $ri_first, $ri_last ) =
10493 recombine_breakpoints( $ri_first, $ri_last );
10496 insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
10499 # do corrector step if -lp option is used
10500 my $do_not_pad = 0;
10501 if ($rOpts_line_up_parentheses) {
10502 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
10504 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
10506 prepare_for_new_input_lines();
10508 # output any new -cscw block comment
10509 if ($cscw_block_comment) {
10511 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10515 sub note_added_semicolon {
10516 $last_added_semicolon_at = $input_line_number;
10517 if ( $added_semicolon_count == 0 ) {
10518 $first_added_semicolon_at = $last_added_semicolon_at;
10520 $added_semicolon_count++;
10521 write_logfile_entry("Added ';' here\n");
10524 sub note_deleted_semicolon {
10525 $last_deleted_semicolon_at = $input_line_number;
10526 if ( $deleted_semicolon_count == 0 ) {
10527 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
10529 $deleted_semicolon_count++;
10530 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
10533 sub note_embedded_tab {
10534 $embedded_tab_count++;
10535 $last_embedded_tab_at = $input_line_number;
10536 if ( !$first_embedded_tab_at ) {
10537 $first_embedded_tab_at = $last_embedded_tab_at;
10540 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
10541 write_logfile_entry("Embedded tabs in quote or pattern\n");
10545 sub starting_one_line_block {
10547 # after seeing an opening curly brace, look for the closing brace
10548 # and see if the entire block will fit on a line. This routine is
10549 # not always right because it uses the old whitespace, so a check
10550 # is made later (at the closing brace) to make sure we really
10551 # have a one-line block. We have to do this preliminary check,
10552 # though, because otherwise we would always break at a semicolon
10553 # within a one-line block if the block contains multiple statements.
10555 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
10559 # kill any current block - we can only go 1 deep
10560 destroy_one_line_block();
10563 # 1=distance from start of block to opening brace exceeds line length
10568 # shouldn't happen: there must have been a prior call to
10569 # store_token_to_go to put the opening brace in the output stream
10570 if ( $max_index_to_go < 0 ) {
10571 warning("program bug: store_token_to_go called incorrectly\n");
10572 report_definite_bug();
10576 # cannot use one-line blocks with cuddled else/elsif lines
10577 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
10582 my $block_type = $$rblock_type[$j];
10584 # find the starting keyword for this block (such as 'if', 'else', ...)
10586 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
10587 $i_start = $max_index_to_go;
10590 # the previous nonblank token should start these block types
10591 elsif (( $last_last_nonblank_token_to_go eq $block_type )
10592 || ( $block_type =~ /^sub/ )
10593 || $block_type =~ /\(\)/ )
10595 $i_start = $last_last_nonblank_index_to_go;
10597 # Patch for signatures and extended syntax ...
10598 # if the previous token was a closing paren we should walk back up to
10599 # find the keyword (sub). Otherwise, we might form a one line block,
10600 # which stays intact, and cause the parenthesized expression to break
10601 # open. That looks bad.
10602 if ( $tokens_to_go[$i_start] eq ')' ) {
10604 # walk back to find the first token with this level
10605 # it should be the opening paren...
10606 my $lev_want = $levels_to_go[$i_start];
10607 for ( $i_start-- ; $i_start >= 0 ; $i_start-- ) {
10608 if ( $i_start <= 0 ) { return 0 }
10609 my $lev = $levels_to_go[$i_start];
10610 if ( $lev <= $lev_want ) {
10612 # if not an opening paren then probably a syntax error
10613 if ( $tokens_to_go[$i_start] ne '(' ) { return 0 }
10615 # now step back to the opening keyword (sub)
10617 if ( $i_start > 0 && $types_to_go[$i_start] eq 'b' ) {
10625 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
10627 # For something like "if (xxx) {", the keyword "if" will be
10628 # just after the most recent break. This will be 0 unless
10629 # we have just killed a one-line block and are starting another.
10631 # Note: cannot use inext_index_to_go[] here because that array
10632 # is still being constructed.
10633 $i_start = $index_max_forced_break + 1;
10634 if ( $types_to_go[$i_start] eq 'b' ) {
10638 # Patch to avoid breaking short blocks defined with extended_syntax:
10639 # Strip off any trailing () which was added in the parser to mark
10640 # the opening keyword. For example, in the following
10641 # create( TypeFoo $e) {$bubba}
10642 # the blocktype would be marked as create()
10643 my $stripped_block_type = $block_type;
10644 $stripped_block_type =~ s/\(\)$//;
10646 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
10651 # patch for SWITCH/CASE to retain one-line case/when blocks
10652 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
10654 # Note: cannot use inext_index_to_go[] here because that array
10655 # is still being constructed.
10656 $i_start = $index_max_forced_break + 1;
10657 if ( $types_to_go[$i_start] eq 'b' ) {
10660 unless ( $tokens_to_go[$i_start] eq $block_type ) {
10669 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
10673 # see if length is too long to even start
10674 if ( $pos > maximum_line_length($i_start) ) {
10678 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
10680 # old whitespace could be arbitrarily large, so don't use it
10681 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
10682 else { $pos += rtoken_length($i) }
10684 # Return false result if we exceed the maximum line length,
10685 if ( $pos > maximum_line_length($i_start) ) {
10689 # or encounter another opening brace before finding the closing brace.
10690 elsif ($$rtokens[$i] eq '{'
10691 && $$rtoken_type[$i] eq '{'
10692 && $$rblock_type[$i] )
10697 # if we find our closing brace..
10698 elsif ($$rtokens[$i] eq '}'
10699 && $$rtoken_type[$i] eq '}'
10700 && $$rblock_type[$i] )
10703 # be sure any trailing comment also fits on the line
10705 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
10707 # Patch for one-line sort/map/grep/eval blocks with side comments:
10708 # We will ignore the side comment length for sort/map/grep/eval
10709 # because this can lead to statements which change every time
10710 # perltidy is run. Here is an example from Denis Moskowitz which
10711 # oscillates between these two states without this patch:
10714 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10718 ## $_->foo ne 'bar'
10719 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10723 # When the first line is input it gets broken apart by the main
10724 # line break logic in sub print_line_of_tokens.
10725 # When the second line is input it gets recombined by
10726 # print_line_of_tokens and passed to the output routines. The
10727 # output routines (set_continuation_breaks) do not break it apart
10728 # because the bond strengths are set to the highest possible value
10729 # for grep/map/eval/sort blocks, so the first version gets output.
10730 # It would be possible to fix this by changing bond strengths,
10731 # but they are high to prevent errors in older versions of perl.
10733 if ( $$rtoken_type[$i_nonblank] eq '#'
10734 && !$is_sort_map_grep{$block_type} )
10737 $pos += rtoken_length($i_nonblank);
10739 if ( $i_nonblank > $i + 1 ) {
10741 # source whitespace could be anything, assume
10742 # at least one space before the hash on output
10743 if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 }
10744 else { $pos += rtoken_length( $i + 1 ) }
10747 if ( $pos >= maximum_line_length($i_start) ) {
10752 # ok, it's a one-line block
10753 create_one_line_block( $i_start, 20 );
10757 # just keep going for other characters
10762 # Allow certain types of new one-line blocks to form by joining
10763 # input lines. These can be safely done, but for other block types,
10764 # we keep old one-line blocks but do not form new ones. It is not
10765 # always a good idea to make as many one-line blocks as possible,
10766 # so other types are not done. The user can always use -mangle.
10767 if ( $is_sort_map_grep_eval{$block_type} ) {
10768 create_one_line_block( $i_start, 1 );
10774 sub unstore_token_to_go {
10776 # remove most recent token from output stream
10777 if ( $max_index_to_go > 0 ) {
10778 $max_index_to_go--;
10781 $max_index_to_go = UNDEFINED_INDEX;
10786 sub want_blank_line {
10788 $file_writer_object->want_blank_line() unless $in_format_skipping_section;
10791 sub write_unindented_line {
10793 $file_writer_object->write_line( $_[0] );
10798 # Undo continuation indentation in certain sequences
10799 # For example, we can undo continuation indentation in sort/map/grep chains
10800 # my $dat1 = pack( "n*",
10801 # map { $_, $lookup->{$_} }
10802 # sort { $a <=> $b }
10803 # grep { $lookup->{$_} ne $default } keys %$lookup );
10804 # To align the map/sort/grep keywords like this:
10805 # my $dat1 = pack( "n*",
10806 # map { $_, $lookup->{$_} }
10807 # sort { $a <=> $b }
10808 # grep { $lookup->{$_} ne $default } keys %$lookup );
10809 my ( $ri_first, $ri_last ) = @_;
10810 my ( $line_1, $line_2, $lev_last );
10811 my $this_line_is_semicolon_terminated;
10812 my $max_line = @$ri_first - 1;
10814 # looking at each line of this batch..
10815 # We are looking at leading tokens and looking for a sequence
10816 # all at the same level and higher level than enclosing lines.
10817 foreach my $line ( 0 .. $max_line ) {
10819 my $ibeg = $$ri_first[$line];
10820 my $lev = $levels_to_go[$ibeg];
10823 # if we have started a chain..
10826 # see if it continues..
10827 if ( $lev == $lev_last ) {
10828 if ( $types_to_go[$ibeg] eq 'k'
10829 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10832 # chain continues...
10833 # check for chain ending at end of a statement
10834 if ( $line == $max_line ) {
10836 # see of this line ends a statement
10837 my $iend = $$ri_last[$line];
10838 $this_line_is_semicolon_terminated =
10839 $types_to_go[$iend] eq ';'
10841 # with possible side comment
10842 || ( $types_to_go[$iend] eq '#'
10843 && $iend - $ibeg >= 2
10844 && $types_to_go[ $iend - 2 ] eq ';'
10845 && $types_to_go[ $iend - 1 ] eq 'b' );
10847 $line_2 = $line if ($this_line_is_semicolon_terminated);
10855 elsif ( $lev < $lev_last ) {
10857 # chain ends with previous line
10858 $line_2 = $line - 1;
10860 elsif ( $lev > $lev_last ) {
10866 # undo the continuation indentation if a chain ends
10867 if ( defined($line_2) && defined($line_1) ) {
10868 my $continuation_line_count = $line_2 - $line_1 + 1;
10869 @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10870 (0) x ($continuation_line_count);
10871 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10872 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
10877 # not in a chain yet..
10880 # look for start of a new sort/map/grep chain
10881 if ( $lev > $lev_last ) {
10882 if ( $types_to_go[$ibeg] eq 'k'
10883 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10896 # If there is a single, long parameter within parens, like this:
10898 # $self->command( "/msg "
10899 # . $infoline->chan
10900 # . " You said $1, but did you know that it's square was "
10901 # . $1 * $1 . " ?" );
10903 # we can remove the continuation indentation of the 2nd and higher lines
10904 # to achieve this effect, which is more pleasing:
10906 # $self->command("/msg "
10907 # . $infoline->chan
10908 # . " You said $1, but did you know that it's square was "
10909 # . $1 * $1 . " ?");
10911 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
10912 my $max_line = @$ri_first - 1;
10914 # must be multiple lines
10915 return unless $max_line > $line_open;
10917 my $lev_start = $levels_to_go[$i_start];
10918 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
10920 # see if all additional lines in this container have continuation
10923 my $line_1 = 1 + $line_open;
10924 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
10925 my $ibeg = $$ri_first[$n];
10926 my $iend = $$ri_last[$n];
10927 if ( $ibeg eq $closing_index ) { $n--; last }
10928 return if ( $lev_start != $levels_to_go[$ibeg] );
10929 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10930 last if ( $closing_index <= $iend );
10933 # we can reduce the indentation of all continuation lines
10934 my $continuation_line_count = $n - $line_open;
10935 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10936 (0) x ($continuation_line_count);
10937 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10938 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10943 # insert $pad_spaces before token number $ipad
10944 my ( $ipad, $pad_spaces ) = @_;
10945 if ( $pad_spaces > 0 ) {
10946 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
10948 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
10949 $tokens_to_go[$ipad] = "";
10957 $token_lengths_to_go[$ipad] += $pad_spaces;
10958 for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) {
10959 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
10968 @_ = qw( + - * / );
10969 @is_math_op{@_} = (1) x scalar(@_);
10972 sub set_logical_padding {
10974 # Look at a batch of lines and see if extra padding can improve the
10975 # alignment when there are certain leading operators. Here is an
10976 # example, in which some extra space is introduced before
10977 # '( $year' to make it line up with the subsequent lines:
10979 # if ( ( $Year < 1601 )
10980 # || ( $Year > 2899 )
10981 # || ( $EndYear < 1601 )
10982 # || ( $EndYear > 2899 ) )
10984 # &Error_OutOfRange;
10987 my ( $ri_first, $ri_last ) = @_;
10988 my $max_line = @$ri_first - 1;
10990 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line,
10992 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10994 # looking at each line of this batch..
10995 foreach $line ( 0 .. $max_line - 1 ) {
10997 # see if the next line begins with a logical operator
10998 $ibeg = $$ri_first[$line];
10999 $iend = $$ri_last[$line];
11000 $ibeg_next = $$ri_first[ $line + 1 ];
11001 $tok_next = $tokens_to_go[$ibeg_next];
11002 $type_next = $types_to_go[$ibeg_next];
11004 $has_leading_op_next = ( $tok_next =~ /^\w/ )
11005 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
11006 : $is_chain_operator{$type_next}; # and, or
11008 next unless ($has_leading_op_next);
11010 # next line must not be at lesser depth
11012 if ( $nesting_depth_to_go[$ibeg] >
11013 $nesting_depth_to_go[$ibeg_next] );
11015 # identify the token in this line to be padded on the left
11018 # handle lines at same depth...
11019 if ( $nesting_depth_to_go[$ibeg] ==
11020 $nesting_depth_to_go[$ibeg_next] )
11023 # if this is not first line of the batch ...
11026 # and we have leading operator..
11027 next if $has_leading_op;
11029 # Introduce padding if..
11030 # 1. the previous line is at lesser depth, or
11031 # 2. the previous line ends in an assignment
11032 # 3. the previous line ends in a 'return'
11033 # 4. the previous line ends in a comma
11034 # Example 1: previous line at lesser depth
11035 # if ( ( $Year < 1601 ) # <- we are here but
11036 # || ( $Year > 2899 ) # list has not yet
11037 # || ( $EndYear < 1601 ) # collapsed vertically
11038 # || ( $EndYear > 2899 ) )
11041 # Example 2: previous line ending in assignment:
11043 # $year % 4 ? 0 # <- We are here
11044 # : $year % 100 ? 1
11045 # : $year % 400 ? 0
11048 # Example 3: previous line ending in comma:
11055 # be sure levels agree (do not indent after an indented 'if')
11057 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
11059 # allow padding on first line after a comma but only if:
11060 # (1) this is line 2 and
11061 # (2) there are at more than three lines and
11062 # (3) lines 3 and 4 have the same leading operator
11063 # These rules try to prevent padding within a long
11064 # comma-separated list.
11066 if ( $types_to_go[$iendm] eq ','
11070 my $ibeg_next_next = $$ri_first[ $line + 2 ];
11071 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
11072 $ok_comma = $tok_next_next eq $tok_next;
11077 $is_assignment{ $types_to_go[$iendm] }
11079 || ( $nesting_depth_to_go[$ibegm] <
11080 $nesting_depth_to_go[$ibeg] )
11081 || ( $types_to_go[$iendm] eq 'k'
11082 && $tokens_to_go[$iendm] eq 'return' )
11085 # we will add padding before the first token
11089 # for first line of the batch..
11092 # WARNING: Never indent if first line is starting in a
11093 # continued quote, which would change the quote.
11094 next if $starting_in_quote;
11096 # if this is text after closing '}'
11097 # then look for an interior token to pad
11098 if ( $types_to_go[$ibeg] eq '}' ) {
11102 # otherwise, we might pad if it looks really good
11105 # we might pad token $ibeg, so be sure that it
11106 # is at the same depth as the next line.
11108 if ( $nesting_depth_to_go[$ibeg] !=
11109 $nesting_depth_to_go[$ibeg_next] );
11111 # We can pad on line 1 of a statement if at least 3
11112 # lines will be aligned. Otherwise, it
11113 # can look very confusing.
11115 # We have to be careful not to pad if there are too few
11116 # lines. The current rule is:
11117 # (1) in general we require at least 3 consecutive lines
11118 # with the same leading chain operator token,
11119 # (2) but an exception is that we only require two lines
11120 # with leading colons if there are no more lines. For example,
11121 # the first $i in the following snippet would get padding
11122 # by the second rule:
11124 # $i == 1 ? ( "First", "Color" )
11125 # : $i == 2 ? ( "Then", "Rarity" )
11126 # : ( "Then", "Name" );
11128 if ( $max_line > 1 ) {
11129 my $leading_token = $tokens_to_go[$ibeg_next];
11132 # never indent line 1 of a '.' series because
11133 # previous line is most likely at same level.
11134 # TODO: we should also look at the leasing_spaces
11135 # of the last output line and skip if it is same
11137 next if ( $leading_token eq '.' );
11140 foreach my $l ( 2 .. 3 ) {
11141 last if ( $line + $l > $max_line );
11142 my $ibeg_next_next = $$ri_first[ $line + $l ];
11143 if ( $tokens_to_go[$ibeg_next_next] ne
11146 $tokens_differ = 1;
11151 next if ($tokens_differ);
11152 next if ( $count < 3 && $leading_token ne ':' );
11162 # find interior token to pad if necessary
11163 if ( !defined($ipad) ) {
11165 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
11167 # find any unclosed container
11169 unless ( $type_sequence_to_go[$i]
11170 && $mate_index_to_go[$i] > $iend );
11172 # find next nonblank token to pad
11173 $ipad = $inext_to_go[$i];
11174 last if ( $ipad > $iend );
11179 # We cannot pad a leading token at the lowest level because
11180 # it could cause a bug in which the starting indentation
11181 # level is guessed incorrectly each time the code is run
11182 # though perltidy, thus causing the code to march off to
11183 # the right. For example, the following snippet would have
11186 ## ov_method mycan( $package, '(""' ), $package
11187 ## or ov_method mycan( $package, '(0+' ), $package
11188 ## or ov_method mycan( $package, '(bool' ), $package
11189 ## or ov_method mycan( $package, '(nomethod' ), $package;
11191 # If this snippet is within a block this won't happen
11192 # unless the user just processes the snippet alone within
11193 # an editor. In that case either the user will see and
11194 # fix the problem or it will be corrected next time the
11195 # entire file is processed with perltidy.
11196 next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
11198 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
11199 ## IT DID MORE HARM THAN GOOD
11201 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
11204 ##? # do not put leading padding for just 2 lines of math
11205 ##? if ( $ipad == $ibeg
11207 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
11208 ##? && $is_math_op{$type_next}
11209 ##? && $line + 2 <= $max_line )
11211 ##? my $ibeg_next_next = $$ri_first[ $line + 2 ];
11212 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
11213 ##? next if !$is_math_op{$type_next_next};
11216 # next line must not be at greater depth
11217 my $iend_next = $$ri_last[ $line + 1 ];
11219 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
11220 $nesting_depth_to_go[$ipad] );
11222 # lines must be somewhat similar to be padded..
11223 my $inext_next = $inext_to_go[$ibeg_next];
11224 my $type = $types_to_go[$ipad];
11225 my $type_next = $types_to_go[ $ipad + 1 ];
11227 # see if there are multiple continuation lines
11228 my $logical_continuation_lines = 1;
11229 if ( $line + 2 <= $max_line ) {
11230 my $leading_token = $tokens_to_go[$ibeg_next];
11231 my $ibeg_next_next = $$ri_first[ $line + 2 ];
11232 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
11233 && $nesting_depth_to_go[$ibeg_next] eq
11234 $nesting_depth_to_go[$ibeg_next_next] )
11236 $logical_continuation_lines++;
11240 # see if leading types match
11241 my $types_match = $types_to_go[$inext_next] eq $type;
11242 my $matches_without_bang;
11244 # if first line has leading ! then compare the following token
11245 if ( !$types_match && $type eq '!' ) {
11246 $types_match = $matches_without_bang =
11247 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
11252 # either we have multiple continuation lines to follow
11253 # and we are not padding the first token
11254 ( $logical_continuation_lines > 1 && $ipad > 0 )
11262 # and keywords must match if keyword
11265 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
11271 #----------------------begin special checks--------------
11274 # A check is needed before we can make the pad.
11275 # If we are in a list with some long items, we want each
11276 # item to stand out. So in the following example, the
11277 # first line beginning with '$casefold->' would look good
11278 # padded to align with the next line, but then it
11279 # would be indented more than the last line, so we
11283 # $casefold->{code} eq '0041'
11284 # && $casefold->{status} eq 'C'
11285 # && $casefold->{mapping} eq '0061',
11290 # It would be faster, and almost as good, to use a comma
11291 # count, and not pad if comma_count > 1 and the previous
11292 # line did not end with a comma.
11296 my $ibg = $$ri_first[ $line + 1 ];
11297 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
11299 # just use simplified formula for leading spaces to avoid
11300 # needless sub calls
11301 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
11303 # look at each line beyond the next ..
11305 foreach $l ( $line + 2 .. $max_line ) {
11306 my $ibg = $$ri_first[$l];
11308 # quit looking at the end of this container
11310 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
11311 || ( $nesting_depth_to_go[$ibg] < $depth );
11313 # cannot do the pad if a later line would be
11315 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
11321 # don't pad if we end in a broken list
11322 if ( $l == $max_line ) {
11323 my $i2 = $$ri_last[$l];
11324 if ( $types_to_go[$i2] eq '#' ) {
11325 my $i1 = $$ri_first[$l];
11328 terminal_type( \@types_to_go, \@block_type_to_go,
11335 # a minus may introduce a quoted variable, and we will
11336 # add the pad only if this line begins with a bare word,
11337 # such as for the word 'Button' here:
11339 # Button => "Print letter \"~$_\"",
11340 # -command => [ sub { print "$_[0]\n" }, $_ ],
11341 # -accelerator => "Meta+$_"
11344 # On the other hand, if 'Button' is quoted, it looks best
11347 # 'Button' => "Print letter \"~$_\"",
11348 # -command => [ sub { print "$_[0]\n" }, $_ ],
11349 # -accelerator => "Meta+$_"
11351 if ( $types_to_go[$ibeg_next] eq 'm' ) {
11352 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
11355 next unless $ok_to_pad;
11357 #----------------------end special check---------------
11359 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
11360 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
11361 $pad_spaces = $length_2 - $length_1;
11363 # If the first line has a leading ! and the second does
11364 # not, then remove one space to try to align the next
11365 # leading characters, which are often the same. For example:
11367 # || $ts == $self->Holder
11368 # || $self->Holder->Type eq "Arena" )
11370 # This usually helps readability, but if there are subsequent
11371 # ! operators things will still get messed up. For example:
11373 # if ( !exists $Net::DNS::typesbyname{$qtype}
11374 # && exists $Net::DNS::classesbyname{$qtype}
11375 # && !exists $Net::DNS::classesbyname{$qclass}
11376 # && exists $Net::DNS::typesbyname{$qclass} )
11377 # We can't fix that.
11378 if ($matches_without_bang) { $pad_spaces-- }
11380 # make sure this won't change if -lp is used
11381 my $indentation_1 = $leading_spaces_to_go[$ibeg];
11382 if ( ref($indentation_1) ) {
11383 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
11384 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
11385 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 )
11392 # we might be able to handle a pad of -1 by removing a blank
11394 if ( $pad_spaces < 0 ) {
11396 if ( $pad_spaces == -1 ) {
11397 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
11399 pad_token( $ipad - 1, $pad_spaces );
11405 # now apply any padding for alignment
11406 if ( $ipad >= 0 && $pad_spaces ) {
11408 my $length_t = total_line_length( $ibeg, $iend );
11409 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
11411 pad_token( $ipad, $pad_spaces );
11419 $has_leading_op = $has_leading_op_next;
11420 } # end of loop over lines
11425 sub correct_lp_indentation {
11427 # When the -lp option is used, we need to make a last pass through
11428 # each line to correct the indentation positions in case they differ
11429 # from the predictions. This is necessary because perltidy uses a
11430 # predictor/corrector method for aligning with opening parens. The
11431 # predictor is usually good, but sometimes stumbles. The corrector
11432 # tries to patch things up once the actual opening paren locations
11434 my ( $ri_first, $ri_last ) = @_;
11435 my $do_not_pad = 0;
11437 # Note on flag '$do_not_pad':
11438 # We want to avoid a situation like this, where the aligner inserts
11439 # whitespace before the '=' to align it with a previous '=', because
11440 # otherwise the parens might become mis-aligned in a situation like
11441 # this, where the '=' has become aligned with the previous line,
11442 # pushing the opening '(' forward beyond where we want it.
11444 # $mkFloor::currentRoom = '';
11445 # $mkFloor::c_entry = $c->Entry(
11447 # -relief => 'sunken',
11451 # We leave it to the aligner to decide how to do this.
11453 # first remove continuation indentation if appropriate
11454 my $max_line = @$ri_first - 1;
11456 # looking at each line of this batch..
11457 my ( $ibeg, $iend );
11459 foreach $line ( 0 .. $max_line ) {
11460 $ibeg = $$ri_first[$line];
11461 $iend = $$ri_last[$line];
11463 # looking at each token in this output line..
11465 foreach $i ( $ibeg .. $iend ) {
11467 # How many space characters to place before this token
11468 # for special alignment. Actual padding is done in the
11471 # looking for next unvisited indentation item
11472 my $indentation = $leading_spaces_to_go[$i];
11473 if ( !$indentation->get_MARKED() ) {
11474 $indentation->set_MARKED(1);
11476 # looking for indentation item for which we are aligning
11477 # with parens, braces, and brackets
11478 next unless ( $indentation->get_ALIGN_PAREN() );
11480 # skip closed container on this line
11481 if ( $i > $ibeg ) {
11482 my $im = max( $ibeg, $iprev_to_go[$i] );
11483 if ( $type_sequence_to_go[$im]
11484 && $mate_index_to_go[$im] <= $iend )
11490 if ( $line == 1 && $i == $ibeg ) {
11494 # Ok, let's see what the error is and try to fix it
11496 my $predicted_pos = $indentation->get_SPACES();
11497 if ( $i > $ibeg ) {
11499 # token is mid-line - use length to previous token
11500 $actual_pos = total_line_length( $ibeg, $i - 1 );
11502 # for mid-line token, we must check to see if all
11503 # additional lines have continuation indentation,
11504 # and remove it if so. Otherwise, we do not get
11506 my $closing_index = $indentation->get_CLOSED();
11507 if ( $closing_index > $iend ) {
11508 my $ibeg_next = $$ri_first[ $line + 1 ];
11509 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
11510 undo_lp_ci( $line, $i, $closing_index, $ri_first,
11515 elsif ( $line > 0 ) {
11517 # handle case where token starts a new line;
11518 # use length of previous line
11519 my $ibegm = $$ri_first[ $line - 1 ];
11520 my $iendm = $$ri_last[ $line - 1 ];
11521 $actual_pos = total_line_length( $ibegm, $iendm );
11525 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
11529 # token is first character of first line of batch
11530 $actual_pos = $predicted_pos;
11533 my $move_right = $actual_pos - $predicted_pos;
11535 # done if no error to correct (gnu2.t)
11536 if ( $move_right == 0 ) {
11537 $indentation->set_RECOVERABLE_SPACES($move_right);
11541 # if we have not seen closure for this indentation in
11542 # this batch, we can only pass on a request to the
11544 my $closing_index = $indentation->get_CLOSED();
11546 if ( $closing_index < 0 ) {
11547 $indentation->set_RECOVERABLE_SPACES($move_right);
11551 # If necessary, look ahead to see if there is really any
11552 # leading whitespace dependent on this whitespace, and
11553 # also find the longest line using this whitespace.
11554 # Since it is always safe to move left if there are no
11555 # dependents, we only need to do this if we may have
11556 # dependent nodes or need to move right.
11558 my $right_margin = 0;
11559 my $have_child = $indentation->get_HAVE_CHILD();
11561 my %saw_indentation;
11562 my $line_count = 1;
11563 $saw_indentation{$indentation} = $indentation;
11565 if ( $have_child || $move_right > 0 ) {
11567 my $max_length = 0;
11568 if ( $i == $ibeg ) {
11569 $max_length = total_line_length( $ibeg, $iend );
11572 # look ahead at the rest of the lines of this batch..
11574 foreach $line_t ( $line + 1 .. $max_line ) {
11575 my $ibeg_t = $$ri_first[$line_t];
11576 my $iend_t = $$ri_last[$line_t];
11577 last if ( $closing_index <= $ibeg_t );
11579 # remember all different indentation objects
11580 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
11581 $saw_indentation{$indentation_t} = $indentation_t;
11584 # remember longest line in the group
11585 my $length_t = total_line_length( $ibeg_t, $iend_t );
11586 if ( $length_t > $max_length ) {
11587 $max_length = $length_t;
11590 $right_margin = maximum_line_length($ibeg) - $max_length;
11591 if ( $right_margin < 0 ) { $right_margin = 0 }
11594 my $first_line_comma_count =
11595 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
11596 my $comma_count = $indentation->get_COMMA_COUNT();
11597 my $arrow_count = $indentation->get_ARROW_COUNT();
11599 # This is a simple approximate test for vertical alignment:
11600 # if we broke just after an opening paren, brace, bracket,
11601 # and there are 2 or more commas in the first line,
11602 # and there are no '=>'s,
11603 # then we are probably vertically aligned. We could set
11604 # an exact flag in sub scan_list, but this is good
11606 my $indentation_count = keys %saw_indentation;
11607 my $is_vertically_aligned =
11609 && $first_line_comma_count > 1
11610 && $indentation_count == 1
11611 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
11613 # Make the move if possible ..
11616 # we can always move left
11619 # but we should only move right if we are sure it will
11620 # not spoil vertical alignment
11621 || ( $comma_count == 0 )
11622 || ( $comma_count > 0 && !$is_vertically_aligned )
11626 ( $move_right <= $right_margin )
11630 foreach ( keys %saw_indentation ) {
11631 $saw_indentation{$_}
11632 ->permanently_decrease_AVAILABLE_SPACES( -$move );
11636 # Otherwise, record what we want and the vertical aligner
11637 # will try to recover it.
11639 $indentation->set_RECOVERABLE_SPACES($move_right);
11644 return $do_not_pad;
11647 # flush is called to output any tokens in the pipeline, so that
11648 # an alternate source of lines can be written in the correct order
11651 destroy_one_line_block();
11652 output_line_to_go();
11653 Perl::Tidy::VerticalAligner::flush();
11656 sub reset_block_text_accumulator {
11658 # save text after 'if' and 'elsif' to append after 'else'
11659 if ($accumulating_text_for_block) {
11661 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
11662 push @{$rleading_block_if_elsif_text}, $leading_block_text;
11665 $accumulating_text_for_block = "";
11666 $leading_block_text = "";
11667 $leading_block_text_level = 0;
11668 $leading_block_text_length_exceeded = 0;
11669 $leading_block_text_line_number = 0;
11670 $leading_block_text_line_length = 0;
11673 sub set_block_text_accumulator {
11675 $accumulating_text_for_block = $tokens_to_go[$i];
11676 if ( $accumulating_text_for_block !~ /^els/ ) {
11677 $rleading_block_if_elsif_text = [];
11679 $leading_block_text = "";
11680 $leading_block_text_level = $levels_to_go[$i];
11681 $leading_block_text_line_number =
11682 $vertical_aligner_object->get_output_line_number();
11683 $leading_block_text_length_exceeded = 0;
11685 # this will contain the column number of the last character
11686 # of the closing side comment
11687 $leading_block_text_line_length =
11688 length($csc_last_label) +
11689 length($accumulating_text_for_block) +
11690 length( $rOpts->{'closing-side-comment-prefix'} ) +
11691 $leading_block_text_level * $rOpts_indent_columns + 3;
11694 sub accumulate_block_text {
11697 # accumulate leading text for -csc, ignoring any side comments
11698 if ( $accumulating_text_for_block
11699 && !$leading_block_text_length_exceeded
11700 && $types_to_go[$i] ne '#' )
11703 my $added_length = $token_lengths_to_go[$i];
11704 $added_length += 1 if $i == 0;
11705 my $new_line_length = $leading_block_text_line_length + $added_length;
11707 # we can add this text if we don't exceed some limits..
11710 # we must not have already exceeded the text length limit
11711 length($leading_block_text) <
11712 $rOpts_closing_side_comment_maximum_text
11715 # the new total line length must be below the line length limit
11716 # or the new length must be below the text length limit
11717 # (ie, we may allow one token to exceed the text length limit)
11720 maximum_line_length_for_level($leading_block_text_level)
11722 || length($leading_block_text) + $added_length <
11723 $rOpts_closing_side_comment_maximum_text
11726 # UNLESS: we are adding a closing paren before the brace we seek.
11727 # This is an attempt to avoid situations where the ... to be
11728 # added are longer than the omitted right paren, as in:
11730 # foreach my $item (@a_rather_long_variable_name_here) {
11732 # } ## end foreach my $item (@a_rather_long_variable_name_here...
11735 $tokens_to_go[$i] eq ')'
11738 $i + 1 <= $max_index_to_go
11739 && $block_type_to_go[ $i + 1 ] eq
11740 $accumulating_text_for_block
11742 || ( $i + 2 <= $max_index_to_go
11743 && $block_type_to_go[ $i + 2 ] eq
11744 $accumulating_text_for_block )
11750 # add an extra space at each newline
11751 if ( $i == 0 ) { $leading_block_text .= ' ' }
11753 # add the token text
11754 $leading_block_text .= $tokens_to_go[$i];
11755 $leading_block_text_line_length = $new_line_length;
11758 # show that text was truncated if necessary
11759 elsif ( $types_to_go[$i] ne 'b' ) {
11760 $leading_block_text_length_exceeded = 1;
11761 ## Please see file perltidy.ERR
11762 $leading_block_text .= '...';
11768 my %is_if_elsif_else_unless_while_until_for_foreach;
11772 # These block types may have text between the keyword and opening
11773 # curly. Note: 'else' does not, but must be included to allow trailing
11774 # if/elsif text to be appended.
11775 # patch for SWITCH/CASE: added 'case' and 'when'
11776 @_ = qw(if elsif else unless while until for foreach case when);
11777 @is_if_elsif_else_unless_while_until_for_foreach{@_} =
11781 sub accumulate_csc_text {
11783 # called once per output buffer when -csc is used. Accumulates
11784 # the text placed after certain closing block braces.
11785 # Defines and returns the following for this buffer:
11787 my $block_leading_text = ""; # the leading text of the last '}'
11788 my $rblock_leading_if_elsif_text;
11789 my $i_block_leading_text =
11790 -1; # index of token owning block_leading_text
11791 my $block_line_count = 100; # how many lines the block spans
11792 my $terminal_type = 'b'; # type of last nonblank token
11793 my $i_terminal = 0; # index of last nonblank token
11794 my $terminal_block_type = "";
11796 # update most recent statement label
11797 $csc_last_label = "" unless ($csc_last_label);
11798 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
11799 my $block_label = $csc_last_label;
11801 # Loop over all tokens of this batch
11802 for my $i ( 0 .. $max_index_to_go ) {
11803 my $type = $types_to_go[$i];
11804 my $block_type = $block_type_to_go[$i];
11805 my $token = $tokens_to_go[$i];
11807 # remember last nonblank token type
11808 if ( $type ne '#' && $type ne 'b' ) {
11809 $terminal_type = $type;
11810 $terminal_block_type = $block_type;
11814 my $type_sequence = $type_sequence_to_go[$i];
11815 if ( $block_type && $type_sequence ) {
11817 if ( $token eq '}' ) {
11819 # restore any leading text saved when we entered this block
11820 if ( defined( $block_leading_text{$type_sequence} ) ) {
11821 ( $block_leading_text, $rblock_leading_if_elsif_text )
11822 = @{ $block_leading_text{$type_sequence} };
11823 $i_block_leading_text = $i;
11824 delete $block_leading_text{$type_sequence};
11825 $rleading_block_if_elsif_text =
11826 $rblock_leading_if_elsif_text;
11829 if ( defined( $csc_block_label{$type_sequence} ) ) {
11830 $block_label = $csc_block_label{$type_sequence};
11831 delete $csc_block_label{$type_sequence};
11834 # if we run into a '}' then we probably started accumulating
11835 # at something like a trailing 'if' clause..no harm done.
11836 if ( $accumulating_text_for_block
11837 && $levels_to_go[$i] <= $leading_block_text_level )
11839 my $lev = $levels_to_go[$i];
11840 reset_block_text_accumulator();
11843 if ( defined( $block_opening_line_number{$type_sequence} ) )
11845 my $output_line_number =
11846 $vertical_aligner_object->get_output_line_number();
11847 $block_line_count =
11848 $output_line_number -
11849 $block_opening_line_number{$type_sequence} + 1;
11850 delete $block_opening_line_number{$type_sequence};
11854 # Error: block opening line undefined for this line..
11855 # This shouldn't be possible, but it is not a
11856 # significant problem.
11860 elsif ( $token eq '{' ) {
11863 $vertical_aligner_object->get_output_line_number();
11864 $block_opening_line_number{$type_sequence} = $line_number;
11866 # set a label for this block, except for
11867 # a bare block which already has the label
11868 # A label can only be used on the next {
11869 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
11870 $csc_block_label{$type_sequence} = $csc_last_label;
11871 $csc_last_label = "";
11873 if ( $accumulating_text_for_block
11874 && $levels_to_go[$i] == $leading_block_text_level )
11877 if ( $accumulating_text_for_block eq $block_type ) {
11879 # save any leading text before we enter this block
11880 $block_leading_text{$type_sequence} = [
11881 $leading_block_text,
11882 $rleading_block_if_elsif_text
11884 $block_opening_line_number{$type_sequence} =
11885 $leading_block_text_line_number;
11886 reset_block_text_accumulator();
11890 # shouldn't happen, but not a serious error.
11891 # We were accumulating -csc text for block type
11892 # $accumulating_text_for_block and unexpectedly
11893 # encountered a '{' for block type $block_type.
11900 && $csc_new_statement_ok
11901 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
11902 && $token =~ /$closing_side_comment_list_pattern/o )
11904 set_block_text_accumulator($i);
11908 # note: ignoring type 'q' because of tricks being played
11909 # with 'q' for hanging side comments
11910 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
11911 $csc_new_statement_ok =
11912 ( $block_type || $type eq 'J' || $type eq ';' );
11915 && $accumulating_text_for_block
11916 && $levels_to_go[$i] == $leading_block_text_level )
11918 reset_block_text_accumulator();
11921 accumulate_block_text($i);
11926 # Treat an 'else' block specially by adding preceding 'if' and
11927 # 'elsif' text. Otherwise, the 'end else' is not helpful,
11928 # especially for cuddled-else formatting.
11929 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
11930 $block_leading_text =
11931 make_else_csc_text( $i_terminal, $terminal_block_type,
11932 $block_leading_text, $rblock_leading_if_elsif_text );
11935 # if this line ends in a label then remember it for the next pass
11936 $csc_last_label = "";
11937 if ( $terminal_type eq 'J' ) {
11938 $csc_last_label = $tokens_to_go[$i_terminal];
11941 return ( $terminal_type, $i_terminal, $i_block_leading_text,
11942 $block_leading_text, $block_line_count, $block_label );
11946 sub make_else_csc_text {
11948 # create additional -csc text for an 'else' and optionally 'elsif',
11949 # depending on the value of switch
11950 # $rOpts_closing_side_comment_else_flag:
11952 # = 0 add 'if' text to trailing else
11953 # = 1 same as 0 plus:
11954 # add 'if' to 'elsif's if can fit in line length
11955 # add last 'elsif' to trailing else if can fit in one line
11956 # = 2 same as 1 but do not check if exceed line length
11958 # $rif_elsif_text = a reference to a list of all previous closing
11959 # side comments created for this if block
11961 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
11962 my $csc_text = $block_leading_text;
11964 if ( $block_type eq 'elsif'
11965 && $rOpts_closing_side_comment_else_flag == 0 )
11970 my $count = @{$rif_elsif_text};
11971 return $csc_text unless ($count);
11973 my $if_text = '[ if' . $rif_elsif_text->[0];
11975 # always show the leading 'if' text on 'else'
11976 if ( $block_type eq 'else' ) {
11977 $csc_text .= $if_text;
11980 # see if that's all
11981 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
11985 my $last_elsif_text = "";
11986 if ( $count > 1 ) {
11987 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
11988 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
11991 # tentatively append one more item
11992 my $saved_text = $csc_text;
11993 if ( $block_type eq 'else' ) {
11994 $csc_text .= $last_elsif_text;
11997 $csc_text .= ' ' . $if_text;
12000 # all done if no length checks requested
12001 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
12005 # undo it if line length exceeded
12007 length($csc_text) +
12008 length($block_type) +
12009 length( $rOpts->{'closing-side-comment-prefix'} ) +
12010 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
12011 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
12012 $csc_text = $saved_text;
12017 { # sub balance_csc_text
12032 sub balance_csc_text {
12034 # Append characters to balance a closing side comment so that editors
12035 # such as vim can correctly jump through code.
12037 # input = ## end foreach my $foo ( sort { $b ...
12038 # output = ## end foreach my $foo ( sort { $b ...})
12040 # NOTE: This routine does not currently filter out structures within
12041 # quoted text because the bounce algorithms in text editors do not
12042 # necessarily do this either (a version of vim was checked and
12043 # did not do this).
12045 # Some complex examples which will cause trouble for some editors:
12046 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
12047 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
12048 # if ( $1 eq '{' ) {
12049 # test file test1/braces.pl has many such examples.
12053 # loop to examine characters one-by-one, RIGHT to LEFT and
12054 # build a balancing ending, LEFT to RIGHT.
12055 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
12057 my $char = substr( $csc, $pos, 1 );
12059 # ignore everything except structural characters
12060 next unless ( $matching_char{$char} );
12062 # pop most recently appended character
12063 my $top = chop($csc);
12065 # push it back plus the mate to the newest character
12066 # unless they balance each other.
12067 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
12070 # return the balanced string
12075 sub add_closing_side_comment {
12077 # add closing side comments after closing block braces if -csc used
12078 my $cscw_block_comment;
12080 #---------------------------------------------------------------
12081 # Step 1: loop through all tokens of this line to accumulate
12082 # the text needed to create the closing side comments. Also see
12083 # how the line ends.
12084 #---------------------------------------------------------------
12086 my ( $terminal_type, $i_terminal, $i_block_leading_text,
12087 $block_leading_text, $block_line_count, $block_label )
12088 = accumulate_csc_text();
12090 #---------------------------------------------------------------
12091 # Step 2: make the closing side comment if this ends a block
12092 #---------------------------------------------------------------
12093 my $have_side_comment = $i_terminal != $max_index_to_go;
12095 # if this line might end in a block closure..
12097 $terminal_type eq '}'
12102 # the block is long enough
12103 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
12105 # or there is an existing comment to check
12106 || ( $have_side_comment
12107 && $rOpts->{'closing-side-comment-warnings'} )
12110 # .. and if this is one of the types of interest
12111 && $block_type_to_go[$i_terminal] =~
12112 /$closing_side_comment_list_pattern/o
12114 # .. but not an anonymous sub
12115 # These are not normally of interest, and their closing braces are
12116 # often followed by commas or semicolons anyway. This also avoids
12117 # possible erratic output due to line numbering inconsistencies
12118 # in the cases where their closing braces terminate a line.
12119 && $block_type_to_go[$i_terminal] ne 'sub'
12121 # ..and the corresponding opening brace must is not in this batch
12122 # (because we do not need to tag one-line blocks, although this
12123 # should also be caught with a positive -csci value)
12124 && $mate_index_to_go[$i_terminal] < 0
12129 # this is the last token (line doesn't have a side comment)
12130 !$have_side_comment
12132 # or the old side comment is a closing side comment
12133 || $tokens_to_go[$max_index_to_go] =~
12134 /$closing_side_comment_prefix_pattern/o
12139 # then make the closing side comment text
12140 if ($block_label) { $block_label .= " " }
12142 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
12144 # append any extra descriptive text collected above
12145 if ( $i_block_leading_text == $i_terminal ) {
12146 $token .= $block_leading_text;
12149 $token = balance_csc_text($token)
12150 if $rOpts->{'closing-side-comments-balanced'};
12152 $token =~ s/\s*$//; # trim any trailing whitespace
12154 # handle case of existing closing side comment
12155 if ($have_side_comment) {
12157 # warn if requested and tokens differ significantly
12158 if ( $rOpts->{'closing-side-comment-warnings'} ) {
12159 my $old_csc = $tokens_to_go[$max_index_to_go];
12160 my $new_csc = $token;
12161 $new_csc =~ s/\s+//g; # trim all whitespace
12162 $old_csc =~ s/\s+//g; # trim all whitespace
12163 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
12164 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
12165 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
12166 my $new_trailing_dots = $1;
12167 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
12169 # Patch to handle multiple closing side comments at
12170 # else and elsif's. These have become too complicated
12171 # to check, so if we see an indication of
12172 # '[ if' or '[ # elsif', then assume they were made
12174 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
12175 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
12177 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
12178 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
12181 # if old comment is contained in new comment,
12182 # only compare the common part.
12183 if ( length($new_csc) > length($old_csc) ) {
12184 $new_csc = substr( $new_csc, 0, length($old_csc) );
12187 # if the new comment is shorter and has been limited,
12188 # only compare the common part.
12189 if ( length($new_csc) < length($old_csc)
12190 && $new_trailing_dots )
12192 $old_csc = substr( $old_csc, 0, length($new_csc) );
12195 # any remaining difference?
12196 if ( $new_csc ne $old_csc ) {
12198 # just leave the old comment if we are below the threshold
12199 # for creating side comments
12200 if ( $block_line_count <
12201 $rOpts->{'closing-side-comment-interval'} )
12206 # otherwise we'll make a note of it
12210 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
12213 # save the old side comment in a new trailing block comment
12214 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
12217 $cscw_block_comment =
12218 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
12223 # No differences.. we can safely delete old comment if we
12224 # are below the threshold
12225 if ( $block_line_count <
12226 $rOpts->{'closing-side-comment-interval'} )
12229 unstore_token_to_go()
12230 if ( $types_to_go[$max_index_to_go] eq '#' );
12231 unstore_token_to_go()
12232 if ( $types_to_go[$max_index_to_go] eq 'b' );
12237 # switch to the new csc (unless we deleted it!)
12238 $tokens_to_go[$max_index_to_go] = $token if $token;
12241 # handle case of NO existing closing side comment
12244 # insert the new side comment into the output token stream
12246 my $block_type = '';
12247 my $type_sequence = '';
12248 my $container_environment =
12249 $container_environment_to_go[$max_index_to_go];
12250 my $level = $levels_to_go[$max_index_to_go];
12251 my $slevel = $nesting_depth_to_go[$max_index_to_go];
12252 my $no_internal_newlines = 0;
12254 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
12255 my $ci_level = $ci_levels_to_go[$max_index_to_go];
12256 my $in_continued_quote = 0;
12258 # first insert a blank token
12259 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
12261 # then the side comment
12262 insert_new_token_to_go( $token, $type, $slevel,
12263 $no_internal_newlines );
12266 return $cscw_block_comment;
12269 sub previous_nonblank_token {
12273 return "" if ( $im < 0 );
12274 if ( $types_to_go[$im] eq 'b' ) { $im--; }
12275 return "" if ( $im < 0 );
12276 $name = $tokens_to_go[$im];
12278 # prepend any sub name to an isolated -> to avoid unwanted alignments
12279 # [test case is test8/penco.pl]
12280 if ( $name eq '->' ) {
12282 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
12283 $name = $tokens_to_go[$im] . $name;
12289 sub send_lines_to_vertical_aligner {
12291 my ( $ri_first, $ri_last, $do_not_pad ) = @_;
12293 my $rindentation_list = [0]; # ref to indentations for each line
12295 # define the array @matching_token_to_go for the output tokens
12296 # which will be non-blank for each special token (such as =>)
12297 # for which alignment is required.
12298 set_vertical_alignment_markers( $ri_first, $ri_last );
12300 # flush if necessary to avoid unwanted alignment
12301 my $must_flush = 0;
12302 if ( @$ri_first > 1 ) {
12304 # flush before a long if statement
12305 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
12310 Perl::Tidy::VerticalAligner::flush();
12313 undo_ci( $ri_first, $ri_last );
12315 set_logical_padding( $ri_first, $ri_last );
12317 # loop to prepare each line for shipment
12318 my $n_last_line = @$ri_first - 1;
12320 for my $n ( 0 .. $n_last_line ) {
12321 my $ibeg = $$ri_first[$n];
12322 my $iend = $$ri_last[$n];
12324 my ( $rtokens, $rfields, $rpatterns ) =
12325 make_alignment_patterns( $ibeg, $iend );
12327 # Set flag to show how much level changes between this line
12328 # and the next line, if we have it.
12330 if ( $n < $n_last_line ) {
12331 my $ibegp = $$ri_first[ $n + 1 ];
12332 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
12335 my ( $indentation, $lev, $level_end, $terminal_type,
12336 $is_semicolon_terminated, $is_outdented_line )
12337 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
12338 $ri_first, $ri_last, $rindentation_list, $ljump );
12340 # we will allow outdenting of long lines..
12341 my $outdent_long_lines = (
12343 # which are long quotes, if allowed
12344 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
12346 # which are long block comments, if allowed
12348 $types_to_go[$ibeg] eq '#'
12349 && $rOpts->{'outdent-long-comments'}
12351 # but not if this is a static block comment
12352 && !$is_static_block_comment
12357 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
12359 my $rvertical_tightness_flags =
12360 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
12361 $ri_first, $ri_last );
12363 # flush an outdented line to avoid any unwanted vertical alignment
12364 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12366 # Set a flag at the final ':' of a ternary chain to request
12367 # vertical alignment of the final term. Here is a
12368 # slightly complex example:
12370 # $self->{_text} = (
12372 # : $type eq 'item' ? "the $section entry"
12373 # : "the section on $section"
12377 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
12378 # : ' elsewhere in this document'
12381 my $is_terminal_ternary = 0;
12382 if ( $tokens_to_go[$ibeg] eq ':'
12383 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
12385 my $last_leading_type = ":";
12387 my $iprev = $$ri_first[ $n - 1 ];
12388 $last_leading_type = $types_to_go[$iprev];
12390 if ( $terminal_type ne ';'
12391 && $n_last_line > $n
12392 && $level_end == $lev )
12394 my $inext = $$ri_first[ $n + 1 ];
12395 $level_end = $levels_to_go[$inext];
12396 $terminal_type = $types_to_go[$inext];
12399 $is_terminal_ternary = $last_leading_type eq ':'
12400 && ( ( $terminal_type eq ';' && $level_end <= $lev )
12401 || ( $terminal_type ne ':' && $level_end < $lev ) )
12403 # the terminal term must not contain any ternary terms, as in
12405 # $Is_MSWin32 ? ".\\echo$$"
12406 # : $Is_MacOS ? ":echo$$"
12407 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
12409 && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
12412 # send this new line down the pipe
12413 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
12414 Perl::Tidy::VerticalAligner::valign_input(
12421 $forced_breakpoint_to_go[$iend] || $in_comma_list,
12422 $outdent_long_lines,
12423 $is_terminal_ternary,
12424 $is_semicolon_terminated,
12426 $rvertical_tightness_flags,
12430 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
12432 # flush an outdented line to avoid any unwanted vertical alignment
12433 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12437 # Set flag indicating if this line ends in an opening
12438 # token and is very short, so that a blank line is not
12439 # needed if the subsequent line is a comment.
12440 # Examples of what we are looking for:
12446 $last_output_short_opening_token
12448 # line ends in opening token
12449 = $types_to_go[$iend] =~ /^[\{\(\[L]$/
12453 # line has either single opening token
12456 # or is a single token followed by opening token.
12457 # Note that sub identifiers have blanks like 'sub doit'
12458 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
12461 # and limit total to 10 character widths
12462 && token_sequence_length( $ibeg, $iend ) <= 10;
12464 } # end of loop to output each line
12466 # remember indentation of lines containing opening containers for
12467 # later use by sub set_adjusted_indentation
12468 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
12471 { # begin make_alignment_patterns
12473 my %block_type_map;
12478 # map related block names into a common name to
12480 %block_type_map = (
12491 # map certain keywords to the same 'if' class to align
12492 # long if/elsif sequences. [elsif.pl]
12498 'default' => 'given',
12499 'case' => 'switch',
12501 # treat an 'undef' similar to numbers and quotes
12506 sub make_alignment_patterns {
12508 # Here we do some important preliminary work for the
12509 # vertical aligner. We create three arrays for one
12510 # output line. These arrays contain strings that can
12511 # be tested by the vertical aligner to see if
12512 # consecutive lines can be aligned vertically.
12514 # The three arrays are indexed on the vertical
12515 # alignment fields and are:
12516 # @tokens - a list of any vertical alignment tokens for this line.
12517 # These are tokens, such as '=' '&&' '#' etc which
12518 # we want to might align vertically. These are
12519 # decorated with various information such as
12520 # nesting depth to prevent unwanted vertical
12521 # alignment matches.
12522 # @fields - the actual text of the line between the vertical alignment
12524 # @patterns - a modified list of token types, one for each alignment
12525 # field. These should normally each match before alignment is
12526 # allowed, even when the alignment tokens match.
12527 my ( $ibeg, $iend ) = @_;
12531 my $i_start = $ibeg;
12535 my @container_name = ("");
12536 my @multiple_comma_arrows = (undef);
12538 my $j = 0; # field index
12541 for $i ( $ibeg .. $iend ) {
12543 # Keep track of containers balanced on this line only.
12544 # These are used below to prevent unwanted cross-line alignments.
12545 # Unbalanced containers already avoid aligning across
12546 # container boundaries.
12547 if ( $tokens_to_go[$i] eq '(' ) {
12549 # if container is balanced on this line...
12550 my $i_mate = $mate_index_to_go[$i];
12551 if ( $i_mate > $i && $i_mate <= $iend ) {
12553 my $seqno = $type_sequence_to_go[$i];
12554 my $count = comma_arrow_count($seqno);
12555 $multiple_comma_arrows[$depth] = $count && $count > 1;
12557 # Append the previous token name to make the container name
12558 # more unique. This name will also be given to any commas
12559 # within this container, and it helps avoid undesirable
12560 # alignments of different types of containers.
12561 my $name = previous_nonblank_token($i);
12563 $container_name[$depth] = "+" . $name;
12565 # Make the container name even more unique if necessary.
12566 # If we are not vertically aligning this opening paren,
12567 # append a character count to avoid bad alignment because
12568 # it usually looks bad to align commas within containers
12569 # for which the opening parens do not align. Here
12570 # is an example very BAD alignment of commas (because
12571 # the atan2 functions are not all aligned):
12573 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
12574 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
12575 # $X * atan2( $X, 1 ) -
12576 # $Y * atan2( $Y, 1 );
12578 # On the other hand, it is usually okay to align commas if
12579 # opening parens align, such as:
12580 # glVertex3d( $cx + $s * $xs, $cy, $z );
12581 # glVertex3d( $cx, $cy + $s * $ys, $z );
12582 # glVertex3d( $cx - $s * $xs, $cy, $z );
12583 # glVertex3d( $cx, $cy - $s * $ys, $z );
12585 # To distinguish between these situations, we will
12586 # append the length of the line from the previous matching
12587 # token, or beginning of line, to the function name. This
12588 # will allow the vertical aligner to reject undesirable
12591 # if we are not aligning on this paren...
12592 if ( $matching_token_to_go[$i] eq '' ) {
12594 # Sum length from previous alignment, or start of line.
12596 ( $i_start == $ibeg )
12597 ? total_line_length( $i_start, $i - 1 )
12598 : token_sequence_length( $i_start, $i - 1 );
12600 # tack length onto the container name to make unique
12601 $container_name[$depth] .= "-" . $len;
12605 elsif ( $tokens_to_go[$i] eq ')' ) {
12606 $depth-- if $depth > 0;
12609 # if we find a new synchronization token, we are done with
12611 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
12613 my $tok = my $raw_tok = $matching_token_to_go[$i];
12615 # make separators in different nesting depths unique
12616 # by appending the nesting depth digit.
12617 if ( $raw_tok ne '#' ) {
12618 $tok .= "$nesting_depth_to_go[$i]";
12621 # also decorate commas with any container name to avoid
12622 # unwanted cross-line alignments.
12623 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
12624 if ( $container_name[$depth] ) {
12625 $tok .= $container_name[$depth];
12629 # Patch to avoid aligning leading and trailing if, unless.
12630 # Mark trailing if, unless statements with container names.
12631 # This makes them different from leading if, unless which
12632 # are not so marked at present. If we ever need to name
12633 # them too, we could use ci to distinguish them.
12634 # Example problem to avoid:
12635 # return ( 2, "DBERROR" )
12636 # if ( $retval == 2 );
12637 # if ( scalar @_ ) {
12638 # my ( $a, $b, $c, $d, $e, $f ) = @_;
12640 if ( $raw_tok eq '(' ) {
12641 my $ci = $ci_levels_to_go[$ibeg];
12642 if ( $container_name[$depth] =~ /^\+(if|unless)/
12645 $tok .= $container_name[$depth];
12649 # Decorate block braces with block types to avoid
12650 # unwanted alignments such as the following:
12651 # foreach ( @{$routput_array} ) { $fh->print($_) }
12652 # eval { $fh->close() };
12653 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
12654 my $block_type = $block_type_to_go[$i];
12656 # map certain related block types to allow
12657 # else blocks to align
12658 $block_type = $block_type_map{$block_type}
12659 if ( defined( $block_type_map{$block_type} ) );
12661 # remove sub names to allow one-line sub braces to align
12662 # regardless of name
12663 if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
12665 # allow all control-type blocks to align
12666 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
12668 $tok .= $block_type;
12671 # concatenate the text of the consecutive tokens to form
12674 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
12676 # store the alignment token for this field
12677 push( @tokens, $tok );
12679 # get ready for the next batch
12682 $patterns[$j] = "";
12685 # continue accumulating tokens
12686 # handle non-keywords..
12687 if ( $types_to_go[$i] ne 'k' ) {
12688 my $type = $types_to_go[$i];
12690 # Mark most things before arrows as a quote to
12691 # get them to line up. Testfile: mixed.pl.
12692 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
12693 my $next_type = $types_to_go[ $i + 1 ];
12694 my $i_next_nonblank =
12695 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12697 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
12700 # Patch to ignore leading minus before words,
12701 # by changing pattern 'mQ' into just 'Q',
12702 # so that we can align things like this:
12703 # Button => "Print letter \"~$_\"",
12704 # -command => [ sub { print "$_[0]\n" }, $_ ],
12705 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
12709 # patch to make numbers and quotes align
12710 if ( $type eq 'n' ) { $type = 'Q' }
12712 # patch to ignore any ! in patterns
12713 if ( $type eq '!' ) { $type = '' }
12715 $patterns[$j] .= $type;
12718 # for keywords we have to use the actual text
12721 my $tok = $tokens_to_go[$i];
12723 # but map certain keywords to a common string to allow
12725 $tok = $keyword_map{$tok}
12726 if ( defined( $keyword_map{$tok} ) );
12727 $patterns[$j] .= $tok;
12731 # done with this line .. join text of tokens to make the last field
12732 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
12733 return ( \@tokens, \@fields, \@patterns );
12736 } # end make_alignment_patterns
12738 { # begin unmatched_indexes
12740 # closure to keep track of unbalanced containers.
12741 # arrays shared by the routines in this block:
12742 my @unmatched_opening_indexes_in_this_batch;
12743 my @unmatched_closing_indexes_in_this_batch;
12744 my %comma_arrow_count;
12746 sub is_unbalanced_batch {
12747 @unmatched_opening_indexes_in_this_batch +
12748 @unmatched_closing_indexes_in_this_batch;
12751 sub comma_arrow_count {
12753 return $comma_arrow_count{$seqno};
12756 sub match_opening_and_closing_tokens {
12758 # Match up indexes of opening and closing braces, etc, in this batch.
12759 # This has to be done after all tokens are stored because unstoring
12760 # of tokens would otherwise cause trouble.
12762 @unmatched_opening_indexes_in_this_batch = ();
12763 @unmatched_closing_indexes_in_this_batch = ();
12764 %comma_arrow_count = ();
12765 my $comma_arrow_count_contained = 0;
12767 my ( $i, $i_mate, $token );
12768 foreach $i ( 0 .. $max_index_to_go ) {
12769 if ( $type_sequence_to_go[$i] ) {
12770 $token = $tokens_to_go[$i];
12771 if ( $token =~ /^[\(\[\{\?]$/ ) {
12772 push @unmatched_opening_indexes_in_this_batch, $i;
12774 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
12776 $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12777 if ( defined($i_mate) && $i_mate >= 0 ) {
12778 if ( $type_sequence_to_go[$i_mate] ==
12779 $type_sequence_to_go[$i] )
12781 $mate_index_to_go[$i] = $i_mate;
12782 $mate_index_to_go[$i_mate] = $i;
12783 my $seqno = $type_sequence_to_go[$i];
12784 if ( $comma_arrow_count{$seqno} ) {
12785 $comma_arrow_count_contained +=
12786 $comma_arrow_count{$seqno};
12790 push @unmatched_opening_indexes_in_this_batch,
12792 push @unmatched_closing_indexes_in_this_batch, $i;
12796 push @unmatched_closing_indexes_in_this_batch, $i;
12800 elsif ( $tokens_to_go[$i] eq '=>' ) {
12801 if (@unmatched_opening_indexes_in_this_batch) {
12802 my $j = $unmatched_opening_indexes_in_this_batch[-1];
12803 my $seqno = $type_sequence_to_go[$j];
12804 $comma_arrow_count{$seqno}++;
12808 return $comma_arrow_count_contained;
12811 sub save_opening_indentation {
12813 # This should be called after each batch of tokens is output. It
12814 # saves indentations of lines of all unmatched opening tokens.
12815 # These will be used by sub get_opening_indentation.
12817 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
12819 # we no longer need indentations of any saved indentations which
12820 # are unmatched closing tokens in this batch, because we will
12821 # never encounter them again. So we can delete them to keep
12822 # the hash size down.
12823 foreach (@unmatched_closing_indexes_in_this_batch) {
12824 my $seqno = $type_sequence_to_go[$_];
12825 delete $saved_opening_indentation{$seqno};
12828 # we need to save indentations of any unmatched opening tokens
12829 # in this batch because we may need them in a subsequent batch.
12830 foreach (@unmatched_opening_indexes_in_this_batch) {
12831 my $seqno = $type_sequence_to_go[$_];
12832 $saved_opening_indentation{$seqno} = [
12833 lookup_opening_indentation(
12834 $_, $ri_first, $ri_last, $rindentation_list
12839 } # end unmatched_indexes
12841 sub get_opening_indentation {
12843 # get the indentation of the line which output the opening token
12844 # corresponding to a given closing token in the current output batch.
12847 # $i_closing - index in this line of a closing token ')' '}' or ']'
12849 # $ri_first - reference to list of the first index $i for each output
12850 # line in this batch
12851 # $ri_last - reference to list of the last index $i for each output line
12853 # $rindentation_list - reference to a list containing the indentation
12854 # used for each line.
12857 # -the indentation of the line which contained the opening token
12858 # which matches the token at index $i_opening
12859 # -and its offset (number of columns) from the start of the line
12861 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
12863 # first, see if the opening token is in the current batch
12864 my $i_opening = $mate_index_to_go[$i_closing];
12865 my ( $indent, $offset, $is_leading, $exists );
12867 if ( $i_opening >= 0 ) {
12869 # it is..look up the indentation
12870 ( $indent, $offset, $is_leading ) =
12871 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
12872 $rindentation_list );
12875 # if not, it should have been stored in the hash by a previous batch
12877 my $seqno = $type_sequence_to_go[$i_closing];
12879 if ( $saved_opening_indentation{$seqno} ) {
12880 ( $indent, $offset, $is_leading ) =
12881 @{ $saved_opening_indentation{$seqno} };
12884 # some kind of serious error
12885 # (example is badfile.t)
12894 # if no sequence number it must be an unbalanced container
12902 return ( $indent, $offset, $is_leading, $exists );
12905 sub lookup_opening_indentation {
12907 # get the indentation of the line in the current output batch
12908 # which output a selected opening token
12911 # $i_opening - index of an opening token in the current output batch
12912 # whose line indentation we need
12913 # $ri_first - reference to list of the first index $i for each output
12914 # line in this batch
12915 # $ri_last - reference to list of the last index $i for each output line
12917 # $rindentation_list - reference to a list containing the indentation
12918 # used for each line. (NOTE: the first slot in
12919 # this list is the last returned line number, and this is
12920 # followed by the list of indentations).
12923 # -the indentation of the line which contained token $i_opening
12924 # -and its offset (number of columns) from the start of the line
12926 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
12928 my $nline = $rindentation_list->[0]; # line number of previous lookup
12930 # reset line location if necessary
12931 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
12933 # find the correct line
12934 unless ( $i_opening > $ri_last->[-1] ) {
12935 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
12938 # error - token index is out of bounds - shouldn't happen
12941 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
12943 report_definite_bug();
12944 $nline = $#{$ri_last};
12947 $rindentation_list->[0] =
12948 $nline; # save line number to start looking next call
12949 my $ibeg = $ri_start->[$nline];
12950 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
12951 my $is_leading = ( $ibeg == $i_opening );
12952 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
12956 my %is_if_elsif_else_unless_while_until_for_foreach;
12960 # These block types may have text between the keyword and opening
12961 # curly. Note: 'else' does not, but must be included to allow trailing
12962 # if/elsif text to be appended.
12963 # patch for SWITCH/CASE: added 'case' and 'when'
12964 @_ = qw(if elsif else unless while until for foreach case when);
12965 @is_if_elsif_else_unless_while_until_for_foreach{@_} =
12969 sub set_adjusted_indentation {
12971 # This routine has the final say regarding the actual indentation of
12972 # a line. It starts with the basic indentation which has been
12973 # defined for the leading token, and then takes into account any
12974 # options that the user has set regarding special indenting and
12977 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
12978 $rindentation_list, $level_jump )
12981 # we need to know the last token of this line
12982 my ( $terminal_type, $i_terminal ) =
12983 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
12985 my $is_outdented_line = 0;
12987 my $is_semicolon_terminated = $terminal_type eq ';'
12988 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
12990 ##########################################################
12991 # Section 1: set a flag and a default indentation
12993 # Most lines are indented according to the initial token.
12994 # But it is common to outdent to the level just after the
12995 # terminal token in certain cases...
12996 # adjust_indentation flag:
12997 # 0 - do not adjust
12999 # 2 - vertically align with opening token
13001 ##########################################################
13002 my $adjust_indentation = 0;
13003 my $default_adjust_indentation = $adjust_indentation;
13006 $opening_indentation, $opening_offset,
13007 $is_leading, $opening_exists
13010 # if we are at a closing token of some type..
13011 if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
13013 # get the indentation of the line containing the corresponding
13016 $opening_indentation, $opening_offset,
13017 $is_leading, $opening_exists
13019 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13020 $rindentation_list );
13022 # First set the default behavior:
13025 # default behavior is to outdent closing lines
13026 # of the form: "); }; ]; )->xxx;"
13027 $is_semicolon_terminated
13029 # and 'cuddled parens' of the form: ")->pack("
13031 $terminal_type eq '('
13032 && $types_to_go[$ibeg] eq ')'
13033 && ( $nesting_depth_to_go[$iend] + 1 ==
13034 $nesting_depth_to_go[$ibeg] )
13037 # and when the next line is at a lower indentation level
13038 # PATCH: and only if the style allows undoing continuation
13039 # for all closing token types. We should really wait until
13040 # the indentation of the next line is known and then make
13041 # a decision, but that would require another pass.
13042 || ( $level_jump < 0 && !$some_closing_token_indentation )
13045 $adjust_indentation = 1;
13048 # outdent something like '),'
13050 $terminal_type eq ','
13052 # allow just one character before the comma
13053 && $i_terminal == $ibeg + 1
13055 # require LIST environment; otherwise, we may outdent too much -
13056 # this can happen in calls without parentheses (overload.t);
13057 && $container_environment_to_go[$i_terminal] eq 'LIST'
13060 $adjust_indentation = 1;
13063 # undo continuation indentation of a terminal closing token if
13064 # it is the last token before a level decrease. This will allow
13065 # a closing token to line up with its opening counterpart, and
13066 # avoids a indentation jump larger than 1 level.
13067 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
13068 && $i_terminal == $ibeg )
13070 my $ci = $ci_levels_to_go[$ibeg];
13071 my $lev = $levels_to_go[$ibeg];
13072 my $next_type = $types_to_go[ $ibeg + 1 ];
13073 my $i_next_nonblank =
13074 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
13075 if ( $i_next_nonblank <= $max_index_to_go
13076 && $levels_to_go[$i_next_nonblank] < $lev )
13078 $adjust_indentation = 1;
13081 # Patch for RT #96101, in which closing brace of anonymous subs
13082 # was not outdented. We should look ahead and see if there is
13083 # a level decrease at the next token (i.e., a closing token),
13084 # but right now we do not have that information. For now
13085 # we see if we are in a list, and this works well.
13086 # See test files 'sub*.t' for good test cases.
13087 if ( $block_type_to_go[$ibeg] =~ /^sub\s*\(?/
13088 && $container_environment_to_go[$i_terminal] eq 'LIST'
13089 && !$rOpts->{'indent-closing-brace'} )
13092 $opening_indentation, $opening_offset,
13093 $is_leading, $opening_exists
13095 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13096 $rindentation_list );
13097 my $indentation = $leading_spaces_to_go[$ibeg];
13098 if ( defined($opening_indentation)
13099 && get_SPACES($indentation) >
13100 get_SPACES($opening_indentation) )
13102 $adjust_indentation = 1;
13107 # YVES patch 1 of 2:
13108 # Undo ci of line with leading closing eval brace,
13109 # but not beyond the indention of the line with
13110 # the opening brace.
13111 if ( $block_type_to_go[$ibeg] eq 'eval'
13112 && !$rOpts->{'line-up-parentheses'}
13113 && !$rOpts->{'indent-closing-brace'} )
13116 $opening_indentation, $opening_offset,
13117 $is_leading, $opening_exists
13119 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13120 $rindentation_list );
13121 my $indentation = $leading_spaces_to_go[$ibeg];
13122 if ( defined($opening_indentation)
13123 && get_SPACES($indentation) >
13124 get_SPACES($opening_indentation) )
13126 $adjust_indentation = 1;
13130 $default_adjust_indentation = $adjust_indentation;
13132 # Now modify default behavior according to user request:
13133 # handle option to indent non-blocks of the form ); }; ];
13134 # But don't do special indentation to something like ')->pack('
13135 if ( !$block_type_to_go[$ibeg] ) {
13136 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
13138 if ( $i_terminal <= $ibeg + 1
13139 || $is_semicolon_terminated )
13141 $adjust_indentation = 2;
13144 $adjust_indentation = 0;
13147 elsif ( $cti == 2 ) {
13148 if ($is_semicolon_terminated) {
13149 $adjust_indentation = 3;
13152 $adjust_indentation = 0;
13155 elsif ( $cti == 3 ) {
13156 $adjust_indentation = 3;
13160 # handle option to indent blocks
13163 $rOpts->{'indent-closing-brace'}
13165 $i_terminal == $ibeg # isolated terminal '}'
13166 || $is_semicolon_terminated
13170 $adjust_indentation = 3;
13175 # if at ');', '};', '>;', and '];' of a terminal qw quote
13176 elsif ($$rpatterns[0] =~ /^qb*;$/
13177 && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
13179 if ( $closing_token_indentation{$1} == 0 ) {
13180 $adjust_indentation = 1;
13183 $adjust_indentation = 3;
13187 # if line begins with a ':', align it with any
13188 # previous line leading with corresponding ?
13189 elsif ( $types_to_go[$ibeg] eq ':' ) {
13191 $opening_indentation, $opening_offset,
13192 $is_leading, $opening_exists
13194 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
13195 $rindentation_list );
13196 if ($is_leading) { $adjust_indentation = 2; }
13199 ##########################################################
13200 # Section 2: set indentation according to flag set above
13202 # Select the indentation object to define leading
13203 # whitespace. If we are outdenting something like '} } );'
13204 # then we want to use one level below the last token
13205 # ($i_terminal) in order to get it to fully outdent through
13207 ##########################################################
13210 my $level_end = $levels_to_go[$iend];
13212 if ( $adjust_indentation == 0 ) {
13213 $indentation = $leading_spaces_to_go[$ibeg];
13214 $lev = $levels_to_go[$ibeg];
13216 elsif ( $adjust_indentation == 1 ) {
13217 $indentation = $reduced_spaces_to_go[$i_terminal];
13218 $lev = $levels_to_go[$i_terminal];
13221 # handle indented closing token which aligns with opening token
13222 elsif ( $adjust_indentation == 2 ) {
13224 # handle option to align closing token with opening token
13225 $lev = $levels_to_go[$ibeg];
13227 # calculate spaces needed to align with opening token
13229 get_SPACES($opening_indentation) + $opening_offset;
13231 # Indent less than the previous line.
13233 # Problem: For -lp we don't exactly know what it was if there
13234 # were recoverable spaces sent to the aligner. A good solution
13235 # would be to force a flush of the vertical alignment buffer, so
13236 # that we would know. For now, this rule is used for -lp:
13238 # When the last line did not start with a closing token we will
13239 # be optimistic that the aligner will recover everything wanted.
13241 # This rule will prevent us from breaking a hierarchy of closing
13242 # tokens, and in a worst case will leave a closing paren too far
13243 # indented, but this is better than frequently leaving it not
13245 my $last_spaces = get_SPACES($last_indentation_written);
13246 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
13248 get_RECOVERABLE_SPACES($last_indentation_written);
13251 # reset the indentation to the new space count if it works
13252 # only options are all or none: nothing in-between looks good
13253 $lev = $levels_to_go[$ibeg];
13254 if ( $space_count < $last_spaces ) {
13255 if ($rOpts_line_up_parentheses) {
13256 my $lev = $levels_to_go[$ibeg];
13258 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13261 $indentation = $space_count;
13265 # revert to default if it doesn't work
13267 $space_count = leading_spaces_to_go($ibeg);
13268 if ( $default_adjust_indentation == 0 ) {
13269 $indentation = $leading_spaces_to_go[$ibeg];
13271 elsif ( $default_adjust_indentation == 1 ) {
13272 $indentation = $reduced_spaces_to_go[$i_terminal];
13273 $lev = $levels_to_go[$i_terminal];
13278 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
13281 # handle -icb (indented closing code block braces)
13282 # Updated method for indented block braces: indent one full level if
13283 # there is no continuation indentation. This will occur for major
13284 # structures such as sub, if, else, but not for things like map
13287 # Note: only code blocks without continuation indentation are
13288 # handled here (if, else, unless, ..). In the following snippet,
13289 # the terminal brace of the sort block will have continuation
13290 # indentation as shown so it will not be handled by the coding
13291 # here. We would have to undo the continuation indentation to do
13292 # this, but it probably looks ok as is. This is a possible future
13293 # update for semicolon terminated lines.
13295 # if ($sortby eq 'date' or $sortby eq 'size') {
13297 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
13302 if ( $block_type_to_go[$ibeg]
13303 && $ci_levels_to_go[$i_terminal] == 0 )
13305 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
13306 $indentation = $spaces + $rOpts_indent_columns;
13308 # NOTE: for -lp we could create a new indentation object, but
13309 # there is probably no need to do it
13312 # handle -icp and any -icb block braces which fall through above
13313 # test such as the 'sort' block mentioned above.
13316 # There are currently two ways to handle -icp...
13317 # One way is to use the indentation of the previous line:
13318 # $indentation = $last_indentation_written;
13320 # The other way is to use the indentation that the previous line
13321 # would have had if it hadn't been adjusted:
13322 $indentation = $last_unadjusted_indentation;
13324 # Current method: use the minimum of the two. This avoids
13325 # inconsistent indentation.
13326 if ( get_SPACES($last_indentation_written) <
13327 get_SPACES($indentation) )
13329 $indentation = $last_indentation_written;
13333 # use previous indentation but use own level
13334 # to cause list to be flushed properly
13335 $lev = $levels_to_go[$ibeg];
13338 # remember indentation except for multi-line quotes, which get
13340 unless ( $ibeg == 0 && $starting_in_quote ) {
13341 $last_indentation_written = $indentation;
13342 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
13343 $last_leading_token = $tokens_to_go[$ibeg];
13346 # be sure lines with leading closing tokens are not outdented more
13347 # than the line which contained the corresponding opening token.
13349 #############################################################
13350 # updated per bug report in alex_bug.pl: we must not
13351 # mess with the indentation of closing logical braces so
13352 # we must treat something like '} else {' as if it were
13353 # an isolated brace my $is_isolated_block_brace = (
13354 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
13355 #############################################################
13356 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
13357 && ( $iend == $ibeg
13358 || $is_if_elsif_else_unless_while_until_for_foreach{
13359 $block_type_to_go[$ibeg]
13362 # only do this for a ':; which is aligned with its leading '?'
13363 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
13364 if ( defined($opening_indentation)
13365 && !$is_isolated_block_brace
13366 && !$is_unaligned_colon )
13368 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
13369 $indentation = $opening_indentation;
13373 # remember the indentation of each line of this batch
13374 push @{$rindentation_list}, $indentation;
13376 # outdent lines with certain leading tokens...
13379 # must be first word of this batch
13385 # certain leading keywords if requested
13387 $rOpts->{'outdent-keywords'}
13388 && $types_to_go[$ibeg] eq 'k'
13389 && $outdent_keyword{ $tokens_to_go[$ibeg] }
13392 # or labels if requested
13393 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
13395 # or static block comments if requested
13396 || ( $types_to_go[$ibeg] eq '#'
13397 && $rOpts->{'outdent-static-block-comments'}
13398 && $is_static_block_comment )
13403 my $space_count = leading_spaces_to_go($ibeg);
13404 if ( $space_count > 0 ) {
13405 $space_count -= $rOpts_continuation_indentation;
13406 $is_outdented_line = 1;
13407 if ( $space_count < 0 ) { $space_count = 0 }
13409 # do not promote a spaced static block comment to non-spaced;
13410 # this is not normally necessary but could be for some
13411 # unusual user inputs (such as -ci = -i)
13412 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
13416 if ($rOpts_line_up_parentheses) {
13418 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13421 $indentation = $space_count;
13426 return ( $indentation, $lev, $level_end, $terminal_type,
13427 $is_semicolon_terminated, $is_outdented_line );
13431 sub set_vertical_tightness_flags {
13433 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
13435 # Define vertical tightness controls for the nth line of a batch.
13436 # We create an array of parameters which tell the vertical aligner
13437 # if we should combine this line with the next line to achieve the
13438 # desired vertical tightness. The array of parameters contains:
13440 # [0] type: 1=opening non-block 2=closing non-block
13441 # 3=opening block brace 4=closing block brace
13443 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
13444 # if closing: spaces of padding to use
13445 # [2] sequence number of container
13446 # [3] valid flag: do not append if this flag is false. Will be
13447 # true if appropriate -vt flag is set. Otherwise, Will be
13448 # made true only for 2 line container in parens with -lp
13450 # These flags are used by sub set_leading_whitespace in
13451 # the vertical aligner
13453 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
13455 #--------------------------------------------------------------
13456 # Vertical Tightness Flags Section 1:
13457 # Handle Lines 1 .. n-1 but not the last line
13458 # For non-BLOCK tokens, we will need to examine the next line
13459 # too, so we won't consider the last line.
13460 #--------------------------------------------------------------
13461 if ( $n < $n_last_line ) {
13463 #--------------------------------------------------------------
13464 # Vertical Tightness Flags Section 1a:
13465 # Look for Type 1, last token of this line is a non-block opening token
13466 #--------------------------------------------------------------
13467 my $ibeg_next = $$ri_first[ $n + 1 ];
13468 my $token_end = $tokens_to_go[$iend];
13469 my $iend_next = $$ri_last[ $n + 1 ];
13471 $type_sequence_to_go[$iend]
13472 && !$block_type_to_go[$iend]
13473 && $is_opening_token{$token_end}
13475 $opening_vertical_tightness{$token_end} > 0
13477 # allow 2-line method call to be closed up
13478 || ( $rOpts_line_up_parentheses
13479 && $token_end eq '('
13481 && $types_to_go[ $iend - 1 ] ne 'b' )
13486 # avoid multiple jumps in nesting depth in one line if
13488 my $ovt = $opening_vertical_tightness{$token_end};
13489 my $iend_next = $$ri_last[ $n + 1 ];
13492 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
13493 $nesting_depth_to_go[$ibeg_next] )
13497 # If -vt flag has not been set, mark this as invalid
13498 # and aligner will validate it if it sees the closing paren
13500 my $valid_flag = $ovt;
13501 @{$rvertical_tightness_flags} =
13502 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
13506 #--------------------------------------------------------------
13507 # Vertical Tightness Flags Section 1b:
13508 # Look for Type 2, first token of next line is a non-block closing
13509 # token .. and be sure this line does not have a side comment
13510 #--------------------------------------------------------------
13511 my $token_next = $tokens_to_go[$ibeg_next];
13512 if ( $type_sequence_to_go[$ibeg_next]
13513 && !$block_type_to_go[$ibeg_next]
13514 && $is_closing_token{$token_next}
13515 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
13517 my $ovt = $opening_vertical_tightness{$token_next};
13518 my $cvt = $closing_vertical_tightness{$token_next};
13521 # never append a trailing line like )->pack(
13522 # because it will throw off later alignment
13524 $nesting_depth_to_go[$ibeg_next] ==
13525 $nesting_depth_to_go[ $iend_next + 1 ] + 1
13530 $container_environment_to_go[$ibeg_next] ne 'LIST'
13534 # allow closing up 2-line method calls
13535 || ( $rOpts_line_up_parentheses
13536 && $token_next eq ')' )
13543 # decide which trailing closing tokens to append..
13545 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
13547 my $str = join( '',
13548 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
13550 # append closing token if followed by comment or ';'
13551 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
13555 my $valid_flag = $cvt;
13556 @{$rvertical_tightness_flags} = (
13558 $tightness{$token_next} == 2 ? 0 : 1,
13559 $type_sequence_to_go[$ibeg_next], $valid_flag,
13565 #--------------------------------------------------------------
13566 # Vertical Tightness Flags Section 1c:
13567 # Implement the Opening Token Right flag (Type 2)..
13568 # If requested, move an isolated trailing opening token to the end of
13569 # the previous line which ended in a comma. We could do this
13570 # in sub recombine_breakpoints but that would cause problems
13571 # with -lp formatting. The problem is that indentation will
13572 # quickly move far to the right in nested expressions. By
13573 # doing it after indentation has been set, we avoid changes
13574 # to the indentation. Actual movement of the token takes place
13575 # in sub valign_output_step_B.
13576 #--------------------------------------------------------------
13578 $opening_token_right{ $tokens_to_go[$ibeg_next] }
13580 # previous line is not opening
13581 # (use -sot to combine with it)
13582 && !$is_opening_token{$token_end}
13584 # previous line ended in one of these
13585 # (add other cases if necessary; '=>' and '.' are not necessary
13586 && !$block_type_to_go[$ibeg_next]
13588 # this is a line with just an opening token
13589 && ( $iend_next == $ibeg_next
13590 || $iend_next == $ibeg_next + 2
13591 && $types_to_go[$iend_next] eq '#' )
13593 # looks bad if we align vertically with the wrong container
13594 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
13597 my $valid_flag = 1;
13598 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13599 @{$rvertical_tightness_flags} =
13600 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
13603 #--------------------------------------------------------------
13604 # Vertical Tightness Flags Section 1d:
13605 # Stacking of opening and closing tokens (Type 2)
13606 #--------------------------------------------------------------
13608 my $token_beg_next = $tokens_to_go[$ibeg_next];
13610 # patch to make something like 'qw(' behave like an opening paren
13612 if ( $types_to_go[$ibeg_next] eq 'q' ) {
13613 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
13614 $token_beg_next = $1;
13618 if ( $is_closing_token{$token_end}
13619 && $is_closing_token{$token_beg_next} )
13621 $stackable = $stack_closing_token{$token_beg_next}
13622 unless ( $block_type_to_go[$ibeg_next] )
13623 ; # shouldn't happen; just checking
13625 elsif ($is_opening_token{$token_end}
13626 && $is_opening_token{$token_beg_next} )
13628 $stackable = $stack_opening_token{$token_beg_next}
13629 unless ( $block_type_to_go[$ibeg_next] )
13630 ; # shouldn't happen; just checking
13635 my $is_semicolon_terminated;
13636 if ( $n + 1 == $n_last_line ) {
13637 my ( $terminal_type, $i_terminal ) = terminal_type(
13638 \@types_to_go, \@block_type_to_go,
13639 $ibeg_next, $iend_next
13641 $is_semicolon_terminated = $terminal_type eq ';'
13642 && $nesting_depth_to_go[$iend_next] <
13643 $nesting_depth_to_go[$ibeg_next];
13646 # this must be a line with just an opening token
13647 # or end in a semicolon
13649 $is_semicolon_terminated
13650 || ( $iend_next == $ibeg_next
13651 || $iend_next == $ibeg_next + 2
13652 && $types_to_go[$iend_next] eq '#' )
13655 my $valid_flag = 1;
13656 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13657 @{$rvertical_tightness_flags} =
13658 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
13664 #--------------------------------------------------------------
13665 # Vertical Tightness Flags Section 2:
13666 # Handle type 3, opening block braces on last line of the batch
13667 # Check for a last line with isolated opening BLOCK curly
13668 #--------------------------------------------------------------
13669 elsif ($rOpts_block_brace_vertical_tightness
13671 && $types_to_go[$iend] eq '{'
13672 && $block_type_to_go[$iend] =~
13673 /$block_brace_vertical_tightness_pattern/o )
13675 @{$rvertical_tightness_flags} =
13676 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
13679 #--------------------------------------------------------------
13680 # Vertical Tightness Flags Section 3:
13681 # Handle type 4, a closing block brace on the last line of the batch Check
13682 # for a last line with isolated closing BLOCK curly
13683 #--------------------------------------------------------------
13684 elsif ($rOpts_stack_closing_block_brace
13686 && $block_type_to_go[$iend]
13687 && $types_to_go[$iend] eq '}' )
13689 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
13690 @{$rvertical_tightness_flags} =
13691 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
13694 # pack in the sequence numbers of the ends of this line
13695 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
13696 $rvertical_tightness_flags->[5] = get_seqno($iend);
13697 return $rvertical_tightness_flags;
13702 # get opening and closing sequence numbers of a token for the vertical
13703 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
13704 # to be treated somewhat like opening and closing tokens for stacking
13705 # tokens by the vertical aligner.
13707 my $seqno = $type_sequence_to_go[$ii];
13708 if ( $types_to_go[$ii] eq 'q' ) {
13711 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
13714 if ( !$ending_in_quote ) {
13715 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
13723 my %is_vertical_alignment_type;
13724 my %is_vertical_alignment_keyword;
13725 my %is_terminal_alignment_type;
13729 # Removed =~ from list to improve chances of alignment
13731 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
13732 { ? : => && || // ~~ !~~
13734 @is_vertical_alignment_type{@_} = (1) x scalar(@_);
13736 # only align these at end of line
13738 @is_terminal_alignment_type{@_} = (1) x scalar(@_);
13740 # eq and ne were removed from this list to improve alignment chances
13741 @_ = qw(if unless and or err for foreach while until);
13742 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
13745 sub set_vertical_alignment_markers {
13747 # This routine takes the first step toward vertical alignment of the
13748 # lines of output text. It looks for certain tokens which can serve as
13749 # vertical alignment markers (such as an '=').
13751 # Method: We look at each token $i in this output batch and set
13752 # $matching_token_to_go[$i] equal to those tokens at which we would
13753 # accept vertical alignment.
13755 # nothing to do if we aren't allowed to change whitespace
13756 if ( !$rOpts_add_whitespace ) {
13757 for my $i ( 0 .. $max_index_to_go ) {
13758 $matching_token_to_go[$i] = '';
13763 my ( $ri_first, $ri_last ) = @_;
13765 # remember the index of last nonblank token before any sidecomment
13766 my $i_terminal = $max_index_to_go;
13767 if ( $types_to_go[$i_terminal] eq '#' ) {
13768 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
13769 if ( $i_terminal > 0 ) { --$i_terminal }
13773 # look at each line of this batch..
13774 my $last_vertical_alignment_before_index;
13775 my $vert_last_nonblank_type;
13776 my $vert_last_nonblank_token;
13777 my $vert_last_nonblank_block_type;
13778 my $max_line = @$ri_first - 1;
13779 my ( $i, $type, $token, $block_type, $alignment_type );
13780 my ( $ibeg, $iend, $line );
13782 foreach $line ( 0 .. $max_line ) {
13783 $ibeg = $$ri_first[$line];
13784 $iend = $$ri_last[$line];
13785 $last_vertical_alignment_before_index = -1;
13786 $vert_last_nonblank_type = '';
13787 $vert_last_nonblank_token = '';
13788 $vert_last_nonblank_block_type = '';
13790 # look at each token in this output line..
13791 foreach $i ( $ibeg .. $iend ) {
13792 $alignment_type = '';
13793 $type = $types_to_go[$i];
13794 $block_type = $block_type_to_go[$i];
13795 $token = $tokens_to_go[$i];
13797 # check for flag indicating that we should not align
13799 if ( $matching_token_to_go[$i] ) {
13800 $matching_token_to_go[$i] = '';
13804 #--------------------------------------------------------
13805 # First see if we want to align BEFORE this token
13806 #--------------------------------------------------------
13808 # The first possible token that we can align before
13809 # is index 2 because: 1) it doesn't normally make sense to
13810 # align before the first token and 2) the second
13811 # token must be a blank if we are to align before
13813 if ( $i < $ibeg + 2 ) { }
13815 # must follow a blank token
13816 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
13818 # align a side comment --
13819 elsif ( $type eq '#' ) {
13823 # it is a static side comment
13825 $rOpts->{'static-side-comments'}
13826 && $token =~ /$static_side_comment_pattern/o
13829 # or a closing side comment
13830 || ( $vert_last_nonblank_block_type
13832 /$closing_side_comment_prefix_pattern/o )
13835 $alignment_type = $type;
13836 } ## Example of a static side comment
13839 # otherwise, do not align two in a row to create a
13841 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
13843 # align before one of these keywords
13844 # (within a line, since $i>1)
13845 elsif ( $type eq 'k' ) {
13847 # /^(if|unless|and|or|eq|ne)$/
13848 if ( $is_vertical_alignment_keyword{$token} ) {
13849 $alignment_type = $token;
13853 # align before one of these types..
13854 # Note: add '.' after new vertical aligner is operational
13855 elsif ( $is_vertical_alignment_type{$type} ) {
13856 $alignment_type = $token;
13858 # Do not align a terminal token. Although it might
13859 # occasionally look ok to do this, this has been found to be
13860 # a good general rule. The main problems are:
13861 # (1) that the terminal token (such as an = or :) might get
13862 # moved far to the right where it is hard to see because
13863 # nothing follows it, and
13864 # (2) doing so may prevent other good alignments.
13865 # Current exceptions are && and ||
13866 if ( $i == $iend || $i >= $i_terminal ) {
13867 $alignment_type = ""
13868 unless ( $is_terminal_alignment_type{$type} );
13871 # Do not align leading ': (' or '. ('. This would prevent
13872 # alignment in something like the following:
13874 # ( $input_line_number < 10 ) ? " "
13875 # : ( $input_line_number < 100 ) ? " "
13879 # ( $case_matters ? $accessor : " lc($accessor) " )
13880 # . ( $yesno ? " eq " : " ne " )
13881 if ( $i == $ibeg + 2
13882 && $types_to_go[$ibeg] =~ /^[\.\:]$/
13883 && $types_to_go[ $i - 1 ] eq 'b' )
13885 $alignment_type = "";
13888 # For a paren after keyword, only align something like this:
13890 # elsif ( $b ) { &b }
13891 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
13892 $alignment_type = ""
13893 unless $vert_last_nonblank_token =~
13894 /^(if|unless|elsif)$/;
13897 # be sure the alignment tokens are unique
13898 # This didn't work well: reason not determined
13899 # if ($token ne $type) {$alignment_type .= $type}
13902 # NOTE: This is deactivated because it causes the previous
13903 # if/elsif alignment to fail
13904 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
13905 #{ $alignment_type = $type; }
13907 if ($alignment_type) {
13908 $last_vertical_alignment_before_index = $i;
13911 #--------------------------------------------------------
13912 # Next see if we want to align AFTER the previous nonblank
13913 #--------------------------------------------------------
13915 # We want to line up ',' and interior ';' tokens, with the added
13916 # space AFTER these tokens. (Note: interior ';' is included
13917 # because it may occur in short blocks).
13920 # we haven't already set it
13923 # and its not the first token of the line
13926 # and it follows a blank
13927 && $types_to_go[ $i - 1 ] eq 'b'
13929 # and previous token IS one of these:
13930 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
13932 # and it's NOT one of these
13933 && ( $type !~ /^[b\#\)\]\}]$/ )
13935 # then go ahead and align
13939 $alignment_type = $vert_last_nonblank_type;
13942 #--------------------------------------------------------
13943 # then store the value
13944 #--------------------------------------------------------
13945 $matching_token_to_go[$i] = $alignment_type;
13946 if ( $type ne 'b' ) {
13947 $vert_last_nonblank_type = $type;
13948 $vert_last_nonblank_token = $token;
13949 $vert_last_nonblank_block_type = $block_type;
13956 sub terminal_type {
13958 # returns type of last token on this line (terminal token), as follows:
13959 # returns # for a full-line comment
13960 # returns ' ' for a blank line
13961 # otherwise returns final token type
13963 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
13965 # check for full-line comment..
13966 if ( $$rtype[$ibeg] eq '#' ) {
13967 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
13971 # start at end and walk backwards..
13972 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
13974 # skip past any side comment and blanks
13975 next if ( $$rtype[$i] eq 'b' );
13976 next if ( $$rtype[$i] eq '#' );
13978 # found it..make sure it is a BLOCK termination,
13979 # but hide a terminal } after sort/grep/map because it is not
13980 # necessarily the end of the line. (terminal.t)
13981 my $terminal_type = $$rtype[$i];
13983 $terminal_type eq '}'
13984 && ( !$$rblock_type[$i]
13985 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
13988 $terminal_type = 'b';
13990 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
13994 return wantarray ? ( ' ', $ibeg ) : ' ';
13998 { # set_bond_strengths
14000 my %is_good_keyword_breakpoint;
14001 my %is_lt_gt_le_ge;
14003 my %binary_bond_strength;
14010 sub bias_table_key {
14011 my ( $type, $token ) = @_;
14012 my $bias_table_key = $type;
14013 if ( $type eq 'k' ) {
14014 $bias_table_key = $token;
14015 if ( $token eq 'err' ) { $bias_table_key = 'or' }
14017 return $bias_table_key;
14020 sub set_bond_strengths {
14024 @_ = qw(if unless while until for foreach);
14025 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
14027 @_ = qw(lt gt le ge);
14028 @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
14030 # The decision about where to break a line depends upon a "bond
14031 # strength" between tokens. The LOWER the bond strength, the MORE
14032 # likely a break. A bond strength may be any value but to simplify
14033 # things there are several pre-defined strength levels:
14035 # NO_BREAK => 10000;
14036 # VERY_STRONG => 100;
14040 # VERY_WEAK => 0.55;
14042 # The strength values are based on trial-and-error, and need to be
14043 # tweaked occasionally to get desired results. Some comments:
14045 # 1. Only relative strengths are important. small differences
14046 # in strengths can make big formatting differences.
14047 # 2. Each indentation level adds one unit of bond strength.
14048 # 3. A value of NO_BREAK makes an unbreakable bond
14049 # 4. A value of VERY_WEAK is the strength of a ','
14050 # 5. Values below NOMINAL are considered ok break points.
14051 # 6. Values above NOMINAL are considered poor break points.
14053 # The bond strengths should roughly follow precedence order where
14054 # possible. If you make changes, please check the results very
14055 # carefully on a variety of scripts. Testing with the -extrude
14056 # options is particularly helpful in exercising all of the rules.
14058 # Wherever possible, bond strengths are defined in the following
14059 # tables. There are two main stages to setting bond strengths and
14060 # two types of tables:
14062 # The first stage involves looking at each token individually and
14063 # defining left and right bond strengths, according to if we want
14064 # to break to the left or right side, and how good a break point it
14065 # is. For example tokens like =, ||, && make good break points and
14066 # will have low strengths, but one might want to break on either
14067 # side to put them at the end of one line or beginning of the next.
14069 # The second stage involves looking at certain pairs of tokens and
14070 # defining a bond strength for that particular pair. This second
14071 # stage has priority.
14073 #---------------------------------------------------------------
14074 # Bond Strength BEGIN Section 1.
14075 # Set left and right bond strengths of individual tokens.
14076 #---------------------------------------------------------------
14078 # NOTE: NO_BREAK's set in this section first are HINTS which will
14079 # probably not be honored. Essential NO_BREAKS's should be set in
14080 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
14081 # of this subroutine.
14083 # Note that we are setting defaults in this section. The user
14084 # cannot change bond strengths but can cause the left and right
14085 # bond strengths of any token type to be swapped through the use of
14086 # the -wba and -wbb flags. In this way the user can determine if a
14087 # breakpoint token should appear at the end of one line or the
14088 # beginning of the next line.
14090 # The hash keys in this section are token types, plus the text of
14091 # certain keywords like 'or', 'and'.
14093 # no break around possible filehandle
14094 $left_bond_strength{'Z'} = NO_BREAK;
14095 $right_bond_strength{'Z'} = NO_BREAK;
14097 # never put a bare word on a new line:
14098 # example print (STDERR, "bla"); will fail with break after (
14099 $left_bond_strength{'w'} = NO_BREAK;
14101 # blanks always have infinite strength to force breaks after
14103 $right_bond_strength{'b'} = NO_BREAK;
14105 # try not to break on exponentation
14106 @_ = qw" ** .. ... <=> ";
14107 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14108 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14110 # The comma-arrow has very low precedence but not a good break point
14111 $left_bond_strength{'=>'} = NO_BREAK;
14112 $right_bond_strength{'=>'} = NOMINAL;
14114 # ok to break after label
14115 $left_bond_strength{'J'} = NO_BREAK;
14116 $right_bond_strength{'J'} = NOMINAL;
14117 $left_bond_strength{'j'} = STRONG;
14118 $right_bond_strength{'j'} = STRONG;
14119 $left_bond_strength{'A'} = STRONG;
14120 $right_bond_strength{'A'} = STRONG;
14122 $left_bond_strength{'->'} = STRONG;
14123 $right_bond_strength{'->'} = VERY_STRONG;
14125 $left_bond_strength{'CORE::'} = NOMINAL;
14126 $right_bond_strength{'CORE::'} = NO_BREAK;
14128 # breaking AFTER modulus operator is ok:
14130 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14131 @right_bond_strength{@_} =
14132 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
14134 # Break AFTER math operators * and /
14136 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14137 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
14139 # Break AFTER weakest math operators + and -
14140 # Make them weaker than * but a bit stronger than '.'
14142 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14143 @right_bond_strength{@_} =
14144 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
14146 # breaking BEFORE these is just ok:
14148 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14149 @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
14151 # breaking before the string concatenation operator seems best
14152 # because it can be hard to see at the end of a line
14153 $right_bond_strength{'.'} = STRONG;
14154 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
14157 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14158 @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
14160 # make these a little weaker than nominal so that they get
14161 # favored for end-of-line characters
14162 @_ = qw"!= == =~ !~ ~~ !~~";
14163 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14164 @right_bond_strength{@_} =
14165 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
14167 # break AFTER these
14168 @_ = qw" < > | & >= <=";
14169 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
14170 @right_bond_strength{@_} =
14171 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
14173 # breaking either before or after a quote is ok
14174 # but bias for breaking before a quote
14175 $left_bond_strength{'Q'} = NOMINAL;
14176 $right_bond_strength{'Q'} = NOMINAL + 0.02;
14177 $left_bond_strength{'q'} = NOMINAL;
14178 $right_bond_strength{'q'} = NOMINAL;
14180 # starting a line with a keyword is usually ok
14181 $left_bond_strength{'k'} = NOMINAL;
14183 # we usually want to bond a keyword strongly to what immediately
14184 # follows, rather than leaving it stranded at the end of a line
14185 $right_bond_strength{'k'} = STRONG;
14187 $left_bond_strength{'G'} = NOMINAL;
14188 $right_bond_strength{'G'} = STRONG;
14190 # assignment operators
14192 = **= += *= &= <<= &&=
14193 -= /= |= >>= ||= //=
14198 # Default is to break AFTER various assignment operators
14199 @left_bond_strength{@_} = (STRONG) x scalar(@_);
14200 @right_bond_strength{@_} =
14201 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
14203 # Default is to break BEFORE '&&' and '||' and '//'
14204 # set strength of '||' to same as '=' so that chains like
14205 # $a = $b || $c || $d will break before the first '||'
14206 $right_bond_strength{'||'} = NOMINAL;
14207 $left_bond_strength{'||'} = $right_bond_strength{'='};
14209 # same thing for '//'
14210 $right_bond_strength{'//'} = NOMINAL;
14211 $left_bond_strength{'//'} = $right_bond_strength{'='};
14213 # set strength of && a little higher than ||
14214 $right_bond_strength{'&&'} = NOMINAL;
14215 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
14217 $left_bond_strength{';'} = VERY_STRONG;
14218 $right_bond_strength{';'} = VERY_WEAK;
14219 $left_bond_strength{'f'} = VERY_STRONG;
14221 # make right strength of for ';' a little less than '='
14222 # to make for contents break after the ';' to avoid this:
14223 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
14224 # $number_of_fields )
14225 # and make it weaker than ',' and 'and' too
14226 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
14228 # The strengths of ?/: should be somewhere between
14229 # an '=' and a quote (NOMINAL),
14230 # make strength of ':' slightly less than '?' to help
14231 # break long chains of ? : after the colons
14232 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
14233 $right_bond_strength{':'} = NO_BREAK;
14234 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
14235 $right_bond_strength{'?'} = NO_BREAK;
14237 $left_bond_strength{','} = VERY_STRONG;
14238 $right_bond_strength{','} = VERY_WEAK;
14240 # remaining digraphs and trigraphs not defined above
14241 @_ = qw( :: <> ++ --);
14242 @left_bond_strength{@_} = (WEAK) x scalar(@_);
14243 @right_bond_strength{@_} = (STRONG) x scalar(@_);
14245 # Set bond strengths of certain keywords
14246 # make 'or', 'err', 'and' slightly weaker than a ','
14247 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
14248 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
14249 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
14250 $left_bond_strength{'xor'} = NOMINAL;
14251 $right_bond_strength{'and'} = NOMINAL;
14252 $right_bond_strength{'or'} = NOMINAL;
14253 $right_bond_strength{'err'} = NOMINAL;
14254 $right_bond_strength{'xor'} = STRONG;
14256 #---------------------------------------------------------------
14257 # Bond Strength BEGIN Section 2.
14258 # Set binary rules for bond strengths between certain token types.
14259 #---------------------------------------------------------------
14261 # We have a little problem making tables which apply to the
14262 # container tokens. Here is a list of container tokens and
14265 # type tokens // meaning
14266 # { {, [, ( // indent
14267 # } }, ], ) // outdent
14268 # [ [ // left non-structural [ (enclosing an array index)
14269 # ] ] // right non-structural square bracket
14270 # ( ( // left non-structural paren
14271 # ) ) // right non-structural paren
14272 # L { // left non-structural curly brace (enclosing a key)
14273 # R } // right non-structural curly brace
14275 # Some rules apply to token types and some to just the token
14276 # itself. We solve the problem by combining type and token into a
14277 # new hash key for the container types.
14279 # If a rule applies to a token 'type' then we need to make rules
14280 # for each of these 'type.token' combinations:
14291 # If a rule applies to a token then we need to make rules for
14292 # these 'type.token' combinations:
14301 # allow long lines before final { in an if statement, as in:
14306 # Otherwise, the line before the { tends to be too short.
14308 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
14309 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
14311 # break on something like '} (', but keep this stronger than a ','
14312 # example is in 'howe.pl'
14313 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14314 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14316 # keep matrix and hash indices together
14317 # but make them a little below STRONG to allow breaking open
14318 # something like {'some-word'}{'some-very-long-word'} at the }{
14320 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14321 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14322 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14323 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14325 # increase strength to the point where a break in the following
14326 # will be after the opening paren rather than at the arrow:
14328 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
14330 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14331 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14332 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14333 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14334 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14335 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14337 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14338 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14339 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14340 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14342 #---------------------------------------------------------------
14343 # Binary NO_BREAK rules
14344 #---------------------------------------------------------------
14346 # use strict requires that bare word and => not be separated
14347 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
14348 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
14350 # Never break between a bareword and a following paren because
14351 # perl may give an error. For example, if a break is placed
14352 # between 'to_filehandle' and its '(' the following line will
14353 # give a syntax error [Carp.pm]: my( $no) =fileno(
14354 # to_filehandle( $in)) ;
14355 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
14356 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
14357 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
14358 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
14360 # use strict requires that bare word within braces not start new
14362 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
14364 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
14366 # use strict requires that bare word and => not be separated
14367 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
14369 # use strict does not allow separating type info from trailing { }
14370 # testfile is readmail.pl
14371 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
14372 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
14374 # As a defensive measure, do not break between a '(' and a
14375 # filehandle. In some cases, this can cause an error. For
14376 # example, the following program works:
14383 # But this program fails:
14391 # This is normally only a problem with the 'extrude' option
14392 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
14393 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
14395 # never break between sub name and opening paren
14396 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
14397 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
14399 # keep '}' together with ';'
14400 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
14402 # Breaking before a ++ can cause perl to guess wrong. For
14403 # example the following line will cause a syntax error
14404 # with -extrude if we break between '$i' and '++' [fixstyle2]
14405 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
14406 $nobreak_lhs{'++'} = NO_BREAK;
14408 # Do not break before a possible file handle
14409 $nobreak_lhs{'Z'} = NO_BREAK;
14411 # use strict hates bare words on any new line. For
14412 # example, a break before the underscore here provokes the
14413 # wrath of use strict:
14414 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
14415 $nobreak_rhs{'F'} = NO_BREAK;
14416 $nobreak_rhs{'CORE::'} = NO_BREAK;
14418 #---------------------------------------------------------------
14419 # Bond Strength BEGIN Section 3.
14420 # Define tables and values for applying a small bias to the above
14422 #---------------------------------------------------------------
14423 # Adding a small 'bias' to strengths is a simple way to make a line
14424 # break at the first of a sequence of identical terms. For
14425 # example, to force long string of conditional operators to break
14426 # with each line ending in a ':', we can add a small number to the
14427 # bond strength of each ':' (colon.t)
14428 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
14429 $delta_bias = 0.0001; # a very small strength level
14433 # patch-its always ok to break at end of line
14434 $nobreak_to_go[$max_index_to_go] = 0;
14436 # we start a new set of bias values for each line
14438 @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
14439 my $code_bias = -.01; # bias for closing block braces
14444 my $last_nonblank_type = $type;
14445 my $last_nonblank_token = $token;
14446 my $list_str = $left_bond_strength{'?'};
14448 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
14449 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
14452 # main loop to compute bond strengths between each pair of tokens
14453 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
14454 $last_type = $type;
14455 if ( $type ne 'b' ) {
14456 $last_nonblank_type = $type;
14457 $last_nonblank_token = $token;
14459 $type = $types_to_go[$i];
14461 # strength on both sides of a blank is the same
14462 if ( $type eq 'b' && $last_type ne 'b' ) {
14463 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
14467 $token = $tokens_to_go[$i];
14468 $block_type = $block_type_to_go[$i];
14470 $next_type = $types_to_go[$i_next];
14471 $next_token = $tokens_to_go[$i_next];
14472 $total_nesting_depth = $nesting_depth_to_go[$i_next];
14473 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
14474 $next_nonblank_type = $types_to_go[$i_next_nonblank];
14475 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14477 # We are computing the strength of the bond between the current
14478 # token and the NEXT token.
14480 #---------------------------------------------------------------
14481 # Bond Strength Section 1:
14482 # First Approximation.
14483 # Use minimum of individual left and right tabulated bond
14485 #---------------------------------------------------------------
14486 my $bsr = $right_bond_strength{$type};
14487 my $bsl = $left_bond_strength{$next_nonblank_type};
14489 # define right bond strengths of certain keywords
14490 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
14491 $bsr = $right_bond_strength{$token};
14493 elsif ( $token eq 'ne' or $token eq 'eq' ) {
14497 # set terminal bond strength to the nominal value
14498 # this will cause good preceding breaks to be retained
14499 if ( $i_next_nonblank > $max_index_to_go ) {
14503 # define right bond strengths of certain keywords
14504 if ( $next_nonblank_type eq 'k'
14505 && defined( $left_bond_strength{$next_nonblank_token} ) )
14507 $bsl = $left_bond_strength{$next_nonblank_token};
14509 elsif ($next_nonblank_token eq 'ne'
14510 or $next_nonblank_token eq 'eq' )
14514 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
14515 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
14518 # Use the minimum of the left and right strengths. Note: it might
14519 # seem that we would want to keep a NO_BREAK if either token has
14520 # this value. This didn't work, for example because in an arrow
14521 # list, it prevents the comma from separating from the following
14522 # bare word (which is probably quoted by its arrow). So necessary
14523 # NO_BREAK's have to be handled as special cases in the final
14525 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
14526 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
14527 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
14528 my $bond_str_1 = $bond_str;
14530 #---------------------------------------------------------------
14531 # Bond Strength Section 2:
14532 # Apply hardwired rules..
14533 #---------------------------------------------------------------
14535 # Patch to put terminal or clauses on a new line: Weaken the bond
14536 # at an || followed by die or similar keyword to make the terminal
14537 # or clause fall on a new line, like this:
14539 # my $class = shift
14540 # || die "Cannot add broadcast: No class identifier found";
14542 # Otherwise the break will be at the previous '=' since the || and
14543 # = have the same starting strength and the or is biased, like
14547 # shift || die "Cannot add broadcast: No class identifier found";
14549 # In any case if the user places a break at either the = or the ||
14550 # it should remain there.
14551 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
14552 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
14553 if ( $want_break_before{$token} && $i > 0 ) {
14554 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
14557 $bond_str -= $delta_bias;
14562 # good to break after end of code blocks
14563 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
14565 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
14566 $code_bias += $delta_bias;
14569 if ( $type eq 'k' ) {
14571 # allow certain control keywords to stand out
14572 if ( $next_nonblank_type eq 'k'
14573 && $is_last_next_redo_return{$token} )
14575 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
14578 # Don't break after keyword my. This is a quick fix for a
14579 # rare problem with perl. An example is this line from file
14582 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
14583 # $this->{'question'} ) )
14585 if ( $token eq 'my' ) {
14586 $bond_str = NO_BREAK;
14591 # good to break before 'if', 'unless', etc
14592 if ( $is_if_brace_follower{$next_nonblank_token} ) {
14593 $bond_str = VERY_WEAK;
14596 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
14598 # FIXME: needs more testing
14599 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
14600 $bond_str = $list_str if ( $bond_str > $list_str );
14603 # keywords like 'unless', 'if', etc, within statements
14605 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
14606 $bond_str = VERY_WEAK / 1.05;
14610 # try not to break before a comma-arrow
14611 elsif ( $next_nonblank_type eq '=>' ) {
14612 if ( $bond_str < STRONG ) { $bond_str = STRONG }
14615 #---------------------------------------------------------------
14616 # Additional hardwired NOBREAK rules
14617 #---------------------------------------------------------------
14619 # map1.t -- correct for a quirk in perl
14621 && $next_nonblank_type eq 'i'
14622 && $last_nonblank_type eq 'k'
14623 && $is_sort_map_grep{$last_nonblank_token} )
14625 # /^(sort|map|grep)$/ )
14627 $bond_str = NO_BREAK;
14630 # extrude.t: do not break before paren at:
14632 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
14633 $bond_str = NO_BREAK;
14636 # in older version of perl, use strict can cause problems with
14637 # breaks before bare words following opening parens. For example,
14638 # this will fail under older versions if a break is made between
14639 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
14640 # command"); close MAIL;
14641 if ( $type eq '{' ) {
14643 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
14645 # but it's fine to break if the word is followed by a '=>'
14646 # or if it is obviously a sub call
14647 my $i_next_next_nonblank = $i_next_nonblank + 1;
14648 my $next_next_type = $types_to_go[$i_next_next_nonblank];
14649 if ( $next_next_type eq 'b'
14650 && $i_next_nonblank < $max_index_to_go )
14652 $i_next_next_nonblank++;
14653 $next_next_type = $types_to_go[$i_next_next_nonblank];
14656 # We'll check for an old breakpoint and keep a leading
14657 # bareword if it was that way in the input file.
14658 # Presumably it was ok that way. For example, the
14659 # following would remain unchanged:
14662 # January, February, March, April,
14663 # May, June, July, August,
14664 # September, October, November, December,
14667 # This should be sufficient:
14669 !$old_breakpoint_to_go[$i]
14670 && ( $next_next_type eq ','
14671 || $next_next_type eq '}' )
14674 $bond_str = NO_BREAK;
14679 # Do not break between a possible filehandle and a ? or / and do
14680 # not introduce a break after it if there is no blank
14682 elsif ( $type eq 'Z' ) {
14687 # if there is no blank and we do not want one. Examples:
14688 # print $x++ # do not break after $x
14689 # print HTML"HELLO" # break ok after HTML
14692 && defined( $want_left_space{$next_type} )
14693 && $want_left_space{$next_type} == WS_NO
14696 # or we might be followed by the start of a quote
14697 || $next_nonblank_type =~ /^[\/\?]$/
14700 $bond_str = NO_BREAK;
14704 # Breaking before a ? before a quote can cause trouble if
14705 # they are not separated by a blank.
14706 # Example: a syntax error occurs if you break before the ? here
14707 # my$logic=join$all?' && ':' || ',@regexps;
14708 # From: Professional_Perl_Programming_Code/multifind.pl
14709 if ( $next_nonblank_type eq '?' ) {
14710 $bond_str = NO_BREAK
14711 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
14714 # Breaking before a . followed by a number
14715 # can cause trouble if there is no intervening space
14716 # Example: a syntax error occurs if you break before the .2 here
14717 # $str .= pack($endian.2, ensurrogate($ord));
14718 # From: perl58/Unicode.pm
14719 elsif ( $next_nonblank_type eq '.' ) {
14720 $bond_str = NO_BREAK
14721 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
14724 # patch to put cuddled elses back together when on multiple
14725 # lines, as in: } \n else \n { \n
14726 if ($rOpts_cuddled_else) {
14728 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
14729 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
14731 $bond_str = NO_BREAK;
14734 my $bond_str_2 = $bond_str;
14736 #---------------------------------------------------------------
14737 # End of hardwired rules
14738 #---------------------------------------------------------------
14740 #---------------------------------------------------------------
14741 # Bond Strength Section 3:
14742 # Apply table rules. These have priority over the above
14744 #---------------------------------------------------------------
14746 my $tabulated_bond_str;
14748 my $rtype = $next_nonblank_type;
14749 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
14750 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
14751 $rtype = $next_nonblank_type . $next_nonblank_token;
14754 if ( $binary_bond_strength{$ltype}{$rtype} ) {
14755 $bond_str = $binary_bond_strength{$ltype}{$rtype};
14756 $tabulated_bond_str = $bond_str;
14759 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
14760 $bond_str = NO_BREAK;
14761 $tabulated_bond_str = $bond_str;
14763 my $bond_str_3 = $bond_str;
14765 # If the hardwired rules conflict with the tabulated bond
14766 # strength then there is an inconsistency that should be fixed
14767 FORMATTER_DEBUG_FLAG_BOND_TABLES
14768 && $tabulated_bond_str
14770 && $bond_str_1 != $bond_str_2
14771 && $bond_str_2 != $tabulated_bond_str
14774 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
14777 #-----------------------------------------------------------------
14778 # Bond Strength Section 4:
14779 # Modify strengths of certain tokens which often occur in sequence
14780 # by adding a small bias to each one in turn so that the breaks
14781 # occur from left to right.
14783 # Note that we only changing strengths by small amounts here,
14784 # and usually increasing, so we should not be altering any NO_BREAKs.
14785 # Other routines which check for NO_BREAKs will use a tolerance
14786 # of one to avoid any problem.
14787 #-----------------------------------------------------------------
14789 # The bias tables use special keys
14790 my $left_key = bias_table_key( $type, $token );
14792 bias_table_key( $next_nonblank_type, $next_nonblank_token );
14794 # add any bias set by sub scan_list at old comma break points.
14795 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
14798 elsif ( defined( $bias{$left_key} ) ) {
14799 if ( !$want_break_before{$left_key} ) {
14800 $bias{$left_key} += $delta_bias;
14801 $bond_str += $bias{$left_key};
14806 if ( defined( $bias{$right_key} ) ) {
14807 if ( $want_break_before{$right_key} ) {
14809 # for leading '.' align all but 'short' quotes; the idea
14810 # is to not place something like "\n" on a single line.
14811 if ( $right_key eq '.' ) {
14813 $last_nonblank_type eq '.'
14816 $rOpts_short_concatenation_item_length )
14817 && ( $token !~ /^[\)\]\}]$/ )
14820 $bias{$right_key} += $delta_bias;
14824 $bias{$right_key} += $delta_bias;
14826 $bond_str += $bias{$right_key};
14829 my $bond_str_4 = $bond_str;
14831 #---------------------------------------------------------------
14832 # Bond Strength Section 5:
14833 # Fifth Approximation.
14834 # Take nesting depth into account by adding the nesting depth
14835 # to the bond strength.
14836 #---------------------------------------------------------------
14839 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
14840 if ( $total_nesting_depth > 0 ) {
14841 $strength = $bond_str + $total_nesting_depth;
14844 $strength = $bond_str;
14848 $strength = NO_BREAK;
14851 # always break after side comment
14852 if ( $type eq '#' ) { $strength = 0 }
14854 $bond_strength_to_go[$i] = $strength;
14856 FORMATTER_DEBUG_FLAG_BOND && do {
14857 my $str = substr( $token, 0, 15 );
14858 $str .= ' ' x ( 16 - length($str) );
14860 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
14863 } ## end sub set_bond_strengths
14866 sub pad_array_to_go {
14868 # to simplify coding in scan_list and set_bond_strengths, it helps
14869 # to create some extra blank tokens at the end of the arrays
14870 $tokens_to_go[ $max_index_to_go + 1 ] = '';
14871 $tokens_to_go[ $max_index_to_go + 2 ] = '';
14872 $types_to_go[ $max_index_to_go + 1 ] = 'b';
14873 $types_to_go[ $max_index_to_go + 2 ] = 'b';
14874 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
14875 $nesting_depth_to_go[$max_index_to_go];
14878 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
14879 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
14881 # shouldn't happen:
14882 unless ( get_saw_brace_error() ) {
14884 "Program bug in scan_list: hit nesting error which should have been caught\n"
14886 report_definite_bug();
14890 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
14895 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
14896 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
14900 { # begin scan_list
14903 $block_type, $current_depth,
14905 $i_last_nonblank_token, $last_colon_sequence_number,
14906 $last_nonblank_token, $last_nonblank_type,
14907 $last_nonblank_block_type, $last_old_breakpoint_count,
14908 $minimum_depth, $next_nonblank_block_type,
14909 $next_nonblank_token, $next_nonblank_type,
14910 $old_breakpoint_count, $starting_breakpoint_count,
14911 $starting_depth, $token,
14912 $type, $type_sequence,
14916 @breakpoint_stack, @breakpoint_undo_stack,
14917 @comma_index, @container_type,
14918 @identifier_count_stack, @index_before_arrow,
14919 @interrupted_list, @item_count_stack,
14920 @last_comma_index, @last_dot_index,
14921 @last_nonblank_type, @old_breakpoint_count_stack,
14922 @opening_structure_index_stack, @rfor_semicolon_list,
14923 @has_old_logical_breakpoints, @rand_or_list,
14927 # routine to define essential variables when we go 'up' to
14929 sub check_for_new_minimum_depth {
14931 if ( $depth < $minimum_depth ) {
14933 $minimum_depth = $depth;
14935 # these arrays need not retain values between calls
14936 $breakpoint_stack[$depth] = $starting_breakpoint_count;
14937 $container_type[$depth] = "";
14938 $identifier_count_stack[$depth] = 0;
14939 $index_before_arrow[$depth] = -1;
14940 $interrupted_list[$depth] = 1;
14941 $item_count_stack[$depth] = 0;
14942 $last_nonblank_type[$depth] = "";
14943 $opening_structure_index_stack[$depth] = -1;
14945 $breakpoint_undo_stack[$depth] = undef;
14946 $comma_index[$depth] = undef;
14947 $last_comma_index[$depth] = undef;
14948 $last_dot_index[$depth] = undef;
14949 $old_breakpoint_count_stack[$depth] = undef;
14950 $has_old_logical_breakpoints[$depth] = 0;
14951 $rand_or_list[$depth] = [];
14952 $rfor_semicolon_list[$depth] = [];
14953 $i_equals[$depth] = -1;
14955 # these arrays must retain values between calls
14956 if ( !defined( $has_broken_sublist[$depth] ) ) {
14957 $dont_align[$depth] = 0;
14958 $has_broken_sublist[$depth] = 0;
14959 $want_comma_break[$depth] = 0;
14964 # routine to decide which commas to break at within a container;
14966 # $bp_count = number of comma breakpoints set
14967 # $do_not_break_apart = a flag indicating if container need not
14969 sub set_comma_breakpoints {
14973 my $do_not_break_apart = 0;
14976 if ( $item_count_stack[$dd] ) {
14978 # handle commas not in containers...
14979 if ( $dont_align[$dd] ) {
14980 do_uncontained_comma_breaks($dd);
14983 # handle commas within containers...
14985 my $fbc = $forced_breakpoint_count;
14987 # always open comma lists not preceded by keywords,
14988 # barewords, identifiers (that is, anything that doesn't
14989 # look like a function call)
14990 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
14992 set_comma_breakpoints_do(
14994 $opening_structure_index_stack[$dd],
14996 $item_count_stack[$dd],
14997 $identifier_count_stack[$dd],
14999 $next_nonblank_type,
15000 $container_type[$dd],
15001 $interrupted_list[$dd],
15002 \$do_not_break_apart,
15005 $bp_count = $forced_breakpoint_count - $fbc;
15006 $do_not_break_apart = 0 if $must_break_open;
15009 return ( $bp_count, $do_not_break_apart );
15012 sub do_uncontained_comma_breaks {
15014 # Handle commas not in containers...
15015 # This is a catch-all routine for commas that we
15016 # don't know what to do with because the don't fall
15017 # within containers. We will bias the bond strength
15018 # to break at commas which ended lines in the input
15019 # file. This usually works better than just trying
15020 # to put as many items on a line as possible. A
15021 # downside is that if the input file is garbage it
15022 # won't work very well. However, the user can always
15023 # prevent following the old breakpoints with the
15027 my $old_comma_break_count = 0;
15028 foreach my $ii ( @{ $comma_index[$dd] } ) {
15029 if ( $old_breakpoint_to_go[$ii] ) {
15030 $old_comma_break_count++;
15031 $bond_strength_to_go[$ii] = $bias;
15033 # reduce bias magnitude to force breaks in order
15038 # Also put a break before the first comma if
15039 # (1) there was a break there in the input, and
15040 # (2) there was exactly one old break before the first comma break
15041 # (3) OLD: there are multiple old comma breaks
15042 # (3) NEW: there are one or more old comma breaks (see return example)
15044 # For example, we will follow the user and break after
15045 # 'print' in this snippet:
15047 # "conformability (Not the same dimension)\n",
15048 # "\t", $have, " is ", text_unit($hu), "\n",
15049 # "\t", $want, " is ", text_unit($wu), "\n",
15052 # Another example, just one comma, where we will break after
15055 # $x * cos($a) - $y * sin($a),
15056 # $x * sin($a) + $y * cos($a);
15058 # Breaking a print statement:
15060 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
15061 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
15063 # But we will not force a break after the opening paren here
15064 # (causes a blinker):
15065 # $heap->{stream}->set_output_filter(
15066 # poe::filter::reference->new('myotherfreezer') ),
15069 my $i_first_comma = $comma_index[$dd]->[0];
15070 if ( $old_breakpoint_to_go[$i_first_comma] ) {
15071 my $level_comma = $levels_to_go[$i_first_comma];
15074 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
15075 if ( $old_breakpoint_to_go[$ii] ) {
15077 last if ( $obp_count > 1 );
15079 if ( $levels_to_go[$ii] == $level_comma );
15083 # Changed rule from multiple old commas to just one here:
15084 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
15086 # Do not to break before an opening token because
15087 # it can lead to "blinkers".
15088 my $ibreakm = $ibreak;
15089 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
15090 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
15092 set_forced_breakpoint($ibreak);
15098 my %is_logical_container;
15101 @_ = qw# if elsif unless while and or err not && | || ? : ! #;
15102 @is_logical_container{@_} = (1) x scalar(@_);
15105 sub set_for_semicolon_breakpoints {
15107 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
15108 set_forced_breakpoint($_);
15112 sub set_logical_breakpoints {
15115 $item_count_stack[$dd] == 0
15116 && $is_logical_container{ $container_type[$dd] }
15118 || $has_old_logical_breakpoints[$dd]
15122 # Look for breaks in this order:
15125 foreach my $i ( 0 .. 3 ) {
15126 if ( $rand_or_list[$dd][$i] ) {
15127 foreach ( @{ $rand_or_list[$dd][$i] } ) {
15128 set_forced_breakpoint($_);
15131 # break at any 'if' and 'unless' too
15132 foreach ( @{ $rand_or_list[$dd][4] } ) {
15133 set_forced_breakpoint($_);
15135 $rand_or_list[$dd] = [];
15142 sub is_unbreakable_container {
15144 # never break a container of one of these types
15145 # because bad things can happen (map1.t)
15147 $is_sort_map_grep{ $container_type[$dd] };
15152 # This routine is responsible for setting line breaks for all lists,
15153 # so that hierarchical structure can be displayed and so that list
15154 # items can be vertically aligned. The output of this routine is
15155 # stored in the array @forced_breakpoint_to_go, which is used to set
15156 # final breakpoints.
15158 $starting_depth = $nesting_depth_to_go[0];
15161 $current_depth = $starting_depth;
15163 $last_colon_sequence_number = -1;
15164 $last_nonblank_token = ';';
15165 $last_nonblank_type = ';';
15166 $last_nonblank_block_type = ' ';
15167 $last_old_breakpoint_count = 0;
15168 $minimum_depth = $current_depth + 1; # forces update in check below
15169 $old_breakpoint_count = 0;
15170 $starting_breakpoint_count = $forced_breakpoint_count;
15173 $type_sequence = '';
15175 my $total_depth_variation = 0;
15176 my $i_old_assignment_break;
15177 my $depth_last = $starting_depth;
15179 check_for_new_minimum_depth($current_depth);
15181 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
15182 my $want_previous_breakpoint = -1;
15184 my $saw_good_breakpoint;
15185 my $i_line_end = -1;
15186 my $i_line_start = -1;
15188 # loop over all tokens in this batch
15189 while ( ++$i <= $max_index_to_go ) {
15190 if ( $type ne 'b' ) {
15191 $i_last_nonblank_token = $i - 1;
15192 $last_nonblank_type = $type;
15193 $last_nonblank_token = $token;
15194 $last_nonblank_block_type = $block_type;
15195 } ## end if ( $type ne 'b' )
15196 $type = $types_to_go[$i];
15197 $block_type = $block_type_to_go[$i];
15198 $token = $tokens_to_go[$i];
15199 $type_sequence = $type_sequence_to_go[$i];
15200 my $next_type = $types_to_go[ $i + 1 ];
15201 my $next_token = $tokens_to_go[ $i + 1 ];
15202 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
15203 $next_nonblank_type = $types_to_go[$i_next_nonblank];
15204 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15205 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15207 # set break if flag was set
15208 if ( $want_previous_breakpoint >= 0 ) {
15209 set_forced_breakpoint($want_previous_breakpoint);
15210 $want_previous_breakpoint = -1;
15213 $last_old_breakpoint_count = $old_breakpoint_count;
15214 if ( $old_breakpoint_to_go[$i] ) {
15216 $i_line_start = $i_next_nonblank;
15218 $old_breakpoint_count++;
15220 # Break before certain keywords if user broke there and
15221 # this is a 'safe' break point. The idea is to retain
15222 # any preferred breaks for sequential list operations,
15223 # like a schwartzian transform.
15224 if ($rOpts_break_at_old_keyword_breakpoints) {
15226 $next_nonblank_type eq 'k'
15227 && $is_keyword_returning_list{$next_nonblank_token}
15228 && ( $type =~ /^[=\)\]\}Riw]$/
15230 && $is_keyword_returning_list{$token} )
15234 # we actually have to set this break next time through
15235 # the loop because if we are at a closing token (such
15236 # as '}') which forms a one-line block, this break might
15238 $want_previous_breakpoint = $i;
15239 } ## end if ( $next_nonblank_type...)
15240 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
15242 # Break before attributes if user broke there
15243 if ($rOpts_break_at_old_attribute_breakpoints) {
15244 if ( $next_nonblank_type eq 'A' ) {
15245 $want_previous_breakpoint = $i;
15249 # remember an = break as possible good break point
15250 if ( $is_assignment{$type} ) {
15251 $i_old_assignment_break = $i;
15253 elsif ( $is_assignment{$next_nonblank_type} ) {
15254 $i_old_assignment_break = $i_next_nonblank;
15256 } ## end if ( $old_breakpoint_to_go...)
15257 next if ( $type eq 'b' );
15258 $depth = $nesting_depth_to_go[ $i + 1 ];
15260 $total_depth_variation += abs( $depth - $depth_last );
15261 $depth_last = $depth;
15263 # safety check - be sure we always break after a comment
15264 # Shouldn't happen .. an error here probably means that the
15265 # nobreak flag did not get turned off correctly during
15267 if ( $type eq '#' ) {
15268 if ( $i != $max_index_to_go ) {
15270 "Non-fatal program bug: backup logic needed to break after a comment\n"
15272 report_definite_bug();
15273 $nobreak_to_go[$i] = 0;
15274 set_forced_breakpoint($i);
15275 } ## end if ( $i != $max_index_to_go)
15276 } ## end if ( $type eq '#' )
15278 # Force breakpoints at certain tokens in long lines.
15279 # Note that such breakpoints will be undone later if these tokens
15280 # are fully contained within parens on a line.
15283 # break before a keyword within a line
15287 # if one of these keywords:
15288 && $token =~ /^(if|unless|while|until|for)$/
15290 # but do not break at something like '1 while'
15291 && ( $last_nonblank_type ne 'n' || $i > 2 )
15293 # and let keywords follow a closing 'do' brace
15294 && $last_nonblank_block_type ne 'do'
15299 # or container is broken (by side-comment, etc)
15300 || ( $next_nonblank_token eq '('
15301 && $mate_index_to_go[$i_next_nonblank] < $i )
15305 set_forced_breakpoint( $i - 1 );
15306 } ## end if ( $type eq 'k' && $i...)
15308 # remember locations of '||' and '&&' for possible breaks if we
15309 # decide this is a long logical expression.
15310 if ( $type eq '||' ) {
15311 push @{ $rand_or_list[$depth][2] }, $i;
15312 ++$has_old_logical_breakpoints[$depth]
15313 if ( ( $i == $i_line_start || $i == $i_line_end )
15314 && $rOpts_break_at_old_logical_breakpoints );
15315 } ## end if ( $type eq '||' )
15316 elsif ( $type eq '&&' ) {
15317 push @{ $rand_or_list[$depth][3] }, $i;
15318 ++$has_old_logical_breakpoints[$depth]
15319 if ( ( $i == $i_line_start || $i == $i_line_end )
15320 && $rOpts_break_at_old_logical_breakpoints );
15321 } ## end elsif ( $type eq '&&' )
15322 elsif ( $type eq 'f' ) {
15323 push @{ $rfor_semicolon_list[$depth] }, $i;
15325 elsif ( $type eq 'k' ) {
15326 if ( $token eq 'and' ) {
15327 push @{ $rand_or_list[$depth][1] }, $i;
15328 ++$has_old_logical_breakpoints[$depth]
15329 if ( ( $i == $i_line_start || $i == $i_line_end )
15330 && $rOpts_break_at_old_logical_breakpoints );
15331 } ## end if ( $token eq 'and' )
15333 # break immediately at 'or's which are probably not in a logical
15334 # block -- but we will break in logical breaks below so that
15335 # they do not add to the forced_breakpoint_count
15336 elsif ( $token eq 'or' ) {
15337 push @{ $rand_or_list[$depth][0] }, $i;
15338 ++$has_old_logical_breakpoints[$depth]
15339 if ( ( $i == $i_line_start || $i == $i_line_end )
15340 && $rOpts_break_at_old_logical_breakpoints );
15341 if ( $is_logical_container{ $container_type[$depth] } ) {
15344 if ($is_long_line) { set_forced_breakpoint($i) }
15345 elsif ( ( $i == $i_line_start || $i == $i_line_end )
15346 && $rOpts_break_at_old_logical_breakpoints )
15348 $saw_good_breakpoint = 1;
15350 } ## end else [ if ( $is_logical_container...)]
15351 } ## end elsif ( $token eq 'or' )
15352 elsif ( $token eq 'if' || $token eq 'unless' ) {
15353 push @{ $rand_or_list[$depth][4] }, $i;
15354 if ( ( $i == $i_line_start || $i == $i_line_end )
15355 && $rOpts_break_at_old_logical_breakpoints )
15357 set_forced_breakpoint($i);
15359 } ## end elsif ( $token eq 'if' ||...)
15360 } ## end elsif ( $type eq 'k' )
15361 elsif ( $is_assignment{$type} ) {
15362 $i_equals[$depth] = $i;
15365 if ($type_sequence) {
15367 # handle any postponed closing breakpoints
15368 if ( $token =~ /^[\)\]\}\:]$/ ) {
15369 if ( $type eq ':' ) {
15370 $last_colon_sequence_number = $type_sequence;
15372 # retain break at a ':' line break
15373 if ( ( $i == $i_line_start || $i == $i_line_end )
15374 && $rOpts_break_at_old_ternary_breakpoints )
15377 set_forced_breakpoint($i);
15379 # break at previous '='
15380 if ( $i_equals[$depth] > 0 ) {
15381 set_forced_breakpoint( $i_equals[$depth] );
15382 $i_equals[$depth] = -1;
15384 } ## end if ( ( $i == $i_line_start...))
15385 } ## end if ( $type eq ':' )
15386 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
15387 my $inc = ( $type eq ':' ) ? 0 : 1;
15388 set_forced_breakpoint( $i - $inc );
15389 delete $postponed_breakpoint{$type_sequence};
15391 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
15393 # set breaks at ?/: if they will get separated (and are
15394 # not a ?/: chain), or if the '?' is at the end of the
15396 elsif ( $token eq '?' ) {
15397 my $i_colon = $mate_index_to_go[$i];
15399 $i_colon <= 0 # the ':' is not in this batch
15400 || $i == 0 # this '?' is the first token of the line
15402 $max_index_to_go # or this '?' is the last token
15406 # don't break at a '?' if preceded by ':' on
15407 # this line of previous ?/: pair on this line.
15408 # This is an attempt to preserve a chain of ?/:
15409 # expressions (elsif2.t). And don't break if
15410 # this has a side comment.
15411 set_forced_breakpoint($i)
15413 $type_sequence == (
15414 $last_colon_sequence_number +
15415 TYPE_SEQUENCE_INCREMENT
15417 || $tokens_to_go[$max_index_to_go] eq '#'
15419 set_closing_breakpoint($i);
15420 } ## end if ( $i_colon <= 0 ||...)
15421 } ## end elsif ( $token eq '?' )
15422 } ## end if ($type_sequence)
15424 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
15426 #------------------------------------------------------------
15427 # Handle Increasing Depth..
15429 # prepare for a new list when depth increases
15430 # token $i is a '(','{', or '['
15431 #------------------------------------------------------------
15432 if ( $depth > $current_depth ) {
15434 $breakpoint_stack[$depth] = $forced_breakpoint_count;
15435 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
15436 $has_broken_sublist[$depth] = 0;
15437 $identifier_count_stack[$depth] = 0;
15438 $index_before_arrow[$depth] = -1;
15439 $interrupted_list[$depth] = 0;
15440 $item_count_stack[$depth] = 0;
15441 $last_comma_index[$depth] = undef;
15442 $last_dot_index[$depth] = undef;
15443 $last_nonblank_type[$depth] = $last_nonblank_type;
15444 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
15445 $opening_structure_index_stack[$depth] = $i;
15446 $rand_or_list[$depth] = [];
15447 $rfor_semicolon_list[$depth] = [];
15448 $i_equals[$depth] = -1;
15449 $want_comma_break[$depth] = 0;
15450 $container_type[$depth] =
15451 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
15452 ? $last_nonblank_token
15454 $has_old_logical_breakpoints[$depth] = 0;
15456 # if line ends here then signal closing token to break
15457 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
15459 set_closing_breakpoint($i);
15462 # Not all lists of values should be vertically aligned..
15463 $dont_align[$depth] =
15465 # code BLOCKS are handled at a higher level
15466 ( $block_type ne "" )
15468 # certain paren lists
15469 || ( $type eq '(' ) && (
15471 # it does not usually look good to align a list of
15472 # identifiers in a parameter list, as in:
15473 # my($var1, $var2, ...)
15474 # (This test should probably be refined, for now I'm just
15475 # testing for any keyword)
15476 ( $last_nonblank_type eq 'k' )
15478 # a trailing '(' usually indicates a non-list
15479 || ( $next_nonblank_type eq '(' )
15482 # patch to outdent opening brace of long if/for/..
15483 # statements (like this one). See similar coding in
15484 # set_continuation breaks. We have also catch it here for
15485 # short line fragments which otherwise will not go through
15486 # set_continuation_breaks.
15490 # if we have the ')' but not its '(' in this batch..
15491 && ( $last_nonblank_token eq ')' )
15492 && $mate_index_to_go[$i_last_nonblank_token] < 0
15494 # and user wants brace to left
15495 && !$rOpts->{'opening-brace-always-on-right'}
15497 && ( $type eq '{' ) # should be true
15498 && ( $token eq '{' ) # should be true
15501 set_forced_breakpoint( $i - 1 );
15502 } ## end if ( $block_type && ( ...))
15503 } ## end if ( $depth > $current_depth)
15505 #------------------------------------------------------------
15506 # Handle Decreasing Depth..
15508 # finish off any old list when depth decreases
15509 # token $i is a ')','}', or ']'
15510 #------------------------------------------------------------
15511 elsif ( $depth < $current_depth ) {
15513 check_for_new_minimum_depth($depth);
15515 # force all outer logical containers to break after we see on
15517 $has_old_logical_breakpoints[$depth] ||=
15518 $has_old_logical_breakpoints[$current_depth];
15520 # Patch to break between ') {' if the paren list is broken.
15521 # There is similar logic in set_continuation_breaks for
15522 # non-broken lists.
15524 && $next_nonblank_block_type
15525 && $interrupted_list[$current_depth]
15526 && $next_nonblank_type eq '{'
15527 && !$rOpts->{'opening-brace-always-on-right'} )
15529 set_forced_breakpoint($i);
15530 } ## end if ( $token eq ')' && ...
15532 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
15534 # set breaks at commas if necessary
15535 my ( $bp_count, $do_not_break_apart ) =
15536 set_comma_breakpoints($current_depth);
15538 my $i_opening = $opening_structure_index_stack[$current_depth];
15539 my $saw_opening_structure = ( $i_opening >= 0 );
15541 # this term is long if we had to break at interior commas..
15542 my $is_long_term = $bp_count > 0;
15544 # If this is a short container with one or more comma arrows,
15545 # then we will mark it as a long term to open it if requested.
15546 # $rOpts_comma_arrow_breakpoints =
15547 # 0 - open only if comma precedes closing brace
15548 # 1 - stable: except for one line blocks
15549 # 2 - try to form 1 line blocks
15551 # 4 - always open up if vt=0
15552 # 5 - stable: even for one line blocks if vt=0
15553 if ( !$is_long_term
15554 && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
15555 && $index_before_arrow[ $depth + 1 ] > 0
15556 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
15559 $is_long_term = $rOpts_comma_arrow_breakpoints == 4
15560 || ( $rOpts_comma_arrow_breakpoints == 0
15561 && $last_nonblank_token eq ',' )
15562 || ( $rOpts_comma_arrow_breakpoints == 5
15563 && $old_breakpoint_to_go[$i_opening] );
15564 } ## end if ( !$is_long_term &&...)
15566 # mark term as long if the length between opening and closing
15567 # parens exceeds allowed line length
15568 if ( !$is_long_term && $saw_opening_structure ) {
15569 my $i_opening_minus = find_token_starting_list($i_opening);
15571 # Note: we have to allow for one extra space after a
15572 # closing token so that we do not strand a comma or
15573 # semicolon, hence the '>=' here (oneline.t)
15575 excess_line_length( $i_opening_minus, $i ) >= 0;
15576 } ## end if ( !$is_long_term &&...)
15578 # We've set breaks after all comma-arrows. Now we have to
15579 # undo them if this can be a one-line block
15580 # (the only breakpoints set will be due to comma-arrows)
15583 # user doesn't require breaking after all comma-arrows
15584 ( $rOpts_comma_arrow_breakpoints != 0 )
15585 && ( $rOpts_comma_arrow_breakpoints != 4 )
15587 # and if the opening structure is in this batch
15588 && $saw_opening_structure
15590 # and either on the same old line
15592 $old_breakpoint_count_stack[$current_depth] ==
15593 $last_old_breakpoint_count
15595 # or user wants to form long blocks with arrows
15596 || $rOpts_comma_arrow_breakpoints == 2
15599 # and we made some breakpoints between the opening and closing
15600 && ( $breakpoint_undo_stack[$current_depth] <
15601 $forced_breakpoint_undo_count )
15603 # and this block is short enough to fit on one line
15604 # Note: use < because need 1 more space for possible comma
15609 undo_forced_breakpoint_stack(
15610 $breakpoint_undo_stack[$current_depth] );
15611 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
15613 # now see if we have any comma breakpoints left
15614 my $has_comma_breakpoints =
15615 ( $breakpoint_stack[$current_depth] !=
15616 $forced_breakpoint_count );
15618 # update broken-sublist flag of the outer container
15619 $has_broken_sublist[$depth] =
15620 $has_broken_sublist[$depth]
15621 || $has_broken_sublist[$current_depth]
15623 || $has_comma_breakpoints;
15625 # Having come to the closing ')', '}', or ']', now we have to decide if we
15626 # should 'open up' the structure by placing breaks at the opening and
15627 # closing containers. This is a tricky decision. Here are some of the
15628 # basic considerations:
15630 # -If this is a BLOCK container, then any breakpoints will have already
15631 # been set (and according to user preferences), so we need do nothing here.
15633 # -If we have a comma-separated list for which we can align the list items,
15634 # then we need to do so because otherwise the vertical aligner cannot
15635 # currently do the alignment.
15637 # -If this container does itself contain a container which has been broken
15638 # open, then it should be broken open to properly show the structure.
15640 # -If there is nothing to align, and no other reason to break apart,
15641 # then do not do it.
15643 # We will not break open the parens of a long but 'simple' logical expression.
15646 # This is an example of a simple logical expression and its formatting:
15648 # if ( $bigwasteofspace1 && $bigwasteofspace2
15649 # || $bigwasteofspace3 && $bigwasteofspace4 )
15651 # Most people would prefer this than the 'spacey' version:
15654 # $bigwasteofspace1 && $bigwasteofspace2
15655 # || $bigwasteofspace3 && $bigwasteofspace4
15658 # To illustrate the rules for breaking logical expressions, consider:
15662 # and ( exists $ids_excl_uc{$id_uc}
15663 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
15665 # This is on the verge of being difficult to read. The current default is to
15666 # open it up like this:
15671 # and ( exists $ids_excl_uc{$id_uc}
15672 # or grep $id_uc =~ /$_/, @ids_excl_uc )
15675 # This is a compromise which tries to avoid being too dense and to spacey.
15676 # A more spaced version would be:
15682 # exists $ids_excl_uc{$id_uc}
15683 # or grep $id_uc =~ /$_/, @ids_excl_uc
15687 # Some people might prefer the spacey version -- an option could be added. The
15688 # innermost expression contains a long block '( exists $ids_... ')'.
15690 # Here is how the logic goes: We will force a break at the 'or' that the
15691 # innermost expression contains, but we will not break apart its opening and
15692 # closing containers because (1) it contains no multi-line sub-containers itself,
15693 # and (2) there is no alignment to be gained by breaking it open like this
15696 # exists $ids_excl_uc{$id_uc}
15697 # or grep $id_uc =~ /$_/, @ids_excl_uc
15700 # (although this looks perfectly ok and might be good for long expressions). The
15701 # outer 'if' container, though, contains a broken sub-container, so it will be
15702 # broken open to avoid too much density. Also, since it contains no 'or's, there
15703 # will be a forced break at its 'and'.
15705 # set some flags telling something about this container..
15706 my $is_simple_logical_expression = 0;
15707 if ( $item_count_stack[$current_depth] == 0
15708 && $saw_opening_structure
15709 && $tokens_to_go[$i_opening] eq '('
15710 && $is_logical_container{ $container_type[$current_depth] }
15714 # This seems to be a simple logical expression with
15715 # no existing breakpoints. Set a flag to prevent
15717 if ( !$has_comma_breakpoints ) {
15718 $is_simple_logical_expression = 1;
15721 # This seems to be a simple logical expression with
15722 # breakpoints (broken sublists, for example). Break
15723 # at all 'or's and '||'s.
15725 set_logical_breakpoints($current_depth);
15727 } ## end if ( $item_count_stack...)
15730 && @{ $rfor_semicolon_list[$current_depth] } )
15732 set_for_semicolon_breakpoints($current_depth);
15734 # open up a long 'for' or 'foreach' container to allow
15735 # leading term alignment unless -lp is used.
15736 $has_comma_breakpoints = 1
15737 unless $rOpts_line_up_parentheses;
15738 } ## end if ( $is_long_term && ...)
15742 # breaks for code BLOCKS are handled at a higher level
15745 # we do not need to break at the top level of an 'if'
15747 && !$is_simple_logical_expression
15749 ## modification to keep ': (' containers vertically tight;
15750 ## but probably better to let user set -vt=1 to avoid
15751 ## inconsistency with other paren types
15752 ## && ($container_type[$current_depth] ne ':')
15754 # otherwise, we require one of these reasons for breaking:
15757 # - this term has forced line breaks
15758 $has_comma_breakpoints
15760 # - the opening container is separated from this batch
15761 # for some reason (comment, blank line, code block)
15762 # - this is a non-paren container spanning multiple lines
15763 || !$saw_opening_structure
15765 # - this is a long block contained in another breakable
15768 && $container_environment_to_go[$i_opening] ne
15774 # For -lp option, we must put a breakpoint before
15775 # the token which has been identified as starting
15776 # this indentation level. This is necessary for
15777 # proper alignment.
15778 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
15780 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
15781 if ( $i_opening + 1 < $max_index_to_go
15782 && $types_to_go[ $i_opening + 1 ] eq 'b' )
15784 $item = $leading_spaces_to_go[ $i_opening + 2 ];
15786 if ( defined($item) ) {
15787 my $i_start_2 = $item->get_STARTING_INDEX();
15789 defined($i_start_2)
15791 # we are breaking after an opening brace, paren,
15792 # so don't break before it too
15793 && $i_start_2 ne $i_opening
15797 # Only break for breakpoints at the same
15798 # indentation level as the opening paren
15799 my $test1 = $nesting_depth_to_go[$i_opening];
15800 my $test2 = $nesting_depth_to_go[$i_start_2];
15801 if ( $test2 == $test1 ) {
15802 set_forced_breakpoint( $i_start_2 - 1 );
15804 } ## end if ( defined($i_start_2...))
15805 } ## end if ( defined($item) )
15806 } ## end if ( $rOpts_line_up_parentheses...)
15808 # break after opening structure.
15809 # note: break before closing structure will be automatic
15810 if ( $minimum_depth <= $current_depth ) {
15812 set_forced_breakpoint($i_opening)
15813 unless ( $do_not_break_apart
15814 || is_unbreakable_container($current_depth) );
15816 # break at ',' of lower depth level before opening token
15817 if ( $last_comma_index[$depth] ) {
15818 set_forced_breakpoint( $last_comma_index[$depth] );
15821 # break at '.' of lower depth level before opening token
15822 if ( $last_dot_index[$depth] ) {
15823 set_forced_breakpoint( $last_dot_index[$depth] );
15826 # break before opening structure if preceded by another
15827 # closing structure and a comma. This is normally
15828 # done by the previous closing brace, but not
15829 # if it was a one-line block.
15830 if ( $i_opening > 2 ) {
15832 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
15836 if ( $types_to_go[$i_prev] eq ','
15837 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
15839 set_forced_breakpoint($i_prev);
15842 # also break before something like ':(' or '?('
15845 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
15847 my $token_prev = $tokens_to_go[$i_prev];
15848 if ( $want_break_before{$token_prev} ) {
15849 set_forced_breakpoint($i_prev);
15851 } ## end elsif ( $types_to_go[$i_prev...])
15852 } ## end if ( $i_opening > 2 )
15853 } ## end if ( $minimum_depth <=...)
15855 # break after comma following closing structure
15856 if ( $next_type eq ',' ) {
15857 set_forced_breakpoint( $i + 1 );
15860 # break before an '=' following closing structure
15862 $is_assignment{$next_nonblank_type}
15863 && ( $breakpoint_stack[$current_depth] !=
15864 $forced_breakpoint_count )
15867 set_forced_breakpoint($i);
15868 } ## end if ( $is_assignment{$next_nonblank_type...})
15870 # break at any comma before the opening structure Added
15871 # for -lp, but seems to be good in general. It isn't
15872 # obvious how far back to look; the '5' below seems to
15873 # work well and will catch the comma in something like
15874 # push @list, myfunc( $param, $param, ..
15876 my $icomma = $last_comma_index[$depth];
15877 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
15878 unless ( $forced_breakpoint_to_go[$icomma] ) {
15879 set_forced_breakpoint($icomma);
15882 } # end logic to open up a container
15884 # Break open a logical container open if it was already open
15885 elsif ($is_simple_logical_expression
15886 && $has_old_logical_breakpoints[$current_depth] )
15888 set_logical_breakpoints($current_depth);
15891 # Handle long container which does not get opened up
15892 elsif ($is_long_term) {
15894 # must set fake breakpoint to alert outer containers that
15896 set_fake_breakpoint();
15897 } ## end elsif ($is_long_term)
15899 } ## end elsif ( $depth < $current_depth)
15901 #------------------------------------------------------------
15902 # Handle this token
15903 #------------------------------------------------------------
15905 $current_depth = $depth;
15907 # handle comma-arrow
15908 if ( $type eq '=>' ) {
15909 next if ( $last_nonblank_type eq '=>' );
15910 next if $rOpts_break_at_old_comma_breakpoints;
15911 next if $rOpts_comma_arrow_breakpoints == 3;
15912 $want_comma_break[$depth] = 1;
15913 $index_before_arrow[$depth] = $i_last_nonblank_token;
15915 } ## end if ( $type eq '=>' )
15917 elsif ( $type eq '.' ) {
15918 $last_dot_index[$depth] = $i;
15921 # Turn off alignment if we are sure that this is not a list
15922 # environment. To be safe, we will do this if we see certain
15923 # non-list tokens, such as ';', and also the environment is
15924 # not a list. Note that '=' could be in any of the = operators
15925 # (lextest.t). We can't just use the reported environment
15926 # because it can be incorrect in some cases.
15927 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
15928 && $container_environment_to_go[$i] ne 'LIST' )
15930 $dont_align[$depth] = 1;
15931 $want_comma_break[$depth] = 0;
15932 $index_before_arrow[$depth] = -1;
15933 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
15935 # now just handle any commas
15936 next unless ( $type eq ',' );
15938 $last_dot_index[$depth] = undef;
15939 $last_comma_index[$depth] = $i;
15941 # break here if this comma follows a '=>'
15942 # but not if there is a side comment after the comma
15943 if ( $want_comma_break[$depth] ) {
15945 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
15946 if ($rOpts_comma_arrow_breakpoints) {
15947 $want_comma_break[$depth] = 0;
15948 ##$index_before_arrow[$depth] = -1;
15953 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15955 # break before the previous token if it looks safe
15956 # Example of something that we will not try to break before:
15957 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
15958 # Also we don't want to break at a binary operator (like +):
15962 # $y - $R, -fill => 'black',
15964 my $ibreak = $index_before_arrow[$depth] - 1;
15966 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
15968 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
15969 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
15970 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
15972 # don't break pointer calls, such as the following:
15973 # File::Spec->curdir => 1,
15974 # (This is tokenized as adjacent 'w' tokens)
15975 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
15977 # And don't break before a comma, as in the following:
15978 # ( LONGER_THAN,=> 1,
15979 # EIGHTY_CHARACTERS,=> 2,
15980 # CAUSES_FORMATTING,=> 3,
15983 # This example is for -tso but should be general rule
15984 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
15985 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
15987 set_forced_breakpoint($ibreak);
15989 } ## end if ( $types_to_go[$ibreak...])
15990 } ## end if ( $ibreak > 0 && $tokens_to_go...)
15992 $want_comma_break[$depth] = 0;
15993 $index_before_arrow[$depth] = -1;
15995 # handle list which mixes '=>'s and ','s:
15996 # treat any list items so far as an interrupted list
15997 $interrupted_list[$depth] = 1;
15999 } ## end if ( $want_comma_break...)
16001 # break after all commas above starting depth
16002 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
16003 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
16007 # add this comma to the list..
16008 my $item_count = $item_count_stack[$depth];
16009 if ( $item_count == 0 ) {
16011 # but do not form a list with no opening structure
16014 # open INFILE_COPY, ">$input_file_copy"
16015 # or die ("very long message");
16017 if ( ( $opening_structure_index_stack[$depth] < 0 )
16018 && $container_environment_to_go[$i] eq 'BLOCK' )
16020 $dont_align[$depth] = 1;
16022 } ## end if ( $item_count == 0 )
16024 $comma_index[$depth][$item_count] = $i;
16025 ++$item_count_stack[$depth];
16026 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
16027 $identifier_count_stack[$depth]++;
16029 } ## end while ( ++$i <= $max_index_to_go)
16031 #-------------------------------------------
16032 # end of loop over all tokens in this batch
16033 #-------------------------------------------
16035 # set breaks for any unfinished lists ..
16036 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
16038 $interrupted_list[$dd] = 1;
16039 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
16040 set_comma_breakpoints($dd);
16041 set_logical_breakpoints($dd)
16042 if ( $has_old_logical_breakpoints[$dd] );
16043 set_for_semicolon_breakpoints($dd);
16045 # break open container...
16046 my $i_opening = $opening_structure_index_stack[$dd];
16047 set_forced_breakpoint($i_opening)
16049 is_unbreakable_container($dd)
16051 # Avoid a break which would place an isolated ' or "
16054 && $i_opening >= $max_index_to_go - 2
16055 && $token =~ /^['"]$/ )
16057 } ## end for ( my $dd = $current_depth...)
16059 # Return a flag indicating if the input file had some good breakpoints.
16060 # This flag will be used to force a break in a line shorter than the
16061 # allowed line length.
16062 if ( $has_old_logical_breakpoints[$current_depth] ) {
16063 $saw_good_breakpoint = 1;
16066 # A complex line with one break at an = has a good breakpoint.
16067 # This is not complex ($total_depth_variation=0):
16071 # This is complex ($total_depth_variation=6):
16073 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
16074 elsif ($i_old_assignment_break
16075 && $total_depth_variation > 4
16076 && $old_breakpoint_count == 1 )
16078 $saw_good_breakpoint = 1;
16079 } ## end elsif ( $i_old_assignment_break...)
16081 return $saw_good_breakpoint;
16082 } ## end sub scan_list
16085 sub find_token_starting_list {
16087 # When testing to see if a block will fit on one line, some
16088 # previous token(s) may also need to be on the line; particularly
16089 # if this is a sub call. So we will look back at least one
16090 # token. NOTE: This isn't perfect, but not critical, because
16091 # if we mis-identify a block, it will be wrapped and therefore
16092 # fixed the next time it is formatted.
16093 my $i_opening_paren = shift;
16094 my $i_opening_minus = $i_opening_paren;
16095 my $im1 = $i_opening_paren - 1;
16096 my $im2 = $i_opening_paren - 2;
16097 my $im3 = $i_opening_paren - 3;
16098 my $typem1 = $types_to_go[$im1];
16099 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
16100 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
16101 $i_opening_minus = $i_opening_paren;
16103 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
16104 $i_opening_minus = $im1 if $im1 >= 0;
16106 # walk back to improve length estimate
16107 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
16108 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
16109 $i_opening_minus = $j;
16111 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
16113 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
16114 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
16115 $i_opening_minus = $im2;
16117 return $i_opening_minus;
16120 { # begin set_comma_breakpoints_do
16122 my %is_keyword_with_special_leading_term;
16126 # These keywords have prototypes which allow a special leading item
16127 # followed by a list
16129 qw(formline grep kill map printf sprintf push chmod join pack unshift);
16130 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
16133 sub set_comma_breakpoints_do {
16135 # Given a list with some commas, set breakpoints at some of the
16136 # commas, if necessary, to make it easy to read. This list is
16139 $depth, $i_opening_paren, $i_closing_paren,
16140 $item_count, $identifier_count, $rcomma_index,
16141 $next_nonblank_type, $list_type, $interrupted,
16142 $rdo_not_break_apart, $must_break_open,
16145 # nothing to do if no commas seen
16146 return if ( $item_count < 1 );
16147 my $i_first_comma = $$rcomma_index[0];
16148 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
16149 my $i_last_comma = $i_true_last_comma;
16150 if ( $i_last_comma >= $max_index_to_go ) {
16151 $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
16152 return if ( $item_count < 1 );
16155 #---------------------------------------------------------------
16156 # find lengths of all items in the list to calculate page layout
16157 #---------------------------------------------------------------
16158 my $comma_count = $item_count;
16164 my @max_length = ( 0, 0 );
16165 my $first_term_length;
16166 my $i = $i_opening_paren;
16169 for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
16170 $is_odd = 1 - $is_odd;
16171 $i_prev_plus = $i + 1;
16172 $i = $$rcomma_index[$j];
16175 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
16177 ( $types_to_go[$i_prev_plus] eq 'b' )
16180 push @i_term_begin, $i_term_begin;
16181 push @i_term_end, $i_term_end;
16182 push @i_term_comma, $i;
16184 # note: currently adding 2 to all lengths (for comma and space)
16186 2 + token_sequence_length( $i_term_begin, $i_term_end );
16187 push @item_lengths, $length;
16190 $first_term_length = $length;
16194 if ( $length > $max_length[$is_odd] ) {
16195 $max_length[$is_odd] = $length;
16200 # now we have to make a distinction between the comma count and item
16201 # count, because the item count will be one greater than the comma
16202 # count if the last item is not terminated with a comma
16204 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
16205 ? $i_last_comma + 1
16208 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
16209 ? $i_closing_paren - 2
16210 : $i_closing_paren - 1;
16211 my $i_effective_last_comma = $i_last_comma;
16213 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
16215 if ( $last_item_length > 0 ) {
16217 # add 2 to length because other lengths include a comma and a blank
16218 $last_item_length += 2;
16219 push @item_lengths, $last_item_length;
16220 push @i_term_begin, $i_b + 1;
16221 push @i_term_end, $i_e;
16222 push @i_term_comma, undef;
16224 my $i_odd = $item_count % 2;
16226 if ( $last_item_length > $max_length[$i_odd] ) {
16227 $max_length[$i_odd] = $last_item_length;
16231 $i_effective_last_comma = $i_e + 1;
16233 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
16234 $identifier_count++;
16238 #---------------------------------------------------------------
16239 # End of length calculations
16240 #---------------------------------------------------------------
16242 #---------------------------------------------------------------
16243 # Compound List Rule 1:
16244 # Break at (almost) every comma for a list containing a broken
16245 # sublist. This has higher priority than the Interrupted List
16247 #---------------------------------------------------------------
16248 if ( $has_broken_sublist[$depth] ) {
16250 # Break at every comma except for a comma between two
16251 # simple, small terms. This prevents long vertical
16252 # columns of, say, just 0's.
16253 my $small_length = 10; # 2 + actual maximum length wanted
16255 # We'll insert a break in long runs of small terms to
16256 # allow alignment in uniform tables.
16257 my $skipped_count = 0;
16258 my $columns = table_columns_available($i_first_comma);
16259 my $fields = int( $columns / $small_length );
16260 if ( $rOpts_maximum_fields_per_table
16261 && $fields > $rOpts_maximum_fields_per_table )
16263 $fields = $rOpts_maximum_fields_per_table;
16265 my $max_skipped_count = $fields - 1;
16267 my $is_simple_last_term = 0;
16268 my $is_simple_next_term = 0;
16269 foreach my $j ( 0 .. $item_count ) {
16270 $is_simple_last_term = $is_simple_next_term;
16271 $is_simple_next_term = 0;
16272 if ( $j < $item_count
16273 && $i_term_end[$j] == $i_term_begin[$j]
16274 && $item_lengths[$j] <= $small_length )
16276 $is_simple_next_term = 1;
16279 if ( $is_simple_last_term
16280 && $is_simple_next_term
16281 && $skipped_count < $max_skipped_count )
16286 $skipped_count = 0;
16287 my $i = $i_term_comma[ $j - 1 ];
16288 last unless defined $i;
16289 set_forced_breakpoint($i);
16293 # always break at the last comma if this list is
16294 # interrupted; we wouldn't want to leave a terminal '{', for
16296 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
16300 #my ( $a, $b, $c ) = caller();
16301 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
16302 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
16303 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
16305 #---------------------------------------------------------------
16306 # Interrupted List Rule:
16307 # A list is forced to use old breakpoints if it was interrupted
16308 # by side comments or blank lines, or requested by user.
16309 #---------------------------------------------------------------
16310 if ( $rOpts_break_at_old_comma_breakpoints
16312 || $i_opening_paren < 0 )
16314 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
16318 #---------------------------------------------------------------
16319 # Looks like a list of items. We have to look at it and size it up.
16320 #---------------------------------------------------------------
16322 my $opening_token = $tokens_to_go[$i_opening_paren];
16323 my $opening_environment =
16324 $container_environment_to_go[$i_opening_paren];
16326 #-------------------------------------------------------------------
16327 # Return if this will fit on one line
16328 #-------------------------------------------------------------------
16330 my $i_opening_minus = find_token_starting_list($i_opening_paren);
16332 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
16334 #-------------------------------------------------------------------
16335 # Now we know that this block spans multiple lines; we have to set
16336 # at least one breakpoint -- real or fake -- as a signal to break
16337 # open any outer containers.
16338 #-------------------------------------------------------------------
16339 set_fake_breakpoint();
16341 # be sure we do not extend beyond the current list length
16342 if ( $i_effective_last_comma >= $max_index_to_go ) {
16343 $i_effective_last_comma = $max_index_to_go - 1;
16346 # Set a flag indicating if we need to break open to keep -lp
16347 # items aligned. This is necessary if any of the list terms
16348 # exceeds the available space after the '('.
16349 my $need_lp_break_open = $must_break_open;
16350 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
16351 my $columns_if_unbroken =
16352 maximum_line_length($i_opening_minus) -
16353 total_line_length( $i_opening_minus, $i_opening_paren );
16354 $need_lp_break_open =
16355 ( $max_length[0] > $columns_if_unbroken )
16356 || ( $max_length[1] > $columns_if_unbroken )
16357 || ( $first_term_length > $columns_if_unbroken );
16360 # Specify if the list must have an even number of fields or not.
16361 # It is generally safest to assume an even number, because the
16362 # list items might be a hash list. But if we can be sure that
16363 # it is not a hash, then we can allow an odd number for more
16365 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
16367 if ( $identifier_count >= $item_count - 1
16368 || $is_assignment{$next_nonblank_type}
16369 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
16375 # do we have a long first term which should be
16376 # left on a line by itself?
16377 my $use_separate_first_term = (
16378 $odd_or_even == 1 # only if we can use 1 field/line
16379 && $item_count > 3 # need several items
16380 && $first_term_length >
16381 2 * $max_length[0] - 2 # need long first term
16382 && $first_term_length >
16383 2 * $max_length[1] - 2 # need long first term
16386 # or do we know from the type of list that the first term should
16388 if ( !$use_separate_first_term ) {
16389 if ( $is_keyword_with_special_leading_term{$list_type} ) {
16390 $use_separate_first_term = 1;
16392 # should the container be broken open?
16393 if ( $item_count < 3 ) {
16394 if ( $i_first_comma - $i_opening_paren < 4 ) {
16395 $$rdo_not_break_apart = 1;
16398 elsif ($first_term_length < 20
16399 && $i_first_comma - $i_opening_paren < 4 )
16401 my $columns = table_columns_available($i_first_comma);
16402 if ( $first_term_length < $columns ) {
16403 $$rdo_not_break_apart = 1;
16410 if ($use_separate_first_term) {
16412 # ..set a break and update starting values
16413 $use_separate_first_term = 1;
16414 set_forced_breakpoint($i_first_comma);
16415 $i_opening_paren = $i_first_comma;
16416 $i_first_comma = $$rcomma_index[1];
16418 return if $comma_count == 1;
16419 shift @item_lengths;
16420 shift @i_term_begin;
16422 shift @i_term_comma;
16425 # if not, update the metrics to include the first term
16427 if ( $first_term_length > $max_length[0] ) {
16428 $max_length[0] = $first_term_length;
16432 # Field width parameters
16433 my $pair_width = ( $max_length[0] + $max_length[1] );
16435 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
16437 # Number of free columns across the page width for laying out tables
16438 my $columns = table_columns_available($i_first_comma);
16440 # Estimated maximum number of fields which fit this space
16441 # This will be our first guess
16442 my $number_of_fields_max =
16443 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
16445 my $number_of_fields = $number_of_fields_max;
16447 # Find the best-looking number of fields
16448 # and make this our second guess if possible
16449 my ( $number_of_fields_best, $ri_ragged_break_list,
16450 $new_identifier_count )
16451 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
16454 if ( $number_of_fields_best != 0
16455 && $number_of_fields_best < $number_of_fields_max )
16457 $number_of_fields = $number_of_fields_best;
16460 # ----------------------------------------------------------------------
16461 # If we are crowded and the -lp option is being used, try to
16462 # undo some indentation
16463 # ----------------------------------------------------------------------
16465 $rOpts_line_up_parentheses
16467 $number_of_fields == 0
16468 || ( $number_of_fields == 1
16469 && $number_of_fields != $number_of_fields_best )
16473 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
16474 if ( $available_spaces > 0 ) {
16476 my $spaces_wanted = $max_width - $columns; # for 1 field
16478 if ( $number_of_fields_best == 0 ) {
16479 $number_of_fields_best =
16480 get_maximum_fields_wanted( \@item_lengths );
16483 if ( $number_of_fields_best != 1 ) {
16484 my $spaces_wanted_2 =
16485 1 + $pair_width - $columns; # for 2 fields
16486 if ( $available_spaces > $spaces_wanted_2 ) {
16487 $spaces_wanted = $spaces_wanted_2;
16491 if ( $spaces_wanted > 0 ) {
16492 my $deleted_spaces =
16493 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
16496 if ( $deleted_spaces > 0 ) {
16497 $columns = table_columns_available($i_first_comma);
16498 $number_of_fields_max =
16499 maximum_number_of_fields( $columns, $odd_or_even,
16500 $max_width, $pair_width );
16501 $number_of_fields = $number_of_fields_max;
16503 if ( $number_of_fields_best == 1
16504 && $number_of_fields >= 1 )
16506 $number_of_fields = $number_of_fields_best;
16513 # try for one column if two won't work
16514 if ( $number_of_fields <= 0 ) {
16515 $number_of_fields = int( $columns / $max_width );
16518 # The user can place an upper bound on the number of fields,
16519 # which can be useful for doing maintenance on tables
16520 if ( $rOpts_maximum_fields_per_table
16521 && $number_of_fields > $rOpts_maximum_fields_per_table )
16523 $number_of_fields = $rOpts_maximum_fields_per_table;
16526 # How many columns (characters) and lines would this container take
16527 # if no additional whitespace were added?
16528 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
16529 $i_effective_last_comma + 1 );
16530 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
16531 my $packed_lines = 1 + int( $packed_columns / $columns );
16533 # are we an item contained in an outer list?
16534 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
16536 if ( $number_of_fields <= 0 ) {
16538 # #---------------------------------------------------------------
16539 # # We're in trouble. We can't find a single field width that works.
16540 # # There is no simple answer here; we may have a single long list
16542 # #---------------------------------------------------------------
16544 # In many cases, it may be best to not force a break if there is just one
16545 # comma, because the standard continuation break logic will do a better
16548 # In the common case that all but one of the terms can fit
16549 # on a single line, it may look better not to break open the
16550 # containing parens. Consider, for example
16554 # sort { $color_value{$::a} <=> $color_value{$::b}; }
16557 # which will look like this with the container broken:
16561 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
16564 # Here is an example of this rule for a long last term:
16566 # log_message( 0, 256, 128,
16567 # "Number of routes in adj-RIB-in to be considered: $peercount" );
16569 # And here is an example with a long first term:
16572 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
16573 # $r, $pu, $ps, $cu, $cs, $tt
16575 # if $style eq 'all';
16577 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
16578 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
16579 my $long_first_term =
16580 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
16582 # break at every comma ...
16585 # if requested by user or is best looking
16586 $number_of_fields_best == 1
16588 # or if this is a sublist of a larger list
16589 || $in_hierarchical_list
16591 # or if multiple commas and we don't have a long first or last
16593 || ( $comma_count > 1
16594 && !( $long_last_term || $long_first_term ) )
16597 foreach ( 0 .. $comma_count - 1 ) {
16598 set_forced_breakpoint( $$rcomma_index[$_] );
16601 elsif ($long_last_term) {
16603 set_forced_breakpoint($i_last_comma);
16604 $$rdo_not_break_apart = 1 unless $must_break_open;
16606 elsif ($long_first_term) {
16608 set_forced_breakpoint($i_first_comma);
16612 # let breaks be defined by default bond strength logic
16617 # --------------------------------------------------------
16618 # We have a tentative field count that seems to work.
16619 # How many lines will this require?
16620 # --------------------------------------------------------
16621 my $formatted_lines = $item_count / ($number_of_fields);
16622 if ( $formatted_lines != int $formatted_lines ) {
16623 $formatted_lines = 1 + int $formatted_lines;
16626 # So far we've been trying to fill out to the right margin. But
16627 # compact tables are easier to read, so let's see if we can use fewer
16628 # fields without increasing the number of lines.
16629 $number_of_fields =
16630 compactify_table( $item_count, $number_of_fields, $formatted_lines,
16633 # How many spaces across the page will we fill?
16634 my $columns_per_line =
16635 ( int $number_of_fields / 2 ) * $pair_width +
16636 ( $number_of_fields % 2 ) * $max_width;
16638 my $formatted_columns;
16640 if ( $number_of_fields > 1 ) {
16641 $formatted_columns =
16642 ( $pair_width * ( int( $item_count / 2 ) ) +
16643 ( $item_count % 2 ) * $max_width );
16646 $formatted_columns = $max_width * $item_count;
16648 if ( $formatted_columns < $packed_columns ) {
16649 $formatted_columns = $packed_columns;
16652 my $unused_columns = $formatted_columns - $packed_columns;
16654 # set some empirical parameters to help decide if we should try to
16655 # align; high sparsity does not look good, especially with few lines
16656 my $sparsity = ($unused_columns) / ($formatted_columns);
16657 my $max_allowed_sparsity =
16658 ( $item_count < 3 ) ? 0.1
16659 : ( $packed_lines == 1 ) ? 0.15
16660 : ( $packed_lines == 2 ) ? 0.4
16663 # Begin check for shortcut methods, which avoid treating a list
16664 # as a table for relatively small parenthesized lists. These
16665 # are usually easier to read if not formatted as tables.
16667 $packed_lines <= 2 # probably can fit in 2 lines
16668 && $item_count < 9 # doesn't have too many items
16669 && $opening_environment eq 'BLOCK' # not a sub-container
16670 && $opening_token eq '(' # is paren list
16674 # Shortcut method 1: for -lp and just one comma:
16675 # This is a no-brainer, just break at the comma.
16677 $rOpts_line_up_parentheses # -lp
16678 && $item_count == 2 # two items, one comma
16679 && !$must_break_open
16682 my $i_break = $$rcomma_index[0];
16683 set_forced_breakpoint($i_break);
16684 $$rdo_not_break_apart = 1;
16685 set_non_alignment_flags( $comma_count, $rcomma_index );
16690 # method 2 is for most small ragged lists which might look
16691 # best if not displayed as a table.
16693 ( $number_of_fields == 2 && $item_count == 3 )
16695 $new_identifier_count > 0 # isn't all quotes
16696 && $sparsity > 0.15
16697 ) # would be fairly spaced gaps if aligned
16701 my $break_count = set_ragged_breakpoints( \@i_term_comma,
16702 $ri_ragged_break_list );
16703 ++$break_count if ($use_separate_first_term);
16705 # NOTE: we should really use the true break count here,
16706 # which can be greater if there are large terms and
16707 # little space, but usually this will work well enough.
16708 unless ($must_break_open) {
16710 if ( $break_count <= 1 ) {
16711 $$rdo_not_break_apart = 1;
16713 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16715 $$rdo_not_break_apart = 1;
16718 set_non_alignment_flags( $comma_count, $rcomma_index );
16722 } # end shortcut methods
16726 FORMATTER_DEBUG_FLAG_SPARSE && do {
16728 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
16732 #---------------------------------------------------------------
16733 # Compound List Rule 2:
16734 # If this list is too long for one line, and it is an item of a
16735 # larger list, then we must format it, regardless of sparsity
16736 # (ian.t). One reason that we have to do this is to trigger
16737 # Compound List Rule 1, above, which causes breaks at all commas of
16738 # all outer lists. In this way, the structure will be properly
16740 #---------------------------------------------------------------
16742 # Decide if this list is too long for one line unless broken
16743 my $total_columns = table_columns_available($i_opening_paren);
16744 my $too_long = $packed_columns > $total_columns;
16746 # For a paren list, include the length of the token just before the
16747 # '(' because this is likely a sub call, and we would have to
16748 # include the sub name on the same line as the list. This is still
16749 # imprecise, but not too bad. (steve.t)
16750 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
16752 $too_long = excess_line_length( $i_opening_minus,
16753 $i_effective_last_comma + 1 ) > 0;
16756 # FIXME: For an item after a '=>', try to include the length of the
16757 # thing before the '=>'. This is crude and should be improved by
16758 # actually looking back token by token.
16759 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
16760 my $i_opening_minus = $i_opening_paren - 4;
16761 if ( $i_opening_minus >= 0 ) {
16762 $too_long = excess_line_length( $i_opening_minus,
16763 $i_effective_last_comma + 1 ) > 0;
16767 # Always break lists contained in '[' and '{' if too long for 1 line,
16768 # and always break lists which are too long and part of a more complex
16770 my $must_break_open_container = $must_break_open
16772 && ( $in_hierarchical_list || $opening_token ne '(' ) );
16774 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
16776 #---------------------------------------------------------------
16777 # The main decision:
16778 # Now decide if we will align the data into aligned columns. Do not
16779 # attempt to align columns if this is a tiny table or it would be
16780 # too spaced. It seems that the more packed lines we have, the
16781 # sparser the list that can be allowed and still look ok.
16782 #---------------------------------------------------------------
16784 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
16785 || ( $formatted_lines < 2 )
16786 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
16790 #---------------------------------------------------------------
16791 # too sparse: would look ugly if aligned in a table;
16792 #---------------------------------------------------------------
16794 # use old breakpoints if this is a 'big' list
16795 # FIXME: goal is to improve set_ragged_breakpoints so that
16796 # this is not necessary.
16797 if ( $packed_lines > 2 && $item_count > 10 ) {
16798 write_logfile_entry("List sparse: using old breakpoints\n");
16799 copy_old_breakpoints( $i_first_comma, $i_last_comma );
16802 # let the continuation logic handle it if 2 lines
16805 my $break_count = set_ragged_breakpoints( \@i_term_comma,
16806 $ri_ragged_break_list );
16807 ++$break_count if ($use_separate_first_term);
16809 unless ($must_break_open_container) {
16810 if ( $break_count <= 1 ) {
16811 $$rdo_not_break_apart = 1;
16813 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16815 $$rdo_not_break_apart = 1;
16818 set_non_alignment_flags( $comma_count, $rcomma_index );
16823 #---------------------------------------------------------------
16824 # go ahead and format as a table
16825 #---------------------------------------------------------------
16826 write_logfile_entry(
16827 "List: auto formatting with $number_of_fields fields/row\n");
16829 my $j_first_break =
16830 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
16833 my $j = $j_first_break ;
16834 $j < $comma_count ;
16835 $j += $number_of_fields
16838 my $i = $$rcomma_index[$j];
16839 set_forced_breakpoint($i);
16845 sub set_non_alignment_flags {
16847 # set flag which indicates that these commas should not be
16849 my ( $comma_count, $rcomma_index ) = @_;
16850 foreach ( 0 .. $comma_count - 1 ) {
16851 $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
16855 sub study_list_complexity {
16857 # Look for complex tables which should be formatted with one term per line.
16858 # Returns the following:
16860 # \@i_ragged_break_list = list of good breakpoints to avoid lines
16861 # which are hard to read
16862 # $number_of_fields_best = suggested number of fields based on
16863 # complexity; = 0 if any number may be used.
16865 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
16866 my $item_count = @{$ri_term_begin};
16867 my $complex_item_count = 0;
16868 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
16869 my $i_max = @{$ritem_lengths} - 1;
16870 ##my @item_complexity;
16872 my $i_last_last_break = -3;
16873 my $i_last_break = -2;
16874 my @i_ragged_break_list;
16876 my $definitely_complex = 30;
16877 my $definitely_simple = 12;
16878 my $quote_count = 0;
16880 for my $i ( 0 .. $i_max ) {
16881 my $ib = $ri_term_begin->[$i];
16882 my $ie = $ri_term_end->[$i];
16884 # define complexity: start with the actual term length
16885 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
16887 ##TBD: join types here and check for variations
16888 ##my $str=join "", @tokens_to_go[$ib..$ie];
16891 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
16895 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
16899 if ( $ib eq $ie ) {
16900 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
16901 $complex_item_count++;
16902 $weighted_length *= 2;
16908 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
16909 $complex_item_count++;
16910 $weighted_length *= 2;
16912 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
16913 $weighted_length += 4;
16917 # add weight for extra tokens.
16918 $weighted_length += 2 * ( $ie - $ib );
16920 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
16921 ## print "# COMPLEXITY:$weighted_length $BUB\n";
16923 ##push @item_complexity, $weighted_length;
16925 # now mark a ragged break after this item it if it is 'long and
16927 if ( $weighted_length >= $definitely_complex ) {
16929 # if we broke after the previous term
16930 # then break before it too
16931 if ( $i_last_break == $i - 1
16933 && $i_last_last_break != $i - 2 )
16936 ## FIXME: don't strand a small term
16937 pop @i_ragged_break_list;
16938 push @i_ragged_break_list, $i - 2;
16939 push @i_ragged_break_list, $i - 1;
16942 push @i_ragged_break_list, $i;
16943 $i_last_last_break = $i_last_break;
16944 $i_last_break = $i;
16947 # don't break before a small last term -- it will
16948 # not look good on a line by itself.
16949 elsif ($i == $i_max
16950 && $i_last_break == $i - 1
16951 && $weighted_length <= $definitely_simple )
16953 pop @i_ragged_break_list;
16957 my $identifier_count = $i_max + 1 - $quote_count;
16959 # Need more tuning here..
16960 if ( $max_width > 12
16961 && $complex_item_count > $item_count / 2
16962 && $number_of_fields_best != 2 )
16964 $number_of_fields_best = 1;
16967 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
16970 sub get_maximum_fields_wanted {
16972 # Not all tables look good with more than one field of items.
16973 # This routine looks at a table and decides if it should be
16974 # formatted with just one field or not.
16975 # This coding is still under development.
16976 my ($ritem_lengths) = @_;
16978 my $number_of_fields_best = 0;
16980 # For just a few items, we tentatively assume just 1 field.
16981 my $item_count = @{$ritem_lengths};
16982 if ( $item_count <= 5 ) {
16983 $number_of_fields_best = 1;
16986 # For larger tables, look at it both ways and see what looks best
16990 my @max_length = ( 0, 0 );
16991 my @last_length_2 = ( undef, undef );
16992 my @first_length_2 = ( undef, undef );
16993 my $last_length = undef;
16994 my $total_variation_1 = 0;
16995 my $total_variation_2 = 0;
16996 my @total_variation_2 = ( 0, 0 );
16997 for ( my $j = 0 ; $j < $item_count ; $j++ ) {
16999 $is_odd = 1 - $is_odd;
17000 my $length = $ritem_lengths->[$j];
17001 if ( $length > $max_length[$is_odd] ) {
17002 $max_length[$is_odd] = $length;
17005 if ( defined($last_length) ) {
17006 my $dl = abs( $length - $last_length );
17007 $total_variation_1 += $dl;
17009 $last_length = $length;
17011 my $ll = $last_length_2[$is_odd];
17012 if ( defined($ll) ) {
17013 my $dl = abs( $length - $ll );
17014 $total_variation_2[$is_odd] += $dl;
17017 $first_length_2[$is_odd] = $length;
17019 $last_length_2[$is_odd] = $length;
17021 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
17023 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
17024 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
17025 $number_of_fields_best = 1;
17028 return ($number_of_fields_best);
17031 sub table_columns_available {
17032 my $i_first_comma = shift;
17034 maximum_line_length($i_first_comma) -
17035 leading_spaces_to_go($i_first_comma);
17037 # Patch: the vertical formatter does not line up lines whose lengths
17038 # exactly equal the available line length because of allowances
17039 # that must be made for side comments. Therefore, the number of
17040 # available columns is reduced by 1 character.
17045 sub maximum_number_of_fields {
17047 # how many fields will fit in the available space?
17048 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
17049 my $max_pairs = int( $columns / $pair_width );
17050 my $number_of_fields = $max_pairs * 2;
17051 if ( $odd_or_even == 1
17052 && $max_pairs * $pair_width + $max_width <= $columns )
17054 $number_of_fields++;
17056 return $number_of_fields;
17059 sub compactify_table {
17061 # given a table with a certain number of fields and a certain number
17062 # of lines, see if reducing the number of fields will make it look
17064 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
17065 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
17069 $min_fields = $number_of_fields ;
17070 $min_fields >= $odd_or_even
17071 && $min_fields * $formatted_lines >= $item_count ;
17072 $min_fields -= $odd_or_even
17075 $number_of_fields = $min_fields;
17078 return $number_of_fields;
17081 sub set_ragged_breakpoints {
17083 # Set breakpoints in a list that cannot be formatted nicely as a
17085 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
17087 my $break_count = 0;
17088 foreach (@$ri_ragged_break_list) {
17089 my $j = $ri_term_comma->[$_];
17091 set_forced_breakpoint($j);
17095 return $break_count;
17098 sub copy_old_breakpoints {
17099 my ( $i_first_comma, $i_last_comma ) = @_;
17100 for my $i ( $i_first_comma .. $i_last_comma ) {
17101 if ( $old_breakpoint_to_go[$i] ) {
17102 set_forced_breakpoint($i);
17108 my ( $i, $j ) = @_;
17109 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
17111 FORMATTER_DEBUG_FLAG_NOBREAK && do {
17112 my ( $a, $b, $c ) = caller();
17114 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
17117 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
17120 # shouldn't happen; non-critical error
17122 FORMATTER_DEBUG_FLAG_NOBREAK && do {
17123 my ( $a, $b, $c ) = caller();
17125 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
17130 sub set_fake_breakpoint {
17132 # Just bump up the breakpoint count as a signal that there are breaks.
17133 # This is useful if we have breaks but may want to postpone deciding where
17135 $forced_breakpoint_count++;
17138 sub set_forced_breakpoint {
17141 return unless defined $i && $i >= 0;
17143 # when called with certain tokens, use bond strengths to decide
17144 # if we break before or after it
17145 my $token = $tokens_to_go[$i];
17147 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
17148 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
17151 # breaks are forced before 'if' and 'unless'
17152 elsif ( $is_if_unless{$token} ) { $i-- }
17154 if ( $i >= 0 && $i <= $max_index_to_go ) {
17155 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
17157 FORMATTER_DEBUG_FLAG_FORCE && do {
17158 my ( $a, $b, $c ) = caller();
17160 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
17163 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
17164 $forced_breakpoint_to_go[$i_nonblank] = 1;
17166 if ( $i_nonblank > $index_max_forced_break ) {
17167 $index_max_forced_break = $i_nonblank;
17169 $forced_breakpoint_count++;
17170 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
17173 # if we break at an opening container..break at the closing
17174 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
17175 set_closing_breakpoint($i_nonblank);
17181 sub clear_breakpoint_undo_stack {
17182 $forced_breakpoint_undo_count = 0;
17185 sub undo_forced_breakpoint_stack {
17187 my $i_start = shift;
17188 if ( $i_start < 0 ) {
17190 my ( $a, $b, $c ) = caller();
17192 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
17196 while ( $forced_breakpoint_undo_count > $i_start ) {
17198 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
17199 if ( $i >= 0 && $i <= $max_index_to_go ) {
17200 $forced_breakpoint_to_go[$i] = 0;
17201 $forced_breakpoint_count--;
17203 FORMATTER_DEBUG_FLAG_UNDOBP && do {
17204 my ( $a, $b, $c ) = caller();
17206 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
17210 # shouldn't happen, but not a critical error
17212 FORMATTER_DEBUG_FLAG_UNDOBP && do {
17213 my ( $a, $b, $c ) = caller();
17215 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
17221 { # begin recombine_breakpoints
17232 @is_amp_amp{@_} = (1) x scalar(@_);
17235 @is_ternary{@_} = (1) x scalar(@_);
17237 @_ = qw( + - * / );
17238 @is_math_op{@_} = (1) x scalar(@_);
17241 @is_plus_minus{@_} = (1) x scalar(@_);
17244 @is_mult_div{@_} = (1) x scalar(@_);
17247 sub DUMP_BREAKPOINTS {
17249 # Debug routine to dump current breakpoints...not normally called
17250 # We are given indexes to the current lines:
17251 # $ri_beg = ref to array of BEGinning indexes of each line
17252 # $ri_end = ref to array of ENDing indexes of each line
17253 my ( $ri_beg, $ri_end, $msg ) = @_;
17254 print STDERR "----Dumping breakpoints from: $msg----\n";
17255 for my $n ( 0 .. @{$ri_end} - 1 ) {
17256 my $ibeg = $$ri_beg[$n];
17257 my $iend = $$ri_end[$n];
17259 foreach my $i ( $ibeg .. $iend ) {
17260 $text .= $tokens_to_go[$i];
17262 print STDERR "$n ($ibeg:$iend) $text\n";
17264 print STDERR "----\n";
17267 sub recombine_breakpoints {
17269 # sub set_continuation_breaks is very liberal in setting line breaks
17270 # for long lines, always setting breaks at good breakpoints, even
17271 # when that creates small lines. Sometimes small line fragments
17272 # are produced which would look better if they were combined.
17273 # That's the task of this routine.
17275 # We are given indexes to the current lines:
17276 # $ri_beg = ref to array of BEGinning indexes of each line
17277 # $ri_end = ref to array of ENDing indexes of each line
17278 my ( $ri_beg, $ri_end ) = @_;
17280 # Make a list of all good joining tokens between the lines
17283 my $nmax = @$ri_end - 1;
17284 for my $n ( 1 .. $nmax ) {
17285 my $ibeg_1 = $$ri_beg[ $n - 1 ];
17286 my $iend_1 = $$ri_end[ $n - 1 ];
17287 my $iend_2 = $$ri_end[$n];
17288 my $ibeg_2 = $$ri_beg[$n];
17290 my ( $itok, $itokp, $itokm );
17292 foreach my $itest ( $iend_1, $ibeg_2 ) {
17293 my $type = $types_to_go[$itest];
17294 if ( $is_math_op{$type}
17295 || $is_amp_amp{$type}
17296 || $is_assignment{$type}
17302 $joint[$n] = [$itok];
17305 my $more_to_do = 1;
17307 # We keep looping over all of the lines of this batch
17308 # until there are no more possible recombinations
17309 my $nmax_last = @$ri_end;
17310 while ($more_to_do) {
17314 my $nmax = @$ri_end - 1;
17316 # Safety check for infinite loop
17317 unless ( $nmax < $nmax_last ) {
17319 # Shouldn't happen because splice below decreases nmax on each
17322 "Program bug-infinite loop in recombine breakpoints\n";
17324 $nmax_last = $nmax;
17326 my $previous_outdentable_closing_paren;
17327 my $leading_amp_count = 0;
17328 my $this_line_is_semicolon_terminated;
17330 # loop over all remaining lines in this batch
17331 for $n ( 1 .. $nmax ) {
17333 #----------------------------------------------------------
17334 # If we join the current pair of lines,
17335 # line $n-1 will become the left part of the joined line
17336 # line $n will become the right part of the joined line
17338 # Here are Indexes of the endpoint tokens of the two lines:
17340 # -----line $n-1--- | -----line $n-----
17341 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
17344 # We want to decide if we should remove the line break
17345 # between the tokens at $iend_1 and $ibeg_2
17347 # We will apply a number of ad-hoc tests to see if joining
17348 # here will look ok. The code will just issue a 'next'
17349 # command if the join doesn't look good. If we get through
17350 # the gauntlet of tests, the lines will be recombined.
17351 #----------------------------------------------------------
17353 # beginning and ending tokens of the lines we are working on
17354 my $ibeg_1 = $$ri_beg[ $n - 1 ];
17355 my $iend_1 = $$ri_end[ $n - 1 ];
17356 my $iend_2 = $$ri_end[$n];
17357 my $ibeg_2 = $$ri_beg[$n];
17358 my $ibeg_nmax = $$ri_beg[$nmax];
17360 my $type_iend_1 = $types_to_go[$iend_1];
17361 my $type_iend_2 = $types_to_go[$iend_2];
17362 my $type_ibeg_1 = $types_to_go[$ibeg_1];
17363 my $type_ibeg_2 = $types_to_go[$ibeg_2];
17365 # some beginning indexes of other lines, which may not exist
17366 my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
17367 my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
17368 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
17372 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
17373 # $nesting_depth_to_go[$ibeg_1] );
17375 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
17377 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
17380 # If line $n is the last line, we set some flags and
17381 # do any special checks for it
17382 if ( $n == $nmax ) {
17384 # a terminal '{' should stay where it is
17385 next if $type_ibeg_2 eq '{';
17387 # set flag if statement $n ends in ';'
17388 $this_line_is_semicolon_terminated = $type_iend_2 eq ';'
17390 # with possible side comment
17391 || ( $type_iend_2 eq '#'
17392 && $iend_2 - $ibeg_2 >= 2
17393 && $types_to_go[ $iend_2 - 2 ] eq ';'
17394 && $types_to_go[ $iend_2 - 1 ] eq 'b' );
17397 #----------------------------------------------------------
17398 # Recombine Section 1:
17399 # Examine the special token joining this line pair, if any.
17400 # Put as many tests in this section to avoid duplicate code and
17401 # to make formatting independent of whether breaks are to the
17402 # left or right of an operator.
17403 #----------------------------------------------------------
17405 my ($itok) = @{ $joint[$n] };
17408 # FIXME: Patch - may not be necessary
17410 $type_iend_1 eq 'b'
17415 $type_iend_2 eq 'b'
17420 my $type = $types_to_go[$itok];
17422 if ( $type eq ':' ) {
17424 # do not join at a colon unless it disobeys the break request
17425 if ( $itok eq $iend_1 ) {
17426 next unless $want_break_before{$type};
17429 $leading_amp_count++;
17430 next if $want_break_before{$type};
17434 # handle math operators + - * /
17435 elsif ( $is_math_op{$type} ) {
17437 # Combine these lines if this line is a single
17438 # number, or if it is a short term with same
17439 # operator as the previous line. For example, in
17440 # the following code we will combine all of the
17441 # short terms $A, $B, $C, $D, $E, $F, together
17442 # instead of leaving them one per line:
17444 # $A * $B * $C * $D * $E * $F *
17445 # ( 2. * $eps * $sigma * $area ) *
17446 # ( 1. / $tcold**3 - 1. / $thot**3 );
17448 # This can be important in math-intensive code.
17452 my $itokp = min( $inext_to_go[$itok], $iend_2 );
17453 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
17454 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
17455 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
17457 # check for a number on the right
17458 if ( $types_to_go[$itokp] eq 'n' ) {
17460 # ok if nothing else on right
17461 if ( $itokp == $iend_2 ) {
17466 # look one more token to right..
17467 # okay if math operator or some termination
17469 ( ( $itokpp == $iend_2 )
17470 && $is_math_op{ $types_to_go[$itokpp] } )
17471 || $types_to_go[$itokpp] =~ /^[#,;]$/;
17475 # check for a number on the left
17476 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
17478 # okay if nothing else to left
17479 if ( $itokm == $ibeg_1 ) {
17483 # otherwise look one more token to left
17486 # okay if math operator, comma, or assignment
17487 $good_combo = ( $itokmm == $ibeg_1 )
17488 && ( $is_math_op{ $types_to_go[$itokmm] }
17489 || $types_to_go[$itokmm] =~ /^[,]$/
17490 || $is_assignment{ $types_to_go[$itokmm] }
17495 # look for a single short token either side of the
17497 if ( !$good_combo ) {
17499 # Slight adjustment factor to make results
17500 # independent of break before or after operator in
17501 # long summed lists. (An operator and a space make
17503 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
17507 # numbers or id's on both sides of this joint
17508 $types_to_go[$itokp] =~ /^[in]$/
17509 && $types_to_go[$itokm] =~ /^[in]$/
17511 # one of the two lines must be short:
17514 # no more than 2 nonblank tokens right of
17519 && token_sequence_length( $itokp, $iend_2 )
17521 $rOpts_short_concatenation_item_length
17524 # no more than 2 nonblank tokens left of
17529 && token_sequence_length( $ibeg_1, $itokm )
17531 $rOpts_short_concatenation_item_length
17536 # keep pure terms; don't mix +- with */
17538 $is_plus_minus{$type}
17539 && ( $is_mult_div{ $types_to_go[$itokmm] }
17540 || $is_mult_div{ $types_to_go[$itokpp] } )
17543 $is_mult_div{$type}
17544 && ( $is_plus_minus{ $types_to_go[$itokmm] }
17545 || $is_plus_minus{ $types_to_go[$itokpp] } )
17551 # it is also good to combine if we can reduce to 2 lines
17552 if ( !$good_combo ) {
17554 # index on other line where same token would be in a
17557 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
17562 && $types_to_go[$iother] ne $type;
17565 next unless ($good_combo);
17569 elsif ( $is_amp_amp{$type} ) {
17573 elsif ( $is_assignment{$type} ) {
17575 } ## end assignment
17578 #----------------------------------------------------------
17579 # Recombine Section 2:
17580 # Examine token at $iend_1 (right end of first line of pair)
17581 #----------------------------------------------------------
17583 # an isolated '}' may join with a ';' terminated segment
17584 if ( $type_iend_1 eq '}' ) {
17586 # Check for cases where combining a semicolon terminated
17587 # statement with a previous isolated closing paren will
17588 # allow the combined line to be outdented. This is
17589 # generally a good move. For example, we can join up
17590 # the last two lines here:
17592 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17593 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17599 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
17600 # $size, $atime, $mtime, $ctime, $blksize, $blocks
17603 # which makes the parens line up.
17605 # Another example, from Joe Matarazzo, probably looks best
17606 # with the 'or' clause appended to the trailing paren:
17607 # $self->some_method(
17610 # ) or die "Some_method didn't work";
17612 # But we do not want to do this for something like the -lp
17613 # option where the paren is not outdentable because the
17614 # trailing clause will be far to the right.
17616 # The logic here is synchronized with the logic in sub
17617 # sub set_adjusted_indentation, which actually does
17620 $previous_outdentable_closing_paren =
17621 $this_line_is_semicolon_terminated
17623 # only one token on last line
17624 && $ibeg_1 == $iend_1
17626 # must be structural paren
17627 && $tokens_to_go[$iend_1] eq ')'
17629 # style must allow outdenting,
17630 && !$closing_token_indentation{')'}
17632 # only leading '&&', '||', and ':' if no others seen
17633 # (but note: our count made below could be wrong
17634 # due to intervening comments)
17635 && ( $leading_amp_count == 0
17636 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
17638 # but leading colons probably line up with a
17639 # previous colon or question (count could be wrong).
17640 && $type_ibeg_2 ne ':'
17642 # only one step in depth allowed. this line must not
17643 # begin with a ')' itself.
17644 && ( $nesting_depth_to_go[$iend_1] ==
17645 $nesting_depth_to_go[$iend_2] + 1 );
17647 # YVES patch 2 of 2:
17648 # Allow cuddled eval chains, like this:
17655 # This patch works together with a patch in
17656 # setting adjusted indentation (where the closing eval
17657 # brace is outdented if possible).
17658 # The problem is that an 'eval' block has continuation
17659 # indentation and it looks better to undo it in some
17660 # cases. If we do not use this patch we would get:
17668 # The alternative, for uncuddled style, is to create
17669 # a patch in set_adjusted_indentation which undoes
17670 # the indentation of a leading line like 'or do {'.
17671 # This doesn't work well with -icb through
17673 $block_type_to_go[$iend_1] eq 'eval'
17674 && !$rOpts->{'line-up-parentheses'}
17675 && !$rOpts->{'indent-closing-brace'}
17676 && $tokens_to_go[$iend_2] eq '{'
17678 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
17679 || ( $type_ibeg_2 eq 'k'
17680 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
17681 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
17685 $previous_outdentable_closing_paren ||= 1;
17690 $previous_outdentable_closing_paren
17692 # handle '.' and '?' specially below
17693 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
17698 # honor breaks at opening brace
17699 # Added to prevent recombining something like this:
17700 # } || eval { package main;
17701 elsif ( $type_iend_1 eq '{' ) {
17702 next if $forced_breakpoint_to_go[$iend_1];
17705 # do not recombine lines with ending &&, ||,
17706 elsif ( $is_amp_amp{$type_iend_1} ) {
17707 next unless $want_break_before{$type_iend_1};
17710 # Identify and recombine a broken ?/: chain
17711 elsif ( $type_iend_1 eq '?' ) {
17713 # Do not recombine different levels
17715 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
17717 # do not recombine unless next line ends in :
17718 next unless $type_iend_2 eq ':';
17721 # for lines ending in a comma...
17722 elsif ( $type_iend_1 eq ',' ) {
17724 # Do not recombine at comma which is following the
17726 # TODO: might be best to make a special flag
17727 next if ( $old_breakpoint_to_go[$iend_1] );
17729 # an isolated '},' may join with an identifier + ';'
17730 # this is useful for the class of a 'bless' statement (bless.t)
17731 if ( $type_ibeg_1 eq '}'
17732 && $type_ibeg_2 eq 'i' )
17735 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
17736 && ( $iend_2 == ( $ibeg_2 + 1 ) )
17737 && $this_line_is_semicolon_terminated );
17739 # override breakpoint
17740 $forced_breakpoint_to_go[$iend_1] = 0;
17746 # do not recombine after a comma unless this will leave
17748 next unless ( $n + 1 >= $nmax );
17750 # do not recombine if there is a change in indentation depth
17753 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
17755 # do not recombine a "complex expression" after a
17756 # comma. "complex" means no parens.
17758 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
17759 if ( $tokens_to_go[$ii] eq '(' ) {
17764 next if $saw_paren;
17769 elsif ( $type_iend_1 eq '(' ) {
17771 # No longer doing this
17774 elsif ( $type_iend_1 eq ')' ) {
17776 # No longer doing this
17779 # keep a terminal for-semicolon
17780 elsif ( $type_iend_1 eq 'f' ) {
17784 # if '=' at end of line ...
17785 elsif ( $is_assignment{$type_iend_1} ) {
17787 # keep break after = if it was in input stream
17788 # this helps prevent 'blinkers'
17789 next if $old_breakpoint_to_go[$iend_1]
17791 # don't strand an isolated '='
17792 && $iend_1 != $ibeg_1;
17794 my $is_short_quote =
17795 ( $type_ibeg_2 eq 'Q'
17796 && $ibeg_2 == $iend_2
17797 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
17798 $rOpts_short_concatenation_item_length );
17800 ( $type_ibeg_1 eq '?'
17801 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
17803 # always join an isolated '=', a short quote, or if this
17804 # will put ?/: at start of adjacent lines
17805 if ( $ibeg_1 != $iend_1
17806 && !$is_short_quote
17813 # unless we can reduce this to two lines
17816 # or three lines, the last with a leading semicolon
17817 || ( $nmax == $n + 2
17818 && $types_to_go[$ibeg_nmax] eq ';' )
17820 # or the next line ends with a here doc
17821 || $type_iend_2 eq 'h'
17823 # or the next line ends in an open paren or brace
17824 # and the break hasn't been forced [dima.t]
17825 || ( !$forced_breakpoint_to_go[$iend_1]
17826 && $type_iend_2 eq '{' )
17829 # do not recombine if the two lines might align well
17830 # this is a very approximate test for this
17832 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
17837 # Recombine if we can make two lines
17840 # -lp users often prefer this:
17841 # my $title = function($env, $env, $sysarea,
17842 # "bubba Borrower Entry");
17843 # so we will recombine if -lp is used we have
17845 && ( !$rOpts_line_up_parentheses
17846 || $type_iend_2 ne ',' )
17850 # otherwise, scan the rhs line up to last token for
17851 # complexity. Note that we are not counting the last
17852 # token in case it is an opening paren.
17854 my $depth = $nesting_depth_to_go[$ibeg_2];
17855 for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
17856 if ( $nesting_depth_to_go[$i] != $depth ) {
17858 last if ( $tv > 1 );
17860 $depth = $nesting_depth_to_go[$i];
17863 # ok to recombine if no level changes before last token
17866 # otherwise, do not recombine if more than two
17868 next if ( $tv > 1 );
17870 # check total complexity of the two adjacent lines
17871 # that will occur if we do this join
17873 ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
17874 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
17875 if ( $nesting_depth_to_go[$i] != $depth ) {
17877 last if ( $tv > 2 );
17879 $depth = $nesting_depth_to_go[$i];
17882 # do not recombine if total is more than 2 level changes
17883 next if ( $tv > 2 );
17888 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
17889 $forced_breakpoint_to_go[$iend_1] = 0;
17894 elsif ( $type_iend_1 eq 'k' ) {
17896 # make major control keywords stand out
17901 #/^(last|next|redo|return)$/
17902 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
17904 # but only if followed by multiple lines
17908 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
17910 unless $want_break_before{ $tokens_to_go[$iend_1] };
17914 #----------------------------------------------------------
17915 # Recombine Section 3:
17916 # Examine token at $ibeg_2 (left end of second line of pair)
17917 #----------------------------------------------------------
17919 # join lines identified above as capable of
17920 # causing an outdented line with leading closing paren
17921 # Note that we are skipping the rest of this section
17922 if ($previous_outdentable_closing_paren) {
17923 $forced_breakpoint_to_go[$iend_1] = 0;
17926 # handle lines with leading &&, ||
17927 elsif ( $is_amp_amp{$type_ibeg_2} ) {
17929 $leading_amp_count++;
17931 # ok to recombine if it follows a ? or :
17932 # and is followed by an open paren..
17934 ( $is_ternary{$type_ibeg_1}
17935 && $tokens_to_go[$iend_2] eq '(' )
17937 # or is followed by a ? or : at same depth
17939 # We are looking for something like this. We can
17940 # recombine the && line with the line above to make the
17941 # structure more clear:
17943 # exists $G->{Attr}->{V}
17944 # && exists $G->{Attr}->{V}->{$u}
17945 # ? %{ $G->{Attr}->{V}->{$u} }
17948 # We should probably leave something like this alone:
17950 # exists $G->{Attr}->{E}
17951 # && exists $G->{Attr}->{E}->{$u}
17952 # && exists $G->{Attr}->{E}->{$u}->{$v}
17953 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
17955 # so that we either have all of the &&'s (or ||'s)
17956 # on one line, as in the first example, or break at
17957 # each one as in the second example. However, it
17958 # sometimes makes things worse to check for this because
17959 # it prevents multiple recombinations. So this is not done.
17961 && $is_ternary{ $types_to_go[$ibeg_3] }
17962 && $nesting_depth_to_go[$ibeg_3] ==
17963 $nesting_depth_to_go[$ibeg_2] );
17965 next if !$ok && $want_break_before{$type_ibeg_2};
17966 $forced_breakpoint_to_go[$iend_1] = 0;
17968 # tweak the bond strength to give this joint priority
17973 # Identify and recombine a broken ?/: chain
17974 elsif ( $type_ibeg_2 eq '?' ) {
17976 # Do not recombine different levels
17977 my $lev = $levels_to_go[$ibeg_2];
17978 next if ( $lev ne $levels_to_go[$ibeg_1] );
17980 # Do not recombine a '?' if either next line or
17981 # previous line does not start with a ':'. The reasons
17982 # are that (1) no alignment of the ? will be possible
17983 # and (2) the expression is somewhat complex, so the
17984 # '?' is harder to see in the interior of the line.
17985 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
17986 my $precedes_colon =
17987 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
17988 next unless ( $follows_colon || $precedes_colon );
17990 # we will always combining a ? line following a : line
17991 if ( !$follows_colon ) {
17993 # ...otherwise recombine only if it looks like a chain.
17994 # we will just look at a few nearby lines to see if
17995 # this looks like a chain.
17996 my $local_count = 0;
17997 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
18000 && $types_to_go[$ii] eq ':'
18001 && $levels_to_go[$ii] == $lev;
18003 next unless ( $local_count > 1 );
18005 $forced_breakpoint_to_go[$iend_1] = 0;
18008 # do not recombine lines with leading '.'
18009 elsif ( $type_ibeg_2 eq '.' ) {
18010 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
18014 # ... unless there is just one and we can reduce
18015 # this to two lines if we do. For example, this
18019 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
18021 # looks better than this:
18022 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
18023 # . '$args .= $pat;'
18028 && $type_ibeg_1 ne $type_ibeg_2
18031 # ... or this would strand a short quote , like this
18032 # . "some long quote"
18035 || ( $types_to_go[$i_next_nonblank] eq 'Q'
18036 && $i_next_nonblank >= $iend_2 - 1
18037 && $token_lengths_to_go[$i_next_nonblank] <
18038 $rOpts_short_concatenation_item_length )
18042 # handle leading keyword..
18043 elsif ( $type_ibeg_2 eq 'k' ) {
18045 # handle leading "or"
18046 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
18049 $this_line_is_semicolon_terminated
18052 # following 'if' or 'unless' or 'or'
18053 $type_ibeg_1 eq 'k'
18054 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
18056 # important: only combine a very simple or
18057 # statement because the step below may have
18058 # combined a trailing 'and' with this or,
18059 # and we do not want to then combine
18060 # everything together
18061 && ( $iend_2 - $ibeg_2 <= 7 )
18065 $forced_breakpoint_to_go[$iend_1] = 0
18066 unless $old_breakpoint_to_go[$iend_1];
18069 # handle leading 'and'
18070 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
18072 # Decide if we will combine a single terminal 'and'
18073 # after an 'if' or 'unless'.
18075 # This looks best with the 'and' on the same
18076 # line as the 'if':
18079 # if $seconds and $nu < 2;
18081 # But this looks better as shown:
18084 # if !$this->{Parents}{$_}
18085 # or $this->{Parents}{$_} eq $_;
18089 $this_line_is_semicolon_terminated
18092 # following 'if' or 'unless' or 'or'
18093 $type_ibeg_1 eq 'k'
18094 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
18095 || $tokens_to_go[$ibeg_1] eq 'or' )
18100 # handle leading "if" and "unless"
18101 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
18103 # FIXME: This is still experimental..may not be too useful
18106 $this_line_is_semicolon_terminated
18108 # previous line begins with 'and' or 'or'
18109 && $type_ibeg_1 eq 'k'
18110 && $is_and_or{ $tokens_to_go[$ibeg_1] }
18115 # handle all other leading keywords
18118 # keywords look best at start of lines,
18119 # but combine things like "1 while"
18120 unless ( $is_assignment{$type_iend_1} ) {
18122 if ( ( $type_iend_1 ne 'k' )
18123 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
18128 # similar treatment of && and || as above for 'and' and 'or':
18129 # NOTE: This block of code is currently bypassed because
18130 # of a previous block but is retained for possible future use.
18131 elsif ( $is_amp_amp{$type_ibeg_2} ) {
18133 # maybe looking at something like:
18134 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
18138 $this_line_is_semicolon_terminated
18140 # previous line begins with an 'if' or 'unless' keyword
18141 && $type_ibeg_1 eq 'k'
18142 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
18147 # handle line with leading = or similar
18148 elsif ( $is_assignment{$type_ibeg_2} ) {
18149 next unless ( $n == 1 || $n == $nmax );
18150 next if $old_breakpoint_to_go[$iend_1];
18154 # unless we can reduce this to two lines
18157 # or three lines, the last with a leading semicolon
18158 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
18160 # or the next line ends with a here doc
18161 || $type_iend_2 eq 'h'
18163 # or this is a short line ending in ;
18164 || ( $n == $nmax && $this_line_is_semicolon_terminated )
18166 $forced_breakpoint_to_go[$iend_1] = 0;
18169 #----------------------------------------------------------
18170 # Recombine Section 4:
18171 # Combine the lines if we arrive here and it is possible
18172 #----------------------------------------------------------
18174 # honor hard breakpoints
18175 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
18177 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
18179 # combined line cannot be too long
18180 my $excess = excess_line_length( $ibeg_1, $iend_2 );
18181 next if ( $excess > 0 );
18183 # Require a few extra spaces before recombining lines if we are
18184 # at an old breakpoint unless this is a simple list or terminal
18185 # line. The goal is to avoid oscillating between two
18186 # quasi-stable end states. For example this snippet caused
18190 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
18194 if ( $old_breakpoint_to_go[$iend_1]
18195 && !$this_line_is_semicolon_terminated
18198 && $type_iend_2 ne ',' );
18200 # do not recombine if we would skip in indentation levels
18201 if ( $n < $nmax ) {
18202 my $if_next = $$ri_beg[ $n + 1 ];
18205 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
18206 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
18208 # but an isolated 'if (' is undesirable
18211 && $iend_1 - $ibeg_1 <= 2
18212 && $type_ibeg_1 eq 'k'
18213 && $tokens_to_go[$ibeg_1] eq 'if'
18214 && $tokens_to_go[$iend_1] ne '('
18220 next if ( $bs >= NO_BREAK - 1 );
18222 # remember the pair with the greatest bond strength
18229 if ( $bs > $bs_best ) {
18236 # recombine the pair with the greatest bond strength
18238 splice @$ri_beg, $n_best, 1;
18239 splice @$ri_end, $n_best - 1, 1;
18240 splice @joint, $n_best, 1;
18242 # keep going if we are still making progress
18246 return ( $ri_beg, $ri_end );
18248 } # end recombine_breakpoints
18250 sub break_all_chain_tokens {
18252 # scan the current breakpoints looking for breaks at certain "chain
18253 # operators" (. : && || + etc) which often occur repeatedly in a long
18254 # statement. If we see a break at any one, break at all similar tokens
18255 # within the same container.
18257 my ( $ri_left, $ri_right ) = @_;
18259 my %saw_chain_type;
18260 my %left_chain_type;
18261 my %right_chain_type;
18262 my %interior_chain_type;
18263 my $nmax = @$ri_right - 1;
18265 # scan the left and right end tokens of all lines
18267 for my $n ( 0 .. $nmax ) {
18268 my $il = $$ri_left[$n];
18269 my $ir = $$ri_right[$n];
18270 my $typel = $types_to_go[$il];
18271 my $typer = $types_to_go[$ir];
18272 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
18273 $typer = '+' if ( $typer eq '-' );
18274 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
18275 $typer = '*' if ( $typer eq '/' );
18276 my $tokenl = $tokens_to_go[$il];
18277 my $tokenr = $tokens_to_go[$ir];
18279 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
18280 next if ( $typel eq '?' );
18281 push @{ $left_chain_type{$typel} }, $il;
18282 $saw_chain_type{$typel} = 1;
18285 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
18286 next if ( $typer eq '?' );
18287 push @{ $right_chain_type{$typer} }, $ir;
18288 $saw_chain_type{$typer} = 1;
18292 return unless $count;
18294 # now look for any interior tokens of the same types
18296 for my $n ( 0 .. $nmax ) {
18297 my $il = $$ri_left[$n];
18298 my $ir = $$ri_right[$n];
18299 for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
18300 my $type = $types_to_go[$i];
18301 $type = '+' if ( $type eq '-' );
18302 $type = '*' if ( $type eq '/' );
18303 if ( $saw_chain_type{$type} ) {
18304 push @{ $interior_chain_type{$type} }, $i;
18309 return unless $count;
18311 # now make a list of all new break points
18314 # loop over all chain types
18315 foreach my $type ( keys %saw_chain_type ) {
18317 # quit if just ONE continuation line with leading . For example--
18318 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
18320 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
18322 # loop over all interior chain tokens
18323 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
18325 # loop over all left end tokens of same type
18326 if ( $left_chain_type{$type} ) {
18327 next if $nobreak_to_go[ $itest - 1 ];
18328 foreach my $i ( @{ $left_chain_type{$type} } ) {
18329 next unless in_same_container( $i, $itest );
18330 push @insert_list, $itest - 1;
18332 # Break at matching ? if this : is at a different level.
18333 # For example, the ? before $THRf_DEAD in the following
18334 # should get a break if its : gets a break.
18337 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
18338 # : ( $_ & 4 ) ? $THRf_R_DETACHED
18339 # : $THRf_R_JOINABLE;
18341 && $levels_to_go[$i] != $levels_to_go[$itest] )
18343 my $i_question = $mate_index_to_go[$itest];
18344 if ( $i_question > 0 ) {
18345 push @insert_list, $i_question - 1;
18352 # loop over all right end tokens of same type
18353 if ( $right_chain_type{$type} ) {
18354 next if $nobreak_to_go[$itest];
18355 foreach my $i ( @{ $right_chain_type{$type} } ) {
18356 next unless in_same_container( $i, $itest );
18357 push @insert_list, $itest;
18359 # break at matching ? if this : is at a different level
18361 && $levels_to_go[$i] != $levels_to_go[$itest] )
18363 my $i_question = $mate_index_to_go[$itest];
18364 if ( $i_question >= 0 ) {
18365 push @insert_list, $i_question;
18374 # insert any new break points
18375 if (@insert_list) {
18376 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18382 # Look for assignment operators that could use a breakpoint.
18383 # For example, in the following snippet
18385 # $HOME = $ENV{HOME}
18388 # || die "no home directory for user $<";
18390 # we could break at the = to get this, which is a little nicer:
18395 # || die "no home directory for user $<";
18397 # The logic here follows the logic in set_logical_padding, which
18398 # will add the padding in the second line to improve alignment.
18400 my ( $ri_left, $ri_right ) = @_;
18401 my $nmax = @$ri_right - 1;
18402 return unless ( $nmax >= 2 );
18404 # scan the left ends of first two lines
18407 for my $n ( 1 .. 2 ) {
18408 my $il = $$ri_left[$n];
18409 my $typel = $types_to_go[$il];
18410 my $tokenl = $tokens_to_go[$il];
18412 my $has_leading_op = ( $tokenl =~ /^\w/ )
18413 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
18414 : $is_chain_operator{$typel}; # and, or
18415 return unless ($has_leading_op);
18418 unless ( $tokenl eq $tokbeg
18419 && $nesting_depth_to_go[$il] eq $depth_beg );
18422 $depth_beg = $nesting_depth_to_go[$il];
18425 # now look for any interior tokens of the same types
18426 my $il = $$ri_left[0];
18427 my $ir = $$ri_right[0];
18429 # now make a list of all new break points
18431 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
18432 my $type = $types_to_go[$i];
18433 if ( $is_assignment{$type}
18434 && $nesting_depth_to_go[$i] eq $depth_beg )
18436 if ( $want_break_before{$type} ) {
18437 push @insert_list, $i - 1;
18440 push @insert_list, $i;
18445 # Break after a 'return' followed by a chain of operators
18446 # return ( $^O !~ /win32|dos/i )
18447 # && ( $^O ne 'VMS' )
18448 # && ( $^O ne 'OS2' )
18449 # && ( $^O ne 'MacOS' );
18452 # ( $^O !~ /win32|dos/i )
18453 # && ( $^O ne 'VMS' )
18454 # && ( $^O ne 'OS2' )
18455 # && ( $^O ne 'MacOS' );
18457 if ( $types_to_go[$i] eq 'k'
18458 && $tokens_to_go[$i] eq 'return'
18460 && $nesting_depth_to_go[$i] eq $depth_beg )
18462 push @insert_list, $i;
18465 return unless (@insert_list);
18467 # One final check...
18468 # scan second and third lines and be sure there are no assignments
18469 # we want to avoid breaking at an = to make something like this:
18471 # $html_icons{"$type-$state"}
18472 # or $icon = $html_icons{$type}
18473 # or $icon = $html_icons{$state} )
18474 for my $n ( 1 .. 2 ) {
18475 my $il = $$ri_left[$n];
18476 my $ir = $$ri_right[$n];
18477 for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
18478 my $type = $types_to_go[$i];
18480 if ( $is_assignment{$type}
18481 && $nesting_depth_to_go[$i] eq $depth_beg );
18485 # ok, insert any new break point
18486 if (@insert_list) {
18487 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18491 sub insert_final_breaks {
18493 my ( $ri_left, $ri_right ) = @_;
18495 my $nmax = @$ri_right - 1;
18497 # scan the left and right end tokens of all lines
18499 my $i_first_colon = -1;
18500 for my $n ( 0 .. $nmax ) {
18501 my $il = $$ri_left[$n];
18502 my $ir = $$ri_right[$n];
18503 my $typel = $types_to_go[$il];
18504 my $typer = $types_to_go[$ir];
18505 return if ( $typel eq '?' );
18506 return if ( $typer eq '?' );
18507 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
18508 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
18511 # For long ternary chains,
18512 # if the first : we see has its # ? is in the interior
18513 # of a preceding line, then see if there are any good
18514 # breakpoints before the ?.
18515 if ( $i_first_colon > 0 ) {
18516 my $i_question = $mate_index_to_go[$i_first_colon];
18517 if ( $i_question > 0 ) {
18519 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
18520 my $token = $tokens_to_go[$ii];
18521 my $type = $types_to_go[$ii];
18523 # For now, a good break is either a comma or a 'return'.
18524 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
18525 && in_same_container( $ii, $i_question ) )
18527 push @insert_list, $ii;
18532 # insert any new break points
18533 if (@insert_list) {
18534 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18540 sub in_same_container {
18542 # check to see if tokens at i1 and i2 are in the
18543 # same container, and not separated by a comma, ? or :
18544 my ( $i1, $i2 ) = @_;
18545 my $type = $types_to_go[$i1];
18546 my $depth = $nesting_depth_to_go[$i1];
18547 return unless ( $nesting_depth_to_go[$i2] == $depth );
18548 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
18550 ###########################################################
18551 # This is potentially a very slow routine and not critical.
18552 # For safety just give up for large differences.
18553 # See test file 'infinite_loop.txt'
18554 # TODO: replace this loop with a data structure
18555 ###########################################################
18556 return if ( $i2 - $i1 > 200 );
18558 for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
18559 next if ( $nesting_depth_to_go[$i] > $depth );
18560 return if ( $nesting_depth_to_go[$i] < $depth );
18562 my $tok = $tokens_to_go[$i];
18563 $tok = ',' if $tok eq '=>'; # treat => same as ,
18565 # Example: we would not want to break at any of these .'s
18566 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
18567 if ( $type ne ':' ) {
18568 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
18571 return if ( $tok =~ /^[\,]$/ );
18577 sub set_continuation_breaks {
18579 # Define an array of indexes for inserting newline characters to
18580 # keep the line lengths below the maximum desired length. There is
18581 # an implied break after the last token, so it need not be included.
18584 # This routine is part of series of routines which adjust line
18585 # lengths. It is only called if a statement is longer than the
18586 # maximum line length, or if a preliminary scanning located
18587 # desirable break points. Sub scan_list has already looked at
18588 # these tokens and set breakpoints (in array
18589 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
18590 # after commas, after opening parens, and before closing parens).
18591 # This routine will honor these breakpoints and also add additional
18592 # breakpoints as necessary to keep the line length below the maximum
18593 # requested. It bases its decision on where the 'bond strength' is
18596 # Output: returns references to the arrays:
18599 # which contain the indexes $i of the first and last tokens on each
18602 # In addition, the array:
18603 # $forced_breakpoint_to_go[$i]
18604 # may be updated to be =1 for any index $i after which there must be
18605 # a break. This signals later routines not to undo the breakpoint.
18607 my $saw_good_break = shift;
18608 my @i_first = (); # the first index to output
18609 my @i_last = (); # the last index to output
18610 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
18611 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
18613 set_bond_strengths();
18616 my $imax = $max_index_to_go;
18617 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
18618 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
18619 my $i_begin = $imin; # index for starting next iteration
18621 my $leading_spaces = leading_spaces_to_go($imin);
18622 my $line_count = 0;
18623 my $last_break_strength = NO_BREAK;
18624 my $i_last_break = -1;
18625 my $max_bias = 0.001;
18626 my $tiny_bias = 0.0001;
18627 my $leading_alignment_token = "";
18628 my $leading_alignment_type = "";
18630 # see if any ?/:'s are in order
18631 my $colons_in_order = 1;
18633 my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
18634 my $colon_count = @colon_list;
18635 foreach (@colon_list) {
18636 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
18640 # This is a sufficient but not necessary condition for colon chain
18641 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
18643 #-------------------------------------------------------
18644 # BEGINNING of main loop to set continuation breakpoints
18645 # Keep iterating until we reach the end
18646 #-------------------------------------------------------
18647 while ( $i_begin <= $imax ) {
18648 my $lowest_strength = NO_BREAK;
18649 my $starting_sum = $summed_lengths_to_go[$i_begin];
18652 my $lowest_next_token = '';
18653 my $lowest_next_type = 'b';
18654 my $i_lowest_next_nonblank = -1;
18656 #-------------------------------------------------------
18657 # BEGINNING of inner loop to find the best next breakpoint
18658 #-------------------------------------------------------
18659 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
18660 my $type = $types_to_go[$i_test];
18661 my $token = $tokens_to_go[$i_test];
18662 my $next_type = $types_to_go[ $i_test + 1 ];
18663 my $next_token = $tokens_to_go[ $i_test + 1 ];
18664 my $i_next_nonblank = $inext_to_go[$i_test];
18665 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
18666 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18667 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18668 my $strength = $bond_strength_to_go[$i_test];
18669 my $maximum_line_length = maximum_line_length($i_begin);
18671 # use old breaks as a tie-breaker. For example to
18672 # prevent blinkers with -pbp in this code:
18675 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
18678 # At the same time try to prevent a leading * in this code
18679 # with the default formatting:
18682 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
18683 ## * ( $x**( $a - 1 ) )
18684 ## * ( ( 1 - $x )**( $b - 1 ) );
18686 # reduce strength a bit to break ties at an old breakpoint ...
18688 $old_breakpoint_to_go[$i_test]
18690 # which is a 'good' breakpoint, meaning ...
18691 # we don't want to break before it
18692 && !$want_break_before{$type}
18694 # and either we want to break before the next token
18695 # or the next token is not short (i.e. not a '*', '/' etc.)
18696 && $i_next_nonblank <= $imax
18697 && ( $want_break_before{$next_nonblank_type}
18698 || $token_lengths_to_go[$i_next_nonblank] > 2
18699 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
18702 $strength -= $tiny_bias;
18705 # otherwise increase strength a bit if this token would be at the
18706 # maximum line length. This is necessary to avoid blinking
18707 # in the above example when the -iob flag is added.
18711 $summed_lengths_to_go[ $i_test + 1 ] -
18713 if ( $len >= $maximum_line_length ) {
18714 $strength += $tiny_bias;
18718 my $must_break = 0;
18720 # Force an immediate break at certain operators
18721 # with lower level than the start of the line,
18722 # unless we've already seen a better break.
18724 ##############################################
18725 # Note on an issue with a preceding ?
18726 ##############################################
18727 # We don't include a ? in the above list, but there may
18728 # be a break at a previous ? if the line is long.
18729 # Because of this we do not want to force a break if
18730 # there is a previous ? on this line. For now the best way
18731 # to do this is to not break if we have seen a lower strength
18732 # point, which is probably a ?.
18734 # Example of unwanted breaks we are avoiding at a '.' following a ?
18735 # from pod2html using perltidy -gnu:
18737 # ? "\n<A NAME=\""
18739 # . "\">\n$text</A>\n"
18740 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
18743 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
18744 || ( $next_nonblank_type eq 'k'
18745 && $next_nonblank_token =~ /^(and|or)$/ )
18747 && ( $nesting_depth_to_go[$i_begin] >
18748 $nesting_depth_to_go[$i_next_nonblank] )
18749 && ( $strength <= $lowest_strength )
18752 set_forced_breakpoint($i_next_nonblank);
18757 # Try to put a break where requested by scan_list
18758 $forced_breakpoint_to_go[$i_test]
18760 # break between ) { in a continued line so that the '{' can
18762 # See similar logic in scan_list which catches instances
18763 # where a line is just something like ') {'. We have to
18764 # be careful because the corresponding block keyword might
18765 # not be on the first line, such as 'for' here:
18769 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
18775 && ( $token eq ')' )
18776 && ( $next_nonblank_type eq '{' )
18777 && ($next_nonblank_block_type)
18778 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
18780 # RT #104427: Dont break before opening sub brace because
18781 # sub block breaks handled at higher level, unless
18782 # it looks like the preceeding list is long and broken
18784 $next_nonblank_block_type =~ /^sub/
18785 && ( $nesting_depth_to_go[$i_begin] ==
18786 $nesting_depth_to_go[$i_next_nonblank] )
18789 && !$rOpts->{'opening-brace-always-on-right'}
18792 # There is an implied forced break at a terminal opening brace
18793 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
18797 # Forced breakpoints must sometimes be overridden, for example
18798 # because of a side comment causing a NO_BREAK. It is easier
18799 # to catch this here than when they are set.
18800 if ( $strength < NO_BREAK - 1 ) {
18801 $strength = $lowest_strength - $tiny_bias;
18806 # quit if a break here would put a good terminal token on
18807 # the next line and we already have a possible break
18810 && ( $next_nonblank_type =~ /^[\;\,]$/ )
18814 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
18816 ) > $maximum_line_length
18820 last if ( $i_lowest >= 0 );
18823 # Avoid a break which would strand a single punctuation
18824 # token. For example, we do not want to strand a leading
18825 # '.' which is followed by a long quoted string.
18826 # But note that we do want to do this with -extrude (l=1)
18827 # so please test any changes to this code on -extrude.
18830 && ( $i_test == $i_begin )
18831 && ( $i_test < $imax )
18832 && ( $token eq $type )
18836 $summed_lengths_to_go[ $i_test + 1 ] -
18838 ) < $maximum_line_length
18842 $i_test = min( $imax, $inext_to_go[$i_test] );
18846 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
18849 # break at previous best break if it would have produced
18850 # a leading alignment of certain common tokens, and it
18851 # is different from the latest candidate break
18853 if ($leading_alignment_type);
18855 # Force at least one breakpoint if old code had good
18856 # break It is only called if a breakpoint is required or
18857 # desired. This will probably need some adjustments
18858 # over time. A goal is to try to be sure that, if a new
18859 # side comment is introduced into formatted text, then
18860 # the same breakpoints will occur. scbreak.t
18863 $i_test == $imax # we are at the end
18864 && !$forced_breakpoint_count #
18865 && $saw_good_break # old line had good break
18866 && $type =~ /^[#;\{]$/ # and this line ends in
18867 # ';' or side comment
18868 && $i_last_break < 0 # and we haven't made a break
18869 && $i_lowest >= 0 # and we saw a possible break
18870 && $i_lowest < $imax - 1 # (but not just before this ;)
18871 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
18874 # Do not skip past an important break point in a short final
18875 # segment. For example, without this check we would miss the
18876 # break at the final / in the following code:
18879 # ( $tau * $mass_pellet * $q_0 *
18880 # ( 1. - exp( -$t_stop / $tau ) ) -
18881 # 4. * $pi * $factor * $k_ice *
18882 # ( $t_melt - $t_ice ) *
18885 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
18887 if ( $line_count > 2
18888 && $i_lowest < $i_test
18889 && $i_test > $imax - 2
18890 && $nesting_depth_to_go[$i_begin] >
18891 $nesting_depth_to_go[$i_lowest]
18892 && $lowest_strength < $last_break_strength - .5 * WEAK )
18894 # Make this break for math operators for now
18895 my $ir = $inext_to_go[$i_lowest];
18896 my $il = $iprev_to_go[$ir];
18898 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
18899 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
18902 # Update the minimum bond strength location
18903 $lowest_strength = $strength;
18904 $i_lowest = $i_test;
18905 $lowest_next_token = $next_nonblank_token;
18906 $lowest_next_type = $next_nonblank_type;
18907 $i_lowest_next_nonblank = $i_next_nonblank;
18908 last if $must_break;
18910 # set flags to remember if a break here will produce a
18911 # leading alignment of certain common tokens
18912 if ( $line_count > 0
18914 && ( $lowest_strength - $last_break_strength <= $max_bias )
18917 my $i_last_end = $iprev_to_go[$i_begin];
18918 my $tok_beg = $tokens_to_go[$i_begin];
18919 my $type_beg = $types_to_go[$i_begin];
18922 # check for leading alignment of certain tokens
18924 $tok_beg eq $next_nonblank_token
18925 && $is_chain_operator{$tok_beg}
18926 && ( $type_beg eq 'k'
18927 || $type_beg eq $tok_beg )
18928 && $nesting_depth_to_go[$i_begin] >=
18929 $nesting_depth_to_go[$i_next_nonblank]
18932 || ( $tokens_to_go[$i_last_end] eq $token
18933 && $is_chain_operator{$token}
18934 && ( $type eq 'k' || $type eq $token )
18935 && $nesting_depth_to_go[$i_last_end] >=
18936 $nesting_depth_to_go[$i_test] )
18939 $leading_alignment_token = $next_nonblank_token;
18940 $leading_alignment_type = $next_nonblank_type;
18945 my $too_long = ( $i_test >= $imax );
18946 if ( !$too_long ) {
18949 $summed_lengths_to_go[ $i_test + 2 ] -
18951 $too_long = $next_length > $maximum_line_length;
18953 # To prevent blinkers we will avoid leaving a token exactly at
18954 # the line length limit unless it is the last token or one of
18955 # several "good" types.
18957 # The following code was a blinker with -pbp before this
18959 ## $last_nonblank_token eq '('
18960 ## && $is_indirect_object_taker{ $paren_type
18961 ## [$paren_depth] }
18962 # The issue causing the problem is that if the
18963 # term [$paren_depth] gets broken across a line then
18964 # the whitespace routine doesn't see both opening and closing
18965 # brackets and will format like '[ $paren_depth ]'. This
18966 # leads to an oscillation in length depending if we break
18967 # before the closing bracket or not.
18969 && $i_test + 1 < $imax
18970 && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
18972 $too_long = $next_length >= $maximum_line_length;
18976 FORMATTER_DEBUG_FLAG_BREAK
18979 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
18980 my $i_testp2 = $i_test + 2;
18981 if ( $i_testp2 > $max_index_to_go + 1 ) {
18982 $i_testp2 = $max_index_to_go + 1;
18984 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
18985 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
18987 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
18990 # allow one extra terminal token after exceeding line length
18991 # if it would strand this token.
18992 if ( $rOpts_fuzzy_line_length
18994 && $i_lowest == $i_test
18995 && $token_lengths_to_go[$i_test] > 1
18996 && $next_nonblank_type =~ /^[\;\,]$/ )
19003 ( $i_test == $imax ) # we're done if no more tokens,
19005 ( $i_lowest >= 0 ) # or no more space and we have a break
19011 #-------------------------------------------------------
19012 # END of inner loop to find the best next breakpoint
19013 # Now decide exactly where to put the breakpoint
19014 #-------------------------------------------------------
19016 # it's always ok to break at imax if no other break was found
19017 if ( $i_lowest < 0 ) { $i_lowest = $imax }
19019 # semi-final index calculation
19020 my $i_next_nonblank = $inext_to_go[$i_lowest];
19021 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
19022 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
19024 #-------------------------------------------------------
19025 # ?/: rule 1 : if a break here will separate a '?' on this
19026 # line from its closing ':', then break at the '?' instead.
19027 #-------------------------------------------------------
19029 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
19030 next unless ( $tokens_to_go[$i] eq '?' );
19032 # do not break if probable sequence of ?/: statements
19033 next if ($is_colon_chain);
19035 # do not break if statement is broken by side comment
19038 $tokens_to_go[$max_index_to_go] eq '#'
19039 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
19040 $max_index_to_go ) !~ /^[\;\}]$/
19043 # no break needed if matching : is also on the line
19045 if ( $mate_index_to_go[$i] >= 0
19046 && $mate_index_to_go[$i] <= $i_next_nonblank );
19049 if ( $want_break_before{'?'} ) { $i_lowest-- }
19053 #-------------------------------------------------------
19054 # END of inner loop to find the best next breakpoint:
19055 # Break the line after the token with index i=$i_lowest
19056 #-------------------------------------------------------
19058 # final index calculation
19059 $i_next_nonblank = $inext_to_go[$i_lowest];
19060 $next_nonblank_type = $types_to_go[$i_next_nonblank];
19061 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
19063 FORMATTER_DEBUG_FLAG_BREAK
19065 "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
19067 #-------------------------------------------------------
19068 # ?/: rule 2 : if we break at a '?', then break at its ':'
19070 # Note: this rule is also in sub scan_list to handle a break
19071 # at the start and end of a line (in case breaks are dictated
19072 # by side comments).
19073 #-------------------------------------------------------
19074 if ( $next_nonblank_type eq '?' ) {
19075 set_closing_breakpoint($i_next_nonblank);
19077 elsif ( $types_to_go[$i_lowest] eq '?' ) {
19078 set_closing_breakpoint($i_lowest);
19081 #-------------------------------------------------------
19082 # ?/: rule 3 : if we break at a ':' then we save
19083 # its location for further work below. We may need to go
19084 # back and break at its '?'.
19085 #-------------------------------------------------------
19086 if ( $next_nonblank_type eq ':' ) {
19087 push @i_colon_breaks, $i_next_nonblank;
19089 elsif ( $types_to_go[$i_lowest] eq ':' ) {
19090 push @i_colon_breaks, $i_lowest;
19093 # here we should set breaks for all '?'/':' pairs which are
19094 # separated by this line
19098 # save this line segment, after trimming blanks at the ends
19100 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
19102 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
19104 # set a forced breakpoint at a container opening, if necessary, to
19105 # signal a break at a closing container. Excepting '(' for now.
19106 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
19107 && !$forced_breakpoint_to_go[$i_lowest] )
19109 set_closing_breakpoint($i_lowest);
19112 # get ready to go again
19113 $i_begin = $i_lowest + 1;
19114 $last_break_strength = $lowest_strength;
19115 $i_last_break = $i_lowest;
19116 $leading_alignment_token = "";
19117 $leading_alignment_type = "";
19118 $lowest_next_token = '';
19119 $lowest_next_type = 'b';
19121 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
19125 # update indentation size
19126 if ( $i_begin <= $imax ) {
19127 $leading_spaces = leading_spaces_to_go($i_begin);
19131 #-------------------------------------------------------
19132 # END of main loop to set continuation breakpoints
19133 # Now go back and make any necessary corrections
19134 #-------------------------------------------------------
19136 #-------------------------------------------------------
19137 # ?/: rule 4 -- if we broke at a ':', then break at
19138 # corresponding '?' unless this is a chain of ?: expressions
19139 #-------------------------------------------------------
19140 if (@i_colon_breaks) {
19142 # using a simple method for deciding if we are in a ?/: chain --
19143 # this is a chain if it has multiple ?/: pairs all in order;
19145 # Note that if line starts in a ':' we count that above as a break
19146 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
19148 unless ($is_chain) {
19149 my @insert_list = ();
19150 foreach (@i_colon_breaks) {
19151 my $i_question = $mate_index_to_go[$_];
19152 if ( $i_question >= 0 ) {
19153 if ( $want_break_before{'?'} ) {
19154 $i_question = $iprev_to_go[$i_question];
19157 if ( $i_question >= 0 ) {
19158 push @insert_list, $i_question;
19161 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
19165 return ( \@i_first, \@i_last, $colon_count );
19168 sub insert_additional_breaks {
19170 # this routine will add line breaks at requested locations after
19171 # sub set_continuation_breaks has made preliminary breaks.
19173 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
19176 my $line_number = 0;
19178 foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
19180 $i_f = $$ri_first[$line_number];
19181 $i_l = $$ri_last[$line_number];
19182 while ( $i_break_left >= $i_l ) {
19185 # shouldn't happen unless caller passes bad indexes
19186 if ( $line_number >= @$ri_last ) {
19188 "Non-fatal program bug: couldn't set break at $i_break_left\n"
19190 report_definite_bug();
19193 $i_f = $$ri_first[$line_number];
19194 $i_l = $$ri_last[$line_number];
19197 # Do not leave a blank at the end of a line; back up if necessary
19198 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
19200 my $i_break_right = $inext_to_go[$i_break_left];
19201 if ( $i_break_left >= $i_f
19202 && $i_break_left < $i_l
19203 && $i_break_right > $i_f
19204 && $i_break_right <= $i_l )
19206 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
19207 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
19212 sub set_closing_breakpoint {
19214 # set a breakpoint at a matching closing token
19215 # at present, this is only used to break at a ':' which matches a '?'
19216 my $i_break = shift;
19218 if ( $mate_index_to_go[$i_break] >= 0 ) {
19220 # CAUTION: infinite recursion possible here:
19221 # set_closing_breakpoint calls set_forced_breakpoint, and
19222 # set_forced_breakpoint call set_closing_breakpoint
19223 # ( test files attrib.t, BasicLyx.pm.html).
19224 # Don't reduce the '2' in the statement below
19225 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
19227 # break before } ] and ), but sub set_forced_breakpoint will decide
19228 # to break before or after a ? and :
19229 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
19230 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
19234 my $type_sequence = $type_sequence_to_go[$i_break];
19235 if ($type_sequence) {
19236 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
19237 $postponed_breakpoint{$type_sequence} = 1;
19242 sub compare_indentation_levels {
19244 # check to see if output line tabbing agrees with input line
19245 # this can be very useful for debugging a script which has an extra
19247 my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
19248 if ( $guessed_indentation_level ne $structural_indentation_level ) {
19249 $last_tabbing_disagreement = $input_line_number;
19251 if ($in_tabbing_disagreement) {
19254 $tabbing_disagreement_count++;
19256 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19257 write_logfile_entry(
19258 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
19261 $in_tabbing_disagreement = $input_line_number;
19262 $first_tabbing_disagreement = $in_tabbing_disagreement
19263 unless ($first_tabbing_disagreement);
19268 if ($in_tabbing_disagreement) {
19270 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19271 write_logfile_entry(
19272 "End indentation disagreement from input line $in_tabbing_disagreement\n"
19275 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
19276 write_logfile_entry(
19277 "No further tabbing disagreements will be noted\n");
19280 $in_tabbing_disagreement = 0;
19285 #####################################################################
19287 # the Perl::Tidy::IndentationItem class supplies items which contain
19288 # how much whitespace should be used at the start of a line
19290 #####################################################################
19292 package Perl::Tidy::IndentationItem;
19294 # Indexes for indentation items
19295 use constant SPACES => 0; # total leading white spaces
19296 use constant LEVEL => 1; # the indentation 'level'
19297 use constant CI_LEVEL => 2; # the 'continuation level'
19298 use constant AVAILABLE_SPACES => 3; # how many left spaces available
19300 use constant CLOSED => 4; # index where we saw closing '}'
19301 use constant COMMA_COUNT => 5; # how many commas at this level?
19302 use constant SEQUENCE_NUMBER => 6; # output batch number
19303 use constant INDEX => 7; # index in output batch list
19304 use constant HAVE_CHILD => 8; # any dependents?
19305 use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
19306 # we would like to move to get
19307 # alignment (negative if left)
19308 use constant ALIGN_PAREN => 10; # do we want to try to align
19309 # with an opening structure?
19310 use constant MARKED => 11; # if visited by corrector logic
19311 use constant STACK_DEPTH => 12; # indentation nesting depth
19312 use constant STARTING_INDEX => 13; # first token index of this level
19313 use constant ARROW_COUNT => 14; # how many =>'s
19317 # Create an 'indentation_item' which describes one level of leading
19318 # whitespace when the '-lp' indentation is used. We return
19319 # a reference to an anonymous array of associated variables.
19320 # See above constants for storage scheme.
19322 $class, $spaces, $level,
19323 $ci_level, $available_spaces, $index,
19324 $gnu_sequence_number, $align_paren, $stack_depth,
19328 my $arrow_count = 0;
19329 my $comma_count = 0;
19330 my $have_child = 0;
19331 my $want_right_spaces = 0;
19334 $spaces, $level, $ci_level,
19335 $available_spaces, $closed, $comma_count,
19336 $gnu_sequence_number, $index, $have_child,
19337 $want_right_spaces, $align_paren, $marked,
19338 $stack_depth, $starting_index, $arrow_count,
19342 sub permanently_decrease_AVAILABLE_SPACES {
19344 # make a permanent reduction in the available indentation spaces
19345 # at one indentation item. NOTE: if there are child nodes, their
19346 # total SPACES must be reduced by the caller.
19348 my ( $item, $spaces_needed ) = @_;
19349 my $available_spaces = $item->get_AVAILABLE_SPACES();
19350 my $deleted_spaces =
19351 ( $available_spaces > $spaces_needed )
19353 : $available_spaces;
19354 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19355 $item->decrease_SPACES($deleted_spaces);
19356 $item->set_RECOVERABLE_SPACES(0);
19358 return $deleted_spaces;
19361 sub tentatively_decrease_AVAILABLE_SPACES {
19363 # We are asked to tentatively delete $spaces_needed of indentation
19364 # for a indentation item. We may want to undo this later. NOTE: if
19365 # there are child nodes, their total SPACES must be reduced by the
19367 my ( $item, $spaces_needed ) = @_;
19368 my $available_spaces = $item->get_AVAILABLE_SPACES();
19369 my $deleted_spaces =
19370 ( $available_spaces > $spaces_needed )
19372 : $available_spaces;
19373 $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19374 $item->decrease_SPACES($deleted_spaces);
19375 $item->increase_RECOVERABLE_SPACES($deleted_spaces);
19376 return $deleted_spaces;
19379 sub get_STACK_DEPTH {
19381 return $self->[STACK_DEPTH];
19386 return $self->[SPACES];
19391 return $self->[MARKED];
19395 my ( $self, $value ) = @_;
19396 if ( defined($value) ) {
19397 $self->[MARKED] = $value;
19399 return $self->[MARKED];
19402 sub get_AVAILABLE_SPACES {
19404 return $self->[AVAILABLE_SPACES];
19407 sub decrease_SPACES {
19408 my ( $self, $value ) = @_;
19409 if ( defined($value) ) {
19410 $self->[SPACES] -= $value;
19412 return $self->[SPACES];
19415 sub decrease_AVAILABLE_SPACES {
19416 my ( $self, $value ) = @_;
19417 if ( defined($value) ) {
19418 $self->[AVAILABLE_SPACES] -= $value;
19420 return $self->[AVAILABLE_SPACES];
19423 sub get_ALIGN_PAREN {
19425 return $self->[ALIGN_PAREN];
19428 sub get_RECOVERABLE_SPACES {
19430 return $self->[RECOVERABLE_SPACES];
19433 sub set_RECOVERABLE_SPACES {
19434 my ( $self, $value ) = @_;
19435 if ( defined($value) ) {
19436 $self->[RECOVERABLE_SPACES] = $value;
19438 return $self->[RECOVERABLE_SPACES];
19441 sub increase_RECOVERABLE_SPACES {
19442 my ( $self, $value ) = @_;
19443 if ( defined($value) ) {
19444 $self->[RECOVERABLE_SPACES] += $value;
19446 return $self->[RECOVERABLE_SPACES];
19451 return $self->[CI_LEVEL];
19456 return $self->[LEVEL];
19459 sub get_SEQUENCE_NUMBER {
19461 return $self->[SEQUENCE_NUMBER];
19466 return $self->[INDEX];
19469 sub get_STARTING_INDEX {
19471 return $self->[STARTING_INDEX];
19474 sub set_HAVE_CHILD {
19475 my ( $self, $value ) = @_;
19476 if ( defined($value) ) {
19477 $self->[HAVE_CHILD] = $value;
19479 return $self->[HAVE_CHILD];
19482 sub get_HAVE_CHILD {
19484 return $self->[HAVE_CHILD];
19487 sub set_ARROW_COUNT {
19488 my ( $self, $value ) = @_;
19489 if ( defined($value) ) {
19490 $self->[ARROW_COUNT] = $value;
19492 return $self->[ARROW_COUNT];
19495 sub get_ARROW_COUNT {
19497 return $self->[ARROW_COUNT];
19500 sub set_COMMA_COUNT {
19501 my ( $self, $value ) = @_;
19502 if ( defined($value) ) {
19503 $self->[COMMA_COUNT] = $value;
19505 return $self->[COMMA_COUNT];
19508 sub get_COMMA_COUNT {
19510 return $self->[COMMA_COUNT];
19514 my ( $self, $value ) = @_;
19515 if ( defined($value) ) {
19516 $self->[CLOSED] = $value;
19518 return $self->[CLOSED];
19523 return $self->[CLOSED];
19526 #####################################################################
19528 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
19529 # contain a single output line
19531 #####################################################################
19533 package Perl::Tidy::VerticalAligner::Line;
19540 use constant JMAX => 0;
19541 use constant JMAX_ORIGINAL_LINE => 1;
19542 use constant RTOKENS => 2;
19543 use constant RFIELDS => 3;
19544 use constant RPATTERNS => 4;
19545 use constant INDENTATION => 5;
19546 use constant LEADING_SPACE_COUNT => 6;
19547 use constant OUTDENT_LONG_LINES => 7;
19548 use constant LIST_TYPE => 8;
19549 use constant IS_HANGING_SIDE_COMMENT => 9;
19550 use constant RALIGNMENTS => 10;
19551 use constant MAXIMUM_LINE_LENGTH => 11;
19552 use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
19555 $_index_map{jmax} = JMAX;
19556 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
19557 $_index_map{rtokens} = RTOKENS;
19558 $_index_map{rfields} = RFIELDS;
19559 $_index_map{rpatterns} = RPATTERNS;
19560 $_index_map{indentation} = INDENTATION;
19561 $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
19562 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
19563 $_index_map{list_type} = LIST_TYPE;
19564 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
19565 $_index_map{ralignments} = RALIGNMENTS;
19566 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
19567 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
19569 my @_default_data = ();
19570 $_default_data[JMAX] = undef;
19571 $_default_data[JMAX_ORIGINAL_LINE] = undef;
19572 $_default_data[RTOKENS] = undef;
19573 $_default_data[RFIELDS] = undef;
19574 $_default_data[RPATTERNS] = undef;
19575 $_default_data[INDENTATION] = undef;
19576 $_default_data[LEADING_SPACE_COUNT] = undef;
19577 $_default_data[OUTDENT_LONG_LINES] = undef;
19578 $_default_data[LIST_TYPE] = undef;
19579 $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
19580 $_default_data[RALIGNMENTS] = [];
19581 $_default_data[MAXIMUM_LINE_LENGTH] = undef;
19582 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
19586 # methods to count object population
19588 sub get_count { $_count; }
19589 sub _increment_count { ++$_count }
19590 sub _decrement_count { --$_count }
19593 # Constructor may be called as a class method
19595 my ( $caller, %arg ) = @_;
19596 my $caller_is_obj = ref($caller);
19597 my $class = $caller_is_obj || $caller;
19599 my $self = bless [], $class;
19601 $self->[RALIGNMENTS] = [];
19604 foreach ( keys %_index_map ) {
19605 $index = $_index_map{$_};
19606 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19607 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
19608 else { $self->[$index] = $_default_data[$index] }
19611 $self->_increment_count();
19616 $_[0]->_decrement_count();
19619 sub get_jmax { $_[0]->[JMAX] }
19620 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
19621 sub get_rtokens { $_[0]->[RTOKENS] }
19622 sub get_rfields { $_[0]->[RFIELDS] }
19623 sub get_rpatterns { $_[0]->[RPATTERNS] }
19624 sub get_indentation { $_[0]->[INDENTATION] }
19625 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
19626 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
19627 sub get_list_type { $_[0]->[LIST_TYPE] }
19628 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
19629 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
19631 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
19632 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
19633 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
19634 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
19636 sub get_starting_column {
19637 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
19640 sub increment_column {
19641 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
19643 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
19645 sub current_field_width {
19649 return $self->get_column($j);
19652 return $self->get_column($j) - $self->get_column( $j - 1 );
19656 sub field_width_growth {
19659 return $self->get_column($j) - $self->get_starting_column($j);
19662 sub starting_field_width {
19666 return $self->get_starting_column($j);
19669 return $self->get_starting_column($j) -
19670 $self->get_starting_column( $j - 1 );
19674 sub increase_field_width {
19677 my ( $j, $pad ) = @_;
19678 my $jmax = $self->get_jmax();
19679 for my $k ( $j .. $jmax ) {
19680 $self->increment_column( $k, $pad );
19684 sub get_available_space_on_right {
19686 my $jmax = $self->get_jmax();
19687 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
19690 sub set_jmax { $_[0]->[JMAX] = $_[1] }
19691 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
19692 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
19693 sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
19694 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
19695 sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
19696 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
19697 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
19698 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
19699 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
19700 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
19704 #####################################################################
19706 # the Perl::Tidy::VerticalAligner::Alignment class holds information
19707 # on a single column being aligned
19709 #####################################################################
19710 package Perl::Tidy::VerticalAligner::Alignment;
19718 # Symbolic array indexes
19719 use constant COLUMN => 0; # the current column number
19720 use constant STARTING_COLUMN => 1; # column number when created
19721 use constant MATCHING_TOKEN => 2; # what token we are matching
19722 use constant STARTING_LINE => 3; # the line index of creation
19723 use constant ENDING_LINE => 4; # the most recent line to use it
19724 use constant SAVED_COLUMN => 5; # the most recent line to use it
19725 use constant SERIAL_NUMBER => 6; # unique number for this alignment
19726 # (just its index in an array)
19728 # Correspondence between variables and array indexes
19730 $_index_map{column} = COLUMN;
19731 $_index_map{starting_column} = STARTING_COLUMN;
19732 $_index_map{matching_token} = MATCHING_TOKEN;
19733 $_index_map{starting_line} = STARTING_LINE;
19734 $_index_map{ending_line} = ENDING_LINE;
19735 $_index_map{saved_column} = SAVED_COLUMN;
19736 $_index_map{serial_number} = SERIAL_NUMBER;
19738 my @_default_data = ();
19739 $_default_data[COLUMN] = undef;
19740 $_default_data[STARTING_COLUMN] = undef;
19741 $_default_data[MATCHING_TOKEN] = undef;
19742 $_default_data[STARTING_LINE] = undef;
19743 $_default_data[ENDING_LINE] = undef;
19744 $_default_data[SAVED_COLUMN] = undef;
19745 $_default_data[SERIAL_NUMBER] = undef;
19747 # class population count
19750 sub get_count { $_count; }
19751 sub _increment_count { ++$_count }
19752 sub _decrement_count { --$_count }
19757 my ( $caller, %arg ) = @_;
19758 my $caller_is_obj = ref($caller);
19759 my $class = $caller_is_obj || $caller;
19761 my $self = bless [], $class;
19763 foreach ( keys %_index_map ) {
19764 my $index = $_index_map{$_};
19765 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19766 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
19767 else { $self->[$index] = $_default_data[$index] }
19769 $self->_increment_count();
19774 $_[0]->_decrement_count();
19777 sub get_column { return $_[0]->[COLUMN] }
19778 sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
19779 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
19780 sub get_starting_line { return $_[0]->[STARTING_LINE] }
19781 sub get_ending_line { return $_[0]->[ENDING_LINE] }
19782 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
19784 sub set_column { $_[0]->[COLUMN] = $_[1] }
19785 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
19786 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
19787 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
19788 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
19789 sub increment_column { $_[0]->[COLUMN] += $_[1] }
19791 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
19792 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
19796 package Perl::Tidy::VerticalAligner;
19798 # The Perl::Tidy::VerticalAligner package collects output lines and
19799 # attempts to line up certain common tokens, such as => and #, which are
19800 # identified by the calling routine.
19802 # There are two main routines: valign_input and flush. Append acts as a
19803 # storage buffer, collecting lines into a group which can be vertically
19804 # aligned. When alignment is no longer possible or desirable, it dumps
19805 # the group to flush.
19807 # valign_input -----> flush
19815 # Caution: these debug flags produce a lot of output
19816 # They should all be 0 except when debugging small scripts
19818 use constant VALIGN_DEBUG_FLAG_APPEND => 0;
19819 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
19820 use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
19821 use constant VALIGN_DEBUG_FLAG_TABS => 0;
19823 my $debug_warning = sub {
19824 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
19827 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
19828 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
19829 VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
19830 VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
19835 $vertical_aligner_self
19837 $maximum_alignment_index
19841 $previous_minimum_jmax_seen
19842 $previous_maximum_jmax_seen
19843 $maximum_line_index
19848 $last_level_written
19849 $last_leading_space_count
19853 $last_comment_column
19854 $last_side_comment_line_number
19855 $last_side_comment_length
19856 $last_side_comment_level
19857 $outdented_line_count
19858 $first_outdented_line_at
19859 $last_outdented_line_at
19860 $diagnostics_object
19862 $file_writer_object
19863 @side_comment_history
19864 $comment_leading_space_count
19865 $is_matching_terminal_line
19866 $consecutive_block_comments
19873 $cached_line_leading_space_count
19874 $cached_seqno_string
19876 $valign_buffer_filling
19880 $last_nonblank_seqno_string
19884 $rOpts_maximum_line_length
19885 $rOpts_variable_maximum_line_length
19886 $rOpts_continuation_indentation
19887 $rOpts_indent_columns
19889 $rOpts_entab_leading_whitespace
19892 $rOpts_fixed_position_side_comment
19893 $rOpts_minimum_space_to_comment
19901 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
19904 # variables describing the entire space group:
19905 $ralignment_list = [];
19907 $last_level_written = -1;
19908 $extra_indent_ok = 0; # can we move all lines to the right?
19909 $last_side_comment_length = 0;
19910 $maximum_jmax_seen = 0;
19911 $minimum_jmax_seen = 0;
19912 $previous_minimum_jmax_seen = 0;
19913 $previous_maximum_jmax_seen = 0;
19915 # variables describing each line of the group
19916 @group_lines = (); # list of all lines in group
19918 $outdented_line_count = 0;
19919 $first_outdented_line_at = 0;
19920 $last_outdented_line_at = 0;
19921 $last_side_comment_line_number = 0;
19922 $last_side_comment_level = -1;
19923 $is_matching_terminal_line = 0;
19925 # most recent 3 side comments; [ line number, column ]
19926 $side_comment_history[0] = [ -300, 0 ];
19927 $side_comment_history[1] = [ -200, 0 ];
19928 $side_comment_history[2] = [ -100, 0 ];
19930 # valign_output_step_B cache:
19931 $cached_line_text = "";
19932 $cached_line_type = 0;
19933 $cached_line_flag = 0;
19935 $cached_line_valid = 0;
19936 $cached_line_leading_space_count = 0;
19937 $cached_seqno_string = "";
19939 # string of sequence numbers joined together
19940 $seqno_string = "";
19941 $last_nonblank_seqno_string = "";
19943 # frequently used parameters
19944 $rOpts_indent_columns = $rOpts->{'indent-columns'};
19945 $rOpts_tabs = $rOpts->{'tabs'};
19946 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
19947 $rOpts_fixed_position_side_comment =
19948 $rOpts->{'fixed-position-side-comment'};
19949 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
19950 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
19951 $rOpts_variable_maximum_line_length =
19952 $rOpts->{'variable-maximum-line-length'};
19953 $rOpts_valign = $rOpts->{'valign'};
19955 $consecutive_block_comments = 0;
19956 forget_side_comment();
19958 initialize_for_new_group();
19960 $vertical_aligner_self = {};
19961 bless $vertical_aligner_self, $class;
19962 return $vertical_aligner_self;
19965 sub initialize_for_new_group {
19966 $maximum_line_index = -1; # lines in the current group
19967 $maximum_alignment_index = -1; # alignments in current group
19968 $zero_count = 0; # count consecutive lines without tokens
19969 $current_line = undef; # line being matched for alignment
19970 $group_maximum_gap = 0; # largest gap introduced
19972 $marginal_match = 0;
19973 $comment_leading_space_count = 0;
19974 $last_leading_space_count = 0;
19977 # interface to Perl::Tidy::Diagnostics routines
19978 sub write_diagnostics {
19979 if ($diagnostics_object) {
19980 $diagnostics_object->write_diagnostics(@_);
19984 # interface to Perl::Tidy::Logger routines
19986 if ($logger_object) {
19987 $logger_object->warning(@_);
19991 sub write_logfile_entry {
19992 if ($logger_object) {
19993 $logger_object->write_logfile_entry(@_);
19997 sub report_definite_bug {
19998 if ($logger_object) {
19999 $logger_object->report_definite_bug();
20005 # return the number of leading spaces associated with an indentation
20006 # variable $indentation is either a constant number of spaces or an
20007 # object with a get_SPACES method.
20008 my $indentation = shift;
20009 return ref($indentation) ? $indentation->get_SPACES() : $indentation;
20012 sub get_RECOVERABLE_SPACES {
20014 # return the number of spaces (+ means shift right, - means shift left)
20015 # that we would like to shift a group of lines with the same indentation
20016 # to get them to line up with their opening parens
20017 my $indentation = shift;
20018 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
20021 sub get_STACK_DEPTH {
20023 my $indentation = shift;
20024 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
20027 sub make_alignment {
20028 my ( $col, $token ) = @_;
20030 # make one new alignment at column $col which aligns token $token
20031 ++$maximum_alignment_index;
20032 my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
20034 starting_column => $col,
20035 matching_token => $token,
20036 starting_line => $maximum_line_index,
20037 ending_line => $maximum_line_index,
20038 serial_number => $maximum_alignment_index,
20040 $ralignment_list->[$maximum_alignment_index] = $alignment;
20044 sub dump_alignments {
20046 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
20047 for my $i ( 0 .. $maximum_alignment_index ) {
20048 my $column = $ralignment_list->[$i]->get_column();
20049 my $starting_column = $ralignment_list->[$i]->get_starting_column();
20050 my $matching_token = $ralignment_list->[$i]->get_matching_token();
20051 my $starting_line = $ralignment_list->[$i]->get_starting_line();
20052 my $ending_line = $ralignment_list->[$i]->get_ending_line();
20054 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
20058 sub save_alignment_columns {
20059 for my $i ( 0 .. $maximum_alignment_index ) {
20060 $ralignment_list->[$i]->save_column();
20064 sub restore_alignment_columns {
20065 for my $i ( 0 .. $maximum_alignment_index ) {
20066 $ralignment_list->[$i]->restore_column();
20070 sub forget_side_comment {
20071 $last_comment_column = 0;
20074 sub maximum_line_length_for_level {
20076 # return maximum line length for line starting with a given level
20077 my $maximum_line_length = $rOpts_maximum_line_length;
20078 if ($rOpts_variable_maximum_line_length) {
20080 if ( $level < 0 ) { $level = 0 }
20081 $maximum_line_length += $level * $rOpts_indent_columns;
20083 return $maximum_line_length;
20088 # Place one line in the current vertical group.
20090 # The input parameters are:
20091 # $level = indentation level of this line
20092 # $rfields = reference to array of fields
20093 # $rpatterns = reference to array of patterns, one per field
20094 # $rtokens = reference to array of tokens starting fields 1,2,..
20096 # Here is an example of what this package does. In this example,
20097 # we are trying to line up both the '=>' and the '#'.
20099 # '18' => 'grave', # \`
20100 # '19' => 'acute', # `'
20101 # '20' => 'caron', # \v
20102 # <-tabs-><f1-><--field 2 ---><-f3->
20105 # col1 col2 col3 col4
20107 # The calling routine has already broken the entire line into 3 fields as
20108 # indicated. (So the work of identifying promising common tokens has
20109 # already been done).
20111 # In this example, there will be 2 tokens being matched: '=>' and '#'.
20112 # They are the leading parts of fields 2 and 3, but we do need to know
20113 # what they are so that we can dump a group of lines when these tokens
20116 # The fields contain the actual characters of each field. The patterns
20117 # are like the fields, but they contain mainly token types instead
20118 # of tokens, so they have fewer characters. They are used to be
20119 # sure we are matching fields of similar type.
20121 # In this example, there will be 4 column indexes being adjusted. The
20122 # first one is always at zero. The interior columns are at the start of
20123 # the matching tokens, and the last one tracks the maximum line length.
20125 # Each time a new line comes in, it joins the current vertical
20126 # group if possible. Otherwise it causes the current group to be dumped
20127 # and a new group is started.
20129 # For each new group member, the column locations are increased, as
20130 # necessary, to make room for the new fields. When the group is finally
20131 # output, these column numbers are used to compute the amount of spaces of
20132 # padding needed for each field.
20134 # Programming note: the fields are assumed not to have any tab characters.
20135 # Tabs have been previously removed except for tabs in quoted strings and
20136 # side comments. Tabs in these fields can mess up the column counting.
20137 # The log file warns the user if there are any such tabs.
20140 $level, $level_end,
20141 $indentation, $rfields,
20142 $rtokens, $rpatterns,
20143 $is_forced_break, $outdent_long_lines,
20144 $is_terminal_ternary, $is_terminal_statement,
20145 $do_not_pad, $rvertical_tightness_flags,
20149 # number of fields is $jmax
20150 # number of tokens between fields is $jmax-1
20151 my $jmax = $#{$rfields};
20153 my $leading_space_count = get_SPACES($indentation);
20155 # set outdented flag to be sure we either align within statements or
20156 # across statement boundaries, but not both.
20157 my $is_outdented = $last_leading_space_count > $leading_space_count;
20158 $last_leading_space_count = $leading_space_count;
20160 # Patch: undo for hanging side comment
20161 my $is_hanging_side_comment =
20162 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
20163 $is_outdented = 0 if $is_hanging_side_comment;
20165 # Forget side comment alignment after seeing 2 or more block comments
20166 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
20167 if ($is_block_comment) {
20168 $consecutive_block_comments++;
20171 if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
20172 $consecutive_block_comments = 0;
20175 VALIGN_DEBUG_FLAG_APPEND0 && do {
20177 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
20180 # Validate cached line if necessary: If we can produce a container
20181 # with just 2 lines total by combining an existing cached opening
20182 # token with the closing token to follow, then we will mark both
20183 # cached flags as valid.
20184 if ($rvertical_tightness_flags) {
20185 if ( $maximum_line_index <= 0
20186 && $cached_line_type
20188 && $rvertical_tightness_flags->[2]
20189 && $rvertical_tightness_flags->[2] == $cached_seqno )
20191 $rvertical_tightness_flags->[3] ||= 1;
20192 $cached_line_valid ||= 1;
20196 # do not join an opening block brace with an unbalanced line
20197 # unless requested with a flag value of 2
20198 if ( $cached_line_type == 3
20199 && $maximum_line_index < 0
20200 && $cached_line_flag < 2
20201 && $level_jump != 0 )
20203 $cached_line_valid = 0;
20206 # patch until new aligner is finished
20207 if ($do_not_pad) { my_flush() }
20209 # shouldn't happen:
20210 if ( $level < 0 ) { $level = 0 }
20212 # do not align code across indentation level changes
20213 # or if vertical alignment is turned off for debugging
20214 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
20216 # we are allowed to shift a group of lines to the right if its
20217 # level is greater than the previous and next group
20219 ( $level < $group_level && $last_level_written < $group_level );
20223 # If we know that this line will get flushed out by itself because
20224 # of level changes, we can leave the extra_indent_ok flag set.
20225 # That way, if we get an external flush call, we will still be
20226 # able to do some -lp alignment if necessary.
20227 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
20229 $group_level = $level;
20231 # wait until after the above flush to get the leading space
20232 # count because it may have been changed if the -icp flag is in
20234 $leading_space_count = get_SPACES($indentation);
20238 # --------------------------------------------------------------------
20239 # Patch to collect outdentable block COMMENTS
20240 # --------------------------------------------------------------------
20241 my $is_blank_line = "";
20242 if ( $group_type eq 'COMMENT' ) {
20246 && $outdent_long_lines
20247 && $leading_space_count == $comment_leading_space_count
20252 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20260 # --------------------------------------------------------------------
20261 # add dummy fields for terminal ternary
20262 # --------------------------------------------------------------------
20263 my $j_terminal_match;
20264 if ( $is_terminal_ternary && $current_line ) {
20265 $j_terminal_match =
20266 fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
20267 $jmax = @{$rfields} - 1;
20270 # --------------------------------------------------------------------
20271 # add dummy fields for else statement
20272 # --------------------------------------------------------------------
20273 if ( $rfields->[0] =~ /^else\s*$/
20275 && $level_jump == 0 )
20277 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
20278 $jmax = @{$rfields} - 1;
20281 # --------------------------------------------------------------------
20282 # Step 1. Handle simple line of code with no fields to match.
20283 # --------------------------------------------------------------------
20284 if ( $jmax <= 0 ) {
20287 if ( $maximum_line_index >= 0
20288 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
20291 # flush the current group if it has some aligned columns..
20292 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
20294 # flush current group if we are just collecting side comments..
20297 # ...and we haven't seen a comment lately
20298 ( $zero_count > 3 )
20300 # ..or if this new line doesn't fit to the left of the comments
20301 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
20302 $group_lines[0]->get_column(0) )
20309 # patch to start new COMMENT group if this comment may be outdented
20310 if ( $is_block_comment
20311 && $outdent_long_lines
20312 && $maximum_line_index < 0 )
20314 $group_type = 'COMMENT';
20315 $comment_leading_space_count = $leading_space_count;
20316 $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20320 # just write this line directly if no current group, no side comment,
20321 # and no space recovery is needed.
20322 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
20324 valign_output_step_B( $leading_space_count, $$rfields[0], 0,
20325 $outdent_long_lines, $rvertical_tightness_flags, $level );
20333 # programming check: (shouldn't happen)
20334 # an error here implies an incorrect call was made
20335 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
20337 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
20339 report_definite_bug();
20342 # --------------------------------------------------------------------
20343 # create an object to hold this line
20344 # --------------------------------------------------------------------
20345 my $new_line = new Perl::Tidy::VerticalAligner::Line(
20347 jmax_original_line => $jmax,
20348 rtokens => $rtokens,
20349 rfields => $rfields,
20350 rpatterns => $rpatterns,
20351 indentation => $indentation,
20352 leading_space_count => $leading_space_count,
20353 outdent_long_lines => $outdent_long_lines,
20355 is_hanging_side_comment => $is_hanging_side_comment,
20356 maximum_line_length => maximum_line_length_for_level($level),
20357 rvertical_tightness_flags => $rvertical_tightness_flags,
20360 # Initialize a global flag saying if the last line of the group should
20361 # match end of group and also terminate the group. There should be no
20362 # returns between here and where the flag is handled at the bottom.
20363 my $col_matching_terminal = 0;
20364 if ( defined($j_terminal_match) ) {
20366 # remember the column of the terminal ? or { to match with
20367 $col_matching_terminal = $current_line->get_column($j_terminal_match);
20369 # set global flag for sub decide_if_aligned
20370 $is_matching_terminal_line = 1;
20373 # --------------------------------------------------------------------
20374 # It simplifies things to create a zero length side comment
20376 # --------------------------------------------------------------------
20377 make_side_comment( $new_line, $level_end );
20379 # --------------------------------------------------------------------
20380 # Decide if this is a simple list of items.
20381 # There are 3 list types: none, comma, comma-arrow.
20382 # We use this below to be less restrictive in deciding what to align.
20383 # --------------------------------------------------------------------
20384 if ($is_forced_break) {
20385 decide_if_list($new_line);
20388 if ($current_line) {
20390 # --------------------------------------------------------------------
20391 # Allow hanging side comment to join current group, if any
20392 # This will help keep side comments aligned, because otherwise we
20393 # will have to start a new group, making alignment less likely.
20394 # --------------------------------------------------------------------
20395 join_hanging_comment( $new_line, $current_line )
20396 if $is_hanging_side_comment;
20398 # --------------------------------------------------------------------
20399 # If there is just one previous line, and it has more fields
20400 # than the new line, try to join fields together to get a match with
20401 # the new line. At the present time, only a single leading '=' is
20402 # allowed to be compressed out. This is useful in rare cases where
20403 # a table is forced to use old breakpoints because of side comments,
20404 # and the table starts out something like this:
20405 # my %MonthChars = ('0', 'Jan', # side comment
20408 # Eliminating the '=' field will allow the remaining fields to line up.
20409 # This situation does not occur if there are no side comments
20410 # because scan_list would put a break after the opening '('.
20411 # --------------------------------------------------------------------
20412 eliminate_old_fields( $new_line, $current_line );
20414 # --------------------------------------------------------------------
20415 # If the new line has more fields than the current group,
20416 # see if we can match the first fields and combine the remaining
20417 # fields of the new line.
20418 # --------------------------------------------------------------------
20419 eliminate_new_fields( $new_line, $current_line );
20421 # --------------------------------------------------------------------
20422 # Flush previous group unless all common tokens and patterns match..
20423 # --------------------------------------------------------------------
20424 check_match( $new_line, $current_line );
20426 # --------------------------------------------------------------------
20427 # See if there is space for this line in the current group (if any)
20428 # --------------------------------------------------------------------
20429 if ($current_line) {
20430 check_fit( $new_line, $current_line );
20434 # --------------------------------------------------------------------
20435 # Append this line to the current group (or start new group)
20436 # --------------------------------------------------------------------
20437 add_to_group($new_line);
20439 # Future update to allow this to vary:
20440 $current_line = $new_line if ( $maximum_line_index == 0 );
20442 # output this group if it ends in a terminal else or ternary line
20443 if ( defined($j_terminal_match) ) {
20445 # if there is only one line in the group (maybe due to failure to match
20446 # perfectly with previous lines), then align the ? or { of this
20447 # terminal line with the previous one unless that would make the line
20449 if ( $maximum_line_index == 0 ) {
20450 my $col_now = $current_line->get_column($j_terminal_match);
20451 my $pad = $col_matching_terminal - $col_now;
20452 my $padding_available =
20453 $current_line->get_available_space_on_right();
20454 if ( $pad > 0 && $pad <= $padding_available ) {
20455 $current_line->increase_field_width( $j_terminal_match, $pad );
20459 $is_matching_terminal_line = 0;
20462 # --------------------------------------------------------------------
20463 # Step 8. Some old debugging stuff
20464 # --------------------------------------------------------------------
20465 VALIGN_DEBUG_FLAG_APPEND && do {
20466 print STDOUT "APPEND fields:";
20467 dump_array(@$rfields);
20468 print STDOUT "APPEND tokens:";
20469 dump_array(@$rtokens);
20470 print STDOUT "APPEND patterns:";
20471 dump_array(@$rpatterns);
20478 sub join_hanging_comment {
20481 my $jmax = $line->get_jmax();
20482 return 0 unless $jmax == 1; # must be 2 fields
20483 my $rtokens = $line->get_rtokens();
20484 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
20485 my $rfields = $line->get_rfields();
20486 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
20487 my $old_line = shift;
20488 my $maximum_field_index = $old_line->get_jmax();
20490 unless $maximum_field_index > $jmax; # the current line has more fields
20491 my $rpatterns = $line->get_rpatterns();
20493 $line->set_is_hanging_side_comment(1);
20494 $jmax = $maximum_field_index;
20495 $line->set_jmax($jmax);
20496 $$rfields[$jmax] = $$rfields[1];
20497 $$rtokens[ $jmax - 1 ] = $$rtokens[0];
20498 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
20499 for ( my $j = 1 ; $j < $jmax ; $j++ ) {
20500 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
20501 $$rtokens[ $j - 1 ] = "";
20502 $$rpatterns[ $j - 1 ] = "";
20507 sub eliminate_old_fields {
20509 my $new_line = shift;
20510 my $jmax = $new_line->get_jmax();
20511 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
20512 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
20514 # there must be one previous line
20515 return unless ( $maximum_line_index == 0 );
20517 my $old_line = shift;
20518 my $maximum_field_index = $old_line->get_jmax();
20520 ###############################################
20521 # this line must have fewer fields
20522 return unless $maximum_field_index > $jmax;
20523 ###############################################
20525 # Identify specific cases where field elimination is allowed:
20526 # case=1: both lines have comma-separated lists, and the first
20527 # line has an equals
20528 # case=2: both lines have leading equals
20530 # case 1 is the default
20533 # See if case 2: both lines have leading '='
20534 # We'll require similar leading patterns in this case
20535 my $old_rtokens = $old_line->get_rtokens();
20536 my $rtokens = $new_line->get_rtokens();
20537 my $rpatterns = $new_line->get_rpatterns();
20538 my $old_rpatterns = $old_line->get_rpatterns();
20539 if ( $rtokens->[0] =~ /^=\d*$/
20540 && $old_rtokens->[0] eq $rtokens->[0]
20541 && $old_rpatterns->[0] eq $rpatterns->[0] )
20546 # not too many fewer fields in new line for case 1
20547 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
20549 # case 1 must have side comment
20550 my $old_rfields = $old_line->get_rfields();
20553 && length( $$old_rfields[$maximum_field_index] ) == 0 );
20555 my $rfields = $new_line->get_rfields();
20557 my $hid_equals = 0;
20559 my @new_alignments = ();
20560 my @new_fields = ();
20561 my @new_matching_patterns = ();
20562 my @new_matching_tokens = ();
20566 my $current_field = '';
20567 my $current_pattern = '';
20569 # loop over all old tokens
20571 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
20572 $current_field .= $$old_rfields[$k];
20573 $current_pattern .= $$old_rpatterns[$k];
20574 last if ( $j > $jmax - 1 );
20576 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
20578 $new_fields[$j] = $current_field;
20579 $new_matching_patterns[$j] = $current_pattern;
20580 $current_field = '';
20581 $current_pattern = '';
20582 $new_matching_tokens[$j] = $$old_rtokens[$k];
20583 $new_alignments[$j] = $old_line->get_alignment($k);
20588 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
20589 last if ( $case == 2 ); # avoid problems with stuff
20590 # like: $a=$b=$c=$d;
20594 if ( $in_match && $case == 1 )
20595 ; # disallow gaps in matching field types in case 1
20599 # Modify the current state if we are successful.
20600 # We must exactly reach the ends of both lists for success.
20601 if ( ( $j == $jmax )
20602 && ( $current_field eq '' )
20603 && ( $case != 1 || $hid_equals ) )
20605 $k = $maximum_field_index;
20606 $current_field .= $$old_rfields[$k];
20607 $current_pattern .= $$old_rpatterns[$k];
20608 $new_fields[$j] = $current_field;
20609 $new_matching_patterns[$j] = $current_pattern;
20611 $new_alignments[$j] = $old_line->get_alignment($k);
20612 $maximum_field_index = $j;
20614 $old_line->set_alignments(@new_alignments);
20615 $old_line->set_jmax($jmax);
20616 $old_line->set_rtokens( \@new_matching_tokens );
20617 $old_line->set_rfields( \@new_fields );
20618 $old_line->set_rpatterns( \@$rpatterns );
20622 # create an empty side comment if none exists
20623 sub make_side_comment {
20624 my $new_line = shift;
20625 my $level_end = shift;
20626 my $jmax = $new_line->get_jmax();
20627 my $rtokens = $new_line->get_rtokens();
20629 # if line does not have a side comment...
20630 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
20631 my $rfields = $new_line->get_rfields();
20632 my $rpatterns = $new_line->get_rpatterns();
20633 $$rtokens[$jmax] = '#';
20634 $$rfields[ ++$jmax ] = '';
20635 $$rpatterns[$jmax] = '#';
20636 $new_line->set_jmax($jmax);
20637 $new_line->set_jmax_original_line($jmax);
20640 # line has a side comment..
20643 # don't remember old side comment location for very long
20644 my $line_number = $vertical_aligner_self->get_output_line_number();
20645 my $rfields = $new_line->get_rfields();
20647 $line_number - $last_side_comment_line_number > 12
20649 # and don't remember comment location across block level changes
20650 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
20653 forget_side_comment();
20655 $last_side_comment_line_number = $line_number;
20656 $last_side_comment_level = $level_end;
20660 sub decide_if_list {
20664 # A list will be taken to be a line with a forced break in which all
20665 # of the field separators are commas or comma-arrows (except for the
20668 # List separator tokens are things like ',3' or '=>2',
20669 # where the trailing digit is the nesting depth. Allow braces
20670 # to allow nested list items.
20671 my $rtokens = $line->get_rtokens();
20672 my $test_token = $$rtokens[0];
20673 if ( $test_token =~ /^(\,|=>)/ ) {
20674 my $list_type = $test_token;
20675 my $jmax = $line->get_jmax();
20677 foreach ( 1 .. $jmax - 2 ) {
20678 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
20683 $line->set_list_type($list_type);
20687 sub eliminate_new_fields {
20689 return unless ( $maximum_line_index >= 0 );
20690 my ( $new_line, $old_line ) = @_;
20691 my $jmax = $new_line->get_jmax();
20693 my $old_rtokens = $old_line->get_rtokens();
20694 my $rtokens = $new_line->get_rtokens();
20695 my $is_assignment =
20696 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
20698 # must be monotonic variation
20699 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
20701 # must be more fields in the new line
20702 my $maximum_field_index = $old_line->get_jmax();
20703 return unless ( $maximum_field_index < $jmax );
20705 unless ($is_assignment) {
20707 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
20708 ; # only if monotonic
20710 # never combine fields of a comma list
20712 unless ( $maximum_field_index > 1 )
20713 && ( $new_line->get_list_type() !~ /^,/ );
20716 my $rfields = $new_line->get_rfields();
20717 my $rpatterns = $new_line->get_rpatterns();
20718 my $old_rpatterns = $old_line->get_rpatterns();
20720 # loop over all OLD tokens except comment and check match
20723 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
20724 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
20725 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
20732 # first tokens agree, so combine extra new tokens
20734 for $k ( $maximum_field_index .. $jmax - 1 ) {
20736 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
20737 $$rfields[$k] = "";
20738 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
20739 $$rpatterns[$k] = "";
20742 $$rtokens[ $maximum_field_index - 1 ] = '#';
20743 $$rfields[$maximum_field_index] = $$rfields[$jmax];
20744 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
20745 $jmax = $maximum_field_index;
20747 $new_line->set_jmax($jmax);
20750 sub fix_terminal_ternary {
20752 # Add empty fields as necessary to align a ternary term
20757 # : $year % 100 ? 1
20758 # : $year % 400 ? 0
20761 # returns 1 if the terminal item should be indented
20763 my ( $rfields, $rtokens, $rpatterns ) = @_;
20765 my $jmax = @{$rfields} - 1;
20766 my $old_line = $group_lines[$maximum_line_index];
20767 my $rfields_old = $old_line->get_rfields();
20769 my $rpatterns_old = $old_line->get_rpatterns();
20770 my $rtokens_old = $old_line->get_rtokens();
20771 my $maximum_field_index = $old_line->get_jmax();
20773 # look for the question mark after the :
20775 my $depth_question;
20777 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
20778 my $tok = $rtokens_old->[$j];
20779 if ( $tok =~ /^\?(\d+)$/ ) {
20780 $depth_question = $1;
20782 # depth must be correct
20783 next unless ( $depth_question eq $group_level );
20786 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
20787 $pad = " " x length($1);
20790 return; # shouldn't happen
20795 return unless ( defined($jquestion) ); # shouldn't happen
20797 # Now splice the tokens and patterns of the previous line
20798 # into the else line to insure a match. Add empty fields
20800 my $jadd = $jquestion;
20802 # Work on copies of the actual arrays in case we have
20803 # to return due to an error
20804 my @fields = @{$rfields};
20805 my @patterns = @{$rpatterns};
20806 my @tokens = @{$rtokens};
20808 VALIGN_DEBUG_FLAG_TERNARY && do {
20810 print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
20811 print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
20812 print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
20813 print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
20814 print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
20815 print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
20818 # handle cases of leading colon on this line
20819 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
20821 my ( $colon, $therest ) = ( $1, $2 );
20823 # Handle sub-case of first field with leading colon plus additional code
20824 # This is the usual situation as at the '1' below:
20826 # : $year % 400 ? 0
20830 # Split the first field after the leading colon and insert padding.
20831 # Note that this padding will remain even if the terminal value goes
20832 # out on a separate line. This does not seem to look to bad, so no
20833 # mechanism has been included to undo it.
20834 my $field1 = shift @fields;
20835 unshift @fields, ( $colon, $pad . $therest );
20837 # change the leading pattern from : to ?
20838 return unless ( $patterns[0] =~ s/^\:/?/ );
20840 # install leading tokens and patterns of existing line
20841 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
20842 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20844 # insert appropriate number of empty fields
20845 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20848 # handle sub-case of first field just equal to leading colon.
20849 # This can happen for example in the example below where
20850 # the leading '(' would create a new alignment token
20851 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
20852 # : ( $mname = $name . '->' );
20855 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
20857 # prepend a leading ? onto the second pattern
20858 $patterns[1] = "?b" . $patterns[1];
20860 # pad the second field
20861 $fields[1] = $pad . $fields[1];
20863 # install leading tokens and patterns of existing line, replacing
20864 # leading token and inserting appropriate number of empty fields
20865 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
20866 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
20867 splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20871 # Handle case of no leading colon on this line. This will
20872 # be the case when -wba=':' is used. For example,
20873 # $year % 400 ? 0 :
20877 # install leading tokens and patterns of existing line
20878 $patterns[0] = '?' . 'b' . $patterns[0];
20879 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
20880 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20882 # insert appropriate number of empty fields
20883 $jadd = $jquestion + 1;
20884 $fields[0] = $pad . $fields[0];
20885 splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
20888 VALIGN_DEBUG_FLAG_TERNARY && do {
20890 print STDOUT "MODIFIED TOKENS=<@tokens>\n";
20891 print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
20892 print STDOUT "MODIFIED FIELDS=<@fields>\n";
20895 # all ok .. update the arrays
20896 @{$rfields} = @fields;
20897 @{$rtokens} = @tokens;
20898 @{$rpatterns} = @patterns;
20900 # force a flush after this line
20904 sub fix_terminal_else {
20906 # Add empty fields as necessary to align a balanced terminal
20907 # else block to a previous if/elsif/unless block,
20910 # if ( 1 || $x ) { print "ok 13\n"; }
20911 # else { print "not ok 13\n"; }
20913 # returns 1 if the else block should be indented
20915 my ( $rfields, $rtokens, $rpatterns ) = @_;
20916 my $jmax = @{$rfields} - 1;
20917 return unless ( $jmax > 0 );
20919 # check for balanced else block following if/elsif/unless
20920 my $rfields_old = $current_line->get_rfields();
20922 # TBD: add handling for 'case'
20923 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
20925 # look for the opening brace after the else, and extract the depth
20926 my $tok_brace = $rtokens->[0];
20928 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
20930 # probably: "else # side_comment"
20933 my $rpatterns_old = $current_line->get_rpatterns();
20934 my $rtokens_old = $current_line->get_rtokens();
20935 my $maximum_field_index = $current_line->get_jmax();
20937 # be sure the previous if/elsif is followed by an opening paren
20939 my $tok_paren = '(' . $depth_brace;
20940 my $tok_test = $rtokens_old->[$jparen];
20941 return unless ( $tok_test eq $tok_paren ); # shouldn't happen
20943 # Now find the opening block brace
20945 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
20946 my $tok = $rtokens_old->[$j];
20947 if ( $tok eq $tok_brace ) {
20952 return unless ( defined($jbrace) ); # shouldn't happen
20954 # Now splice the tokens and patterns of the previous line
20955 # into the else line to insure a match. Add empty fields
20957 my $jadd = $jbrace - $jparen;
20958 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
20959 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
20960 splice( @{$rfields}, 1, 0, ('') x $jadd );
20962 # force a flush after this line if it does not follow a case
20964 unless ( $rfields_old->[0] =~ /^case\s*$/ );
20967 { # sub check_match
20968 my %is_good_alignment;
20972 # Vertically aligning on certain "good" tokens is usually okay
20973 # so we can be less restrictive in marginal cases.
20974 @_ = qw( { ? => = );
20976 @is_good_alignment{@_} = (1) x scalar(@_);
20981 # See if the current line matches the current vertical alignment group.
20982 # If not, flush the current group.
20983 my $new_line = shift;
20984 my $old_line = shift;
20986 # uses global variables:
20987 # $previous_minimum_jmax_seen
20988 # $maximum_jmax_seen
20989 # $maximum_line_index
20991 my $jmax = $new_line->get_jmax();
20992 my $maximum_field_index = $old_line->get_jmax();
20994 # flush if this line has too many fields
20995 if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
20997 # flush if adding this line would make a non-monotonic field count
20999 ( $maximum_field_index > $jmax ) # this has too few fields
21001 ( $previous_minimum_jmax_seen <
21002 $jmax ) # and wouldn't be monotonic
21003 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
21010 # otherwise see if this line matches the current group
21011 my $jmax_original_line = $new_line->get_jmax_original_line();
21012 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
21013 my $rtokens = $new_line->get_rtokens();
21014 my $rfields = $new_line->get_rfields();
21015 my $rpatterns = $new_line->get_rpatterns();
21016 my $list_type = $new_line->get_list_type();
21018 my $group_list_type = $old_line->get_list_type();
21019 my $old_rpatterns = $old_line->get_rpatterns();
21020 my $old_rtokens = $old_line->get_rtokens();
21022 my $jlimit = $jmax - 1;
21023 if ( $maximum_field_index > $jmax ) {
21024 $jlimit = $jmax_original_line;
21025 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
21028 # handle comma-separated lists ..
21029 if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
21030 for my $j ( 0 .. $jlimit ) {
21031 my $old_tok = $$old_rtokens[$j];
21032 next unless $old_tok;
21033 my $new_tok = $$rtokens[$j];
21034 next unless $new_tok;
21036 # lists always match ...
21037 # unless they would align any '=>'s with ','s
21039 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
21040 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
21044 # do detailed check for everything else except hanging side comments
21045 elsif ( !$is_hanging_side_comment ) {
21047 my $leading_space_count = $new_line->get_leading_space_count();
21051 my $saw_good_alignment;
21053 for my $j ( 0 .. $jlimit ) {
21055 my $old_tok = $$old_rtokens[$j];
21056 my $new_tok = $$rtokens[$j];
21058 # Note on encoding used for alignment tokens:
21059 # -------------------------------------------
21060 # Tokens are "decorated" with information which can help
21061 # prevent unwanted alignments. Consider for example the
21062 # following two lines:
21063 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
21064 # local ( $i, $f ) = &'bdiv( $xn, $xd );
21065 # There are three alignment tokens in each line, a comma,
21066 # an =, and a comma. In the first line these three tokens
21068 # ,4+local-18 =3 ,4+split-7
21069 # and in the second line they are encoded as
21070 # ,4+local-18 =3 ,4+&'bdiv-8
21071 # Tokens always at least have token name and nesting
21072 # depth. So in this example the ='s are at depth 3 and
21073 # the ,'s are at depth 4. This prevents aligning tokens
21074 # of different depths. Commas contain additional
21075 # information, as follows:
21076 # , {depth} + {container name} - {spaces to opening paren}
21077 # This allows us to reject matching the rightmost commas
21078 # in the above two lines, since they are for different
21079 # function calls. This encoding is done in
21080 # 'sub send_lines_to_vertical_aligner'.
21082 # Pick off actual token.
21083 # Everything up to the first digit is the actual token.
21084 my $alignment_token = $new_tok;
21085 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
21087 # see if the decorated tokens match
21088 my $tokens_match = $new_tok eq $old_tok
21090 # Exception for matching terminal : of ternary statement..
21091 # consider containers prefixed by ? and : a match
21092 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
21094 # No match if the alignment tokens differ...
21095 if ( !$tokens_match ) {
21097 # ...Unless this is a side comment
21101 # and there is either at least one alignment token
21102 # or this is a single item following a list. This
21103 # latter rule is required for 'December' to join
21104 # the following list:
21106 # '', 'January', 'February', 'March',
21107 # 'April', 'May', 'June', 'July',
21108 # 'August', 'September', 'October', 'November',
21111 # If it doesn't then the -lp formatting will fail.
21112 && ( $j > 0 || $old_tok =~ /^,/ )
21115 $marginal_match = 1
21116 if ( $marginal_match == 0
21117 && $maximum_line_index == 0 );
21124 # Calculate amount of padding required to fit this in.
21125 # $pad is the number of spaces by which we must increase
21126 # the current field to squeeze in this field.
21128 length( $$rfields[$j] ) - $old_line->current_field_width($j);
21129 if ( $j == 0 ) { $pad += $leading_space_count; }
21131 # remember max pads to limit marginal cases
21132 if ( $alignment_token ne '#' ) {
21133 if ( $pad > $max_pad ) { $max_pad = $pad }
21134 if ( $pad < $min_pad ) { $min_pad = $pad }
21136 if ( $is_good_alignment{$alignment_token} ) {
21137 $saw_good_alignment = 1;
21140 # If patterns don't match, we have to be careful...
21141 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
21143 # flag this as a marginal match since patterns differ
21144 $marginal_match = 1
21145 if ( $marginal_match == 0 && $maximum_line_index == 0 );
21147 # We have to be very careful about aligning commas
21148 # when the pattern's don't match, because it can be
21149 # worse to create an alignment where none is needed
21150 # than to omit one. Here's an example where the ','s
21151 # are not in named containers. The first line below
21152 # should not match the next two:
21153 # ( $a, $b ) = ( $b, $r );
21154 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
21155 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
21156 if ( $alignment_token eq ',' ) {
21158 # do not align commas unless they are in named containers
21159 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
21162 # do not align parens unless patterns match;
21163 # large ugly spaces can occur in math expressions.
21164 elsif ( $alignment_token eq '(' ) {
21166 # But we can allow a match if the parens don't
21167 # require any padding.
21168 if ( $pad != 0 ) { goto NO_MATCH }
21171 # Handle an '=' alignment with different patterns to
21173 elsif ( $alignment_token eq '=' ) {
21175 # It is best to be a little restrictive when
21176 # aligning '=' tokens. Here is an example of
21177 # two lines that we will not align:
21180 # The problem is that one is a 'my' declaration,
21181 # and the other isn't, so they're not very similar.
21182 # We will filter these out by comparing the first
21183 # letter of the pattern. This is crude, but works
21186 substr( $$old_rpatterns[$j], 0, 1 ) ne
21187 substr( $$rpatterns[$j], 0, 1 ) )
21192 # If we pass that test, we'll call it a marginal match.
21193 # Here is an example of a marginal match:
21195 # $op = compile_bblock($op);
21196 # The left tokens are both identifiers, but
21197 # one accesses a hash and the other doesn't.
21198 # We'll let this be a tentative match and undo
21199 # it later if we don't find more than 2 lines
21201 elsif ( $maximum_line_index == 0 ) {
21203 2; # =2 prevents being undone below
21208 # Don't let line with fewer fields increase column widths
21210 if ( $maximum_field_index > $jmax ) {
21212 # Exception: suspend this rule to allow last lines to join
21213 if ( $pad > 0 ) { goto NO_MATCH; }
21215 } ## end for my $j ( 0 .. $jlimit)
21217 # Turn off the "marginal match" flag in some cases...
21218 # A "marginal match" occurs when the alignment tokens agree
21219 # but there are differences in the other tokens (patterns).
21220 # If we leave the marginal match flag set, then the rule is that we
21221 # will align only if there are more than two lines in the group.
21222 # We will turn of the flag if we almost have a match
21223 # and either we have seen a good alignment token or we
21224 # just need a small pad (2 spaces) to fit. These rules are
21225 # the result of experimentation. Tokens which misaligned by just
21226 # one or two characters are annoying. On the other hand,
21227 # large gaps to less important alignment tokens are also annoying.
21228 if ( $marginal_match == 1
21229 && $jmax == $maximum_field_index
21230 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
21233 $marginal_match = 0;
21235 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
21238 # We have a match (even if marginal).
21239 # If the current line has fewer fields than the current group
21240 # but otherwise matches, copy the remaining group fields to
21241 # make it a perfect match.
21242 if ( $maximum_field_index > $jmax ) {
21243 my $comment = $$rfields[$jmax];
21244 for $jmax ( $jlimit .. $maximum_field_index ) {
21245 $$rtokens[$jmax] = $$old_rtokens[$jmax];
21246 $$rfields[ ++$jmax ] = '';
21247 $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
21249 $$rfields[$jmax] = $comment;
21250 $new_line->set_jmax($jmax);
21255 ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
21263 return unless ( $maximum_line_index >= 0 );
21264 my $new_line = shift;
21265 my $old_line = shift;
21267 my $jmax = $new_line->get_jmax();
21268 my $leading_space_count = $new_line->get_leading_space_count();
21269 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
21270 my $rtokens = $new_line->get_rtokens();
21271 my $rfields = $new_line->get_rfields();
21272 my $rpatterns = $new_line->get_rpatterns();
21274 my $group_list_type = $group_lines[0]->get_list_type();
21276 my $padding_so_far = 0;
21277 my $padding_available = $old_line->get_available_space_on_right();
21279 # save current columns in case this doesn't work
21280 save_alignment_columns();
21282 my ( $j, $pad, $eight );
21283 my $maximum_field_index = $old_line->get_jmax();
21284 for $j ( 0 .. $jmax ) {
21286 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
21289 $pad += $leading_space_count;
21292 # remember largest gap of the group, excluding gap to side comment
21294 && $group_maximum_gap < -$pad
21296 && $j < $jmax - 1 )
21298 $group_maximum_gap = -$pad;
21303 ## This patch helps sometimes, but it doesn't check to see if
21304 ## the line is too long even without the side comment. It needs
21306 ##don't let a long token with no trailing side comment push
21307 ##side comments out, or end a group. (sidecmt1.t)
21308 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
21310 # This line will need space; lets see if we want to accept it..
21313 # not if this won't fit
21314 ( $pad > $padding_available )
21316 # previously, there were upper bounds placed on padding here
21317 # (maximum_whitespace_columns), but they were not really helpful
21322 # revert to starting state then flush; things didn't work out
21323 restore_alignment_columns();
21328 # patch to avoid excessive gaps in previous lines,
21329 # due to a line of fewer fields.
21330 # return join( ".",
21331 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
21332 # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
21333 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
21335 # looks ok, squeeze this field in
21336 $old_line->increase_field_width( $j, $pad );
21337 $padding_available -= $pad;
21339 # remember largest gap of the group, excluding gap to side comment
21340 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
21341 $group_maximum_gap = $pad;
21348 # The current line either starts a new alignment group or is
21349 # accepted into the current alignment group.
21350 my $new_line = shift;
21351 $group_lines[ ++$maximum_line_index ] = $new_line;
21353 # initialize field lengths if starting new group
21354 if ( $maximum_line_index == 0 ) {
21356 my $jmax = $new_line->get_jmax();
21357 my $rfields = $new_line->get_rfields();
21358 my $rtokens = $new_line->get_rtokens();
21360 my $col = $new_line->get_leading_space_count();
21362 for $j ( 0 .. $jmax ) {
21363 $col += length( $$rfields[$j] );
21365 # create initial alignments for the new group
21367 if ( $j < $jmax ) { $token = $$rtokens[$j] }
21368 my $alignment = make_alignment( $col, $token );
21369 $new_line->set_alignment( $j, $alignment );
21372 $maximum_jmax_seen = $jmax;
21373 $minimum_jmax_seen = $jmax;
21376 # use previous alignments otherwise
21378 my @new_alignments =
21379 $group_lines[ $maximum_line_index - 1 ]->get_alignments();
21380 $new_line->set_alignments(@new_alignments);
21383 # remember group jmax extremes for next call to valign_input
21384 $previous_minimum_jmax_seen = $minimum_jmax_seen;
21385 $previous_maximum_jmax_seen = $maximum_jmax_seen;
21390 # debug routine to dump array contents
21392 print STDOUT "(@_)\n";
21395 # flush() sends the current Perl::Tidy::VerticalAligner group down the
21396 # pipeline to Perl::Tidy::FileWriter.
21398 # This is the external flush, which also empties the buffer and cache
21401 # the buffer must be emptied first, then any cached text
21402 dump_valign_buffer();
21404 if ( $maximum_line_index < 0 ) {
21405 if ($cached_line_type) {
21406 $seqno_string = $cached_seqno_string;
21407 valign_output_step_C( $cached_line_text,
21408 $cached_line_leading_space_count,
21409 $last_level_written );
21410 $cached_line_type = 0;
21411 $cached_line_text = "";
21412 $cached_seqno_string = "";
21420 sub reduce_valign_buffer_indentation {
21423 if ( $valign_buffer_filling && $diff ) {
21424 my $max_valign_buffer = @valign_buffer;
21425 for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) {
21426 my ( $line, $leading_space_count, $level ) =
21427 @{ $valign_buffer[$i] };
21428 my $ws = substr( $line, 0, $diff );
21429 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
21430 $line = substr( $line, $diff );
21432 if ( $leading_space_count >= $diff ) {
21433 $leading_space_count -= $diff;
21434 $level = level_change( $leading_space_count, $diff, $level );
21436 $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
21443 # compute decrease in level when we remove $diff spaces from the
21445 my ( $leading_space_count, $diff, $level ) = @_;
21446 if ($rOpts_indent_columns) {
21448 int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
21449 my $nlev = int( $leading_space_count / $rOpts_indent_columns );
21450 $level -= ( $olev - $nlev );
21451 if ( $level < 0 ) { $level = 0 }
21456 sub dump_valign_buffer {
21457 if (@valign_buffer) {
21458 foreach (@valign_buffer) {
21459 valign_output_step_D( @{$_} );
21461 @valign_buffer = ();
21463 $valign_buffer_filling = "";
21466 # This is the internal flush, which leaves the cache intact
21469 return if ( $maximum_line_index < 0 );
21471 # handle a group of comment lines
21472 if ( $group_type eq 'COMMENT' ) {
21474 VALIGN_DEBUG_FLAG_APPEND0 && do {
21475 my ( $a, $b, $c ) = caller();
21477 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
21480 my $leading_space_count = $comment_leading_space_count;
21481 my $leading_string = get_leading_string($leading_space_count);
21483 # zero leading space count if any lines are too long
21484 my $max_excess = 0;
21485 for my $i ( 0 .. $maximum_line_index ) {
21486 my $str = $group_lines[$i];
21489 $leading_space_count -
21490 maximum_line_length_for_level($group_level);
21491 if ( $excess > $max_excess ) {
21492 $max_excess = $excess;
21496 if ( $max_excess > 0 ) {
21497 $leading_space_count -= $max_excess;
21498 if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
21499 $last_outdented_line_at =
21500 $file_writer_object->get_output_line_number();
21501 unless ($outdented_line_count) {
21502 $first_outdented_line_at = $last_outdented_line_at;
21504 $outdented_line_count += ( $maximum_line_index + 1 );
21507 # write the group of lines
21508 my $outdent_long_lines = 0;
21509 for my $i ( 0 .. $maximum_line_index ) {
21510 valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
21511 $outdent_long_lines, "", $group_level );
21515 # handle a group of code lines
21518 VALIGN_DEBUG_FLAG_APPEND0 && do {
21519 my $group_list_type = $group_lines[0]->get_list_type();
21520 my ( $a, $b, $c ) = caller();
21521 my $maximum_field_index = $group_lines[0]->get_jmax();
21523 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
21527 # some small groups are best left unaligned
21528 my $do_not_align = decide_if_aligned();
21530 # optimize side comment location
21531 $do_not_align = adjust_side_comment($do_not_align);
21533 # recover spaces for -lp option if possible
21534 my $extra_leading_spaces = get_extra_leading_spaces();
21536 # all lines of this group have the same basic leading spacing
21537 my $group_leader_length = $group_lines[0]->get_leading_space_count();
21539 # add extra leading spaces if helpful
21540 my $min_ci_gap = improve_continuation_indentation( $do_not_align,
21541 $group_leader_length );
21543 # loop to output all lines
21544 for my $i ( 0 .. $maximum_line_index ) {
21545 my $line = $group_lines[$i];
21546 valign_output_step_A( $line, $min_ci_gap, $do_not_align,
21547 $group_leader_length, $extra_leading_spaces );
21550 initialize_for_new_group();
21553 sub decide_if_aligned {
21555 # Do not try to align two lines which are not really similar
21556 return unless $maximum_line_index == 1;
21557 return if ($is_matching_terminal_line);
21559 my $group_list_type = $group_lines[0]->get_list_type();
21561 my $do_not_align = (
21563 # always align lists
21568 # don't align if it was just a marginal match
21571 # don't align two lines with big gap
21572 || $group_maximum_gap > 12
21574 # or lines with differing number of alignment tokens
21575 # TODO: this could be improved. It occasionally rejects
21577 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
21581 # But try to convert them into a simple comment group if the first line
21582 # a has side comment
21583 my $rfields = $group_lines[0]->get_rfields();
21584 my $maximum_field_index = $group_lines[0]->get_jmax();
21586 && ( $maximum_line_index > 0 )
21587 && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
21592 return $do_not_align;
21595 sub adjust_side_comment {
21597 my $do_not_align = shift;
21599 # let's see if we can move the side comment field out a little
21600 # to improve readability (the last field is always a side comment field)
21601 my $have_side_comment = 0;
21602 my $first_side_comment_line = -1;
21603 my $maximum_field_index = $group_lines[0]->get_jmax();
21604 for my $i ( 0 .. $maximum_line_index ) {
21605 my $line = $group_lines[$i];
21607 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
21608 $have_side_comment = 1;
21609 $first_side_comment_line = $i;
21614 my $kmax = $maximum_field_index + 1;
21616 if ($have_side_comment) {
21618 my $line = $group_lines[0];
21620 # the maximum space without exceeding the line length:
21621 my $avail = $line->get_available_space_on_right();
21623 # try to use the previous comment column
21624 my $side_comment_column = $line->get_column( $kmax - 2 );
21625 my $move = $last_comment_column - $side_comment_column;
21627 ## my $sc_line0 = $side_comment_history[0]->[0];
21628 ## my $sc_col0 = $side_comment_history[0]->[1];
21629 ## my $sc_line1 = $side_comment_history[1]->[0];
21630 ## my $sc_col1 = $side_comment_history[1]->[1];
21631 ## my $sc_line2 = $side_comment_history[2]->[0];
21632 ## my $sc_col2 = $side_comment_history[2]->[1];
21634 ## # FUTURE UPDATES:
21635 ## # Be sure to ignore 'do not align' and '} # end comments'
21636 ## # Find first $move > 0 and $move <= $avail as follows:
21637 ## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
21638 ## # 2. try sc_col2 if (line-sc_line2) < 12
21639 ## # 3. try min possible space, plus up to 8,
21640 ## # 4. try min possible space
21642 if ( $kmax > 0 && !$do_not_align ) {
21644 # but if this doesn't work, give up and use the minimum space
21645 if ( $move > $avail ) {
21646 $move = $rOpts_minimum_space_to_comment - 1;
21649 # but we want some minimum space to the comment
21650 my $min_move = $rOpts_minimum_space_to_comment - 1;
21652 && $last_side_comment_length > 0
21653 && ( $first_side_comment_line == 0 )
21654 && $group_level == $last_level_written )
21659 if ( $move < $min_move ) {
21663 # previously, an upper bound was placed on $move here,
21664 # (maximum_space_to_comment), but it was not helpful
21666 # don't exceed the available space
21667 if ( $move > $avail ) { $move = $avail }
21669 # we can only increase space, never decrease
21671 $line->increase_field_width( $maximum_field_index - 1, $move );
21674 # remember this column for the next group
21675 $last_comment_column = $line->get_column( $kmax - 2 );
21679 # try to at least line up the existing side comment location
21680 if ( $kmax > 0 && $move > 0 && $move < $avail ) {
21681 $line->increase_field_width( $maximum_field_index - 1, $move );
21685 # reset side comment column if we can't align
21687 forget_side_comment();
21691 return $do_not_align;
21694 sub improve_continuation_indentation {
21695 my ( $do_not_align, $group_leader_length ) = @_;
21697 # See if we can increase the continuation indentation
21698 # to move all continuation lines closer to the next field
21699 # (unless it is a comment).
21701 # '$min_ci_gap'is the extra indentation that we may need to introduce.
21702 # We will only introduce this to fields which already have some ci.
21703 # Without this variable, we would occasionally get something like this
21706 # use overload '+' => \&plus,
21708 # '*' => \&multiply,
21711 # 'atan2' => \&atan2,
21713 # Whereas with this variable, we can shift variables over to get this:
21715 # use overload '+' => \&plus,
21717 # '*' => \&multiply,
21720 # 'atan2' => \&atan2,
21722 ## Deactivated####################
21723 # The trouble with this patch is that it may, for example,
21724 # move in some 'or's or ':'s, and leave some out, so that the
21725 # left edge alignment suffers.
21727 ###########################################
21729 my $maximum_field_index = $group_lines[0]->get_jmax();
21731 my $min_ci_gap = maximum_line_length_for_level($group_level);
21732 if ( $maximum_field_index > 1 && !$do_not_align ) {
21734 for my $i ( 0 .. $maximum_line_index ) {
21735 my $line = $group_lines[$i];
21736 my $leading_space_count = $line->get_leading_space_count();
21737 my $rfields = $line->get_rfields();
21740 $line->get_column(0) -
21741 $leading_space_count -
21742 length( $$rfields[0] );
21744 if ( $leading_space_count > $group_leader_length ) {
21745 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
21749 if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) {
21756 return $min_ci_gap;
21759 sub valign_output_step_A {
21761 ###############################################################
21762 # This is Step A in writing vertically aligned lines.
21763 # The line is prepared according to the alignments which have
21764 # been found and shipped to the next step.
21765 ###############################################################
21767 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
21768 $extra_leading_spaces )
21770 my $rfields = $line->get_rfields();
21771 my $leading_space_count = $line->get_leading_space_count();
21772 my $outdent_long_lines = $line->get_outdent_long_lines();
21773 my $maximum_field_index = $line->get_jmax();
21774 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
21776 # add any extra spaces
21777 if ( $leading_space_count > $group_leader_length ) {
21778 $leading_space_count += $min_ci_gap;
21781 my $str = $$rfields[0];
21783 # loop to concatenate all fields of this line and needed padding
21784 my $total_pad_count = 0;
21786 for $j ( 1 .. $maximum_field_index ) {
21788 # skip zero-length side comments
21790 if ( ( $j == $maximum_field_index )
21791 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
21794 # compute spaces of padding before this field
21795 my $col = $line->get_column( $j - 1 );
21796 $pad = $col - ( length($str) + $leading_space_count );
21798 if ($do_not_align) {
21800 ( $j < $maximum_field_index )
21802 : $rOpts_minimum_space_to_comment - 1;
21805 # if the -fpsc flag is set, move the side comment to the selected
21806 # column if and only if it is possible, ignoring constraints on
21807 # line length and minimum space to comment
21808 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
21810 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
21811 if ( $newpad >= 0 ) { $pad = $newpad; }
21814 # accumulate the padding
21815 if ( $pad > 0 ) { $total_pad_count += $pad; }
21818 if ( !defined $$rfields[$j] ) {
21819 write_diagnostics("UNDEFined field at j=$j\n");
21822 # only add padding when we have a finite field;
21823 # this avoids extra terminal spaces if we have empty fields
21824 if ( length( $$rfields[$j] ) > 0 ) {
21825 $str .= ' ' x $total_pad_count;
21826 $total_pad_count = 0;
21827 $str .= $$rfields[$j];
21830 $total_pad_count = 0;
21833 # update side comment history buffer
21834 if ( $j == $maximum_field_index ) {
21835 my $lineno = $file_writer_object->get_output_line_number();
21836 shift @side_comment_history;
21837 push @side_comment_history, [ $lineno, $col ];
21841 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
21843 # ship this line off
21844 valign_output_step_B( $leading_space_count + $extra_leading_spaces,
21845 $str, $side_comment_length, $outdent_long_lines,
21846 $rvertical_tightness_flags, $group_level );
21849 sub get_extra_leading_spaces {
21851 #----------------------------------------------------------
21852 # Define any extra indentation space (for the -lp option).
21854 # If a list has side comments, sub scan_list must dump the
21855 # list before it sees everything. When this happens, it sets
21856 # the indentation to the standard scheme, but notes how
21857 # many spaces it would have liked to use. We may be able
21858 # to recover that space here in the event that all of the
21859 # lines of a list are back together again.
21860 #----------------------------------------------------------
21862 my $extra_leading_spaces = 0;
21863 if ($extra_indent_ok) {
21864 my $object = $group_lines[0]->get_indentation();
21865 if ( ref($object) ) {
21866 my $extra_indentation_spaces_wanted =
21867 get_RECOVERABLE_SPACES($object);
21869 # all indentation objects must be the same
21871 for $i ( 1 .. $maximum_line_index ) {
21872 if ( $object != $group_lines[$i]->get_indentation() ) {
21873 $extra_indentation_spaces_wanted = 0;
21878 if ($extra_indentation_spaces_wanted) {
21880 # the maximum space without exceeding the line length:
21881 my $avail = $group_lines[0]->get_available_space_on_right();
21882 $extra_leading_spaces =
21883 ( $avail > $extra_indentation_spaces_wanted )
21884 ? $extra_indentation_spaces_wanted
21887 # update the indentation object because with -icp the terminal
21888 # ');' will use the same adjustment.
21889 $object->permanently_decrease_AVAILABLE_SPACES(
21890 -$extra_leading_spaces );
21894 return $extra_leading_spaces;
21897 sub combine_fields {
21899 # combine all fields except for the comment field ( sidecmt.t )
21900 # Uses global variables:
21902 # $maximum_line_index
21904 my $maximum_field_index = $group_lines[0]->get_jmax();
21905 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
21906 my $line = $group_lines[$j];
21907 my $rfields = $line->get_rfields();
21908 foreach ( 1 .. $maximum_field_index - 1 ) {
21909 $$rfields[0] .= $$rfields[$_];
21911 $$rfields[1] = $$rfields[$maximum_field_index];
21913 $line->set_jmax(1);
21914 $line->set_column( 0, 0 );
21915 $line->set_column( 1, 0 );
21918 $maximum_field_index = 1;
21920 for $j ( 0 .. $maximum_line_index ) {
21921 my $line = $group_lines[$j];
21922 my $rfields = $line->get_rfields();
21923 for $k ( 0 .. $maximum_field_index ) {
21924 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
21926 $pad += $group_lines[$j]->get_leading_space_count();
21929 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
21935 sub get_output_line_number {
21937 # the output line number reported to a caller is the number of items
21938 # written plus the number of items in the buffer
21940 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
21943 sub valign_output_step_B {
21945 ###############################################################
21946 # This is Step B in writing vertically aligned lines.
21947 # Vertical tightness is applied according to preset flags.
21948 # In particular this routine handles stacking of opening
21949 # and closing tokens.
21950 ###############################################################
21952 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
21953 $rvertical_tightness_flags, $level )
21956 # handle outdenting of long lines:
21957 if ($outdent_long_lines) {
21960 $side_comment_length +
21961 $leading_space_count -
21962 maximum_line_length_for_level($level);
21963 if ( $excess > 0 ) {
21964 $leading_space_count = 0;
21965 $last_outdented_line_at =
21966 $file_writer_object->get_output_line_number();
21968 unless ($outdented_line_count) {
21969 $first_outdented_line_at = $last_outdented_line_at;
21971 $outdented_line_count++;
21975 # Make preliminary leading whitespace. It could get changed
21976 # later by entabbing, so we have to keep track of any changes
21977 # to the leading_space_count from here on.
21978 my $leading_string =
21979 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
21981 # Unpack any recombination data; it was packed by
21982 # sub send_lines_to_vertical_aligner. Contents:
21984 # [0] type: 1=opening non-block 2=closing non-block
21985 # 3=opening block brace 4=closing block brace
21986 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
21987 # if closing: spaces of padding to use
21988 # [2] sequence number of container
21989 # [3] valid flag: do not append if this flag is false
21991 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
21993 if ($rvertical_tightness_flags) {
21995 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
21997 ) = @{$rvertical_tightness_flags};
22000 $seqno_string = $seqno_end;
22002 # handle any cached line ..
22003 # either append this line to it or write it out
22004 if ( length($cached_line_text) ) {
22006 # Dump an invalid cached line
22007 if ( !$cached_line_valid ) {
22008 valign_output_step_C( $cached_line_text,
22009 $cached_line_leading_space_count,
22010 $last_level_written );
22013 # Handle cached line ending in OPENING tokens
22014 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
22016 my $gap = $leading_space_count - length($cached_line_text);
22018 # handle option of just one tight opening per line:
22019 if ( $cached_line_flag == 1 ) {
22020 if ( defined($open_or_close) && $open_or_close == 1 ) {
22025 if ( $gap >= 0 && defined($seqno_beg) ) {
22026 $leading_string = $cached_line_text . ' ' x $gap;
22027 $leading_space_count = $cached_line_leading_space_count;
22028 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
22029 $level = $last_level_written;
22032 valign_output_step_C( $cached_line_text,
22033 $cached_line_leading_space_count,
22034 $last_level_written );
22038 # Handle cached line ending in CLOSING tokens
22040 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
22043 # The new line must start with container
22046 # The container combination must be okay..
22049 # okay to combine like types
22050 ( $open_or_close == $cached_line_type )
22052 # closing block brace may append to non-block
22053 || ( $cached_line_type == 2 && $open_or_close == 4 )
22055 # something like ');'
22056 || ( !$open_or_close && $cached_line_type == 2 )
22060 # The combined line must fit
22062 length($test_line) <=
22063 maximum_line_length_for_level($last_level_written) )
22067 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
22069 # Patch to outdent closing tokens ending # in ');'
22070 # If we are joining a line like ');' to a previous stacked
22071 # set of closing tokens, then decide if we may outdent the
22072 # combined stack to the indentation of the ');'. Since we
22073 # should not normally outdent any of the other tokens more than
22074 # the indentation of the lines that contained them, we will
22075 # only do this if all of the corresponding opening
22076 # tokens were on the same line. This can happen with
22077 # -sot and -sct. For example, it is ok here:
22078 # __PACKAGE__->load_components( qw(
22083 # But, for example, we do not outdent in this example because
22084 # that would put the closing sub brace out farther than the
22085 # opening sub brace:
22087 # perltidy -sot -sct
22089 # '<Control-f>' => sub {
22091 # my $e = $c->XEvent;
22092 # itemsUnderArea $c;
22095 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
22097 # The way to tell this is if the stacked sequence numbers
22098 # of this output line are the reverse of the stacked
22099 # sequence numbers of the previous non-blank line of
22100 # sequence numbers. So we can join if the previous
22101 # nonblank string of tokens is the mirror image. For
22102 # example if stack )}] is 13:8:6 then we are looking for a
22103 # leading stack like [{( which is 6:8:13 We only need to
22104 # check the two ends, because the intermediate tokens must
22105 # fall in order. Note on speed: having to split on colons
22106 # and eliminate multiple colons might appear to be slow,
22107 # but it's not an issue because we almost never come
22108 # through here. In a typical file we don't.
22109 $seqno_string =~ s/^:+//;
22110 $last_nonblank_seqno_string =~ s/^:+//;
22111 $seqno_string =~ s/:+/:/g;
22112 $last_nonblank_seqno_string =~ s/:+/:/g;
22114 # how many spaces can we outdent?
22116 $cached_line_leading_space_count - $leading_space_count;
22118 && length($seqno_string)
22119 && length($last_nonblank_seqno_string) ==
22120 length($seqno_string) )
22123 ( split ':', $last_nonblank_seqno_string );
22124 my @seqno_now = ( split ':', $seqno_string );
22125 if ( $seqno_now[-1] == $seqno_last[0]
22126 && $seqno_now[0] == $seqno_last[-1] )
22130 # for absolute safety, be sure we only remove
22132 my $ws = substr( $test_line, 0, $diff );
22133 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
22135 $test_line = substr( $test_line, $diff );
22136 $cached_line_leading_space_count -= $diff;
22137 $last_level_written =
22139 $cached_line_leading_space_count,
22140 $diff, $last_level_written );
22141 reduce_valign_buffer_indentation($diff);
22144 # shouldn't happen, but not critical:
22146 ## ERROR transferring indentation here
22153 $leading_string = "";
22154 $leading_space_count = $cached_line_leading_space_count;
22155 $level = $last_level_written;
22158 valign_output_step_C( $cached_line_text,
22159 $cached_line_leading_space_count,
22160 $last_level_written );
22164 $cached_line_type = 0;
22165 $cached_line_text = "";
22167 # make the line to be written
22168 my $line = $leading_string . $str;
22170 # write or cache this line
22171 if ( !$open_or_close || $side_comment_length > 0 ) {
22172 valign_output_step_C( $line, $leading_space_count, $level );
22175 $cached_line_text = $line;
22176 $cached_line_type = $open_or_close;
22177 $cached_line_flag = $tightness_flag;
22178 $cached_seqno = $seqno;
22179 $cached_line_valid = $valid;
22180 $cached_line_leading_space_count = $leading_space_count;
22181 $cached_seqno_string = $seqno_string;
22184 $last_level_written = $level;
22185 $last_side_comment_length = $side_comment_length;
22186 $extra_indent_ok = 0;
22189 sub valign_output_step_C {
22191 ###############################################################
22192 # This is Step C in writing vertically aligned lines.
22193 # Lines are either stored in a buffer or passed along to the next step.
22194 # The reason for storing lines is that we may later want to reduce their
22195 # indentation when -sot and -sct are both used.
22196 ###############################################################
22199 # Dump any saved lines if we see a line with an unbalanced opening or
22201 dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
22203 # Either store or write this line
22204 if ($valign_buffer_filling) {
22205 push @valign_buffer, [@args];
22208 valign_output_step_D(@args);
22211 # For lines starting or ending with opening or closing tokens..
22212 if ($seqno_string) {
22213 $last_nonblank_seqno_string = $seqno_string;
22215 # Start storing lines when we see a line with multiple stacked opening
22217 # patch for RT #94354, requested by Colin Williams
22218 if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
22221 # This test is efficient but a little subtle: The first test says
22222 # that we have multiple sequence numbers and hence multiple opening
22223 # or closing tokens in this line. The second part of the test
22224 # rejects stacked closing and ternary tokens. So if we get here
22225 # then we should have stacked unbalanced opening tokens.
22227 # Here is a complex example:
22229 # Foo($Bar[0], { # (side comment)
22233 # The first line has sequence 6::4. It does not begin with
22234 # a closing token or ternary, so it passes the test and must be
22235 # stacked opening tokens.
22237 # The last line has sequence 4:6 but is a stack of closing tokens,
22238 # so it gets rejected.
22240 # Note that the sequence number of an opening token for a qw quote
22241 # is a negative number and will be rejected.
22242 # For example, for the following line:
22243 # skip_symbols([qw(
22244 # $seqno_string='10:5:-1'. It would be okay to accept it but
22245 # I decided not to do this after testing.
22247 $valign_buffer_filling = $seqno_string;
22253 sub valign_output_step_D {
22255 ###############################################################
22256 # This is Step D in writing vertically aligned lines.
22257 # Write one vertically aligned line of code to the output object.
22258 ###############################################################
22260 my ( $line, $leading_space_count, $level ) = @_;
22262 # The line is currently correct if there is no tabbing (recommended!)
22263 # We may have to lop off some leading spaces and replace with tabs.
22264 if ( $leading_space_count > 0 ) {
22266 # Nothing to do if no tabs
22267 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22268 || $rOpts_indent_columns <= 0 )
22274 # Handle entab option
22275 elsif ($rOpts_entab_leading_whitespace) {
22277 $leading_space_count % $rOpts_entab_leading_whitespace;
22279 int( $leading_space_count / $rOpts_entab_leading_whitespace );
22280 my $leading_string = "\t" x $tab_count . ' ' x $space_count;
22281 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22282 substr( $line, 0, $leading_space_count ) = $leading_string;
22286 # shouldn't happen - program error counting whitespace
22288 VALIGN_DEBUG_FLAG_TABS
22290 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22295 # Handle option of one tab per level
22297 my $leading_string = ( "\t" x $level );
22299 $leading_space_count - $level * $rOpts_indent_columns;
22301 # shouldn't happen:
22302 if ( $space_count < 0 ) {
22304 # But it could be an outdented comment
22305 if ( $line !~ /^\s*#/ ) {
22306 VALIGN_DEBUG_FLAG_TABS
22308 "Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
22311 $leading_string = ( ' ' x $leading_space_count );
22314 $leading_string .= ( ' ' x $space_count );
22316 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22317 substr( $line, 0, $leading_space_count ) = $leading_string;
22321 # shouldn't happen - program error counting whitespace
22322 # we'll skip entabbing
22323 VALIGN_DEBUG_FLAG_TABS
22325 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22330 $file_writer_object->write_code_line( $line . "\n" );
22333 { # begin get_leading_string
22335 my @leading_string_cache;
22337 sub get_leading_string {
22339 # define the leading whitespace string for this line..
22340 my $leading_whitespace_count = shift;
22342 # Handle case of zero whitespace, which includes multi-line quotes
22343 # (which may have a finite level; this prevents tab problems)
22344 if ( $leading_whitespace_count <= 0 ) {
22348 # look for previous result
22349 elsif ( $leading_string_cache[$leading_whitespace_count] ) {
22350 return $leading_string_cache[$leading_whitespace_count];
22353 # must compute a string for this number of spaces
22354 my $leading_string;
22356 # Handle simple case of no tabs
22357 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22358 || $rOpts_indent_columns <= 0 )
22360 $leading_string = ( ' ' x $leading_whitespace_count );
22363 # Handle entab option
22364 elsif ($rOpts_entab_leading_whitespace) {
22366 $leading_whitespace_count % $rOpts_entab_leading_whitespace;
22367 my $tab_count = int(
22368 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
22369 $leading_string = "\t" x $tab_count . ' ' x $space_count;
22372 # Handle option of one tab per level
22374 $leading_string = ( "\t" x $group_level );
22376 $leading_whitespace_count - $group_level * $rOpts_indent_columns;
22378 # shouldn't happen:
22379 if ( $space_count < 0 ) {
22380 VALIGN_DEBUG_FLAG_TABS
22382 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
22385 # -- skip entabbing
22386 $leading_string = ( ' ' x $leading_whitespace_count );
22389 $leading_string .= ( ' ' x $space_count );
22392 $leading_string_cache[$leading_whitespace_count] = $leading_string;
22393 return $leading_string;
22395 } # end get_leading_string
22397 sub report_anything_unusual {
22399 if ( $outdented_line_count > 0 ) {
22400 write_logfile_entry(
22401 "$outdented_line_count long lines were outdented:\n");
22402 write_logfile_entry(
22403 " First at output line $first_outdented_line_at\n");
22405 if ( $outdented_line_count > 1 ) {
22406 write_logfile_entry(
22407 " Last at output line $last_outdented_line_at\n");
22409 write_logfile_entry(
22410 " use -noll to prevent outdenting, -l=n to increase line length\n"
22412 write_logfile_entry("\n");
22416 #####################################################################
22418 # the Perl::Tidy::FileWriter class writes the output file
22420 #####################################################################
22422 package Perl::Tidy::FileWriter;
22424 # Maximum number of little messages; probably need not be changed.
22425 use constant MAX_NAG_MESSAGES => 6;
22427 sub write_logfile_entry {
22429 my $logger_object = $self->{_logger_object};
22430 if ($logger_object) {
22431 $logger_object->write_logfile_entry(@_);
22437 my ( $line_sink_object, $rOpts, $logger_object ) = @_;
22440 _line_sink_object => $line_sink_object,
22441 _logger_object => $logger_object,
22443 _output_line_number => 1,
22444 _consecutive_blank_lines => 0,
22445 _consecutive_nonblank_lines => 0,
22446 _first_line_length_error => 0,
22447 _max_line_length_error => 0,
22448 _last_line_length_error => 0,
22449 _first_line_length_error_at => 0,
22450 _max_line_length_error_at => 0,
22451 _last_line_length_error_at => 0,
22452 _line_length_error_count => 0,
22453 _max_output_line_length => 0,
22454 _max_output_line_length_at => 0,
22460 $self->{_line_sink_object}->tee_on();
22465 $self->{_line_sink_object}->tee_off();
22468 sub get_output_line_number {
22470 return $self->{_output_line_number};
22473 sub decrement_output_line_number {
22475 $self->{_output_line_number}--;
22478 sub get_consecutive_nonblank_lines {
22480 return $self->{_consecutive_nonblank_lines};
22483 sub reset_consecutive_blank_lines {
22485 $self->{_consecutive_blank_lines} = 0;
22488 sub want_blank_line {
22490 unless ( $self->{_consecutive_blank_lines} ) {
22491 $self->write_blank_code_line();
22495 sub require_blank_code_lines {
22497 # write out the requested number of blanks regardless of the value of -mbl
22498 # unless -mbl=0. This allows extra blank lines to be written for subs and
22499 # packages even with the default -mbl=1
22502 my $need = $count - $self->{_consecutive_blank_lines};
22503 my $rOpts = $self->{_rOpts};
22504 my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
22505 for ( my $i = 0 ; $i < $need ; $i++ ) {
22506 $self->write_blank_code_line($forced);
22510 sub write_blank_code_line {
22512 my $forced = shift;
22513 my $rOpts = $self->{_rOpts};
22516 && $self->{_consecutive_blank_lines} >=
22517 $rOpts->{'maximum-consecutive-blank-lines'} );
22518 $self->{_consecutive_blank_lines}++;
22519 $self->{_consecutive_nonblank_lines} = 0;
22520 $self->write_line("\n");
22523 sub write_code_line {
22527 if ( $a =~ /^\s*$/ ) {
22528 my $rOpts = $self->{_rOpts};
22530 if ( $self->{_consecutive_blank_lines} >=
22531 $rOpts->{'maximum-consecutive-blank-lines'} );
22532 $self->{_consecutive_blank_lines}++;
22533 $self->{_consecutive_nonblank_lines} = 0;
22536 $self->{_consecutive_blank_lines} = 0;
22537 $self->{_consecutive_nonblank_lines}++;
22539 $self->write_line($a);
22546 # TODO: go through and see if the test is necessary here
22547 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
22549 $self->{_line_sink_object}->write_line($a);
22551 # This calculation of excess line length ignores any internal tabs
22552 my $rOpts = $self->{_rOpts};
22553 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
22554 if ( $a =~ /^\t+/g ) {
22555 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
22558 # Note that we just incremented output line number to future value
22559 # so we must subtract 1 for current line number
22560 if ( length($a) > 1 + $self->{_max_output_line_length} ) {
22561 $self->{_max_output_line_length} = length($a) - 1;
22562 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
22565 if ( $exceed > 0 ) {
22566 my $output_line_number = $self->{_output_line_number};
22567 $self->{_last_line_length_error} = $exceed;
22568 $self->{_last_line_length_error_at} = $output_line_number - 1;
22569 if ( $self->{_line_length_error_count} == 0 ) {
22570 $self->{_first_line_length_error} = $exceed;
22571 $self->{_first_line_length_error_at} = $output_line_number - 1;
22575 $self->{_last_line_length_error} > $self->{_max_line_length_error} )
22577 $self->{_max_line_length_error} = $exceed;
22578 $self->{_max_line_length_error_at} = $output_line_number - 1;
22581 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
22582 $self->write_logfile_entry(
22583 "Line length exceeded by $exceed characters\n");
22585 $self->{_line_length_error_count}++;
22590 sub report_line_length_errors {
22592 my $rOpts = $self->{_rOpts};
22593 my $line_length_error_count = $self->{_line_length_error_count};
22594 if ( $line_length_error_count == 0 ) {
22595 $self->write_logfile_entry(
22596 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
22597 my $max_output_line_length = $self->{_max_output_line_length};
22598 my $max_output_line_length_at = $self->{_max_output_line_length_at};
22599 $self->write_logfile_entry(
22600 " Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
22606 my $word = ( $line_length_error_count > 1 ) ? "s" : "";
22607 $self->write_logfile_entry(
22608 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
22611 $word = ( $line_length_error_count > 1 ) ? "First" : "";
22612 my $first_line_length_error = $self->{_first_line_length_error};
22613 my $first_line_length_error_at = $self->{_first_line_length_error_at};
22614 $self->write_logfile_entry(
22615 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
22618 if ( $line_length_error_count > 1 ) {
22619 my $max_line_length_error = $self->{_max_line_length_error};
22620 my $max_line_length_error_at = $self->{_max_line_length_error_at};
22621 my $last_line_length_error = $self->{_last_line_length_error};
22622 my $last_line_length_error_at = $self->{_last_line_length_error_at};
22623 $self->write_logfile_entry(
22624 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
22626 $self->write_logfile_entry(
22627 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
22633 #####################################################################
22635 # The Perl::Tidy::Debugger class shows line tokenization
22637 #####################################################################
22639 package Perl::Tidy::Debugger;
22643 my ( $class, $filename ) = @_;
22646 _debug_file => $filename,
22647 _debug_file_opened => 0,
22652 sub really_open_debug_file {
22655 my $debug_file = $self->{_debug_file};
22657 unless ( $fh = IO::File->new("> $debug_file") ) {
22658 Perl::Tidy::Warn("can't open $debug_file: $!\n");
22660 $self->{_debug_file_opened} = 1;
22661 $self->{_fh} = $fh;
22663 "Use -dump-token-types (-dtt) to get a list of token type codes\n";
22666 sub close_debug_file {
22669 my $fh = $self->{_fh};
22670 if ( $self->{_debug_file_opened} ) {
22672 eval { $self->{_fh}->close() };
22676 sub write_debug_entry {
22678 # This is a debug dump routine which may be modified as necessary
22679 # to dump tokens on a line-by-line basis. The output will be written
22680 # to the .DEBUG file when the -D flag is entered.
22682 my $line_of_tokens = shift;
22684 my $input_line = $line_of_tokens->{_line_text};
22685 my $rtoken_type = $line_of_tokens->{_rtoken_type};
22686 my $rtokens = $line_of_tokens->{_rtokens};
22687 my $rlevels = $line_of_tokens->{_rlevels};
22688 my $rslevels = $line_of_tokens->{_rslevels};
22689 my $rblock_type = $line_of_tokens->{_rblock_type};
22690 my $input_line_number = $line_of_tokens->{_line_number};
22691 my $line_type = $line_of_tokens->{_line_type};
22695 my $token_str = "$input_line_number: ";
22696 my $reconstructed_original = "$input_line_number: ";
22697 my $block_str = "$input_line_number: ";
22699 #$token_str .= "$line_type: ";
22700 #$reconstructed_original .= "$line_type: ";
22703 my @next_char = ( '"', '"' );
22705 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
22706 my $fh = $self->{_fh};
22708 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
22711 if ( $$rtoken_type[$j] eq 'k' ) {
22712 $pattern .= $$rtokens[$j];
22715 $pattern .= $$rtoken_type[$j];
22717 $reconstructed_original .= $$rtokens[$j];
22718 $block_str .= "($$rblock_type[$j])";
22719 $num = length( $$rtokens[$j] );
22720 my $type_str = $$rtoken_type[$j];
22722 # be sure there are no blank tokens (shouldn't happen)
22723 # This can only happen if a programming error has been made
22724 # because all valid tokens are non-blank
22725 if ( $type_str eq ' ' ) {
22726 print $fh "BLANK TOKEN on the next line\n";
22727 $type_str = $next_char[$i_next];
22728 $i_next = 1 - $i_next;
22731 if ( length($type_str) == 1 ) {
22732 $type_str = $type_str x $num;
22734 $token_str .= $type_str;
22737 # Write what you want here ...
22738 # print $fh "$input_line\n";
22739 # print $fh "$pattern\n";
22740 print $fh "$reconstructed_original\n";
22741 print $fh "$token_str\n";
22743 #print $fh "$block_str\n";
22746 #####################################################################
22748 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
22749 # method for returning the next line to be parsed, as well as a
22750 # 'peek_ahead()' method
22752 # The input parameter is an object with a 'get_line()' method
22753 # which returns the next line to be parsed
22755 #####################################################################
22757 package Perl::Tidy::LineBuffer;
22762 my $line_source_object = shift;
22765 _line_source_object => $line_source_object,
22766 _rlookahead_buffer => [],
22772 my $buffer_index = shift;
22774 my $line_source_object = $self->{_line_source_object};
22775 my $rlookahead_buffer = $self->{_rlookahead_buffer};
22776 if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
22777 $line = $$rlookahead_buffer[$buffer_index];
22780 $line = $line_source_object->get_line();
22781 push( @$rlookahead_buffer, $line );
22789 my $line_source_object = $self->{_line_source_object};
22790 my $rlookahead_buffer = $self->{_rlookahead_buffer};
22792 if ( scalar(@$rlookahead_buffer) ) {
22793 $line = shift @$rlookahead_buffer;
22796 $line = $line_source_object->get_line();
22801 ########################################################################
22803 # the Perl::Tidy::Tokenizer package is essentially a filter which
22804 # reads lines of perl source code from a source object and provides
22805 # corresponding tokenized lines through its get_line() method. Lines
22806 # flow from the source_object to the caller like this:
22808 # source_object --> LineBuffer_object --> Tokenizer --> calling routine
22809 # get_line() get_line() get_line() line_of_tokens
22811 # The source object can be any object with a get_line() method which
22812 # supplies one line (a character string) perl call.
22813 # The LineBuffer object is created by the Tokenizer.
22814 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
22815 # containing one tokenized line for each call to its get_line() method.
22817 # WARNING: This is not a real class yet. Only one tokenizer my be used.
22819 ########################################################################
22821 package Perl::Tidy::Tokenizer;
22825 # Caution: these debug flags produce a lot of output
22826 # They should all be 0 except when debugging small scripts
22828 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
22829 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
22830 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
22831 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
22832 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
22834 my $debug_warning = sub {
22835 print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
22838 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
22839 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
22840 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
22841 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
22842 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
22848 # PACKAGE VARIABLES for processing an entire FILE.
22852 $last_nonblank_token
22853 $last_nonblank_type
22854 $last_nonblank_block_type
22862 %user_function_prototype
22864 %is_block_list_function
22865 %saw_function_definition
22869 $square_bracket_depth
22874 @nesting_sequence_number
22875 @current_sequence_number
22877 @paren_semicolon_count
22878 @paren_structural_type
22880 @brace_structural_type
22883 @square_bracket_type
22884 @square_bracket_structural_type
22886 @nested_ternary_flag
22887 @nested_statement_type
22888 @starting_line_of_current_depth
22891 # GLOBAL CONSTANTS for routines in this package
22893 %is_indirect_object_taker
22895 %expecting_operator_token
22896 %expecting_operator_types
22897 %expecting_term_types
22898 %expecting_term_token
22900 %is_file_test_operator
22902 %is_valid_token_type
22904 %is_code_block_token
22906 @opening_brace_names
22907 @closing_brace_names
22908 %is_keyword_taking_list
22909 %is_q_qq_qw_qx_qr_s_y_tr_m
22912 # possible values of operator_expected()
22913 use constant TERM => -1;
22914 use constant UNKNOWN => 0;
22915 use constant OPERATOR => 1;
22917 # possible values of context
22918 use constant SCALAR_CONTEXT => -1;
22919 use constant UNKNOWN_CONTEXT => 0;
22920 use constant LIST_CONTEXT => 1;
22922 # Maximum number of little messages; probably need not be changed.
22923 use constant MAX_NAG_MESSAGES => 6;
22927 # methods to count instances
22929 sub get_count { $_count; }
22930 sub _increment_count { ++$_count }
22931 sub _decrement_count { --$_count }
22935 $_[0]->_decrement_count();
22942 # Note: 'tabs' and 'indent_columns' are temporary and should be
22945 source_object => undef,
22946 debugger_object => undef,
22947 diagnostics_object => undef,
22948 logger_object => undef,
22949 starting_level => undef,
22950 indent_columns => 4,
22952 look_for_hash_bang => 0,
22954 look_for_autoloader => 1,
22955 look_for_selfloader => 1,
22956 starting_line_number => 1,
22957 extended_syntax => 0,
22959 my %args = ( %defaults, @_ );
22961 # we are given an object with a get_line() method to supply source lines
22962 my $source_object = $args{source_object};
22964 # we create another object with a get_line() and peek_ahead() method
22965 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
22967 # Tokenizer state data is as follows:
22968 # _rhere_target_list reference to list of here-doc targets
22969 # _here_doc_target the target string for a here document
22970 # _here_quote_character the type of here-doc quoting (" ' ` or none)
22971 # to determine if interpolation is done
22972 # _quote_target character we seek if chasing a quote
22973 # _line_start_quote line where we started looking for a long quote
22974 # _in_here_doc flag indicating if we are in a here-doc
22975 # _in_pod flag set if we are in pod documentation
22976 # _in_error flag set if we saw severe error (binary in script)
22977 # _in_data flag set if we are in __DATA__ section
22978 # _in_end flag set if we are in __END__ section
22979 # _in_format flag set if we are in a format description
22980 # _in_attribute_list flag telling if we are looking for attributes
22981 # _in_quote flag telling if we are chasing a quote
22982 # _starting_level indentation level of first line
22983 # _line_buffer_object object with get_line() method to supply source code
22984 # _diagnostics_object place to write debugging information
22985 # _unexpected_error_count error count used to limit output
22986 # _lower_case_labels_at line numbers where lower case labels seen
22987 $tokenizer_self = {
22988 _rhere_target_list => [],
22990 _here_doc_target => "",
22991 _here_quote_character => "",
22997 _in_attribute_list => 0,
22999 _quote_target => "",
23000 _line_start_quote => -1,
23001 _starting_level => $args{starting_level},
23002 _know_starting_level => defined( $args{starting_level} ),
23003 _tabsize => $args{tabsize},
23004 _indent_columns => $args{indent_columns},
23005 _look_for_hash_bang => $args{look_for_hash_bang},
23006 _trim_qw => $args{trim_qw},
23007 _continuation_indentation => $args{continuation_indentation},
23008 _outdent_labels => $args{outdent_labels},
23009 _last_line_number => $args{starting_line_number} - 1,
23010 _saw_perl_dash_P => 0,
23011 _saw_perl_dash_w => 0,
23012 _saw_use_strict => 0,
23013 _saw_v_string => 0,
23014 _look_for_autoloader => $args{look_for_autoloader},
23015 _look_for_selfloader => $args{look_for_selfloader},
23016 _saw_autoloader => 0,
23017 _saw_selfloader => 0,
23018 _saw_hash_bang => 0,
23021 _saw_negative_indentation => 0,
23022 _started_tokenizing => 0,
23023 _line_buffer_object => $line_buffer_object,
23024 _debugger_object => $args{debugger_object},
23025 _diagnostics_object => $args{diagnostics_object},
23026 _logger_object => $args{logger_object},
23027 _unexpected_error_count => 0,
23028 _started_looking_for_here_target_at => 0,
23029 _nearly_matched_here_target_at => undef,
23031 _rlower_case_labels_at => undef,
23032 _extended_syntax => $args{extended_syntax},
23035 prepare_for_a_new_file();
23036 find_starting_indentation_level();
23038 bless $tokenizer_self, $class;
23040 # This is not a full class yet, so die if an attempt is made to
23041 # create more than one object.
23043 if ( _increment_count() > 1 ) {
23045 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
23048 return $tokenizer_self;
23052 # interface to Perl::Tidy::Logger routines
23054 my $logger_object = $tokenizer_self->{_logger_object};
23055 if ($logger_object) {
23056 $logger_object->warning(@_);
23061 my $logger_object = $tokenizer_self->{_logger_object};
23062 if ($logger_object) {
23063 $logger_object->complain(@_);
23067 sub write_logfile_entry {
23068 my $logger_object = $tokenizer_self->{_logger_object};
23069 if ($logger_object) {
23070 $logger_object->write_logfile_entry(@_);
23074 sub interrupt_logfile {
23075 my $logger_object = $tokenizer_self->{_logger_object};
23076 if ($logger_object) {
23077 $logger_object->interrupt_logfile();
23081 sub resume_logfile {
23082 my $logger_object = $tokenizer_self->{_logger_object};
23083 if ($logger_object) {
23084 $logger_object->resume_logfile();
23088 sub increment_brace_error {
23089 my $logger_object = $tokenizer_self->{_logger_object};
23090 if ($logger_object) {
23091 $logger_object->increment_brace_error();
23095 sub report_definite_bug {
23096 my $logger_object = $tokenizer_self->{_logger_object};
23097 if ($logger_object) {
23098 $logger_object->report_definite_bug();
23102 sub brace_warning {
23103 my $logger_object = $tokenizer_self->{_logger_object};
23104 if ($logger_object) {
23105 $logger_object->brace_warning(@_);
23109 sub get_saw_brace_error {
23110 my $logger_object = $tokenizer_self->{_logger_object};
23111 if ($logger_object) {
23112 $logger_object->get_saw_brace_error();
23119 # interface to Perl::Tidy::Diagnostics routines
23120 sub write_diagnostics {
23121 if ( $tokenizer_self->{_diagnostics_object} ) {
23122 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
23126 sub report_tokenization_errors {
23130 my $level = get_indentation_level();
23131 if ( $level != $tokenizer_self->{_starting_level} ) {
23132 warning("final indentation level: $level\n");
23135 check_final_nesting_depths();
23137 if ( $tokenizer_self->{_look_for_hash_bang}
23138 && !$tokenizer_self->{_saw_hash_bang} )
23141 "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
23144 if ( $tokenizer_self->{_in_format} ) {
23145 warning("hit EOF while in format description\n");
23148 if ( $tokenizer_self->{_in_pod} ) {
23150 # Just write log entry if this is after __END__ or __DATA__
23151 # because this happens to often, and it is not likely to be
23153 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
23154 write_logfile_entry(
23155 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
23161 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
23167 if ( $tokenizer_self->{_in_here_doc} ) {
23168 my $here_doc_target = $tokenizer_self->{_here_doc_target};
23169 my $started_looking_for_here_target_at =
23170 $tokenizer_self->{_started_looking_for_here_target_at};
23171 if ($here_doc_target) {
23173 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
23178 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
23181 my $nearly_matched_here_target_at =
23182 $tokenizer_self->{_nearly_matched_here_target_at};
23183 if ($nearly_matched_here_target_at) {
23185 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
23190 if ( $tokenizer_self->{_in_quote} ) {
23191 my $line_start_quote = $tokenizer_self->{_line_start_quote};
23192 my $quote_target = $tokenizer_self->{_quote_target};
23194 ( $tokenizer_self->{_in_attribute_list} )
23198 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
23202 unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
23203 if ( $] < 5.006 ) {
23204 write_logfile_entry("Suggest including '-w parameter'\n");
23207 write_logfile_entry("Suggest including 'use warnings;'\n");
23211 if ( $tokenizer_self->{_saw_perl_dash_P} ) {
23212 write_logfile_entry("Use of -P parameter for defines is discouraged\n");
23215 unless ( $tokenizer_self->{_saw_use_strict} ) {
23216 write_logfile_entry("Suggest including 'use strict;'\n");
23219 # it is suggested that labels have at least one upper case character
23220 # for legibility and to avoid code breakage as new keywords are introduced
23221 if ( $tokenizer_self->{_rlower_case_labels_at} ) {
23222 my @lower_case_labels_at =
23223 @{ $tokenizer_self->{_rlower_case_labels_at} };
23224 write_logfile_entry(
23225 "Suggest using upper case characters in label(s)\n");
23227 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
23231 sub report_v_string {
23233 # warn if this version can't handle v-strings
23235 unless ( $tokenizer_self->{_saw_v_string} ) {
23236 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
23238 if ( $] < 5.006 ) {
23240 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
23245 sub get_input_line_number {
23246 return $tokenizer_self->{_last_line_number};
23249 # returns the next tokenized line
23254 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
23255 # $square_bracket_depth, $paren_depth
23257 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
23258 $tokenizer_self->{_line_text} = $input_line;
23260 return undef unless ($input_line);
23262 my $input_line_number = ++$tokenizer_self->{_last_line_number};
23264 # Find and remove what characters terminate this line, including any
23266 my $input_line_separator = "";
23267 if ( chomp($input_line) ) { $input_line_separator = $/ }
23269 # TODO: what other characters should be included here?
23270 if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
23271 $input_line_separator = $2 . $input_line_separator;
23274 # for backwards compatibility we keep the line text terminated with
23275 # a newline character
23276 $input_line .= "\n";
23277 $tokenizer_self->{_line_text} = $input_line; # update
23279 # create a data structure describing this line which will be
23280 # returned to the caller.
23282 # _line_type codes are:
23283 # SYSTEM - system-specific code before hash-bang line
23284 # CODE - line of perl code (including comments)
23285 # POD_START - line starting pod, such as '=head'
23286 # POD - pod documentation text
23287 # POD_END - last line of pod section, '=cut'
23288 # HERE - text of here-document
23289 # HERE_END - last line of here-doc (target word)
23290 # FORMAT - format section
23291 # FORMAT_END - last line of format section, '.'
23292 # DATA_START - __DATA__ line
23293 # DATA - unidentified text following __DATA__
23294 # END_START - __END__ line
23295 # END - unidentified text following __END__
23296 # ERROR - we are in big trouble, probably not a perl script
23299 # _curly_brace_depth - depth of curly braces at start of line
23300 # _square_bracket_depth - depth of square brackets at start of line
23301 # _paren_depth - depth of parens at start of line
23302 # _starting_in_quote - this line continues a multi-line quote
23303 # (so don't trim leading blanks!)
23304 # _ending_in_quote - this line ends in a multi-line quote
23305 # (so don't trim trailing blanks!)
23306 my $line_of_tokens = {
23307 _line_type => 'EOF',
23308 _line_text => $input_line,
23309 _line_number => $input_line_number,
23310 _rtoken_type => undef,
23313 _rslevels => undef,
23314 _rblock_type => undef,
23315 _rcontainer_type => undef,
23316 _rcontainer_environment => undef,
23317 _rtype_sequence => undef,
23318 _rnesting_tokens => undef,
23319 _rci_levels => undef,
23320 _rnesting_blocks => undef,
23321 _guessed_indentation_level => 0,
23322 _starting_in_quote => 0, # to be set by subroutine
23323 _ending_in_quote => 0,
23324 _curly_brace_depth => $brace_depth,
23325 _square_bracket_depth => $square_bracket_depth,
23326 _paren_depth => $paren_depth,
23327 _quote_character => '',
23330 # must print line unchanged if we are in a here document
23331 if ( $tokenizer_self->{_in_here_doc} ) {
23333 $line_of_tokens->{_line_type} = 'HERE';
23334 my $here_doc_target = $tokenizer_self->{_here_doc_target};
23335 my $here_quote_character = $tokenizer_self->{_here_quote_character};
23336 my $candidate_target = $input_line;
23337 chomp $candidate_target;
23338 if ( $candidate_target eq $here_doc_target ) {
23339 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23340 $line_of_tokens->{_line_type} = 'HERE_END';
23341 write_logfile_entry("Exiting HERE document $here_doc_target\n");
23343 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23344 if (@$rhere_target_list) { # there can be multiple here targets
23345 ( $here_doc_target, $here_quote_character ) =
23346 @{ shift @$rhere_target_list };
23347 $tokenizer_self->{_here_doc_target} = $here_doc_target;
23348 $tokenizer_self->{_here_quote_character} =
23349 $here_quote_character;
23350 write_logfile_entry(
23351 "Entering HERE document $here_doc_target\n");
23352 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23353 $tokenizer_self->{_started_looking_for_here_target_at} =
23354 $input_line_number;
23357 $tokenizer_self->{_in_here_doc} = 0;
23358 $tokenizer_self->{_here_doc_target} = "";
23359 $tokenizer_self->{_here_quote_character} = "";
23363 # check for error of extra whitespace
23364 # note for PERL6: leading whitespace is allowed
23366 $candidate_target =~ s/\s*$//;
23367 $candidate_target =~ s/^\s*//;
23368 if ( $candidate_target eq $here_doc_target ) {
23369 $tokenizer_self->{_nearly_matched_here_target_at} =
23370 $input_line_number;
23373 return $line_of_tokens;
23376 # must print line unchanged if we are in a format section
23377 elsif ( $tokenizer_self->{_in_format} ) {
23379 if ( $input_line =~ /^\.[\s#]*$/ ) {
23380 write_logfile_entry("Exiting format section\n");
23381 $tokenizer_self->{_in_format} = 0;
23382 $line_of_tokens->{_line_type} = 'FORMAT_END';
23385 $line_of_tokens->{_line_type} = 'FORMAT';
23387 return $line_of_tokens;
23390 # must print line unchanged if we are in pod documentation
23391 elsif ( $tokenizer_self->{_in_pod} ) {
23393 $line_of_tokens->{_line_type} = 'POD';
23394 if ( $input_line =~ /^=cut/ ) {
23395 $line_of_tokens->{_line_type} = 'POD_END';
23396 write_logfile_entry("Exiting POD section\n");
23397 $tokenizer_self->{_in_pod} = 0;
23399 if ( $input_line =~ /^\#\!.*perl\b/ ) {
23401 "Hash-bang in pod can cause older versions of perl to fail! \n"
23405 return $line_of_tokens;
23408 # must print line unchanged if we have seen a severe error (i.e., we
23409 # are seeing illegal tokens and cannot continue. Syntax errors do
23410 # not pass this route). Calling routine can decide what to do, but
23411 # the default can be to just pass all lines as if they were after __END__
23412 elsif ( $tokenizer_self->{_in_error} ) {
23413 $line_of_tokens->{_line_type} = 'ERROR';
23414 return $line_of_tokens;
23417 # print line unchanged if we are __DATA__ section
23418 elsif ( $tokenizer_self->{_in_data} ) {
23420 # ...but look for POD
23421 # Note that the _in_data and _in_end flags remain set
23422 # so that we return to that state after seeing the
23423 # end of a pod section
23424 if ( $input_line =~ /^=(?!cut)/ ) {
23425 $line_of_tokens->{_line_type} = 'POD_START';
23426 write_logfile_entry("Entering POD section\n");
23427 $tokenizer_self->{_in_pod} = 1;
23428 return $line_of_tokens;
23431 $line_of_tokens->{_line_type} = 'DATA';
23432 return $line_of_tokens;
23436 # print line unchanged if we are in __END__ section
23437 elsif ( $tokenizer_self->{_in_end} ) {
23439 # ...but look for POD
23440 # Note that the _in_data and _in_end flags remain set
23441 # so that we return to that state after seeing the
23442 # end of a pod section
23443 if ( $input_line =~ /^=(?!cut)/ ) {
23444 $line_of_tokens->{_line_type} = 'POD_START';
23445 write_logfile_entry("Entering POD section\n");
23446 $tokenizer_self->{_in_pod} = 1;
23447 return $line_of_tokens;
23450 $line_of_tokens->{_line_type} = 'END';
23451 return $line_of_tokens;
23455 # check for a hash-bang line if we haven't seen one
23456 if ( !$tokenizer_self->{_saw_hash_bang} ) {
23457 if ( $input_line =~ /^\#\!.*perl\b/ ) {
23458 $tokenizer_self->{_saw_hash_bang} = $input_line_number;
23460 # check for -w and -P flags
23461 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
23462 $tokenizer_self->{_saw_perl_dash_P} = 1;
23465 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
23466 $tokenizer_self->{_saw_perl_dash_w} = 1;
23469 if ( ( $input_line_number > 1 )
23470 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
23473 # this is helpful for VMS systems; we may have accidentally
23474 # tokenized some DCL commands
23475 if ( $tokenizer_self->{_started_tokenizing} ) {
23477 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
23481 complain("Useless hash-bang after line 1\n");
23485 # Report the leading hash-bang as a system line
23486 # This will prevent -dac from deleting it
23488 $line_of_tokens->{_line_type} = 'SYSTEM';
23489 return $line_of_tokens;
23494 # wait for a hash-bang before parsing if the user invoked us with -x
23495 if ( $tokenizer_self->{_look_for_hash_bang}
23496 && !$tokenizer_self->{_saw_hash_bang} )
23498 $line_of_tokens->{_line_type} = 'SYSTEM';
23499 return $line_of_tokens;
23502 # a first line of the form ': #' will be marked as SYSTEM
23503 # since lines of this form may be used by tcsh
23504 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
23505 $line_of_tokens->{_line_type} = 'SYSTEM';
23506 return $line_of_tokens;
23509 # now we know that it is ok to tokenize the line...
23510 # the line tokenizer will modify any of these private variables:
23511 # _rhere_target_list
23518 my $ending_in_quote_last = $tokenizer_self->{_in_quote};
23519 tokenize_this_line($line_of_tokens);
23521 # Now finish defining the return structure and return it
23522 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
23524 # handle severe error (binary data in script)
23525 if ( $tokenizer_self->{_in_error} ) {
23526 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages
23527 warning("Giving up after error\n");
23528 $line_of_tokens->{_line_type} = 'ERROR';
23529 reset_indentation_level(0); # avoid error messages
23530 return $line_of_tokens;
23533 # handle start of pod documentation
23534 if ( $tokenizer_self->{_in_pod} ) {
23536 # This gets tricky..above a __DATA__ or __END__ section, perl
23537 # accepts '=cut' as the start of pod section. But afterwards,
23538 # only pod utilities see it and they may ignore an =cut without
23539 # leading =head. In any case, this isn't good.
23540 if ( $input_line =~ /^=cut\b/ ) {
23541 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
23542 complain("=cut while not in pod ignored\n");
23543 $tokenizer_self->{_in_pod} = 0;
23544 $line_of_tokens->{_line_type} = 'POD_END';
23547 $line_of_tokens->{_line_type} = 'POD_START';
23549 "=cut starts a pod section .. this can fool pod utilities.\n"
23551 write_logfile_entry("Entering POD section\n");
23556 $line_of_tokens->{_line_type} = 'POD_START';
23557 write_logfile_entry("Entering POD section\n");
23560 return $line_of_tokens;
23563 # update indentation levels for log messages
23564 if ( $input_line !~ /^\s*$/ ) {
23565 my $rlevels = $line_of_tokens->{_rlevels};
23566 $line_of_tokens->{_guessed_indentation_level} =
23567 guess_old_indentation_level($input_line);
23570 # see if this line contains here doc targets
23571 my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23572 if (@$rhere_target_list) {
23574 my ( $here_doc_target, $here_quote_character ) =
23575 @{ shift @$rhere_target_list };
23576 $tokenizer_self->{_in_here_doc} = 1;
23577 $tokenizer_self->{_here_doc_target} = $here_doc_target;
23578 $tokenizer_self->{_here_quote_character} = $here_quote_character;
23579 write_logfile_entry("Entering HERE document $here_doc_target\n");
23580 $tokenizer_self->{_started_looking_for_here_target_at} =
23581 $input_line_number;
23584 # NOTE: __END__ and __DATA__ statements are written unformatted
23585 # because they can theoretically contain additional characters
23586 # which are not tokenized (and cannot be read with <DATA> either!).
23587 if ( $tokenizer_self->{_in_data} ) {
23588 $line_of_tokens->{_line_type} = 'DATA_START';
23589 write_logfile_entry("Starting __DATA__ section\n");
23590 $tokenizer_self->{_saw_data} = 1;
23592 # keep parsing after __DATA__ if use SelfLoader was seen
23593 if ( $tokenizer_self->{_saw_selfloader} ) {
23594 $tokenizer_self->{_in_data} = 0;
23595 write_logfile_entry(
23596 "SelfLoader seen, continuing; -nlsl deactivates\n");
23599 return $line_of_tokens;
23602 elsif ( $tokenizer_self->{_in_end} ) {
23603 $line_of_tokens->{_line_type} = 'END_START';
23604 write_logfile_entry("Starting __END__ section\n");
23605 $tokenizer_self->{_saw_end} = 1;
23607 # keep parsing after __END__ if use AutoLoader was seen
23608 if ( $tokenizer_self->{_saw_autoloader} ) {
23609 $tokenizer_self->{_in_end} = 0;
23610 write_logfile_entry(
23611 "AutoLoader seen, continuing; -nlal deactivates\n");
23613 return $line_of_tokens;
23616 # now, finally, we know that this line is type 'CODE'
23617 $line_of_tokens->{_line_type} = 'CODE';
23619 # remember if we have seen any real code
23620 if ( !$tokenizer_self->{_started_tokenizing}
23621 && $input_line !~ /^\s*$/
23622 && $input_line !~ /^\s*#/ )
23624 $tokenizer_self->{_started_tokenizing} = 1;
23627 if ( $tokenizer_self->{_debugger_object} ) {
23628 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
23631 # Note: if keyword 'format' occurs in this line code, it is still CODE
23632 # (keyword 'format' need not start a line)
23633 if ( $tokenizer_self->{_in_format} ) {
23634 write_logfile_entry("Entering format section\n");
23637 if ( $tokenizer_self->{_in_quote}
23638 and ( $tokenizer_self->{_line_start_quote} < 0 ) )
23641 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
23643 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
23645 $tokenizer_self->{_line_start_quote} = $input_line_number;
23646 write_logfile_entry(
23647 "Start multi-line quote or pattern ending in $quote_target\n");
23650 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
23651 and !$tokenizer_self->{_in_quote} )
23653 $tokenizer_self->{_line_start_quote} = -1;
23654 write_logfile_entry("End of multi-line quote or pattern\n");
23657 # we are returning a line of CODE
23658 return $line_of_tokens;
23661 sub find_starting_indentation_level {
23663 # We need to find the indentation level of the first line of the
23664 # script being formatted. Often it will be zero for an entire file,
23665 # but if we are formatting a local block of code (within an editor for
23666 # example) it may not be zero. The user may specify this with the
23667 # -sil=n parameter but normally doesn't so we have to guess.
23669 # USES GLOBAL VARIABLES: $tokenizer_self
23670 my $starting_level = 0;
23672 # use value if given as parameter
23673 if ( $tokenizer_self->{_know_starting_level} ) {
23674 $starting_level = $tokenizer_self->{_starting_level};
23677 # if we know there is a hash_bang line, the level must be zero
23678 elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
23679 $tokenizer_self->{_know_starting_level} = 1;
23682 # otherwise figure it out from the input file
23687 # keep looking at lines until we find a hash bang or piece of code
23690 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23693 # if first line is #! then assume starting level is zero
23694 if ( $i == 1 && $line =~ /^\#\!/ ) {
23695 $starting_level = 0;
23698 next if ( $line =~ /^\s*#/ ); # skip past comments
23699 next if ( $line =~ /^\s*$/ ); # skip past blank lines
23700 $starting_level = guess_old_indentation_level($line);
23703 $msg = "Line $i implies starting-indentation-level = $starting_level\n";
23704 write_logfile_entry("$msg");
23706 $tokenizer_self->{_starting_level} = $starting_level;
23707 reset_indentation_level($starting_level);
23710 sub guess_old_indentation_level {
23713 # Guess the indentation level of an input line.
23715 # For the first line of code this result will define the starting
23716 # indentation level. It will mainly be non-zero when perltidy is applied
23717 # within an editor to a local block of code.
23719 # This is an impossible task in general because we can't know what tabs
23720 # meant for the old script and how many spaces were used for one
23721 # indentation level in the given input script. For example it may have
23722 # been previously formatted with -i=7 -et=3. But we can at least try to
23723 # make sure that perltidy guesses correctly if it is applied repeatedly to
23724 # a block of code within an editor, so that the block stays at the same
23725 # level when perltidy is applied repeatedly.
23727 # USES GLOBAL VARIABLES: $tokenizer_self
23730 # find leading tabs, spaces, and any statement label
23732 if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
23734 # If there are leading tabs, we use the tab scheme for this run, if
23735 # any, so that the code will remain stable when editing.
23736 if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
23738 if ($2) { $spaces += length($2) }
23740 # correct for outdented labels
23741 if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
23742 $spaces += $tokenizer_self->{_continuation_indentation};
23746 # compute indentation using the value of -i for this run.
23747 # If -i=0 is used for this run (which is possible) it doesn't matter
23748 # what we do here but we'll guess that the old run used 4 spaces per level.
23749 my $indent_columns = $tokenizer_self->{_indent_columns};
23750 $indent_columns = 4 if ( !$indent_columns );
23751 $level = int( $spaces / $indent_columns );
23755 # This is a currently unused debug routine
23756 sub dump_functions {
23760 foreach $pkg ( keys %is_user_function ) {
23761 print $fh "\nnon-constant subs in package $pkg\n";
23763 foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
23765 if ( $is_block_list_function{$pkg}{$sub} ) {
23766 $msg = 'block_list';
23769 if ( $is_block_function{$pkg}{$sub} ) {
23772 print $fh "$sub $msg\n";
23776 foreach $pkg ( keys %is_constant ) {
23777 print $fh "\nconstants and constant subs in package $pkg\n";
23779 foreach $sub ( keys %{ $is_constant{$pkg} } ) {
23780 print $fh "$sub\n";
23787 # count number of 1's in a string of 1's and 0's
23788 # example: ones_count("010101010101") gives 6
23789 return ( my $cis = $_[0] ) =~ tr/1/0/;
23792 sub prepare_for_a_new_file {
23794 # previous tokens needed to determine what to expect next
23795 $last_nonblank_token = ';'; # the only possible starting state which
23796 $last_nonblank_type = ';'; # will make a leading brace a code block
23797 $last_nonblank_block_type = '';
23799 # scalars for remembering statement types across multiple lines
23800 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
23801 $in_attribute_list = 0;
23803 # scalars for remembering where we are in the file
23804 $current_package = "main";
23805 $context = UNKNOWN_CONTEXT;
23807 # hashes used to remember function information
23808 %is_constant = (); # user-defined constants
23809 %is_user_function = (); # user-defined functions
23810 %user_function_prototype = (); # their prototypes
23811 %is_block_function = ();
23812 %is_block_list_function = ();
23813 %saw_function_definition = ();
23815 # variables used to track depths of various containers
23816 # and report nesting errors
23819 $square_bracket_depth = 0;
23820 @current_depth[ 0 .. $#closing_brace_names ] =
23821 (0) x scalar @closing_brace_names;
23824 @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
23825 ( 0 .. $#closing_brace_names );
23826 @current_sequence_number = ();
23827 $paren_type[$paren_depth] = '';
23828 $paren_semicolon_count[$paren_depth] = 0;
23829 $paren_structural_type[$brace_depth] = '';
23830 $brace_type[$brace_depth] = ';'; # identify opening brace as code block
23831 $brace_structural_type[$brace_depth] = '';
23832 $brace_context[$brace_depth] = UNKNOWN_CONTEXT;
23833 $brace_package[$paren_depth] = $current_package;
23834 $square_bracket_type[$square_bracket_depth] = '';
23835 $square_bracket_structural_type[$square_bracket_depth] = '';
23837 initialize_tokenizer_state();
23840 { # begin tokenize_this_line
23842 use constant BRACE => 0;
23843 use constant SQUARE_BRACKET => 1;
23844 use constant PAREN => 2;
23845 use constant QUESTION_COLON => 3;
23847 # TV1: scalars for processing one LINE.
23848 # Re-initialized on each entry to sub tokenize_this_line.
23850 $block_type, $container_type, $expecting,
23851 $i, $i_tok, $input_line,
23852 $input_line_number, $last_nonblank_i, $max_token_index,
23853 $next_tok, $next_type, $peeked_ahead,
23854 $prototype, $rhere_target_list, $rtoken_map,
23855 $rtoken_type, $rtokens, $tok,
23856 $type, $type_sequence, $indent_flag,
23859 # TV2: refs to ARRAYS for processing one LINE
23860 # Re-initialized on each call.
23861 my $routput_token_list = []; # stack of output token indexes
23862 my $routput_token_type = []; # token types
23863 my $routput_block_type = []; # types of code block
23864 my $routput_container_type = []; # paren types, such as if, elsif, ..
23865 my $routput_type_sequence = []; # nesting sequential number
23866 my $routput_indent_flag = []; #
23868 # TV3: SCALARS for quote variables. These are initialized with a
23869 # subroutine call and continually updated as lines are processed.
23870 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
23871 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
23873 # TV4: SCALARS for multi-line identifiers and
23874 # statements. These are initialized with a subroutine call
23875 # and continually updated as lines are processed.
23876 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
23878 # TV5: SCALARS for tracking indentation level.
23879 # Initialized once and continually updated as lines are
23882 $nesting_token_string, $nesting_type_string,
23883 $nesting_block_string, $nesting_block_flag,
23884 $nesting_list_string, $nesting_list_flag,
23885 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23886 $in_statement_continuation, $level_in_tokenizer,
23887 $slevel_in_tokenizer, $rslevel_stack,
23890 # TV6: SCALARS for remembering several previous
23891 # tokens. Initialized once and continually updated as
23892 # lines are processed.
23894 $last_nonblank_container_type, $last_nonblank_type_sequence,
23895 $last_last_nonblank_token, $last_last_nonblank_type,
23896 $last_last_nonblank_block_type, $last_last_nonblank_container_type,
23897 $last_last_nonblank_type_sequence, $last_nonblank_prototype,
23900 # ----------------------------------------------------------------
23901 # beginning of tokenizer variable access and manipulation routines
23902 # ----------------------------------------------------------------
23904 sub initialize_tokenizer_state {
23906 # TV1: initialized on each call
23907 # TV2: initialized on each call
23911 $quote_character = "";
23914 $quoted_string_1 = "";
23915 $quoted_string_2 = "";
23916 $allowed_quote_modifiers = "";
23919 $id_scan_state = '';
23922 $indented_if_level = 0;
23925 $nesting_token_string = "";
23926 $nesting_type_string = "";
23927 $nesting_block_string = '1'; # initially in a block
23928 $nesting_block_flag = 1;
23929 $nesting_list_string = '0'; # initially not in a list
23930 $nesting_list_flag = 0; # initially not in a list
23931 $ci_string_in_tokenizer = "";
23932 $continuation_string_in_tokenizer = "0";
23933 $in_statement_continuation = 0;
23934 $level_in_tokenizer = 0;
23935 $slevel_in_tokenizer = 0;
23936 $rslevel_stack = [];
23939 $last_nonblank_container_type = '';
23940 $last_nonblank_type_sequence = '';
23941 $last_last_nonblank_token = ';';
23942 $last_last_nonblank_type = ';';
23943 $last_last_nonblank_block_type = '';
23944 $last_last_nonblank_container_type = '';
23945 $last_last_nonblank_type_sequence = '';
23946 $last_nonblank_prototype = "";
23949 sub save_tokenizer_state {
23952 $block_type, $container_type, $expecting,
23953 $i, $i_tok, $input_line,
23954 $input_line_number, $last_nonblank_i, $max_token_index,
23955 $next_tok, $next_type, $peeked_ahead,
23956 $prototype, $rhere_target_list, $rtoken_map,
23957 $rtoken_type, $rtokens, $tok,
23958 $type, $type_sequence, $indent_flag,
23962 $routput_token_list, $routput_token_type,
23963 $routput_block_type, $routput_container_type,
23964 $routput_type_sequence, $routput_indent_flag,
23968 $in_quote, $quote_type,
23969 $quote_character, $quote_pos,
23970 $quote_depth, $quoted_string_1,
23971 $quoted_string_2, $allowed_quote_modifiers,
23975 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
23978 $nesting_token_string, $nesting_type_string,
23979 $nesting_block_string, $nesting_block_flag,
23980 $nesting_list_string, $nesting_list_flag,
23981 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
23982 $in_statement_continuation, $level_in_tokenizer,
23983 $slevel_in_tokenizer, $rslevel_stack,
23987 $last_nonblank_container_type,
23988 $last_nonblank_type_sequence,
23989 $last_last_nonblank_token,
23990 $last_last_nonblank_type,
23991 $last_last_nonblank_block_type,
23992 $last_last_nonblank_container_type,
23993 $last_last_nonblank_type_sequence,
23994 $last_nonblank_prototype,
23996 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
23999 sub restore_tokenizer_state {
24001 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
24003 $block_type, $container_type, $expecting,
24004 $i, $i_tok, $input_line,
24005 $input_line_number, $last_nonblank_i, $max_token_index,
24006 $next_tok, $next_type, $peeked_ahead,
24007 $prototype, $rhere_target_list, $rtoken_map,
24008 $rtoken_type, $rtokens, $tok,
24009 $type, $type_sequence, $indent_flag,
24013 $routput_token_list, $routput_token_type,
24014 $routput_block_type, $routput_container_type,
24015 $routput_type_sequence, $routput_type_sequence,
24019 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
24020 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
24023 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
24027 $nesting_token_string, $nesting_type_string,
24028 $nesting_block_string, $nesting_block_flag,
24029 $nesting_list_string, $nesting_list_flag,
24030 $ci_string_in_tokenizer, $continuation_string_in_tokenizer,
24031 $in_statement_continuation, $level_in_tokenizer,
24032 $slevel_in_tokenizer, $rslevel_stack,
24036 $last_nonblank_container_type,
24037 $last_nonblank_type_sequence,
24038 $last_last_nonblank_token,
24039 $last_last_nonblank_type,
24040 $last_last_nonblank_block_type,
24041 $last_last_nonblank_container_type,
24042 $last_last_nonblank_type_sequence,
24043 $last_nonblank_prototype,
24047 sub get_indentation_level {
24049 # patch to avoid reporting error if indented if is not terminated
24050 if ($indented_if_level) { return $level_in_tokenizer - 1 }
24051 return $level_in_tokenizer;
24054 sub reset_indentation_level {
24055 $level_in_tokenizer = $_[0];
24056 $slevel_in_tokenizer = $_[0];
24057 push @{$rslevel_stack}, $slevel_in_tokenizer;
24061 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
24064 # ------------------------------------------------------------
24065 # end of tokenizer variable access and manipulation routines
24066 # ------------------------------------------------------------
24068 # ------------------------------------------------------------
24069 # beginning of various scanner interface routines
24070 # ------------------------------------------------------------
24071 sub scan_replacement_text {
24073 # check for here-docs in replacement text invoked by
24074 # a substitution operator with executable modifier 'e'.
24077 # $replacement_text
24079 # $rht = reference to any here-doc targets
24080 my ($replacement_text) = @_;
24083 return undef unless ( $replacement_text =~ /<</ );
24085 write_logfile_entry("scanning replacement text for here-doc targets\n");
24087 # save the logger object for error messages
24088 my $logger_object = $tokenizer_self->{_logger_object};
24090 # localize all package variables
24092 $tokenizer_self, $last_nonblank_token,
24093 $last_nonblank_type, $last_nonblank_block_type,
24094 $statement_type, $in_attribute_list,
24095 $current_package, $context,
24096 %is_constant, %is_user_function,
24097 %user_function_prototype, %is_block_function,
24098 %is_block_list_function, %saw_function_definition,
24099 $brace_depth, $paren_depth,
24100 $square_bracket_depth, @current_depth,
24101 @total_depth, $total_depth,
24102 @nesting_sequence_number, @current_sequence_number,
24103 @paren_type, @paren_semicolon_count,
24104 @paren_structural_type, @brace_type,
24105 @brace_structural_type, @brace_context,
24106 @brace_package, @square_bracket_type,
24107 @square_bracket_structural_type, @depth_array,
24108 @starting_line_of_current_depth, @nested_ternary_flag,
24109 @nested_statement_type,
24112 # save all lexical variables
24113 my $rstate = save_tokenizer_state();
24114 _decrement_count(); # avoid error check for multiple tokenizers
24116 # make a new tokenizer
24118 my $rpending_logfile_message;
24119 my $source_object =
24120 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
24121 $rpending_logfile_message );
24122 my $tokenizer = Perl::Tidy::Tokenizer->new(
24123 source_object => $source_object,
24124 logger_object => $logger_object,
24125 starting_line_number => $input_line_number,
24128 # scan the replacement text
24129 1 while ( $tokenizer->get_line() );
24131 # remove any here doc targets
24133 if ( $tokenizer_self->{_in_here_doc} ) {
24137 $tokenizer_self->{_here_doc_target},
24138 $tokenizer_self->{_here_quote_character}
24140 if ( $tokenizer_self->{_rhere_target_list} ) {
24141 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
24142 $tokenizer_self->{_rhere_target_list} = undef;
24144 $tokenizer_self->{_in_here_doc} = undef;
24147 # now its safe to report errors
24148 $tokenizer->report_tokenization_errors();
24150 # restore all tokenizer lexical variables
24151 restore_tokenizer_state($rstate);
24153 # return the here doc targets
24157 sub scan_bare_identifier {
24158 ( $i, $tok, $type, $prototype ) =
24159 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
24160 $rtoken_map, $max_token_index );
24163 sub scan_identifier {
24164 ( $i, $tok, $type, $id_scan_state, $identifier ) =
24165 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
24166 $max_token_index, $expecting, $paren_type[$paren_depth] );
24170 ( $i, $tok, $type, $id_scan_state ) =
24171 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
24172 $id_scan_state, $max_token_index );
24177 ( $i, $type, $number ) =
24178 scan_number_do( $input_line, $i, $rtoken_map, $type,
24179 $max_token_index );
24183 # a sub to warn if token found where term expected
24184 sub error_if_expecting_TERM {
24185 if ( $expecting == TERM ) {
24186 if ( $really_want_term{$last_nonblank_type} ) {
24187 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
24188 $rtoken_type, $input_line );
24194 # a sub to warn if token found where operator expected
24195 sub error_if_expecting_OPERATOR {
24196 if ( $expecting == OPERATOR ) {
24197 my $thing = defined $_[0] ? $_[0] : $tok;
24198 unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
24199 $rtoken_map, $rtoken_type, $input_line );
24200 if ( $i_tok == 0 ) {
24201 interrupt_logfile();
24202 warning("Missing ';' above?\n");
24209 # ------------------------------------------------------------
24210 # end scanner interfaces
24211 # ------------------------------------------------------------
24213 my %is_for_foreach;
24214 @_ = qw(for foreach);
24215 @is_for_foreach{@_} = (1) x scalar(@_);
24219 @is_my_our{@_} = (1) x scalar(@_);
24221 # These keywords may introduce blocks after parenthesized expressions,
24223 # keyword ( .... ) { BLOCK }
24224 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
24225 my %is_blocktype_with_paren;
24227 qw(if elsif unless while until for foreach switch case given when catch);
24228 @is_blocktype_with_paren{@_} = (1) x scalar(@_);
24230 # ------------------------------------------------------------
24231 # begin hash of code for handling most token types
24232 # ------------------------------------------------------------
24233 my $tokenization_code = {
24235 # no special code for these types yet, but syntax checks
24270 error_if_expecting_TERM()
24271 if ( $expecting == TERM );
24274 error_if_expecting_TERM()
24275 if ( $expecting == TERM );
24279 # start looking for a scalar
24280 error_if_expecting_OPERATOR("Scalar")
24281 if ( $expecting == OPERATOR );
24284 if ( $identifier eq '$^W' ) {
24285 $tokenizer_self->{_saw_perl_dash_w} = 1;
24288 # Check for identifier in indirect object slot
24289 # (vorboard.pl, sort.t). Something like:
24290 # /^(print|printf|sort|exec|system)$/
24292 $is_indirect_object_taker{$last_nonblank_token}
24294 || ( ( $last_nonblank_token eq '(' )
24295 && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
24296 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
24305 $paren_semicolon_count[$paren_depth] = 0;
24307 $container_type = $want_paren;
24310 elsif ( $statement_type =~ /^sub/ ) {
24311 $container_type = $statement_type;
24314 $container_type = $last_nonblank_token;
24316 # We can check for a syntax error here of unexpected '(',
24317 # but this is going to get messy...
24319 $expecting == OPERATOR
24321 # be sure this is not a method call of the form
24322 # &method(...), $method->(..), &{method}(...),
24323 # $ref[2](list) is ok & short for $ref[2]->(list)
24324 # NOTE: at present, braces in something like &{ xxx }
24325 # are not marked as a block, we might have a method call
24326 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
24331 # ref: camel 3 p 703.
24332 if ( $last_last_nonblank_token eq 'do' ) {
24334 "do SUBROUTINE is deprecated; consider & or -> notation\n"
24339 # if this is an empty list, (), then it is not an
24340 # error; for example, we might have a constant pi and
24341 # invoke it with pi() or just pi;
24342 my ( $next_nonblank_token, $i_next ) =
24343 find_next_nonblank_token( $i, $rtokens,
24344 $max_token_index );
24345 if ( $next_nonblank_token ne ')' ) {
24347 error_if_expecting_OPERATOR('(');
24349 if ( $last_nonblank_type eq 'C' ) {
24351 "$last_nonblank_token has a void prototype\n";
24353 elsif ( $last_nonblank_type eq 'i' ) {
24355 && $last_nonblank_token =~ /^\$/ )
24358 "Do you mean '$last_nonblank_token->(' ?\n";
24362 interrupt_logfile();
24366 } ## end if ( $next_nonblank_token...
24367 } ## end else [ if ( $last_last_nonblank_token...
24368 } ## end if ( $expecting == OPERATOR...
24370 $paren_type[$paren_depth] = $container_type;
24371 ( $type_sequence, $indent_flag ) =
24372 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24374 # propagate types down through nested parens
24375 # for example: the second paren in 'if ((' would be structural
24376 # since the first is.
24378 if ( $last_nonblank_token eq '(' ) {
24379 $type = $last_nonblank_type;
24382 # We exclude parens as structural after a ',' because it
24383 # causes subtle problems with continuation indentation for
24384 # something like this, where the first 'or' will not get
24389 # ( not defined $check )
24391 # or $check eq "new"
24392 # or $check eq "old",
24395 # Likewise, we exclude parens where a statement can start
24396 # because of problems with continuation indentation, like
24399 # ($firstline =~ /^#\!.*perl/)
24400 # and (print $File::Find::name, "\n")
24403 # (ref($usage_fref) =~ /CODE/)
24405 # : (&blast_usage, &blast_params, &blast_general_params);
24411 if ( $last_nonblank_type eq ')' ) {
24413 "Syntax error? found token '$last_nonblank_type' then '('\n"
24416 $paren_structural_type[$paren_depth] = $type;
24420 ( $type_sequence, $indent_flag ) =
24421 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24423 if ( $paren_structural_type[$paren_depth] eq '{' ) {
24427 $container_type = $paren_type[$paren_depth];
24429 # /^(for|foreach)$/
24430 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
24431 my $num_sc = $paren_semicolon_count[$paren_depth];
24432 if ( $num_sc > 0 && $num_sc != 2 ) {
24433 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
24437 if ( $paren_depth > 0 ) { $paren_depth-- }
24440 if ( $last_nonblank_type eq ',' ) {
24441 complain("Repeated ','s \n");
24444 # patch for operator_expected: note if we are in the list (use.t)
24445 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
24446 ## FIXME: need to move this elsewhere, perhaps check after a '('
24447 ## elsif ($last_nonblank_token eq '(') {
24448 ## warning("Leading ','s illegal in some versions of perl\n");
24452 $context = UNKNOWN_CONTEXT;
24453 $statement_type = '';
24456 # /^(for|foreach)$/
24457 if ( $is_for_foreach{ $paren_type[$paren_depth] } )
24458 { # mark ; in for loop
24460 # Be careful: we do not want a semicolon such as the
24461 # following to be included:
24463 # for (sort {strcoll($a,$b);} keys %investments) {
24465 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
24466 && $square_bracket_depth ==
24467 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
24471 $paren_semicolon_count[$paren_depth]++;
24477 error_if_expecting_OPERATOR("String")
24478 if ( $expecting == OPERATOR );
24481 $allowed_quote_modifiers = "";
24484 error_if_expecting_OPERATOR("String")
24485 if ( $expecting == OPERATOR );
24488 $allowed_quote_modifiers = "";
24491 error_if_expecting_OPERATOR("String")
24492 if ( $expecting == OPERATOR );
24495 $allowed_quote_modifiers = "";
24500 if ( $expecting == UNKNOWN ) { # indeterminate, must guess..
24502 ( $is_pattern, $msg ) =
24503 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
24504 $max_token_index );
24507 write_diagnostics("DIVIDE:$msg\n");
24508 write_logfile_entry($msg);
24511 else { $is_pattern = ( $expecting == TERM ) }
24516 $allowed_quote_modifiers = '[msixpodualngc]';
24518 else { # not a pattern; check for a /= token
24520 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
24526 #DEBUG - collecting info on what tokens follow a divide
24527 # for development of guessing algorithm
24528 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
24529 # #write_diagnostics( "DIVIDE? $input_line\n" );
24535 # if we just saw a ')', we will label this block with
24536 # its type. We need to do this to allow sub
24537 # code_block_type to determine if this brace starts a
24538 # code block or anonymous hash. (The type of a paren
24539 # pair is the preceding token, such as 'if', 'else',
24541 $container_type = "";
24543 # ATTRS: for a '{' following an attribute list, reset
24544 # things to look like we just saw the sub name
24545 if ( $statement_type =~ /^sub/ ) {
24546 $last_nonblank_token = $statement_type;
24547 $last_nonblank_type = 'i';
24548 $statement_type = "";
24551 # patch for SWITCH/CASE: hide these keywords from an immediately
24552 # following opening brace
24553 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
24554 && $statement_type eq $last_nonblank_token )
24556 $last_nonblank_token = ";";
24559 elsif ( $last_nonblank_token eq ')' ) {
24560 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
24562 # defensive move in case of a nesting error (pbug.t)
24563 # in which this ')' had no previous '('
24564 # this nesting error will have been caught
24565 if ( !defined($last_nonblank_token) ) {
24566 $last_nonblank_token = 'if';
24569 # check for syntax error here;
24570 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
24571 if ( $tokenizer_self->{'_extended_syntax'} ) {
24573 # we append a trailing () to mark this as an unknown
24574 # block type. This allows perltidy to format some
24575 # common extensions of perl syntax.
24576 # This is used by sub code_block_type
24577 $last_nonblank_token .= '()';
24581 join( ' ', sort keys %is_blocktype_with_paren );
24583 "syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
24589 # patch for paren-less for/foreach glitch, part 2.
24590 # see note below under 'qw'
24591 elsif ($last_nonblank_token eq 'qw'
24592 && $is_for_foreach{$want_paren} )
24594 $last_nonblank_token = $want_paren;
24595 if ( $last_last_nonblank_token eq $want_paren ) {
24597 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
24604 # now identify which of the three possible types of
24605 # curly braces we have: hash index container, anonymous
24606 # hash reference, or code block.
24608 # non-structural (hash index) curly brace pair
24609 # get marked 'L' and 'R'
24610 if ( is_non_structural_brace() ) {
24613 # patch for SWITCH/CASE:
24614 # allow paren-less identifier after 'when'
24615 # if the brace is preceded by a space
24616 if ( $statement_type eq 'when'
24617 && $last_nonblank_type eq 'i'
24618 && $last_last_nonblank_type eq 'k'
24619 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
24622 $block_type = $statement_type;
24626 # code and anonymous hash have the same type, '{', but are
24627 # distinguished by 'block_type',
24628 # which will be blank for an anonymous hash
24631 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
24632 $max_token_index );
24634 # patch to promote bareword type to function taking block
24636 && $last_nonblank_type eq 'w'
24637 && $last_nonblank_i >= 0 )
24639 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
24640 $routput_token_type->[$last_nonblank_i] = 'G';
24644 # patch for SWITCH/CASE: if we find a stray opening block brace
24645 # where we might accept a 'case' or 'when' block, then take it
24646 if ( $statement_type eq 'case'
24647 || $statement_type eq 'when' )
24649 if ( !$block_type || $block_type eq '}' ) {
24650 $block_type = $statement_type;
24655 $brace_type[ ++$brace_depth ] = $block_type;
24656 $brace_package[$brace_depth] = $current_package;
24657 $brace_structural_type[$brace_depth] = $type;
24658 $brace_context[$brace_depth] = $context;
24659 ( $type_sequence, $indent_flag ) =
24660 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24663 $block_type = $brace_type[$brace_depth];
24664 if ($block_type) { $statement_type = '' }
24665 if ( defined( $brace_package[$brace_depth] ) ) {
24666 $current_package = $brace_package[$brace_depth];
24669 # can happen on brace error (caught elsewhere)
24672 ( $type_sequence, $indent_flag ) =
24673 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24675 if ( $brace_structural_type[$brace_depth] eq 'L' ) {
24679 # propagate type information for 'do' and 'eval' blocks, and also
24680 # for smartmatch operator. This is necessary to enable us to know
24681 # if an operator or term is expected next.
24682 if ( $is_block_operator{$block_type} ) {
24683 $tok = $block_type;
24686 $context = $brace_context[$brace_depth];
24687 if ( $brace_depth > 0 ) { $brace_depth--; }
24689 '&' => sub { # maybe sub call? start looking
24691 # We have to check for sub call unless we are sure we
24692 # are expecting an operator. This example from s2p
24693 # got mistaken as a q operator in an early version:
24694 # print BODY &q(<<'EOT');
24695 if ( $expecting != OPERATOR ) {
24697 # But only look for a sub call if we are expecting a term or
24698 # if there is no existing space after the &.
24699 # For example we probably don't want & as sub call here:
24700 # Fcntl::S_IRUSR & $mode;
24701 if ( $expecting == TERM || $next_type ne 'b' ) {
24708 '<' => sub { # angle operator or less than?
24710 if ( $expecting != OPERATOR ) {
24712 find_angle_operator_termination( $input_line, $i, $rtoken_map,
24713 $expecting, $max_token_index );
24715 if ( $type eq '<' && $expecting == TERM ) {
24716 error_if_expecting_TERM();
24717 interrupt_logfile();
24718 warning("Unterminated <> operator?\n");
24725 '?' => sub { # ?: conditional or starting pattern?
24729 if ( $expecting == UNKNOWN ) {
24732 ( $is_pattern, $msg ) =
24733 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
24734 $max_token_index );
24736 if ($msg) { write_logfile_entry($msg) }
24738 else { $is_pattern = ( $expecting == TERM ) }
24743 $allowed_quote_modifiers = '[msixpodualngc]';
24746 ( $type_sequence, $indent_flag ) =
24747 increase_nesting_depth( QUESTION_COLON,
24748 $$rtoken_map[$i_tok] );
24751 '*' => sub { # typeglob, or multiply?
24753 if ( $expecting == TERM ) {
24758 if ( $$rtokens[ $i + 1 ] eq '=' ) {
24763 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
24767 if ( $$rtokens[ $i + 1 ] eq '=' ) {
24775 '.' => sub { # what kind of . ?
24777 if ( $expecting != OPERATOR ) {
24779 if ( $type eq '.' ) {
24780 error_if_expecting_TERM()
24781 if ( $expecting == TERM );
24789 # if this is the first nonblank character, call it a label
24790 # since perl seems to just swallow it
24791 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
24795 # ATTRS: check for a ':' which introduces an attribute list
24796 # (this might eventually get its own token type)
24797 elsif ( $statement_type =~ /^sub/ ) {
24799 $in_attribute_list = 1;
24802 # check for scalar attribute, such as
24803 # my $foo : shared = 1;
24804 elsif ($is_my_our{$statement_type}
24805 && $current_depth[QUESTION_COLON] == 0 )
24808 $in_attribute_list = 1;
24811 # otherwise, it should be part of a ?/: operator
24813 ( $type_sequence, $indent_flag ) =
24814 decrease_nesting_depth( QUESTION_COLON,
24815 $$rtoken_map[$i_tok] );
24816 if ( $last_nonblank_token eq '?' ) {
24817 warning("Syntax error near ? :\n");
24821 '+' => sub { # what kind of plus?
24823 if ( $expecting == TERM ) {
24824 my $number = scan_number();
24826 # unary plus is safest assumption if not a number
24827 if ( !defined($number) ) { $type = 'p'; }
24829 elsif ( $expecting == OPERATOR ) {
24832 if ( $next_type eq 'w' ) { $type = 'p' }
24837 error_if_expecting_OPERATOR("Array")
24838 if ( $expecting == OPERATOR );
24841 '%' => sub { # hash or modulo?
24843 # first guess is hash if no following blank
24844 if ( $expecting == UNKNOWN ) {
24845 if ( $next_type ne 'b' ) { $expecting = TERM }
24847 if ( $expecting == TERM ) {
24852 $square_bracket_type[ ++$square_bracket_depth ] =
24853 $last_nonblank_token;
24854 ( $type_sequence, $indent_flag ) =
24855 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24857 # It may seem odd, but structural square brackets have
24858 # type '{' and '}'. This simplifies the indentation logic.
24859 if ( !is_non_structural_brace() ) {
24862 $square_bracket_structural_type[$square_bracket_depth] = $type;
24865 ( $type_sequence, $indent_flag ) =
24866 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24868 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
24873 # propagate type information for smartmatch operator. This is
24874 # necessary to enable us to know if an operator or term is expected
24876 if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
24877 $tok = $square_bracket_type[$square_bracket_depth];
24880 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
24882 '-' => sub { # what kind of minus?
24884 if ( ( $expecting != OPERATOR )
24885 && $is_file_test_operator{$next_tok} )
24887 my ( $next_nonblank_token, $i_next ) =
24888 find_next_nonblank_token( $i + 1, $rtokens,
24889 $max_token_index );
24891 # check for a quoted word like "-w=>xx";
24892 # it is sufficient to just check for a following '='
24893 if ( $next_nonblank_token eq '=' ) {
24902 elsif ( $expecting == TERM ) {
24903 my $number = scan_number();
24905 # maybe part of bareword token? unary is safest
24906 if ( !defined($number) ) { $type = 'm'; }
24909 elsif ( $expecting == OPERATOR ) {
24913 if ( $next_type eq 'w' ) {
24921 # check for special variables like ${^WARNING_BITS}
24922 if ( $expecting == TERM ) {
24924 # FIXME: this should work but will not catch errors
24925 # because we also have to be sure that previous token is
24926 # a type character ($,@,%).
24927 if ( $last_nonblank_token eq '{'
24928 && ( $next_tok =~ /^[A-Za-z_]/ ) )
24931 if ( $next_tok eq 'W' ) {
24932 $tokenizer_self->{_saw_perl_dash_w} = 1;
24934 $tok = $tok . $next_tok;
24940 unless ( error_if_expecting_TERM() ) {
24942 # Something like this is valid but strange:
24944 complain("The '^' seems unusual here\n");
24950 '::' => sub { # probably a sub call
24951 scan_bare_identifier();
24953 '<<' => sub { # maybe a here-doc?
24955 unless ( $i < $max_token_index )
24956 ; # here-doc not possible if end of line
24958 if ( $expecting != OPERATOR ) {
24959 my ( $found_target, $here_doc_target, $here_quote_character,
24962 $found_target, $here_doc_target, $here_quote_character, $i,
24965 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
24966 $max_token_index );
24968 if ($found_target) {
24969 push @{$rhere_target_list},
24970 [ $here_doc_target, $here_quote_character ];
24972 if ( length($here_doc_target) > 80 ) {
24973 my $truncated = substr( $here_doc_target, 0, 80 );
24974 complain("Long here-target: '$truncated' ...\n");
24976 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
24978 "Unconventional here-target: '$here_doc_target'\n"
24982 elsif ( $expecting == TERM ) {
24983 unless ($saw_error) {
24985 # shouldn't happen..
24986 warning("Program bug; didn't find here doc target\n");
24987 report_definite_bug();
24996 # if -> points to a bare word, we must scan for an identifier,
24997 # otherwise something like ->y would look like the y operator
25001 # type = 'pp' for pre-increment, '++' for post-increment
25003 if ( $expecting == TERM ) { $type = 'pp' }
25004 elsif ( $expecting == UNKNOWN ) {
25005 my ( $next_nonblank_token, $i_next ) =
25006 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25007 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
25012 if ( $last_nonblank_type eq $tok ) {
25013 complain("Repeated '=>'s \n");
25016 # patch for operator_expected: note if we are in the list (use.t)
25017 # TODO: make version numbers a new token type
25018 if ( $statement_type eq 'use' ) { $statement_type = '_use' }
25021 # type = 'mm' for pre-decrement, '--' for post-decrement
25024 if ( $expecting == TERM ) { $type = 'mm' }
25025 elsif ( $expecting == UNKNOWN ) {
25026 my ( $next_nonblank_token, $i_next ) =
25027 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25028 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
25033 error_if_expecting_TERM()
25034 if ( $expecting == TERM );
25038 error_if_expecting_TERM()
25039 if ( $expecting == TERM );
25043 error_if_expecting_TERM()
25044 if ( $expecting == TERM );
25048 # ------------------------------------------------------------
25049 # end hash of code for handling individual token types
25050 # ------------------------------------------------------------
25052 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
25054 # These block types terminate statements and do not need a trailing
25056 # patched for SWITCH/CASE/
25057 my %is_zero_continuation_block_type;
25058 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
25059 if elsif else unless while until for foreach switch case given when);
25060 @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
25062 my %is_not_zero_continuation_block_type;
25063 @_ = qw(sort grep map do eval);
25064 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
25066 my %is_logical_container;
25067 @_ = qw(if elsif unless while and or err not && ! || for foreach);
25068 @is_logical_container{@_} = (1) x scalar(@_);
25070 my %is_binary_type;
25072 @is_binary_type{@_} = (1) x scalar(@_);
25074 my %is_binary_keyword;
25075 @_ = qw(and or err eq ne cmp);
25076 @is_binary_keyword{@_} = (1) x scalar(@_);
25078 # 'L' is token for opening { at hash key
25079 my %is_opening_type;
25080 @_ = qw" L { ( [ ";
25081 @is_opening_type{@_} = (1) x scalar(@_);
25083 # 'R' is token for closing } at hash key
25084 my %is_closing_type;
25085 @_ = qw" R } ) ] ";
25086 @is_closing_type{@_} = (1) x scalar(@_);
25088 my %is_redo_last_next_goto;
25089 @_ = qw(redo last next goto);
25090 @is_redo_last_next_goto{@_} = (1) x scalar(@_);
25092 my %is_use_require;
25093 @_ = qw(use require);
25094 @is_use_require{@_} = (1) x scalar(@_);
25096 my %is_sub_package;
25097 @_ = qw(sub package);
25098 @is_sub_package{@_} = (1) x scalar(@_);
25100 # This hash holds the hash key in $tokenizer_self for these keywords:
25101 my %is_format_END_DATA = (
25102 'format' => '_in_format',
25103 '__END__' => '_in_end',
25104 '__DATA__' => '_in_data',
25107 # original ref: camel 3 p 147,
25108 # but perl may accept undocumented flags
25109 # perl 5.10 adds 'p' (preserve)
25110 # Perl version 5.22 added 'n'
25111 # From http://perldoc.perl.org/perlop.html we have
25112 # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
25113 # s/PATTERN/REPLACEMENT/msixpodualngcer
25114 # y/SEARCHLIST/REPLACEMENTLIST/cdsr
25115 # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
25116 # qr/STRING/msixpodualn
25117 my %quote_modifiers = (
25118 's' => '[msixpodualngcer]',
25121 'm' => '[msixpodualngc]',
25122 'qr' => '[msixpodualn]',
25129 # table showing how many quoted things to look for after quote operator..
25130 # s, y, tr have 2 (pattern and replacement)
25131 # others have 1 (pattern only)
25132 my %quote_items = (
25144 sub tokenize_this_line {
25146 # This routine breaks a line of perl code into tokens which are of use in
25147 # indentation and reformatting. One of my goals has been to define tokens
25148 # such that a newline may be inserted between any pair of tokens without
25149 # changing or invalidating the program. This version comes close to this,
25150 # although there are necessarily a few exceptions which must be caught by
25151 # the formatter. Many of these involve the treatment of bare words.
25153 # The tokens and their types are returned in arrays. See previous
25154 # routine for their names.
25156 # See also the array "valid_token_types" in the BEGIN section for an
25159 # To simplify things, token types are either a single character, or they
25160 # are identical to the tokens themselves.
25162 # As a debugging aid, the -D flag creates a file containing a side-by-side
25163 # comparison of the input string and its tokenization for each line of a file.
25164 # This is an invaluable debugging aid.
25166 # In addition to tokens, and some associated quantities, the tokenizer
25167 # also returns flags indication any special line types. These include
25168 # quotes, here_docs, formats.
25170 # -----------------------------------------------------------------------
25172 # How to add NEW_TOKENS:
25174 # New token types will undoubtedly be needed in the future both to keep up
25175 # with changes in perl and to help adapt the tokenizer to other applications.
25177 # Here are some notes on the minimal steps. I wrote these notes while
25178 # adding the 'v' token type for v-strings, which are things like version
25179 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You
25180 # can use your editor to search for the string "NEW_TOKENS" to find the
25181 # appropriate sections to change):
25183 # *. Try to talk somebody else into doing it! If not, ..
25185 # *. Make a backup of your current version in case things don't work out!
25187 # *. Think of a new, unused character for the token type, and add to
25188 # the array @valid_token_types in the BEGIN section of this package.
25189 # For example, I used 'v' for v-strings.
25191 # *. Implement coding to recognize the $type of the token in this routine.
25192 # This is the hardest part, and is best done by imitating or modifying
25193 # some of the existing coding. For example, to recognize v-strings, I
25194 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
25195 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
25197 # *. Update sub operator_expected. This update is critically important but
25198 # the coding is trivial. Look at the comments in that routine for help.
25199 # For v-strings, which should behave like numbers, I just added 'v' to the
25200 # regex used to handle numbers and strings (types 'n' and 'Q').
25202 # *. Implement a 'bond strength' rule in sub set_bond_strengths in
25203 # Perl::Tidy::Formatter for breaking lines around this token type. You can
25204 # skip this step and take the default at first, then adjust later to get
25205 # desired results. For adding type 'v', I looked at sub bond_strength and
25206 # saw that number type 'n' was using default strengths, so I didn't do
25207 # anything. I may tune it up someday if I don't like the way line
25208 # breaks with v-strings look.
25210 # *. Implement a 'whitespace' rule in sub set_white_space_flag in
25211 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
25212 # and saw that type 'n' used spaces on both sides, so I just added 'v'
25213 # to the array @spaces_both_sides.
25215 # *. Update HtmlWriter package so that users can colorize the token as
25216 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
25217 # that package. For v-strings, I initially chose to use a default color
25218 # equal to the default for numbers, but it might be nice to change that
25221 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
25223 # *. Run lots and lots of debug tests. Start with special files designed
25224 # to test the new token type. Run with the -D flag to create a .DEBUG
25225 # file which shows the tokenization. When these work ok, test as many old
25226 # scripts as possible. Start with all of the '.t' files in the 'test'
25227 # directory of the distribution file. Compare .tdy output with previous
25228 # version and updated version to see the differences. Then include as
25229 # many more files as possible. My own technique has been to collect a huge
25230 # number of perl scripts (thousands!) into one directory and run perltidy
25231 # *, then run diff between the output of the previous version and the
25234 # *. For another example, search for the smartmatch operator '~~'
25235 # with your editor to see where updates were made for it.
25237 # -----------------------------------------------------------------------
25239 my $line_of_tokens = shift;
25240 my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
25242 # patch while coding change is underway
25243 # make callers private data to allow access
25244 # $tokenizer_self = $caller_tokenizer_self;
25246 # extract line number for use in error messages
25247 $input_line_number = $line_of_tokens->{_line_number};
25249 # reinitialize for multi-line quote
25250 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
25252 # check for pod documentation
25253 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
25255 # must not be in multi-line quote
25256 # and must not be in an equation
25257 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
25259 $tokenizer_self->{_in_pod} = 1;
25264 $input_line = $untrimmed_input_line;
25268 # trim start of this line unless we are continuing a quoted line
25269 # do not trim end because we might end in a quote (test: deken4.pl)
25270 # Perl::Tidy::Formatter will delete needless trailing blanks
25271 unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
25272 $input_line =~ s/^\s*//; # trim left end
25275 # update the copy of the line for use in error messages
25276 # This must be exactly what we give the pre_tokenizer
25277 $tokenizer_self->{_line_text} = $input_line;
25279 # re-initialize for the main loop
25280 $routput_token_list = []; # stack of output token indexes
25281 $routput_token_type = []; # token types
25282 $routput_block_type = []; # types of code block
25283 $routput_container_type = []; # paren types, such as if, elsif, ..
25284 $routput_type_sequence = []; # nesting sequential number
25286 $rhere_target_list = [];
25288 $tok = $last_nonblank_token;
25289 $type = $last_nonblank_type;
25290 $prototype = $last_nonblank_prototype;
25291 $last_nonblank_i = -1;
25292 $block_type = $last_nonblank_block_type;
25293 $container_type = $last_nonblank_container_type;
25294 $type_sequence = $last_nonblank_type_sequence;
25298 # tokenization is done in two stages..
25299 # stage 1 is a very simple pre-tokenization
25300 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
25302 # a little optimization for a full-line comment
25303 if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
25304 $max_tokens_wanted = 1 # no use tokenizing a comment
25307 # start by breaking the line into pre-tokens
25308 ( $rtokens, $rtoken_map, $rtoken_type ) =
25309 pre_tokenize( $input_line, $max_tokens_wanted );
25311 $max_token_index = scalar(@$rtokens) - 1;
25312 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
25313 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
25314 push( @$rtoken_type, 'b', 'b', 'b' );
25316 # initialize for main loop
25317 for $i ( 0 .. $max_token_index + 3 ) {
25318 $routput_token_type->[$i] = "";
25319 $routput_block_type->[$i] = "";
25320 $routput_container_type->[$i] = "";
25321 $routput_type_sequence->[$i] = "";
25322 $routput_indent_flag->[$i] = 0;
25327 # ------------------------------------------------------------
25328 # begin main tokenization loop
25329 # ------------------------------------------------------------
25331 # we are looking at each pre-token of one line and combining them
25333 while ( ++$i <= $max_token_index ) {
25335 if ($in_quote) { # continue looking for end of a quote
25336 $type = $quote_type;
25338 unless ( @{$routput_token_list} )
25339 { # initialize if continuation line
25340 push( @{$routput_token_list}, $i );
25341 $routput_token_type->[$i] = $type;
25344 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
25346 # scan for the end of the quote or pattern
25348 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25349 $quoted_string_1, $quoted_string_2
25352 $i, $in_quote, $quote_character,
25353 $quote_pos, $quote_depth, $quoted_string_1,
25354 $quoted_string_2, $rtokens, $rtoken_map,
25358 # all done if we didn't find it
25359 last if ($in_quote);
25361 # save pattern and replacement text for rescanning
25362 my $qs1 = $quoted_string_1;
25363 my $qs2 = $quoted_string_2;
25365 # re-initialize for next search
25366 $quote_character = '';
25369 $quoted_string_1 = "";
25370 $quoted_string_2 = "";
25371 last if ( ++$i > $max_token_index );
25373 # look for any modifiers
25374 if ($allowed_quote_modifiers) {
25376 # check for exact quote modifiers
25377 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
25378 my $str = $$rtokens[$i];
25379 my $saw_modifier_e;
25380 while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
25381 my $pos = pos($str);
25382 my $char = substr( $str, $pos - 1, 1 );
25383 $saw_modifier_e ||= ( $char eq 'e' );
25386 # For an 'e' quote modifier we must scan the replacement
25387 # text for here-doc targets.
25388 if ($saw_modifier_e) {
25390 my $rht = scan_replacement_text($qs1);
25392 # Change type from 'Q' to 'h' for quotes with
25393 # here-doc targets so that the formatter (see sub
25394 # print_line_of_tokens) will not make any line
25395 # breaks after this point.
25397 push @{$rhere_target_list}, @{$rht};
25399 if ( $i_tok < 0 ) {
25400 my $ilast = $routput_token_list->[-1];
25401 $routput_token_type->[$ilast] = $type;
25406 if ( defined( pos($str) ) ) {
25409 if ( pos($str) == length($str) ) {
25410 last if ( ++$i > $max_token_index );
25413 # Looks like a joined quote modifier
25414 # and keyword, maybe something like
25415 # s/xxx/yyy/gefor @k=...
25416 # Example is "galgen.pl". Would have to split
25417 # the word and insert a new token in the
25418 # pre-token list. This is so rare that I haven't
25419 # done it. Will just issue a warning citation.
25421 # This error might also be triggered if my quote
25422 # modifier characters are incomplete
25426 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
25427 Please put a space between quote modifiers and trailing keywords.
25430 # print "token $$rtokens[$i]\n";
25431 # my $num = length($str) - pos($str);
25432 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
25433 # print "continuing with new token $$rtokens[$i]\n";
25435 # skipping past this token does least damage
25436 last if ( ++$i > $max_token_index );
25441 # example file: rokicki4.pl
25442 # This error might also be triggered if my quote
25443 # modifier characters are incomplete
25444 write_logfile_entry(
25445 "Note: found word $str at quote modifier location\n"
25451 $allowed_quote_modifiers = "";
25455 unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
25457 # try to catch some common errors
25458 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
25460 if ( $last_nonblank_token eq 'eq' ) {
25461 complain("Should 'eq' be '==' here ?\n");
25463 elsif ( $last_nonblank_token eq 'ne' ) {
25464 complain("Should 'ne' be '!=' here ?\n");
25468 $last_last_nonblank_token = $last_nonblank_token;
25469 $last_last_nonblank_type = $last_nonblank_type;
25470 $last_last_nonblank_block_type = $last_nonblank_block_type;
25471 $last_last_nonblank_container_type =
25472 $last_nonblank_container_type;
25473 $last_last_nonblank_type_sequence =
25474 $last_nonblank_type_sequence;
25475 $last_nonblank_token = $tok;
25476 $last_nonblank_type = $type;
25477 $last_nonblank_prototype = $prototype;
25478 $last_nonblank_block_type = $block_type;
25479 $last_nonblank_container_type = $container_type;
25480 $last_nonblank_type_sequence = $type_sequence;
25481 $last_nonblank_i = $i_tok;
25484 # store previous token type
25485 if ( $i_tok >= 0 ) {
25486 $routput_token_type->[$i_tok] = $type;
25487 $routput_block_type->[$i_tok] = $block_type;
25488 $routput_container_type->[$i_tok] = $container_type;
25489 $routput_type_sequence->[$i_tok] = $type_sequence;
25490 $routput_indent_flag->[$i_tok] = $indent_flag;
25492 my $pre_tok = $$rtokens[$i]; # get the next pre-token
25493 my $pre_type = $$rtoken_type[$i]; # and type
25495 $type = $pre_type; # to be modified as necessary
25496 $block_type = ""; # blank for all tokens except code block braces
25497 $container_type = ""; # blank for all tokens except some parens
25498 $type_sequence = ""; # blank for all tokens except ?/:
25500 $prototype = ""; # blank for all tokens except user defined subs
25503 # this pre-token will start an output token
25504 push( @{$routput_token_list}, $i_tok );
25506 # continue gathering identifier if necessary
25507 # but do not start on blanks and comments
25508 if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
25510 if ( $id_scan_state =~ /^(sub|package)/ ) {
25517 last if ($id_scan_state);
25518 next if ( ( $i > 0 ) || $type );
25520 # didn't find any token; start over
25525 # handle whitespace tokens..
25526 next if ( $type eq 'b' );
25527 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
25528 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
25530 # Build larger tokens where possible, since we are not in a quote.
25532 # First try to assemble digraphs. The following tokens are
25533 # excluded and handled specially:
25534 # '/=' is excluded because the / might start a pattern.
25535 # 'x=' is excluded since it might be $x=, with $ on previous line
25536 # '**' and *= might be typeglobs of punctuation variables
25537 # I have allowed tokens starting with <, such as <=,
25538 # because I don't think these could be valid angle operators.
25539 # test file: storrs4.pl
25540 my $test_tok = $tok . $$rtokens[ $i + 1 ];
25541 my $combine_ok = $is_digraph{$test_tok};
25543 # check for special cases which cannot be combined
25546 # '//' must be defined_or operator if an operator is expected.
25547 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
25548 # could be migrated here for clarity
25550 # Patch for RT#102371, misparsing a // in the following snippet:
25551 # state $b //= ccc();
25552 # The solution is to always accept the digraph (or trigraph) after
25553 # token type 'Z' (possible file handle). The reason is that
25554 # sub operator_expected gives TERM expected here, which is
25555 # wrong in this case.
25556 if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
25557 my $next_type = $$rtokens[ $i + 1 ];
25559 operator_expected( $prev_type, $tok, $next_type );
25561 # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
25562 $combine_ok = 0 if ( $expecting == TERM );
25568 && ( $test_tok ne '/=' ) # might be pattern
25569 && ( $test_tok ne 'x=' ) # might be $x
25570 && ( $test_tok ne '**' ) # typeglob?
25571 && ( $test_tok ne '*=' ) # typeglob?
25577 # Now try to assemble trigraphs. Note that all possible
25578 # perl trigraphs can be constructed by appending a character
25580 $test_tok = $tok . $$rtokens[ $i + 1 ];
25582 if ( $is_trigraph{$test_tok} ) {
25589 $next_tok = $$rtokens[ $i + 1 ];
25590 $next_type = $$rtoken_type[ $i + 1 ];
25592 TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
25595 $last_nonblank_token, $tok,
25596 $next_tok, $brace_depth,
25597 $brace_type[$brace_depth], $paren_depth,
25598 $paren_type[$paren_depth]
25600 print STDOUT "TOKENIZE:(@debug_list)\n";
25603 # turn off attribute list on first non-blank, non-bareword
25604 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
25606 ###############################################################
25607 # We have the next token, $tok.
25608 # Now we have to examine this token and decide what it is
25609 # and define its $type
25611 # section 1: bare words
25612 ###############################################################
25614 if ( $pre_type eq 'w' ) {
25615 $expecting = operator_expected( $prev_type, $tok, $next_type );
25616 my ( $next_nonblank_token, $i_next ) =
25617 find_next_nonblank_token( $i, $rtokens, $max_token_index );
25619 # ATTRS: handle sub and variable attributes
25620 if ($in_attribute_list) {
25622 # treat bare word followed by open paren like qw(
25623 if ( $next_nonblank_token eq '(' ) {
25624 $in_quote = $quote_items{'q'};
25625 $allowed_quote_modifiers = $quote_modifiers{'q'};
25631 # handle bareword not followed by open paren
25638 # quote a word followed by => operator
25639 if ( $next_nonblank_token eq '=' ) {
25641 if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
25642 if ( $is_constant{$current_package}{$tok} ) {
25645 elsif ( $is_user_function{$current_package}{$tok} ) {
25648 $user_function_prototype{$current_package}{$tok};
25650 elsif ( $tok =~ /^v\d+$/ ) {
25652 report_v_string($tok);
25654 else { $type = 'w' }
25660 # quote a bare word within braces..like xxx->{s}; note that we
25661 # must be sure this is not a structural brace, to avoid
25662 # mistaking {s} in the following for a quoted bare word:
25663 # for(@[){s}bla}BLA}
25664 # Also treat q in something like var{-q} as a bare word, not qoute operator
25666 $next_nonblank_token eq '}'
25668 $last_nonblank_type eq 'L'
25669 || ( $last_nonblank_type eq 'm'
25670 && $last_last_nonblank_type eq 'L' )
25678 # a bare word immediately followed by :: is not a keyword;
25679 # use $tok_kw when testing for keywords to avoid a mistake
25681 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
25686 # handle operator x (now we know it isn't $x=)
25687 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
25688 if ( $tok eq 'x' ) {
25690 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
25700 # FIXME: Patch: mark something like x4 as an integer for now
25701 # It gets fixed downstream. This is easier than
25702 # splitting the pretoken.
25707 elsif ( $tok_kw eq 'CORE::' ) {
25708 $type = $tok = $tok_kw;
25711 elsif ( ( $tok eq 'strict' )
25712 and ( $last_nonblank_token eq 'use' ) )
25714 $tokenizer_self->{_saw_use_strict} = 1;
25715 scan_bare_identifier();
25718 elsif ( ( $tok eq 'warnings' )
25719 and ( $last_nonblank_token eq 'use' ) )
25721 $tokenizer_self->{_saw_perl_dash_w} = 1;
25723 # scan as identifier, so that we pick up something like:
25724 # use warnings::register
25725 scan_bare_identifier();
25729 $tok eq 'AutoLoader'
25730 && $tokenizer_self->{_look_for_autoloader}
25732 $last_nonblank_token eq 'use'
25734 # these regexes are from AutoSplit.pm, which we want
25736 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
25737 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
25741 write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
25742 $tokenizer_self->{_saw_autoloader} = 1;
25743 $tokenizer_self->{_look_for_autoloader} = 0;
25744 scan_bare_identifier();
25748 $tok eq 'SelfLoader'
25749 && $tokenizer_self->{_look_for_selfloader}
25750 && ( $last_nonblank_token eq 'use'
25751 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
25752 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
25755 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
25756 $tokenizer_self->{_saw_selfloader} = 1;
25757 $tokenizer_self->{_look_for_selfloader} = 0;
25758 scan_bare_identifier();
25761 elsif ( ( $tok eq 'constant' )
25762 and ( $last_nonblank_token eq 'use' ) )
25764 scan_bare_identifier();
25765 my ( $next_nonblank_token, $i_next ) =
25766 find_next_nonblank_token( $i, $rtokens,
25767 $max_token_index );
25769 if ($next_nonblank_token) {
25771 if ( $is_keyword{$next_nonblank_token} ) {
25773 # Assume qw is used as a quote and okay, as in:
25774 # use constant qw{ DEBUG 0 };
25775 # Not worth trying to parse for just a warning
25777 # NOTE: This warning is deactivated because recent
25778 # versions of perl do not complain here, but
25779 # the coding is retained for reference.
25780 if ( 0 && $next_nonblank_token ne 'qw' ) {
25782 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
25787 # FIXME: could check for error in which next token is
25788 # not a word (number, punctuation, ..)
25790 $is_constant{$current_package}{$next_nonblank_token}
25796 # various quote operators
25797 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
25799 if ( $expecting == OPERATOR ) {
25801 # Be careful not to call an error for a qw quote
25802 # where a parenthesized list is allowed. For example,
25803 # it could also be a for/foreach construct such as
25805 # foreach my $key qw\Uno Due Tres Quadro\ {
25806 # print "Set $key\n";
25810 # Or it could be a function call.
25811 # NOTE: Braces in something like &{ xxx } are not
25812 # marked as a block, we might have a method call.
25813 # &method(...), $method->(..), &{method}(...),
25814 # $ref[2](list) is ok & short for $ref[2]->(list)
25816 # See notes in 'sub code_block_type' and
25817 # 'sub is_non_structural_brace'
25821 && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
25822 || $is_for_foreach{$want_paren} )
25825 error_if_expecting_OPERATOR();
25828 $in_quote = $quote_items{$tok};
25829 $allowed_quote_modifiers = $quote_modifiers{$tok};
25831 # All quote types are 'Q' except possibly qw quotes.
25832 # qw quotes are special in that they may generally be trimmed
25833 # of leading and trailing whitespace. So they are given a
25834 # separate type, 'q', unless requested otherwise.
25836 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
25839 $quote_type = $type;
25842 # check for a statement label
25844 ( $next_nonblank_token eq ':' )
25845 && ( $$rtokens[ $i_next + 1 ] ne ':' )
25846 && ( $i_next <= $max_token_index ) # colon on same line
25850 if ( $tok !~ /[A-Z]/ ) {
25851 push @{ $tokenizer_self->{_rlower_case_labels_at} },
25852 $input_line_number;
25860 # 'sub' || 'package'
25861 elsif ( $is_sub_package{$tok_kw} ) {
25862 error_if_expecting_OPERATOR()
25863 if ( $expecting == OPERATOR );
25867 # Note on token types for format, __DATA__, __END__:
25868 # It simplifies things to give these type ';', so that when we
25869 # start rescanning we will be expecting a token of type TERM.
25870 # We will switch to type 'k' before outputting the tokens.
25871 elsif ( $is_format_END_DATA{$tok_kw} ) {
25872 $type = ';'; # make tokenizer look for TERM next
25873 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
25877 elsif ( $is_keyword{$tok_kw} ) {
25880 # Since for and foreach may not be followed immediately
25881 # by an opening paren, we have to remember which keyword
25882 # is associated with the next '('
25883 if ( $is_for_foreach{$tok} ) {
25884 if ( new_statement_ok() ) {
25885 $want_paren = $tok;
25889 # recognize 'use' statements, which are special
25890 elsif ( $is_use_require{$tok} ) {
25891 $statement_type = $tok;
25892 error_if_expecting_OPERATOR()
25893 if ( $expecting == OPERATOR );
25896 # remember my and our to check for trailing ": shared"
25897 elsif ( $is_my_our{$tok} ) {
25898 $statement_type = $tok;
25901 # Check for misplaced 'elsif' and 'else', but allow isolated
25902 # else or elsif blocks to be formatted. This is indicated
25903 # by a last noblank token of ';'
25904 elsif ( $tok eq 'elsif' ) {
25905 if ( $last_nonblank_token ne ';'
25906 && $last_nonblank_block_type !~
25907 /^(if|elsif|unless)$/ )
25910 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
25914 elsif ( $tok eq 'else' ) {
25916 # patched for SWITCH/CASE
25918 $last_nonblank_token ne ';'
25919 && $last_nonblank_block_type !~
25920 /^(if|elsif|unless|case|when)$/
25922 # patch to avoid an unwanted error message for
25923 # the case of a parenless 'case' (RT 105484):
25924 # switch ( 1 ) { case x { 2 } else { } }
25925 && $statement_type !~
25926 /^(if|elsif|unless|case|when)$/
25930 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
25934 elsif ( $tok eq 'continue' ) {
25935 if ( $last_nonblank_token ne ';'
25936 && $last_nonblank_block_type !~
25937 /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
25940 # note: ';' '{' and '}' in list above
25941 # because continues can follow bare blocks;
25942 # ':' is labeled block
25944 ############################################
25945 # NOTE: This check has been deactivated because
25946 # continue has an alternative usage for given/when
25947 # blocks in perl 5.10
25948 ## warning("'$tok' should follow a block\n");
25949 ############################################
25953 # patch for SWITCH/CASE if 'case' and 'when are
25954 # treated as keywords.
25955 elsif ( $tok eq 'when' || $tok eq 'case' ) {
25956 $statement_type = $tok; # next '{' is block
25960 # indent trailing if/unless/while/until
25961 # outdenting will be handled by later indentation loop
25962 ## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
25974 ## if ( $tok =~ /^(if|unless|while|until)$/
25975 ## && $next_nonblank_token ne '(' )
25977 ## $indent_flag = 1;
25981 # check for inline label following
25982 # /^(redo|last|next|goto)$/
25983 elsif (( $last_nonblank_type eq 'k' )
25984 && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
25990 # something else --
25993 scan_bare_identifier();
25994 if ( $type eq 'w' ) {
25996 if ( $expecting == OPERATOR ) {
25998 # don't complain about possible indirect object
26002 # sub new($) { ... }
26003 # $b = new A::; # calls A::new
26004 # $c = new A; # same thing but suspicious
26005 # This will call A::new but we have a 'new' in
26006 # main:: which looks like a constant.
26008 if ( $last_nonblank_type eq 'C' ) {
26009 if ( $tok !~ /::$/ ) {
26011 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
26012 Maybe indirectet object notation?
26017 error_if_expecting_OPERATOR("bareword");
26021 # mark bare words immediately followed by a paren as
26023 $next_tok = $$rtokens[ $i + 1 ];
26024 if ( $next_tok eq '(' ) {
26028 # underscore after file test operator is file handle
26029 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
26033 # patch for SWITCH/CASE if 'case' and 'when are
26034 # not treated as keywords:
26038 && $brace_type[$brace_depth] eq 'switch'
26040 || ( $tok eq 'when'
26041 && $brace_type[$brace_depth] eq 'given' )
26044 $statement_type = $tok; # next '{' is block
26045 $type = 'k'; # for keyword syntax coloring
26048 # patch for SWITCH/CASE if switch and given not keywords
26049 # Switch is not a perl 5 keyword, but we will gamble
26050 # and mark switch followed by paren as a keyword. This
26051 # is only necessary to get html syntax coloring nice,
26052 # and does not commit this as being a switch/case.
26053 if ( $next_nonblank_token eq '('
26054 && ( $tok eq 'switch' || $tok eq 'given' ) )
26056 $type = 'k'; # for keyword syntax coloring
26062 ###############################################################
26063 # section 2: strings of digits
26064 ###############################################################
26065 elsif ( $pre_type eq 'd' ) {
26066 $expecting = operator_expected( $prev_type, $tok, $next_type );
26067 error_if_expecting_OPERATOR("Number")
26068 if ( $expecting == OPERATOR );
26069 my $number = scan_number();
26070 if ( !defined($number) ) {
26072 # shouldn't happen - we should always get a number
26073 warning("non-number beginning with digit--program bug\n");
26074 report_definite_bug();
26078 ###############################################################
26079 # section 3: all other tokens
26080 ###############################################################
26083 last if ( $tok eq '#' );
26084 my $code = $tokenization_code->{$tok};
26087 operator_expected( $prev_type, $tok, $next_type );
26094 # -----------------------------
26095 # end of main tokenization loop
26096 # -----------------------------
26098 if ( $i_tok >= 0 ) {
26099 $routput_token_type->[$i_tok] = $type;
26100 $routput_block_type->[$i_tok] = $block_type;
26101 $routput_container_type->[$i_tok] = $container_type;
26102 $routput_type_sequence->[$i_tok] = $type_sequence;
26103 $routput_indent_flag->[$i_tok] = $indent_flag;
26106 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
26107 $last_last_nonblank_token = $last_nonblank_token;
26108 $last_last_nonblank_type = $last_nonblank_type;
26109 $last_last_nonblank_block_type = $last_nonblank_block_type;
26110 $last_last_nonblank_container_type = $last_nonblank_container_type;
26111 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
26112 $last_nonblank_token = $tok;
26113 $last_nonblank_type = $type;
26114 $last_nonblank_block_type = $block_type;
26115 $last_nonblank_container_type = $container_type;
26116 $last_nonblank_type_sequence = $type_sequence;
26117 $last_nonblank_prototype = $prototype;
26120 # reset indentation level if necessary at a sub or package
26121 # in an attempt to recover from a nesting error
26122 if ( $level_in_tokenizer < 0 ) {
26123 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
26124 reset_indentation_level(0);
26125 brace_warning("resetting level to 0 at $1 $2\n");
26129 # all done tokenizing this line ...
26130 # now prepare the final list of tokens and types
26132 my @token_type = (); # stack of output token types
26133 my @block_type = (); # stack of output code block types
26134 my @container_type = (); # stack of output code container types
26135 my @type_sequence = (); # stack of output type sequence numbers
26136 my @tokens = (); # output tokens
26137 my @levels = (); # structural brace levels of output tokens
26138 my @slevels = (); # secondary nesting levels of output tokens
26139 my @nesting_tokens = (); # string of tokens leading to this depth
26140 my @nesting_types = (); # string of token types leading to this depth
26141 my @nesting_blocks = (); # string of block types leading to this depth
26142 my @nesting_lists = (); # string of list types leading to this depth
26143 my @ci_string = (); # string needed to compute continuation indentation
26144 my @container_environment = (); # BLOCK or LIST
26145 my $container_environment = '';
26146 my $im = -1; # previous $i value
26148 my $ci_string_sum = ones_count($ci_string_in_tokenizer);
26150 # Computing Token Indentation
26152 # The final section of the tokenizer forms tokens and also computes
26153 # parameters needed to find indentation. It is much easier to do it
26154 # in the tokenizer than elsewhere. Here is a brief description of how
26155 # indentation is computed. Perl::Tidy computes indentation as the sum
26158 # (1) structural indentation, such as if/else/elsif blocks
26159 # (2) continuation indentation, such as long parameter call lists.
26161 # These are occasionally called primary and secondary indentation.
26163 # Structural indentation is introduced by tokens of type '{', although
26164 # the actual tokens might be '{', '(', or '['. Structural indentation
26165 # is of two types: BLOCK and non-BLOCK. Default structural indentation
26166 # is 4 characters if the standard indentation scheme is used.
26168 # Continuation indentation is introduced whenever a line at BLOCK level
26169 # is broken before its termination. Default continuation indentation
26170 # is 2 characters in the standard indentation scheme.
26172 # Both types of indentation may be nested arbitrarily deep and
26173 # interlaced. The distinction between the two is somewhat arbitrary.
26175 # For each token, we will define two variables which would apply if
26176 # the current statement were broken just before that token, so that
26177 # that token started a new line:
26179 # $level = the structural indentation level,
26180 # $ci_level = the continuation indentation level
26182 # The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
26183 # assuming defaults. However, in some special cases it is customary
26184 # to modify $ci_level from this strict value.
26186 # The total structural indentation is easy to compute by adding and
26187 # subtracting 1 from a saved value as types '{' and '}' are seen. The
26188 # running value of this variable is $level_in_tokenizer.
26190 # The total continuation is much more difficult to compute, and requires
26191 # several variables. These variables are:
26193 # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
26194 # each indentation level, if there are intervening open secondary
26195 # structures just prior to that level.
26196 # $continuation_string_in_tokenizer = a string of 1's and 0's indicating
26197 # if the last token at that level is "continued", meaning that it
26198 # is not the first token of an expression.
26199 # $nesting_block_string = a string of 1's and 0's indicating, for each
26200 # indentation level, if the level is of type BLOCK or not.
26201 # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
26202 # $nesting_list_string = a string of 1's and 0's indicating, for each
26203 # indentation level, if it is appropriate for list formatting.
26204 # If so, continuation indentation is used to indent long list items.
26205 # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
26206 # @{$rslevel_stack} = a stack of total nesting depths at each
26207 # structural indentation level, where "total nesting depth" means
26208 # the nesting depth that would occur if every nesting token -- '{', '[',
26209 # and '(' -- , regardless of context, is used to compute a nesting
26212 #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
26213 #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
26215 my ( $ci_string_i, $level_i, $nesting_block_string_i,
26216 $nesting_list_string_i, $nesting_token_string_i,
26217 $nesting_type_string_i, );
26219 foreach $i ( @{$routput_token_list} )
26220 { # scan the list of pre-tokens indexes
26222 # self-checking for valid token types
26223 my $type = $routput_token_type->[$i];
26224 my $forced_indentation_flag = $routput_indent_flag->[$i];
26226 # See if we should undo the $forced_indentation_flag.
26227 # Forced indentation after 'if', 'unless', 'while' and 'until'
26228 # expressions without trailing parens is optional and doesn't
26229 # always look good. It is usually okay for a trailing logical
26230 # expression, but if the expression is a function call, code block,
26231 # or some kind of list it puts in an unwanted extra indentation
26232 # level which is hard to remove.
26234 # Example where extra indentation looks ok:
26236 # if $det_a < 0 and $det_b > 0
26237 # or $det_a > 0 and $det_b < 0;
26239 # Example where extra indentation is not needed because
26240 # the eval brace also provides indentation:
26241 # print "not " if defined eval {
26242 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
26245 # The following rule works fairly well:
26246 # Undo the flag if the end of this line, or start of the next
26247 # line, is an opening container token or a comma.
26248 # This almost always works, but if not after another pass it will
26250 if ( $forced_indentation_flag && $type eq 'k' ) {
26252 my $ilast = $routput_token_list->[$ixlast];
26253 my $toklast = $routput_token_type->[$ilast];
26254 if ( $toklast eq '#' ) {
26256 $ilast = $routput_token_list->[$ixlast];
26257 $toklast = $routput_token_type->[$ilast];
26259 if ( $toklast eq 'b' ) {
26261 $ilast = $routput_token_list->[$ixlast];
26262 $toklast = $routput_token_type->[$ilast];
26264 if ( $toklast =~ /^[\{,]$/ ) {
26265 $forced_indentation_flag = 0;
26268 ( $toklast, my $i_next ) =
26269 find_next_nonblank_token( $max_token_index, $rtokens,
26270 $max_token_index );
26271 if ( $toklast =~ /^[\{,]$/ ) {
26272 $forced_indentation_flag = 0;
26277 # if we are already in an indented if, see if we should outdent
26278 if ($indented_if_level) {
26280 # don't try to nest trailing if's - shouldn't happen
26281 if ( $type eq 'k' ) {
26282 $forced_indentation_flag = 0;
26285 # check for the normal case - outdenting at next ';'
26286 elsif ( $type eq ';' ) {
26287 if ( $level_in_tokenizer == $indented_if_level ) {
26288 $forced_indentation_flag = -1;
26289 $indented_if_level = 0;
26293 # handle case of missing semicolon
26294 elsif ( $type eq '}' ) {
26295 if ( $level_in_tokenizer == $indented_if_level ) {
26296 $indented_if_level = 0;
26298 # TBD: This could be a subroutine call
26299 $level_in_tokenizer--;
26300 if ( @{$rslevel_stack} > 1 ) {
26301 pop( @{$rslevel_stack} );
26303 if ( length($nesting_block_string) > 1 )
26304 { # true for valid script
26305 chop $nesting_block_string;
26306 chop $nesting_list_string;
26313 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
26314 $level_i = $level_in_tokenizer;
26316 # This can happen by running perltidy on non-scripts
26317 # although it could also be bug introduced by programming change.
26318 # Perl silently accepts a 032 (^Z) and takes it as the end
26319 if ( !$is_valid_token_type{$type} ) {
26320 my $val = ord($type);
26322 "unexpected character decimal $val ($type) in script\n");
26323 $tokenizer_self->{_in_error} = 1;
26326 # ----------------------------------------------------------------
26327 # TOKEN TYPE PATCHES
26328 # output __END__, __DATA__, and format as type 'k' instead of ';'
26329 # to make html colors correct, etc.
26330 my $fix_type = $type;
26331 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
26333 # output anonymous 'sub' as keyword
26334 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
26336 # -----------------------------------------------------------------
26338 $nesting_token_string_i = $nesting_token_string;
26339 $nesting_type_string_i = $nesting_type_string;
26340 $nesting_block_string_i = $nesting_block_string;
26341 $nesting_list_string_i = $nesting_list_string;
26343 # set primary indentation levels based on structural braces
26344 # Note: these are set so that the leading braces have a HIGHER
26345 # level than their CONTENTS, which is convenient for indentation
26346 # Also, define continuation indentation for each token.
26347 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
26350 # use environment before updating
26351 $container_environment =
26352 $nesting_block_flag ? 'BLOCK'
26353 : $nesting_list_flag ? 'LIST'
26356 # if the difference between total nesting levels is not 1,
26357 # there are intervening non-structural nesting types between
26358 # this '{' and the previous unclosed '{'
26359 my $intervening_secondary_structure = 0;
26360 if ( @{$rslevel_stack} ) {
26361 $intervening_secondary_structure =
26362 $slevel_in_tokenizer - $rslevel_stack->[-1];
26365 # Continuation Indentation
26367 # Having tried setting continuation indentation both in the formatter and
26368 # in the tokenizer, I can say that setting it in the tokenizer is much,
26369 # much easier. The formatter already has too much to do, and can't
26370 # make decisions on line breaks without knowing what 'ci' will be at
26371 # arbitrary locations.
26373 # But a problem with setting the continuation indentation (ci) here
26374 # in the tokenizer is that we do not know where line breaks will actually
26375 # be. As a result, we don't know if we should propagate continuation
26376 # indentation to higher levels of structure.
26378 # For nesting of only structural indentation, we never need to do this.
26379 # For example, in a long if statement, like this
26381 # if ( !$output_block_type[$i]
26382 # && ($in_statement_continuation) )
26387 # the second line has ci but we do normally give the lines within the BLOCK
26388 # any ci. This would be true if we had blocks nested arbitrarily deeply.
26390 # But consider something like this, where we have created a break after
26391 # an opening paren on line 1, and the paren is not (currently) a
26392 # structural indentation token:
26394 # my $file = $menubar->Menubutton(
26395 # qw/-text File -underline 0 -menuitems/ => [
26397 # Cascade => '~View',
26401 # The second line has ci, so it would seem reasonable to propagate it
26402 # down, giving the third line 1 ci + 1 indentation. This suggests the
26403 # following rule, which is currently used to propagating ci down: if there
26404 # are any non-structural opening parens (or brackets, or braces), before
26405 # an opening structural brace, then ci is propagated down, and otherwise
26406 # not. The variable $intervening_secondary_structure contains this
26407 # information for the current token, and the string
26408 # "$ci_string_in_tokenizer" is a stack of previous values of this
26411 # save the current states
26412 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
26413 $level_in_tokenizer++;
26415 if ($forced_indentation_flag) {
26417 # break BEFORE '?' when there is forced indentation
26418 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
26419 if ( $type eq 'k' ) {
26420 $indented_if_level = $level_in_tokenizer;
26423 # do not change container environment here if we are not
26424 # at a real list. Adding this check prevents "blinkers"
26425 # often near 'unless" clauses, such as in the following
26430 ## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
26433 $nesting_block_string .= "$nesting_block_flag";
26437 if ( $routput_block_type->[$i] ) {
26438 $nesting_block_flag = 1;
26439 $nesting_block_string .= '1';
26442 $nesting_block_flag = 0;
26443 $nesting_block_string .= '0';
26447 # we will use continuation indentation within containers
26448 # which are not blocks and not logical expressions
26450 if ( !$routput_block_type->[$i] ) {
26452 # propagate flag down at nested open parens
26453 if ( $routput_container_type->[$i] eq '(' ) {
26454 $bit = 1 if $nesting_list_flag;
26457 # use list continuation if not a logical grouping
26458 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
26462 $is_logical_container{ $routput_container_type->[$i]
26466 $nesting_list_string .= $bit;
26467 $nesting_list_flag = $bit;
26469 $ci_string_in_tokenizer .=
26470 ( $intervening_secondary_structure != 0 ) ? '1' : '0';
26471 $ci_string_sum = ones_count($ci_string_in_tokenizer);
26472 $continuation_string_in_tokenizer .=
26473 ( $in_statement_continuation > 0 ) ? '1' : '0';
26475 # Sometimes we want to give an opening brace continuation indentation,
26476 # and sometimes not. For code blocks, we don't do it, so that the leading
26477 # '{' gets outdented, like this:
26479 # if ( !$output_block_type[$i]
26480 # && ($in_statement_continuation) )
26483 # For other types, we will give them continuation indentation. For example,
26484 # here is how a list looks with the opening paren indented:
26487 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
26488 # [ "homer", "marge", "bart" ], );
26490 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
26492 my $total_ci = $ci_string_sum;
26494 !$routput_block_type->[$i] # patch: skip for BLOCK
26495 && ($in_statement_continuation)
26496 && !( $forced_indentation_flag && $type eq ':' )
26499 $total_ci += $in_statement_continuation
26500 unless ( $ci_string_in_tokenizer =~ /1$/ );
26503 $ci_string_i = $total_ci;
26504 $in_statement_continuation = 0;
26507 elsif ($type eq '}'
26509 || $forced_indentation_flag < 0 )
26512 # only a nesting error in the script would prevent popping here
26513 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
26515 $level_i = --$level_in_tokenizer;
26517 # restore previous level values
26518 if ( length($nesting_block_string) > 1 )
26519 { # true for valid script
26520 chop $nesting_block_string;
26521 $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
26522 chop $nesting_list_string;
26523 $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
26525 chop $ci_string_in_tokenizer;
26526 $ci_string_sum = ones_count($ci_string_in_tokenizer);
26528 $in_statement_continuation =
26529 chop $continuation_string_in_tokenizer;
26531 # zero continuation flag at terminal BLOCK '}' which
26532 # ends a statement.
26533 if ( $routput_block_type->[$i] ) {
26535 # ...These include non-anonymous subs
26536 # note: could be sub ::abc { or sub 'abc
26537 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
26539 # note: older versions of perl require the /gc modifier
26540 # here or else the \G does not work.
26541 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
26543 $in_statement_continuation = 0;
26547 # ...and include all block types except user subs with
26548 # block prototypes and these: (sort|grep|map|do|eval)
26549 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
26551 $is_zero_continuation_block_type{
26552 $routput_block_type->[$i]
26555 $in_statement_continuation = 0;
26558 # ..but these are not terminal types:
26559 # /^(sort|grep|map|do|eval)$/ )
26561 $is_not_zero_continuation_block_type{
26562 $routput_block_type->[$i]
26567 # ..and a block introduced by a label
26568 # /^\w+\s*:$/gc ) {
26569 elsif ( $routput_block_type->[$i] =~ /:$/ ) {
26570 $in_statement_continuation = 0;
26573 # user function with block prototype
26575 $in_statement_continuation = 0;
26579 # If we are in a list, then
26580 # we must set continuation indentation at the closing
26581 # paren of something like this (paren after $check):
26584 # ( not defined $check )
26586 # or $check eq "new"
26587 # or $check eq "old",
26589 elsif ( $tok eq ')' ) {
26590 $in_statement_continuation = 1
26591 if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
26594 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
26597 # use environment after updating
26598 $container_environment =
26599 $nesting_block_flag ? 'BLOCK'
26600 : $nesting_list_flag ? 'LIST'
26602 $ci_string_i = $ci_string_sum + $in_statement_continuation;
26603 $nesting_block_string_i = $nesting_block_string;
26604 $nesting_list_string_i = $nesting_list_string;
26607 # not a structural indentation type..
26610 $container_environment =
26611 $nesting_block_flag ? 'BLOCK'
26612 : $nesting_list_flag ? 'LIST'
26615 # zero the continuation indentation at certain tokens so
26616 # that they will be at the same level as its container. For
26617 # commas, this simplifies the -lp indentation logic, which
26618 # counts commas. For ?: it makes them stand out.
26619 if ($nesting_list_flag) {
26620 if ( $type =~ /^[,\?\:]$/ ) {
26621 $in_statement_continuation = 0;
26625 # be sure binary operators get continuation indentation
26627 $container_environment
26628 && ( $type eq 'k' && $is_binary_keyword{$tok}
26629 || $is_binary_type{$type} )
26632 $in_statement_continuation = 1;
26635 # continuation indentation is sum of any open ci from previous
26636 # levels plus the current level
26637 $ci_string_i = $ci_string_sum + $in_statement_continuation;
26639 # update continuation flag ...
26640 # if this isn't a blank or comment..
26641 if ( $type ne 'b' && $type ne '#' ) {
26643 # and we are in a BLOCK
26644 if ($nesting_block_flag) {
26646 # the next token after a ';' and label starts a new stmt
26647 if ( $type eq ';' || $type eq 'J' ) {
26648 $in_statement_continuation = 0;
26651 # otherwise, we are continuing the current statement
26653 $in_statement_continuation = 1;
26657 # if we are not in a BLOCK..
26660 # do not use continuation indentation if not list
26661 # environment (could be within if/elsif clause)
26662 if ( !$nesting_list_flag ) {
26663 $in_statement_continuation = 0;
26666 # otherwise, the token after a ',' starts a new term
26668 # Patch FOR RT#99961; no continuation after a ';'
26669 # This is needed because perltidy currently marks
26670 # a block preceded by a type character like % or @
26671 # as a non block, to simplify formatting. But these
26672 # are actually blocks and can have semicolons.
26673 # See code_block_type() and is_non_structural_brace().
26674 elsif ( $type eq ',' || $type eq ';' ) {
26675 $in_statement_continuation = 0;
26678 # otherwise, we are continuing the current term
26680 $in_statement_continuation = 1;
26686 if ( $level_in_tokenizer < 0 ) {
26687 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
26688 $tokenizer_self->{_saw_negative_indentation} = 1;
26689 warning("Starting negative indentation\n");
26693 # set secondary nesting levels based on all containment token types
26694 # Note: these are set so that the nesting depth is the depth
26695 # of the PREVIOUS TOKEN, which is convenient for setting
26696 # the strength of token bonds
26697 my $slevel_i = $slevel_in_tokenizer;
26700 if ( $is_opening_type{$type} ) {
26701 $slevel_in_tokenizer++;
26702 $nesting_token_string .= $tok;
26703 $nesting_type_string .= $type;
26707 elsif ( $is_closing_type{$type} ) {
26708 $slevel_in_tokenizer--;
26709 my $char = chop $nesting_token_string;
26711 if ( $char ne $matching_start_token{$tok} ) {
26712 $nesting_token_string .= $char . $tok;
26713 $nesting_type_string .= $type;
26716 chop $nesting_type_string;
26720 push( @block_type, $routput_block_type->[$i] );
26721 push( @ci_string, $ci_string_i );
26722 push( @container_environment, $container_environment );
26723 push( @container_type, $routput_container_type->[$i] );
26724 push( @levels, $level_i );
26725 push( @nesting_tokens, $nesting_token_string_i );
26726 push( @nesting_types, $nesting_type_string_i );
26727 push( @slevels, $slevel_i );
26728 push( @token_type, $fix_type );
26729 push( @type_sequence, $routput_type_sequence->[$i] );
26730 push( @nesting_blocks, $nesting_block_string );
26731 push( @nesting_lists, $nesting_list_string );
26733 # now form the previous token
26736 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
26740 substr( $input_line, $$rtoken_map[$im], $num ) );
26746 $num = length($input_line) - $$rtoken_map[$im]; # make the last token
26748 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
26751 $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
26752 $tokenizer_self->{_in_quote} = $in_quote;
26753 $tokenizer_self->{_quote_target} =
26754 $in_quote ? matching_end_token($quote_character) : "";
26755 $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
26757 $line_of_tokens->{_rtoken_type} = \@token_type;
26758 $line_of_tokens->{_rtokens} = \@tokens;
26759 $line_of_tokens->{_rblock_type} = \@block_type;
26760 $line_of_tokens->{_rcontainer_type} = \@container_type;
26761 $line_of_tokens->{_rcontainer_environment} = \@container_environment;
26762 $line_of_tokens->{_rtype_sequence} = \@type_sequence;
26763 $line_of_tokens->{_rlevels} = \@levels;
26764 $line_of_tokens->{_rslevels} = \@slevels;
26765 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
26766 $line_of_tokens->{_rci_levels} = \@ci_string;
26767 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
26771 } # end tokenize_this_line
26773 #########i#############################################################
26774 # Tokenizer routines which assist in identifying token types
26775 #######################################################################
26777 sub operator_expected {
26779 # Many perl symbols have two or more meanings. For example, '<<'
26780 # can be a shift operator or a here-doc operator. The
26781 # interpretation of these symbols depends on the current state of
26782 # the tokenizer, which may either be expecting a term or an
26783 # operator. For this example, a << would be a shift if an operator
26784 # is expected, and a here-doc if a term is expected. This routine
26785 # is called to make this decision for any current token. It returns
26786 # one of three possible values:
26788 # OPERATOR - operator expected (or at least, not a term)
26789 # UNKNOWN - can't tell
26790 # TERM - a term is expected (or at least, not an operator)
26792 # The decision is based on what has been seen so far. This
26793 # information is stored in the "$last_nonblank_type" and
26794 # "$last_nonblank_token" variables. For example, if the
26795 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
26796 # if $last_nonblank_type is 'n' (numeric), we are expecting an
26799 # If a UNKNOWN is returned, the calling routine must guess. A major
26800 # goal of this tokenizer is to minimize the possibility of returning
26801 # UNKNOWN, because a wrong guess can spoil the formatting of a
26804 # adding NEW_TOKENS: it is critically important that this routine be
26805 # updated to allow it to determine if an operator or term is to be
26806 # expected after the new token. Doing this simply involves adding
26807 # the new token character to one of the regexes in this routine or
26808 # to one of the hash lists
26809 # that it uses, which are initialized in the BEGIN section.
26810 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
26813 my ( $prev_type, $tok, $next_type ) = @_;
26815 my $op_expected = UNKNOWN;
26817 ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
26819 # Note: function prototype is available for token type 'U' for future
26820 # program development. It contains the leading and trailing parens,
26821 # and no blanks. It might be used to eliminate token type 'C', for
26822 # example (prototype = '()'). Thus:
26823 # if ($last_nonblank_type eq 'U') {
26824 # print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
26827 # A possible filehandle (or object) requires some care...
26828 if ( $last_nonblank_type eq 'Z' ) {
26831 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
26832 $op_expected = UNKNOWN;
26835 # For possible file handle like "$a", Perl uses weird parsing rules.
26837 # print $a/2,"/hi"; - division
26838 # print $a / 2,"/hi"; - division
26839 # print $a/ 2,"/hi"; - division
26840 # print $a /2,"/hi"; - pattern (and error)!
26841 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
26842 $op_expected = TERM;
26845 # Note when an operation is being done where a
26846 # filehandle might be expected, since a change in whitespace
26847 # could change the interpretation of the statement.
26849 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
26850 complain("operator in print statement not recommended\n");
26851 $op_expected = OPERATOR;
26856 # Check for smartmatch operator before preceding brace or square bracket.
26857 # For example, at the ? after the ] in the following expressions we are
26858 # expecting an operator:
26860 # qr/3/ ~~ ['1234'] ? 1 : 0;
26861 # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
26862 elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
26863 $op_expected = OPERATOR;
26866 # handle something after 'do' and 'eval'
26867 elsif ( $is_block_operator{$last_nonblank_token} ) {
26869 # something like $a = eval "expression";
26871 if ( $last_nonblank_type eq 'k' ) {
26872 $op_expected = TERM; # expression or list mode following keyword
26875 # something like $a = do { BLOCK } / 2;
26876 # or this ? after a smartmatch anonynmous hash or array reference:
26877 # qr/3/ ~~ ['1234'] ? 1 : 0;
26880 $op_expected = OPERATOR; # block mode following }
26884 # handle bare word..
26885 elsif ( $last_nonblank_type eq 'w' ) {
26887 # unfortunately, we can't tell what type of token to expect next
26888 # after most bare words
26889 $op_expected = UNKNOWN;
26892 # operator, but not term possible after these types
26893 # Note: moved ')' from type to token because parens in list context
26894 # get marked as '{' '}' now. This is a minor glitch in the following:
26895 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
26897 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
26898 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
26900 $op_expected = OPERATOR;
26902 # in a 'use' statement, numbers and v-strings are not true
26903 # numbers, so to avoid incorrect error messages, we will
26904 # mark them as unknown for now (use.t)
26905 # TODO: it would be much nicer to create a new token V for VERSION
26906 # number in a use statement. Then this could be a check on type V
26907 # and related patches which change $statement_type for '=>'
26908 # and ',' could be removed. Further, it would clean things up to
26909 # scan the 'use' statement with a separate subroutine.
26910 if ( ( $statement_type eq 'use' )
26911 && ( $last_nonblank_type =~ /^[nv]$/ ) )
26913 $op_expected = UNKNOWN;
26916 # expecting VERSION or {} after package NAMESPACE
26917 elsif ($statement_type =~ /^package\b/
26918 && $last_nonblank_token =~ /^package\b/ )
26920 $op_expected = TERM;
26924 # no operator after many keywords, such as "die", "warn", etc
26925 elsif ( $expecting_term_token{$last_nonblank_token} ) {
26927 # patch for dor.t (defined or).
26928 # perl functions which may be unary operators
26929 # TODO: This list is incomplete, and these should be put
26932 && $next_type eq '/'
26933 && $last_nonblank_type eq 'k'
26934 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
26936 $op_expected = OPERATOR;
26939 $op_expected = TERM;
26943 # no operator after things like + - ** (i.e., other operators)
26944 elsif ( $expecting_term_types{$last_nonblank_type} ) {
26945 $op_expected = TERM;
26948 # a few operators, like "time", have an empty prototype () and so
26949 # take no parameters but produce a value to operate on
26950 elsif ( $expecting_operator_token{$last_nonblank_token} ) {
26951 $op_expected = OPERATOR;
26954 # post-increment and decrement produce values to be operated on
26955 elsif ( $expecting_operator_types{$last_nonblank_type} ) {
26956 $op_expected = OPERATOR;
26959 # no value to operate on after sub block
26960 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
26962 # a right brace here indicates the end of a simple block.
26963 # all non-structural right braces have type 'R'
26964 # all braces associated with block operator keywords have been given those
26965 # keywords as "last_nonblank_token" and caught above.
26966 # (This statement is order dependent, and must come after checking
26967 # $last_nonblank_token).
26968 elsif ( $last_nonblank_type eq '}' ) {
26970 # patch for dor.t (defined or).
26972 && $next_type eq '/'
26973 && $last_nonblank_token eq ']' )
26975 $op_expected = OPERATOR;
26978 $op_expected = TERM;
26982 # something else..what did I forget?
26985 # collecting diagnostics on unknown operator types..see what was missed
26986 $op_expected = UNKNOWN;
26988 "OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
26992 TOKENIZER_DEBUG_FLAG_EXPECT && do {
26994 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
26996 return $op_expected;
26999 sub new_statement_ok {
27001 # return true if the current token can start a new statement
27002 # USES GLOBAL VARIABLES: $last_nonblank_type
27004 return label_ok() # a label would be ok here
27006 || $last_nonblank_type eq 'J'; # or we follow a label
27012 # Decide if a bare word followed by a colon here is a label
27013 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
27014 # $brace_depth, @brace_type
27016 # if it follows an opening or closing code block curly brace..
27017 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
27018 && $last_nonblank_type eq $last_nonblank_token )
27021 # it is a label if and only if the curly encloses a code block
27022 return $brace_type[$brace_depth];
27025 # otherwise, it is a label if and only if it follows a ';' (real or fake)
27028 return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
27032 sub code_block_type {
27034 # Decide if this is a block of code, and its type.
27035 # Must be called only when $type = $token = '{'
27036 # The problem is to distinguish between the start of a block of code
27037 # and the start of an anonymous hash reference
27038 # Returns "" if not code block, otherwise returns 'last_nonblank_token'
27039 # to indicate the type of code block. (For example, 'last_nonblank_token'
27040 # might be 'if' for an if block, 'else' for an else block, etc).
27041 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
27042 # $last_nonblank_block_type, $brace_depth, @brace_type
27044 # handle case of multiple '{'s
27046 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
27048 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
27049 if ( $last_nonblank_token eq '{'
27050 && $last_nonblank_type eq $last_nonblank_token )
27053 # opening brace where a statement may appear is probably
27054 # a code block but might be and anonymous hash reference
27055 if ( $brace_type[$brace_depth] ) {
27056 return decide_if_code_block( $i, $rtokens, $rtoken_type,
27057 $max_token_index );
27060 # cannot start a code block within an anonymous hash
27066 elsif ( $last_nonblank_token eq ';' ) {
27068 # an opening brace where a statement may appear is probably
27069 # a code block but might be and anonymous hash reference
27070 return decide_if_code_block( $i, $rtokens, $rtoken_type,
27071 $max_token_index );
27074 # handle case of '}{'
27075 elsif ($last_nonblank_token eq '}'
27076 && $last_nonblank_type eq $last_nonblank_token )
27079 # a } { situation ...
27080 # could be hash reference after code block..(blktype1.t)
27081 if ($last_nonblank_block_type) {
27082 return decide_if_code_block( $i, $rtokens, $rtoken_type,
27083 $max_token_index );
27086 # must be a block if it follows a closing hash reference
27088 return $last_nonblank_token;
27092 ################################################################
27093 # NOTE: braces after type characters start code blocks, but for
27094 # simplicity these are not identified as such. See also
27095 # sub is_non_structural_brace.
27096 ################################################################
27098 ## elsif ( $last_nonblank_type eq 't' ) {
27099 ## return $last_nonblank_token;
27102 # brace after label:
27103 elsif ( $last_nonblank_type eq 'J' ) {
27104 return $last_nonblank_token;
27107 # otherwise, look at previous token. This must be a code block if
27108 # it follows any of these:
27109 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
27110 elsif ( $is_code_block_token{$last_nonblank_token} ) {
27112 # Bug Patch: Note that the opening brace after the 'if' in the following
27113 # snippet is an anonymous hash ref and not a code block!
27114 # print 'hi' if { x => 1, }->{x};
27115 # We can identify this situation because the last nonblank type
27116 # will be a keyword (instead of a closing peren)
27117 if ( $last_nonblank_token =~ /^(if|unless)$/
27118 && $last_nonblank_type eq 'k' )
27123 return $last_nonblank_token;
27127 # or a sub or package BLOCK
27128 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
27129 && $last_nonblank_token =~ /^(sub|package)\b/ )
27131 return $last_nonblank_token;
27134 elsif ( $statement_type =~ /^(sub|package)\b/ ) {
27135 return $statement_type;
27138 # user-defined subs with block parameters (like grep/map/eval)
27139 elsif ( $last_nonblank_type eq 'G' ) {
27140 return $last_nonblank_token;
27144 elsif ( $last_nonblank_type eq 'w' ) {
27145 return decide_if_code_block( $i, $rtokens, $rtoken_type,
27146 $max_token_index );
27149 # Patch for bug # RT #94338 reported by Daniel Trizen
27150 # for-loop in a parenthesized block-map triggering an error message:
27151 # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
27152 # Check for a code block within a parenthesized function call
27153 elsif ( $last_nonblank_token eq '(' ) {
27154 my $paren_type = $paren_type[$paren_depth];
27155 if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
27157 # We will mark this as a code block but use type 't' instead
27158 # of the name of the contining function. This will allow for
27159 # correct parsing but will usually produce better formatting.
27160 # Braces with block type 't' are not broken open automatically
27161 # in the formatter as are other code block types, and this usually
27163 return 't'; # (Not $paren_type)
27170 # handle unknown syntax ') {'
27171 # we previously appended a '()' to mark this case
27172 elsif ( $last_nonblank_token =~ /\(\)$/ ) {
27173 return $last_nonblank_token;
27176 # anything else must be anonymous hash reference
27182 sub decide_if_code_block {
27184 # USES GLOBAL VARIABLES: $last_nonblank_token
27185 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
27187 my ( $next_nonblank_token, $i_next ) =
27188 find_next_nonblank_token( $i, $rtokens, $max_token_index );
27190 # we are at a '{' where a statement may appear.
27191 # We must decide if this brace starts an anonymous hash or a code
27193 # return "" if anonymous hash, and $last_nonblank_token otherwise
27195 # initialize to be code BLOCK
27196 my $code_block_type = $last_nonblank_token;
27198 # Check for the common case of an empty anonymous hash reference:
27199 # Maybe something like sub { { } }
27200 if ( $next_nonblank_token eq '}' ) {
27201 $code_block_type = "";
27206 # To guess if this '{' is an anonymous hash reference, look ahead
27207 # and test as follows:
27209 # it is a hash reference if next come:
27210 # - a string or digit followed by a comma or =>
27211 # - bareword followed by =>
27212 # otherwise it is a code block
27214 # Examples of anonymous hash ref:
27218 # Examples of code blocks:
27219 # {1; print "hello\n", 1;}
27222 # We are only going to look ahead one more (nonblank/comment) line.
27223 # Strange formatting could cause a bad guess, but that's unlikely.
27227 # Ignore the rest of this line if it is a side comment
27228 if ( $next_nonblank_token ne '#' ) {
27229 @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
27230 @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
27232 my ( $rpre_tokens, $rpre_types ) =
27233 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
27234 # generous, and prevents
27236 # time in mangled files
27237 if ( defined($rpre_types) && @$rpre_types ) {
27238 push @pre_types, @$rpre_types;
27239 push @pre_tokens, @$rpre_tokens;
27242 # put a sentinel token to simplify stopping the search
27243 push @pre_types, '}';
27244 push @pre_types, '}';
27247 $jbeg = 1 if $pre_types[0] eq 'b';
27249 # first look for one of these
27251 # - bareword with leading -
27255 if ( $pre_types[$j] =~ /^[\'\"]/ ) {
27257 # find the closing quote; don't worry about escapes
27258 my $quote_mark = $pre_types[$j];
27259 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
27260 if ( $pre_types[$k] eq $quote_mark ) {
27262 my $next = $pre_types[$j];
27267 elsif ( $pre_types[$j] eq 'd' ) {
27270 elsif ( $pre_types[$j] eq 'w' ) {
27273 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
27276 if ( $j > $jbeg ) {
27278 $j++ if $pre_types[$j] eq 'b';
27280 # Patched for RT #95708
27283 # it is a comma which is not a pattern delimeter except for qw
27285 $pre_types[$j] eq ','
27286 && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
27290 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
27293 $code_block_type = "";
27298 return $code_block_type;
27303 # report unexpected token type and show where it is
27304 # USES GLOBAL VARIABLES: $tokenizer_self
27305 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
27306 $rpretoken_type, $input_line )
27309 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
27310 my $msg = "found $found where $expecting expected";
27311 my $pos = $$rpretoken_map[$i_tok];
27312 interrupt_logfile();
27313 my $input_line_number = $tokenizer_self->{_last_line_number};
27314 my ( $offset, $numbered_line, $underline ) =
27315 make_numbered_line( $input_line_number, $input_line, $pos );
27316 $underline = write_on_underline( $underline, $pos - $offset, '^' );
27319 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
27320 my $pos_prev = $$rpretoken_map[$last_nonblank_i];
27322 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
27323 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
27326 $num = $pos - $pos_prev;
27328 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
27331 write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
27332 $trailer = " (previous token underlined)";
27334 warning( $numbered_line . "\n" );
27335 warning( $underline . "\n" );
27336 warning( $msg . $trailer . "\n" );
27341 sub is_non_structural_brace {
27343 # Decide if a brace or bracket is structural or non-structural
27344 # by looking at the previous token and type
27345 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
27347 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
27348 # Tentatively deactivated because it caused the wrong operator expectation
27350 # $user = @vars[1] / 100;
27351 # Must update sub operator_expected before re-implementing.
27352 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
27356 ################################################################
27357 # NOTE: braces after type characters start code blocks, but for
27358 # simplicity these are not identified as such. See also
27359 # sub code_block_type
27360 ################################################################
27362 ##if ($last_nonblank_type eq 't') {return 0}
27364 # otherwise, it is non-structural if it is decorated
27365 # by type information.
27366 # For example, the '{' here is non-structural: ${xxx}
27368 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
27370 # or if we follow a hash or array closing curly brace or bracket
27371 # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
27372 # because the first '}' would have been given type 'R'
27373 || $last_nonblank_type =~ /^([R\]])$/
27377 #########i#############################################################
27378 # Tokenizer routines for tracking container nesting depths
27379 #######################################################################
27381 # The following routines keep track of nesting depths of the nesting
27382 # types, ( [ { and ?. This is necessary for determining the indentation
27383 # level, and also for debugging programs. Not only do they keep track of
27384 # nesting depths of the individual brace types, but they check that each
27385 # of the other brace types is balanced within matching pairs. For
27386 # example, if the program sees this sequence:
27390 # then it can determine that there is an extra left paren somewhere
27391 # between the { and the }. And so on with every other possible
27392 # combination of outer and inner brace types. For another
27397 # which has an extra ] within the parens.
27399 # The brace types have indexes 0 .. 3 which are indexes into
27402 # The pair ? : are treated as just another nesting type, with ? acting
27403 # as the opening brace and : acting as the closing brace.
27407 # $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
27409 # saves the nesting depth of brace type $b (where $b is either of the other
27410 # nesting types) when brace type $a enters a new depth. When this depth
27411 # decreases, a check is made that the current depth of brace types $b is
27412 # unchanged, or otherwise there must have been an error. This can
27413 # be very useful for localizing errors, particularly when perl runs to
27414 # the end of a large file (such as this one) and announces that there
27415 # is a problem somewhere.
27417 # A numerical sequence number is maintained for every nesting type,
27418 # so that each matching pair can be uniquely identified in a simple
27421 sub increase_nesting_depth {
27422 my ( $aa, $pos ) = @_;
27424 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27425 # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
27428 $current_depth[$aa]++;
27430 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
27431 my $input_line_number = $tokenizer_self->{_last_line_number};
27432 my $input_line = $tokenizer_self->{_line_text};
27434 # Sequence numbers increment by number of items. This keeps
27435 # a unique set of numbers but still allows the relative location
27436 # of any type to be determined.
27437 $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
27438 my $seqno = $nesting_sequence_number[$aa];
27439 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
27441 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
27442 [ $input_line_number, $input_line, $pos ];
27444 for $bb ( 0 .. $#closing_brace_names ) {
27445 next if ( $bb == $aa );
27446 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
27449 # set a flag for indenting a nested ternary statement
27451 if ( $aa == QUESTION_COLON ) {
27452 $nested_ternary_flag[ $current_depth[$aa] ] = 0;
27453 if ( $current_depth[$aa] > 1 ) {
27454 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
27455 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
27456 if ( $pdepth == $total_depth - 1 ) {
27458 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
27463 $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
27464 $statement_type = "";
27465 return ( $seqno, $indent );
27468 sub decrease_nesting_depth {
27470 my ( $aa, $pos ) = @_;
27472 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27473 # @current_sequence_number, @depth_array, @starting_line_of_current_depth
27477 my $input_line_number = $tokenizer_self->{_last_line_number};
27478 my $input_line = $tokenizer_self->{_line_text};
27482 if ( $current_depth[$aa] > 0 ) {
27484 # set a flag for un-indenting after seeing a nested ternary statement
27485 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
27486 if ( $aa == QUESTION_COLON ) {
27487 $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
27489 $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
27491 # check that any brace types $bb contained within are balanced
27492 for $bb ( 0 .. $#closing_brace_names ) {
27493 next if ( $bb == $aa );
27495 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
27496 $current_depth[$bb] )
27499 $current_depth[$bb] -
27500 $depth_array[$aa][$bb][ $current_depth[$aa] ];
27502 # don't whine too many times
27503 my $saw_brace_error = get_saw_brace_error();
27505 $saw_brace_error <= MAX_NAG_MESSAGES
27507 # if too many closing types have occurred, we probably
27508 # already caught this error
27509 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
27512 interrupt_logfile();
27514 $starting_line_of_current_depth[$aa]
27515 [ $current_depth[$aa] ];
27517 my $rel = [ $input_line_number, $input_line, $pos ];
27521 if ( $diff == 1 || $diff == -1 ) {
27529 ? $opening_brace_names[$bb]
27530 : $closing_brace_names[$bb];
27531 write_error_indicator_pair( @$rsl, '^' );
27533 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
27538 $starting_line_of_current_depth[$bb]
27539 [ $current_depth[$bb] ];
27542 " The most recent un-matched $bname is on line $ml\n";
27543 write_error_indicator_pair( @$rml, '^' );
27545 write_error_indicator_pair( @$rel, '^' );
27549 increment_brace_error();
27552 $current_depth[$aa]--;
27556 my $saw_brace_error = get_saw_brace_error();
27557 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
27559 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
27561 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
27563 increment_brace_error();
27565 return ( $seqno, $outdent );
27568 sub check_final_nesting_depths {
27571 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
27573 for $aa ( 0 .. $#closing_brace_names ) {
27575 if ( $current_depth[$aa] ) {
27577 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
27580 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
27581 The most recent un-matched $opening_brace_names[$aa] is on line $sl
27583 indicate_error( $msg, @$rsl, '^' );
27584 increment_brace_error();
27589 #########i#############################################################
27590 # Tokenizer routines for looking ahead in input stream
27591 #######################################################################
27593 sub peek_ahead_for_n_nonblank_pre_tokens {
27595 # returns next n pretokens if they exist
27596 # returns undef's if hits eof without seeing any pretokens
27597 # USES GLOBAL VARIABLES: $tokenizer_self
27598 my $max_pretokens = shift;
27601 my ( $rpre_tokens, $rmap, $rpre_types );
27603 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27605 $line =~ s/^\s*//; # trim leading blanks
27606 next if ( length($line) <= 0 ); # skip blank
27607 next if ( $line =~ /^#/ ); # skip comment
27608 ( $rpre_tokens, $rmap, $rpre_types ) =
27609 pre_tokenize( $line, $max_pretokens );
27612 return ( $rpre_tokens, $rpre_types );
27615 # look ahead for next non-blank, non-comment line of code
27616 sub peek_ahead_for_nonblank_token {
27618 # USES GLOBAL VARIABLES: $tokenizer_self
27619 my ( $rtokens, $max_token_index ) = @_;
27623 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27625 $line =~ s/^\s*//; # trim leading blanks
27626 next if ( length($line) <= 0 ); # skip blank
27627 next if ( $line =~ /^#/ ); # skip comment
27628 my ( $rtok, $rmap, $rtype ) =
27629 pre_tokenize( $line, 2 ); # only need 2 pre-tokens
27630 my $j = $max_token_index + 1;
27633 foreach $tok (@$rtok) {
27634 last if ( $tok =~ "\n" );
27635 $$rtokens[ ++$j ] = $tok;
27642 #########i#############################################################
27643 # Tokenizer guessing routines for ambiguous situations
27644 #######################################################################
27646 sub guess_if_pattern_or_conditional {
27648 # this routine is called when we have encountered a ? following an
27649 # unknown bareword, and we must decide if it starts a pattern or not
27650 # input parameters:
27651 # $i - token index of the ? starting possible pattern
27652 # output parameters:
27653 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
27654 # msg = a warning or diagnostic message
27655 # USES GLOBAL VARIABLES: $last_nonblank_token
27656 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27657 my $is_pattern = 0;
27658 my $msg = "guessing that ? after $last_nonblank_token starts a ";
27660 if ( $i >= $max_token_index ) {
27661 $msg .= "conditional (no end to pattern found on the line)\n";
27666 my $next_token = $$rtokens[$i]; # first token after ?
27668 # look for a possible ending ? on this line..
27670 my $quote_depth = 0;
27671 my $quote_character = '';
27675 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27678 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27679 $quote_pos, $quote_depth, $max_token_index );
27683 # we didn't find an ending ? on this line,
27684 # so we bias towards conditional
27686 $msg .= "conditional (no ending ? on this line)\n";
27688 # we found an ending ?, so we bias towards a pattern
27692 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27694 $msg .= "pattern (found ending ? and pattern expected)\n";
27697 $msg .= "pattern (uncertain, but found ending ?)\n";
27701 return ( $is_pattern, $msg );
27704 sub guess_if_pattern_or_division {
27706 # this routine is called when we have encountered a / following an
27707 # unknown bareword, and we must decide if it starts a pattern or is a
27709 # input parameters:
27710 # $i - token index of the / starting possible pattern
27711 # output parameters:
27712 # $is_pattern = 0 if probably division, =1 if probably a pattern
27713 # msg = a warning or diagnostic message
27714 # USES GLOBAL VARIABLES: $last_nonblank_token
27715 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27716 my $is_pattern = 0;
27717 my $msg = "guessing that / after $last_nonblank_token starts a ";
27719 if ( $i >= $max_token_index ) {
27720 $msg .= "division (no end to pattern found on the line)\n";
27724 my $divide_expected =
27725 numerator_expected( $i, $rtokens, $max_token_index );
27727 my $next_token = $$rtokens[$i]; # first token after slash
27729 # look for a possible ending / on this line..
27731 my $quote_depth = 0;
27732 my $quote_character = '';
27736 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27739 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27740 $quote_pos, $quote_depth, $max_token_index );
27744 # we didn't find an ending / on this line,
27745 # so we bias towards division
27746 if ( $divide_expected >= 0 ) {
27748 $msg .= "division (no ending / on this line)\n";
27751 $msg = "multi-line pattern (division not possible)\n";
27757 # we found an ending /, so we bias towards a pattern
27760 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27762 if ( $divide_expected >= 0 ) {
27764 if ( $i - $ibeg > 60 ) {
27765 $msg .= "division (matching / too distant)\n";
27769 $msg .= "pattern (but division possible too)\n";
27775 $msg .= "pattern (division not possible)\n";
27780 if ( $divide_expected >= 0 ) {
27782 $msg .= "division (pattern not possible)\n";
27787 "pattern (uncertain, but division would not work here)\n";
27792 return ( $is_pattern, $msg );
27795 # try to resolve here-doc vs. shift by looking ahead for
27796 # non-code or the end token (currently only looks for end token)
27797 # returns 1 if it is probably a here doc, 0 if not
27798 sub guess_if_here_doc {
27800 # This is how many lines we will search for a target as part of the
27801 # guessing strategy. It is a constant because there is probably
27802 # little reason to change it.
27803 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
27805 use constant HERE_DOC_WINDOW => 40;
27807 my $next_token = shift;
27808 my $here_doc_expected = 0;
27811 my $msg = "checking <<";
27813 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
27817 if ( $line =~ /^$next_token$/ ) {
27818 $msg .= " -- found target $next_token ahead $k lines\n";
27819 $here_doc_expected = 1; # got it
27822 last if ( $k >= HERE_DOC_WINDOW );
27825 unless ($here_doc_expected) {
27827 if ( !defined($line) ) {
27828 $here_doc_expected = -1; # hit eof without seeing target
27829 $msg .= " -- must be shift; target $next_token not in file\n";
27832 else { # still unsure..taking a wild guess
27834 if ( !$is_constant{$current_package}{$next_token} ) {
27835 $here_doc_expected = 1;
27837 " -- guessing it's a here-doc ($next_token not a constant)\n";
27841 " -- guessing it's a shift ($next_token is a constant)\n";
27845 write_logfile_entry($msg);
27846 return $here_doc_expected;
27849 #########i#############################################################
27850 # Tokenizer Routines for scanning identifiers and related items
27851 #######################################################################
27853 sub scan_bare_identifier_do {
27855 # this routine is called to scan a token starting with an alphanumeric
27856 # variable or package separator, :: or '.
27857 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
27858 # $last_nonblank_type,@paren_type, $paren_depth
27860 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
27864 my $package = undef;
27868 # we have to back up one pretoken at a :: since each : is one pretoken
27869 if ( $tok eq '::' ) { $i_beg-- }
27870 if ( $tok eq '->' ) { $i_beg-- }
27871 my $pos_beg = $$rtoken_map[$i_beg];
27872 pos($input_line) = $pos_beg;
27879 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
27881 my $pos = pos($input_line);
27882 my $numc = $pos - $pos_beg;
27883 $tok = substr( $input_line, $pos_beg, $numc );
27885 # type 'w' includes anything without leading type info
27886 # ($,%,@,*) including something like abc::def::ghi
27890 if ( defined($2) ) { $sub_name = $2; }
27891 if ( defined($1) ) {
27894 # patch: don't allow isolated package name which just ends
27895 # in the old style package separator (single quote). Example:
27897 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
27901 $package =~ s/\'/::/g;
27902 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27903 $package =~ s/::$//;
27906 $package = $current_package;
27908 if ( $is_keyword{$tok} ) {
27913 # if it is a bareword..
27914 if ( $type eq 'w' ) {
27916 # check for v-string with leading 'v' type character
27917 # (This seems to have precedence over filehandle, type 'Y')
27918 if ( $tok =~ /^v\d[_\d]*$/ ) {
27920 # we only have the first part - something like 'v101' -
27922 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
27923 $pos = pos($input_line);
27924 $numc = $pos - $pos_beg;
27925 $tok = substr( $input_line, $pos_beg, $numc );
27929 # warn if this version can't handle v-strings
27930 report_v_string($tok);
27933 elsif ( $is_constant{$package}{$sub_name} ) {
27937 # bareword after sort has implied empty prototype; for example:
27938 # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
27939 # This has priority over whatever the user has specified.
27940 elsif ($last_nonblank_token eq 'sort'
27941 && $last_nonblank_type eq 'k' )
27946 # Note: strangely, perl does not seem to really let you create
27947 # functions which act like eval and do, in the sense that eval
27948 # and do may have operators following the final }, but any operators
27949 # that you create with prototype (&) apparently do not allow
27950 # trailing operators, only terms. This seems strange.
27951 # If this ever changes, here is the update
27952 # to make perltidy behave accordingly:
27954 # elsif ( $is_block_function{$package}{$tok} ) {
27955 # $tok='eval'; # patch to do braces like eval - doesn't work
27958 # FIXME: This could become a separate type to allow for different
27960 elsif ( $is_block_function{$package}{$sub_name} ) {
27964 elsif ( $is_block_list_function{$package}{$sub_name} ) {
27967 elsif ( $is_user_function{$package}{$sub_name} ) {
27969 $prototype = $user_function_prototype{$package}{$sub_name};
27972 # check for indirect object
27975 # added 2001-03-27: must not be followed immediately by '('
27977 ( $input_line !~ m/\G\(/gc )
27982 # preceded by keyword like 'print', 'printf' and friends
27983 $is_indirect_object_taker{$last_nonblank_token}
27985 # or preceded by something like 'print(' or 'printf('
27987 ( $last_nonblank_token eq '(' )
27988 && $is_indirect_object_taker{ $paren_type[$paren_depth]
27996 # may not be indirect object unless followed by a space
27997 if ( $input_line =~ m/\G\s+/gc ) {
28001 # Perl's indirect object notation is a very bad
28002 # thing and can cause subtle bugs, especially for
28003 # beginning programmers. And I haven't even been
28004 # able to figure out a sane warning scheme which
28005 # doesn't get in the way of good scripts.
28007 # Complain if a filehandle has any lower case
28008 # letters. This is suggested good practice.
28009 # Use 'sub_name' because something like
28010 # main::MYHANDLE is ok for filehandle
28011 if ( $sub_name =~ /[a-z]/ ) {
28013 # could be bug caused by older perltidy if
28015 if ( $input_line =~ m/\G\s*\(/gc ) {
28017 "Caution: unknown word '$tok' in indirect object slot\n"
28023 # bareword not followed by a space -- may not be filehandle
28024 # (may be function call defined in a 'use' statement)
28031 # Now we must convert back from character position
28032 # to pre_token index.
28033 # I don't think an error flag can occur here ..but who knows
28036 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28038 warning("scan_bare_identifier: Possibly invalid tokenization\n");
28042 # no match but line not blank - could be syntax error
28043 # perl will take '::' alone without complaint
28047 # change this warning to log message if it becomes annoying
28048 warning("didn't find identifier after leading ::\n");
28050 return ( $i, $tok, $type, $prototype );
28055 # This is the new scanner and will eventually replace scan_identifier.
28056 # Only type 'sub' and 'package' are implemented.
28057 # Token types $ * % @ & -> are not yet implemented.
28059 # Scan identifier following a type token.
28060 # The type of call depends on $id_scan_state: $id_scan_state = ''
28061 # for starting call, in which case $tok must be the token defining
28064 # If the type token is the last nonblank token on the line, a value
28065 # of $id_scan_state = $tok is returned, indicating that further
28066 # calls must be made to get the identifier. If the type token is
28067 # not the last nonblank token on the line, the identifier is
28068 # scanned and handled and a value of '' is returned.
28069 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
28070 # $statement_type, $tokenizer_self
28072 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
28076 my ( $i_beg, $pos_beg );
28078 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
28079 #my ($a,$b,$c) = caller;
28080 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
28082 # on re-entry, start scanning at first token on the line
28083 if ($id_scan_state) {
28088 # on initial entry, start scanning just after type token
28091 $id_scan_state = $tok;
28095 # find $i_beg = index of next nonblank token,
28096 # and handle empty lines
28097 my $blank_line = 0;
28098 my $next_nonblank_token = $$rtokens[$i_beg];
28099 if ( $i_beg > $max_token_index ) {
28104 # only a '#' immediately after a '$' is not a comment
28105 if ( $next_nonblank_token eq '#' ) {
28106 unless ( $tok eq '$' ) {
28111 if ( $next_nonblank_token =~ /^\s/ ) {
28112 ( $next_nonblank_token, $i_beg ) =
28113 find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
28114 $max_token_index );
28115 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
28121 # handle non-blank line; identifier, if any, must follow
28122 unless ($blank_line) {
28124 if ( $id_scan_state eq 'sub' ) {
28125 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
28126 $input_line, $i, $i_beg,
28127 $tok, $type, $rtokens,
28128 $rtoken_map, $id_scan_state, $max_token_index
28132 elsif ( $id_scan_state eq 'package' ) {
28133 ( $i, $tok, $type ) =
28134 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
28135 $rtoken_map, $max_token_index );
28136 $id_scan_state = '';
28140 warning("invalid token in scan_id: $tok\n");
28141 $id_scan_state = '';
28145 if ( $id_scan_state && ( !defined($type) || !$type ) ) {
28147 # shouldn't happen:
28149 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
28151 report_definite_bug();
28154 TOKENIZER_DEBUG_FLAG_NSCAN && do {
28156 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
28158 return ( $i, $tok, $type, $id_scan_state );
28161 sub check_prototype {
28162 my ( $proto, $package, $subname ) = @_;
28163 return unless ( defined($package) && defined($subname) );
28164 if ( defined($proto) ) {
28165 $proto =~ s/^\s*\(\s*//;
28166 $proto =~ s/\s*\)$//;
28168 $is_user_function{$package}{$subname} = 1;
28169 $user_function_prototype{$package}{$subname} = "($proto)";
28171 # prototypes containing '&' must be treated specially..
28172 if ( $proto =~ /\&/ ) {
28174 # right curly braces of prototypes ending in
28175 # '&' may be followed by an operator
28176 if ( $proto =~ /\&$/ ) {
28177 $is_block_function{$package}{$subname} = 1;
28180 # right curly braces of prototypes NOT ending in
28181 # '&' may NOT be followed by an operator
28182 elsif ( $proto !~ /\&$/ ) {
28183 $is_block_list_function{$package}{$subname} = 1;
28188 $is_constant{$package}{$subname} = 1;
28192 $is_user_function{$package}{$subname} = 1;
28196 sub do_scan_package {
28198 # do_scan_package parses a package name
28199 # it is called with $i_beg equal to the index of the first nonblank
28200 # token following a 'package' token.
28201 # USES GLOBAL VARIABLES: $current_package,
28203 # package NAMESPACE
28204 # package NAMESPACE VERSION
28205 # package NAMESPACE BLOCK
28206 # package NAMESPACE VERSION BLOCK
28208 # If VERSION is provided, package sets the $VERSION variable in the given
28209 # namespace to a version object with the VERSION provided. VERSION must be
28210 # a "strict" style version number as defined by the version module: a
28211 # positive decimal number (integer or decimal-fraction) without
28212 # exponentiation or else a dotted-decimal v-string with a leading 'v'
28213 # character and at least three components.
28214 # reference http://perldoc.perl.org/functions/package.html
28216 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
28219 my $package = undef;
28220 my $pos_beg = $$rtoken_map[$i_beg];
28221 pos($input_line) = $pos_beg;
28223 # handle non-blank line; package name, if any, must follow
28224 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
28226 $package = ( defined($1) && $1 ) ? $1 : 'main';
28227 $package =~ s/\'/::/g;
28228 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
28229 $package =~ s/::$//;
28230 my $pos = pos($input_line);
28231 my $numc = $pos - $pos_beg;
28232 $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
28235 # Now we must convert back from character position
28236 # to pre_token index.
28237 # I don't think an error flag can occur here ..but ?
28240 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28241 if ($error) { warning("Possibly invalid package\n") }
28242 $current_package = $package;
28244 # we should now have package NAMESPACE
28245 # now expecting VERSION, BLOCK, or ; to follow ...
28246 # package NAMESPACE VERSION
28247 # package NAMESPACE BLOCK
28248 # package NAMESPACE VERSION BLOCK
28249 my ( $next_nonblank_token, $i_next ) =
28250 find_next_nonblank_token( $i, $rtokens, $max_token_index );
28252 # check that something recognizable follows, but do not parse.
28253 # A VERSION number will be parsed later as a number or v-string in the
28254 # normal way. What is important is to set the statement type if
28255 # everything looks okay so that the operator_expected() routine
28256 # knows that the number is in a package statement.
28257 # Examples of valid primitive tokens that might follow are:
28258 # 1235 . ; { } v3 v
28259 if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
28260 $statement_type = $tok;
28264 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
28269 # no match but line not blank --
28270 # could be a label with name package, like package: , for example.
28275 return ( $i, $tok, $type );
28278 sub scan_identifier_do {
28280 # This routine assembles tokens into identifiers. It maintains a
28281 # scan state, id_scan_state. It updates id_scan_state based upon
28282 # current id_scan_state and token, and returns an updated
28283 # id_scan_state and the next index after the identifier.
28284 # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
28285 # $last_nonblank_type
28287 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
28288 $expecting, $container_type )
28292 my $tok_begin = $$rtokens[$i_begin];
28293 if ( $tok_begin eq ':' ) { $tok_begin = '::' }
28294 my $id_scan_state_begin = $id_scan_state;
28295 my $identifier_begin = $identifier;
28296 my $tok = $tok_begin;
28299 my $in_prototype_or_signature = $container_type =~ /^sub/;
28301 # these flags will be used to help figure out the type:
28302 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
28305 # allow old package separator (') except in 'use' statement
28306 my $allow_tick = ( $last_nonblank_token ne 'use' );
28308 # get started by defining a type and a state if necessary
28309 unless ($id_scan_state) {
28310 $context = UNKNOWN_CONTEXT;
28312 # fixup for digraph
28313 if ( $tok eq '>' ) {
28317 $identifier = $tok;
28319 if ( $tok eq '$' || $tok eq '*' ) {
28320 $id_scan_state = '$';
28321 $context = SCALAR_CONTEXT;
28323 elsif ( $tok eq '%' || $tok eq '@' ) {
28324 $id_scan_state = '$';
28325 $context = LIST_CONTEXT;
28327 elsif ( $tok eq '&' ) {
28328 $id_scan_state = '&';
28330 elsif ( $tok eq 'sub' or $tok eq 'package' ) {
28331 $saw_alpha = 0; # 'sub' is considered type info here
28332 $id_scan_state = '$';
28333 $identifier .= ' '; # need a space to separate sub from sub name
28335 elsif ( $tok eq '::' ) {
28336 $id_scan_state = 'A';
28338 elsif ( $tok =~ /^[A-Za-z_]/ ) {
28339 $id_scan_state = ':';
28341 elsif ( $tok eq '->' ) {
28342 $id_scan_state = '$';
28347 my ( $a, $b, $c ) = caller;
28348 warning("Program Bug: scan_identifier given bad token = $tok \n");
28349 warning(" called from sub $a line: $c\n");
28350 report_definite_bug();
28352 $saw_type = !$saw_alpha;
28356 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
28359 # now loop to gather the identifier
28362 while ( $i < $max_token_index ) {
28363 $i_save = $i unless ( $tok =~ /^\s*$/ );
28364 $tok = $$rtokens[ ++$i ];
28366 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
28371 if ( $id_scan_state eq '$' ) { # starting variable name
28373 if ( $tok eq '$' ) {
28375 $identifier .= $tok;
28377 # we've got a punctuation variable if end of line (punct.t)
28378 if ( $i == $max_token_index ) {
28380 $id_scan_state = '';
28385 # POSTDEFREF ->@ ->% ->& ->*
28386 elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
28387 $identifier .= $tok;
28389 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
28391 $id_scan_state = ':'; # now need ::
28392 $identifier .= $tok;
28394 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
28396 $id_scan_state = ':'; # now need ::
28397 $identifier .= $tok;
28399 # Perl will accept leading digits in identifiers,
28400 # although they may not always produce useful results.
28401 # Something like $main::0 is ok. But this also works:
28403 # sub howdy::123::bubba{ print "bubba $54321!\n" }
28404 # howdy::123::bubba();
28407 elsif ( $tok =~ /^[0-9]/ ) { # numeric
28409 $id_scan_state = ':'; # now need ::
28410 $identifier .= $tok;
28412 elsif ( $tok eq '::' ) {
28413 $id_scan_state = 'A';
28414 $identifier .= $tok;
28417 # $# and POSTDEFREF ->$#
28418 elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
28419 $identifier .= $tok; # keep same state, a $ could follow
28421 elsif ( $tok eq '{' ) {
28423 # check for something like ${#} or ${©}
28424 ##if ( $identifier eq '$'
28428 || $identifier eq '@'
28429 || $identifier eq '$#'
28431 && $i + 2 <= $max_token_index
28432 && $$rtokens[ $i + 2 ] eq '}'
28433 && $$rtokens[ $i + 1 ] !~ /[\s\w]/
28436 my $next2 = $$rtokens[ $i + 2 ];
28437 my $next1 = $$rtokens[ $i + 1 ];
28438 $identifier .= $tok . $next1 . $next2;
28440 $id_scan_state = '';
28444 # skip something like ${xxx} or ->{
28445 $id_scan_state = '';
28447 # if this is the first token of a line, any tokens for this
28448 # identifier have already been accumulated
28449 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
28454 # space ok after leading $ % * & @
28455 elsif ( $tok =~ /^\s*$/ ) {
28457 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
28459 if ( length($identifier) > 1 ) {
28460 $id_scan_state = '';
28462 $type = 'i'; # probably punctuation variable
28467 # spaces after $'s are common, and space after @
28468 # is harmless, so only complain about space
28469 # after other type characters. Space after $ and
28470 # @ will be removed in formatting. Report space
28471 # after % and * because they might indicate a
28472 # parsing error. In other words '% ' might be a
28473 # modulo operator. Delete this warning if it
28475 if ( $identifier !~ /^[\@\$]$/ ) {
28477 "Space in identifier, following $identifier\n";
28483 # space after '->' is ok
28485 elsif ( $tok eq '^' ) {
28487 # check for some special variables like $^W
28488 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28489 $identifier .= $tok;
28490 $id_scan_state = 'A';
28492 # Perl accepts '$^]' or '@^]', but
28493 # there must not be a space before the ']'.
28494 my $next1 = $$rtokens[ $i + 1 ];
28495 if ( $next1 eq ']' ) {
28497 $identifier .= $next1;
28498 $id_scan_state = "";
28503 $id_scan_state = '';
28506 else { # something else
28508 if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
28509 $id_scan_state = '';
28511 $type = 'i'; # probably punctuation variable
28515 # check for various punctuation variables
28516 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28517 $identifier .= $tok;
28520 # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
28521 elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
28522 $identifier .= $tok;
28525 elsif ( $identifier eq '$#' ) {
28527 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
28529 # perl seems to allow just these: $#: $#- $#+
28530 elsif ( $tok =~ /^[\:\-\+]$/ ) {
28532 $identifier .= $tok;
28536 write_logfile_entry( 'Use of $# is deprecated' . "\n" );
28539 elsif ( $identifier eq '$$' ) {
28541 # perl does not allow references to punctuation
28542 # variables without braces. For example, this
28546 # You would have to use
28550 if ( $tok eq '{' ) { $type = 't' }
28551 else { $type = 'i' }
28553 elsif ( $identifier eq '->' ) {
28558 if ( length($identifier) == 1 ) { $identifier = ''; }
28560 $id_scan_state = '';
28564 elsif ( $id_scan_state eq '&' ) { # starting sub call?
28566 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
28567 $id_scan_state = ':'; # now need ::
28569 $identifier .= $tok;
28571 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
28572 $id_scan_state = ':'; # now need ::
28574 $identifier .= $tok;
28576 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28577 $id_scan_state = ':'; # now need ::
28579 $identifier .= $tok;
28581 elsif ( $tok =~ /^\s*$/ ) { # allow space
28583 elsif ( $tok eq '::' ) { # leading ::
28584 $id_scan_state = 'A'; # accept alpha next
28585 $identifier .= $tok;
28587 elsif ( $tok eq '{' ) {
28588 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
28590 $id_scan_state = '';
28595 # punctuation variable?
28596 # testfile: cunningham4.pl
28598 # We have to be careful here. If we are in an unknown state,
28599 # we will reject the punctuation variable. In the following
28600 # example the '&' is a binary operator but we are in an unknown
28601 # state because there is no sigil on 'Prima', so we don't
28602 # know what it is. But it is a bad guess that
28603 # '&~' is a function variable.
28604 # $self->{text}->{colorMap}->[
28605 # Prima::PodView::COLOR_CODE_FOREGROUND
28606 # & ~tb::COLOR_INDEX ] =
28607 # $sec->{ColorCode}
28608 if ( $identifier eq '&' && $expecting ) {
28609 $identifier .= $tok;
28616 $id_scan_state = '';
28620 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
28622 if ( $tok =~ /^[A-Za-z_]/ ) { # found it
28623 $identifier .= $tok;
28624 $id_scan_state = ':'; # now need ::
28627 elsif ( $tok eq "'" && $allow_tick ) {
28628 $identifier .= $tok;
28629 $id_scan_state = ':'; # now need ::
28632 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28633 $identifier .= $tok;
28634 $id_scan_state = ':'; # now need ::
28637 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28638 $id_scan_state = '(';
28639 $identifier .= $tok;
28641 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28642 $id_scan_state = ')';
28643 $identifier .= $tok;
28646 $id_scan_state = '';
28651 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
28653 if ( $tok eq '::' ) { # got it
28654 $identifier .= $tok;
28655 $id_scan_state = 'A'; # now require alpha
28657 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
28658 $identifier .= $tok;
28659 $id_scan_state = ':'; # now need ::
28662 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
28663 $identifier .= $tok;
28664 $id_scan_state = ':'; # now need ::
28667 elsif ( $tok eq "'" && $allow_tick ) { # tick
28669 if ( $is_keyword{$identifier} ) {
28670 $id_scan_state = ''; # that's all
28674 $identifier .= $tok;
28677 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28678 $id_scan_state = '(';
28679 $identifier .= $tok;
28681 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28682 $id_scan_state = ')';
28683 $identifier .= $tok;
28686 $id_scan_state = ''; # that's all
28691 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
28693 if ( $tok eq '(' ) { # got it
28694 $identifier .= $tok;
28695 $id_scan_state = ')'; # now find the end of it
28697 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
28698 $identifier .= $tok;
28701 $id_scan_state = ''; # that's all - no prototype
28706 elsif ( $id_scan_state eq ')' ) { # looking for ) to end
28708 if ( $tok eq ')' ) { # got it
28709 $identifier .= $tok;
28710 $id_scan_state = ''; # all done
28713 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
28714 $identifier .= $tok;
28716 else { # probable error in script, but keep going
28717 warning("Unexpected '$tok' while seeking end of prototype\n");
28718 $identifier .= $tok;
28721 else { # can get here due to error in initialization
28722 $id_scan_state = '';
28728 if ( $id_scan_state eq ')' ) {
28729 warning("Hit end of line while seeking ) to end prototype\n");
28732 # once we enter the actual identifier, it may not extend beyond
28733 # the end of the current line
28734 if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
28735 $id_scan_state = '';
28737 if ( $i < 0 ) { $i = 0 }
28744 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
28747 else { $type = 'i' }
28749 elsif ( $identifier eq '->' ) {
28753 ( length($identifier) > 1 )
28755 # In something like '@$=' we have an identifier '@$'
28756 # In something like '$${' we have type '$$' (and only
28757 # part of an identifier)
28758 && !( $identifier =~ /\$$/ && $tok eq '{' )
28759 && ( $identifier !~ /^(sub |package )$/ )
28764 else { $type = 't' }
28766 elsif ($saw_alpha) {
28768 # type 'w' includes anything without leading type info
28769 # ($,%,@,*) including something like abc::def::ghi
28774 } # this can happen on a restart
28778 $tok = $identifier;
28779 if ($message) { write_logfile_entry($message) }
28786 TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
28787 my ( $a, $b, $c ) = caller;
28789 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
28791 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
28793 return ( $i, $tok, $type, $id_scan_state, $identifier );
28798 # saved package and subnames in case prototype is on separate line
28799 my ( $package_saved, $subname_saved );
28803 # do_scan_sub parses a sub name and prototype
28804 # it is called with $i_beg equal to the index of the first nonblank
28805 # token following a 'sub' token.
28807 # TODO: add future error checks to be sure we have a valid
28808 # sub name. For example, 'sub &doit' is wrong. Also, be sure
28809 # a name is given if and only if a non-anonymous sub is
28811 # USES GLOBAL VARS: $current_package, $last_nonblank_token,
28812 # $in_attribute_list, %saw_function_definition,
28816 $input_line, $i, $i_beg,
28817 $tok, $type, $rtokens,
28818 $rtoken_map, $id_scan_state, $max_token_index
28820 $id_scan_state = ""; # normally we get everything in one call
28821 my $subname = undef;
28822 my $package = undef;
28827 my $pos_beg = $$rtoken_map[$i_beg];
28828 pos($input_line) = $pos_beg;
28830 # Look for the sub NAME
28832 $input_line =~ m/\G\s*
28833 ((?:\w*(?:'|::))*) # package - something that ends in :: or '
28834 (\w+) # NAME - required
28841 $package = ( defined($1) && $1 ) ? $1 : $current_package;
28842 $package =~ s/\'/::/g;
28843 if ( $package =~ /^\:/ ) { $package = 'main' . $package }
28844 $package =~ s/::$//;
28845 my $pos = pos($input_line);
28846 my $numc = $pos - $pos_beg;
28847 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
28851 # Now look for PROTO ATTRS
28852 # Look for prototype/attributes which are usually on the same
28853 # line as the sub name but which might be on a separate line.
28854 # For example, we might have an anonymous sub with attributes,
28855 # or a prototype on a separate line from its sub name
28857 # NOTE: We only want to parse PROTOTYPES here. If we see anything that
28858 # does not look like a prototype, we assume it is a SIGNATURE and we
28859 # will stop and let the the standard tokenizer handle it. In
28860 # particular, we stop if we see any nested parens, braces, or commas.
28861 my $saw_opening_paren = $input_line =~ /\G\s*\(/;
28863 $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO
28864 (\s*:)? # ATTRS leading ':'
28872 # If we also found the sub name on this call then append PROTO.
28873 # This is not necessary but for compatability with previous
28874 # versions when the -csc flag is used:
28875 if ( $match && $proto ) {
28880 # Handle prototype on separate line from subname
28881 if ($subname_saved) {
28882 $package = $package_saved;
28883 $subname = $subname_saved;
28884 $tok = $last_nonblank_token;
28891 # ATTRS: if there are attributes, back up and let the ':' be
28892 # found later by the scanner.
28893 my $pos = pos($input_line);
28895 $pos -= length($attrs);
28898 my $next_nonblank_token = $tok;
28900 # catch case of line with leading ATTR ':' after anonymous sub
28901 if ( $pos == $pos_beg && $tok eq ':' ) {
28903 $in_attribute_list = 1;
28906 # Otherwise, if we found a match we must convert back from
28907 # string position to the pre_token index for continued parsing.
28910 # I don't think an error flag can occur here ..but ?
28912 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
28913 $max_token_index );
28914 if ($error) { warning("Possibly invalid sub\n") }
28916 # check for multiple definitions of a sub
28917 ( $next_nonblank_token, my $i_next ) =
28918 find_next_nonblank_token_on_this_line( $i, $rtokens,
28919 $max_token_index );
28922 if ( $next_nonblank_token =~ /^(\s*|#)$/ )
28923 { # skip blank or side comment
28924 my ( $rpre_tokens, $rpre_types ) =
28925 peek_ahead_for_n_nonblank_pre_tokens(1);
28926 if ( defined($rpre_tokens) && @$rpre_tokens ) {
28927 $next_nonblank_token = $rpre_tokens->[0];
28930 $next_nonblank_token = '}';
28933 $package_saved = "";
28934 $subname_saved = "";
28936 # See what's next...
28937 if ( $next_nonblank_token eq '{' ) {
28940 # Check for multiple definitions of a sub, but
28941 # it is ok to have multiple sub BEGIN, etc,
28942 # so we do not complain if name is all caps
28943 if ( $saw_function_definition{$package}{$subname}
28944 && $subname !~ /^[A-Z]+$/ )
28946 my $lno = $saw_function_definition{$package}{$subname};
28948 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
28951 $saw_function_definition{$package}{$subname} =
28952 $tokenizer_self->{_last_line_number};
28955 elsif ( $next_nonblank_token eq ';' ) {
28957 elsif ( $next_nonblank_token eq '}' ) {
28960 # ATTRS - if an attribute list follows, remember the name
28961 # of the sub so the next opening brace can be labeled.
28962 # Setting 'statement_type' causes any ':'s to introduce
28964 elsif ( $next_nonblank_token eq ':' ) {
28965 $statement_type = $tok;
28968 # if we stopped before an open paren ...
28969 elsif ( $next_nonblank_token eq '(' ) {
28971 # If we DID NOT see this paren above then it must be on the
28972 # next line so we will set a flag to come back here and see if
28973 # it is a PROTOTYPE
28975 # Otherwise, we assume it is a SIGNATURE rather than a
28976 # PROTOTYPE and let the normal tokenizer handle it as a list
28977 if ( !$saw_opening_paren ) {
28978 $id_scan_state = 'sub'; # we must come back to get proto
28979 $package_saved = $package;
28980 $subname_saved = $subname;
28982 $statement_type = $tok;
28984 elsif ($next_nonblank_token) { # EOF technically ok
28986 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
28989 check_prototype( $proto, $package, $subname );
28992 # no match but line not blank
28995 return ( $i, $tok, $type, $id_scan_state );
28999 #########i###############################################################
29000 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
29001 #########################################################################
29003 sub find_next_nonblank_token {
29004 my ( $i, $rtokens, $max_token_index ) = @_;
29006 if ( $i >= $max_token_index ) {
29007 if ( !peeked_ahead() ) {
29010 peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
29013 my $next_nonblank_token = $$rtokens[ ++$i ];
29015 if ( $next_nonblank_token =~ /^\s*$/ ) {
29016 $next_nonblank_token = $$rtokens[ ++$i ];
29018 return ( $next_nonblank_token, $i );
29021 sub numerator_expected {
29023 # this is a filter for a possible numerator, in support of guessing
29024 # for the / pattern delimiter token.
29029 # Note: I am using the convention that variables ending in
29030 # _expected have these 3 possible values.
29031 my ( $i, $rtokens, $max_token_index ) = @_;
29032 my $next_token = $$rtokens[ $i + 1 ];
29033 if ( $next_token eq '=' ) { $i++; } # handle /=
29034 my ( $next_nonblank_token, $i_next ) =
29035 find_next_nonblank_token( $i, $rtokens, $max_token_index );
29037 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
29042 if ( $next_nonblank_token =~ /^\s*$/ ) {
29051 sub pattern_expected {
29053 # This is the start of a filter for a possible pattern.
29054 # It looks at the token after a possible pattern and tries to
29055 # determine if that token could end a pattern.
29060 my ( $i, $rtokens, $max_token_index ) = @_;
29061 my $next_token = $$rtokens[ $i + 1 ];
29062 if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
29063 my ( $next_nonblank_token, $i_next ) =
29064 find_next_nonblank_token( $i, $rtokens, $max_token_index );
29066 # list of tokens which may follow a pattern
29067 # (can probably be expanded)
29068 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
29074 if ( $next_nonblank_token =~ /^\s*$/ ) {
29083 sub find_next_nonblank_token_on_this_line {
29084 my ( $i, $rtokens, $max_token_index ) = @_;
29085 my $next_nonblank_token;
29087 if ( $i < $max_token_index ) {
29088 $next_nonblank_token = $$rtokens[ ++$i ];
29090 if ( $next_nonblank_token =~ /^\s*$/ ) {
29092 if ( $i < $max_token_index ) {
29093 $next_nonblank_token = $$rtokens[ ++$i ];
29098 $next_nonblank_token = "";
29100 return ( $next_nonblank_token, $i );
29103 sub find_angle_operator_termination {
29105 # We are looking at a '<' and want to know if it is an angle operator.
29106 # We are to return:
29107 # $i = pretoken index of ending '>' if found, current $i otherwise
29108 # $type = 'Q' if found, '>' otherwise
29109 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
29112 pos($input_line) = 1 + $$rtoken_map[$i];
29116 # we just have to find the next '>' if a term is expected
29117 if ( $expecting == TERM ) { $filter = '[\>]' }
29119 # we have to guess if we don't know what is expected
29120 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
29122 # shouldn't happen - we shouldn't be here if operator is expected
29123 else { warning("Program Bug in find_angle_operator_termination\n") }
29125 # To illustrate what we might be looking at, in case we are
29126 # guessing, here are some examples of valid angle operators
29133 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
29134 # <${PREFIX}*img*.$IMAGE_TYPE>
29135 # <img*.$IMAGE_TYPE>
29136 # <Timg*.$IMAGE_TYPE>
29137 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
29139 # Here are some examples of lines which do not have angle operators:
29140 # return undef unless $self->[2]++ < $#{$self->[1]};
29143 # the following line from dlister.pl caused trouble:
29144 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
29146 # If the '<' starts an angle operator, it must end on this line and
29147 # it must not have certain characters like ';' and '=' in it. I use
29148 # this to limit the testing. This filter should be improved if
29151 if ( $input_line =~ /($filter)/g ) {
29155 # We MAY have found an angle operator termination if we get
29156 # here, but we need to do more to be sure we haven't been
29158 my $pos = pos($input_line);
29160 my $pos_beg = $$rtoken_map[$i];
29161 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
29163 # Reject if the closing '>' follows a '-' as in:
29164 # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
29165 if ( $expecting eq UNKNOWN ) {
29166 my $check = substr( $input_line, $pos - 2, 1 );
29167 if ( $check eq '-' ) {
29168 return ( $i, $type );
29172 ######################################debug#####
29173 #write_diagnostics( "ANGLE? :$str\n");
29174 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
29175 ######################################debug#####
29179 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
29181 # It may be possible that a quote ends midway in a pretoken.
29182 # If this happens, it may be necessary to split the pretoken.
29185 "Possible tokinization error..please check this line\n");
29186 report_possible_bug();
29189 # Now let's see where we stand....
29190 # OK if math op not possible
29191 if ( $expecting == TERM ) {
29194 # OK if there are no more than 2 pre-tokens inside
29195 # (not possible to write 2 token math between < and >)
29196 # This catches most common cases
29197 elsif ( $i <= $i_beg + 3 ) {
29198 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
29204 # Let's try a Brace Test: any braces inside must balance
29206 while ( $str =~ /\{/g ) { $br++ }
29207 while ( $str =~ /\}/g ) { $br-- }
29209 while ( $str =~ /\[/g ) { $sb++ }
29210 while ( $str =~ /\]/g ) { $sb-- }
29212 while ( $str =~ /\(/g ) { $pr++ }
29213 while ( $str =~ /\)/g ) { $pr-- }
29215 # if braces do not balance - not angle operator
29216 if ( $br || $sb || $pr ) {
29220 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
29223 # we should keep doing more checks here...to be continued
29224 # Tentatively accepting this as a valid angle operator.
29225 # There are lots more things that can be checked.
29228 "ANGLE-Guessing yes: $str expecting=$expecting\n");
29229 write_logfile_entry("Guessing angle operator here: $str\n");
29234 # didn't find ending >
29236 if ( $expecting == TERM ) {
29237 warning("No ending > for angle operator\n");
29241 return ( $i, $type );
29244 sub scan_number_do {
29246 # scan a number in any of the formats that Perl accepts
29247 # Underbars (_) are allowed in decimal numbers.
29248 # input parameters -
29249 # $input_line - the string to scan
29250 # $i - pre_token index to start scanning
29251 # $rtoken_map - reference to the pre_token map giving starting
29252 # character position in $input_line of token $i
29253 # output parameters -
29254 # $i - last pre_token index of the number just scanned
29255 # number - the number (characters); or undef if not a number
29257 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
29258 my $pos_beg = $$rtoken_map[$i];
29261 my $number = undef;
29262 my $type = $input_type;
29264 my $first_char = substr( $input_line, $pos_beg, 1 );
29266 # Look for bad starting characters; Shouldn't happen..
29267 if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
29268 warning("Program bug - scan_number given character $first_char\n");
29269 report_definite_bug();
29270 return ( $i, $type, $number );
29273 # handle v-string without leading 'v' character ('Two Dot' rule)
29275 # TODO: v-strings may contain underscores
29276 pos($input_line) = $pos_beg;
29277 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
29278 $pos = pos($input_line);
29279 my $numc = $pos - $pos_beg;
29280 $number = substr( $input_line, $pos_beg, $numc );
29282 report_v_string($number);
29285 # handle octal, hex, binary
29286 if ( !defined($number) ) {
29287 pos($input_line) = $pos_beg;
29288 if ( $input_line =~
29289 /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
29291 $pos = pos($input_line);
29292 my $numc = $pos - $pos_beg;
29293 $number = substr( $input_line, $pos_beg, $numc );
29299 if ( !defined($number) ) {
29300 pos($input_line) = $pos_beg;
29302 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
29303 $pos = pos($input_line);
29305 # watch out for things like 0..40 which would give 0. by this;
29306 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
29307 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
29311 my $numc = $pos - $pos_beg;
29312 $number = substr( $input_line, $pos_beg, $numc );
29317 # filter out non-numbers like e + - . e2 .e3 +e6
29318 # the rule: at least one digit, and any 'e' must be preceded by a digit
29320 $number !~ /\d/ # no digits
29321 || ( $number =~ /^(.*)[eE]/
29322 && $1 !~ /\d/ ) # or no digits before the 'e'
29326 $type = $input_type;
29327 return ( $i, $type, $number );
29330 # Found a number; now we must convert back from character position
29331 # to pre_token index. An error here implies user syntax error.
29332 # An example would be an invalid octal number like '009'.
29335 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
29336 if ($error) { warning("Possibly invalid number\n") }
29338 return ( $i, $type, $number );
29341 sub inverse_pretoken_map {
29343 # Starting with the current pre_token index $i, scan forward until
29344 # finding the index of the next pre_token whose position is $pos.
29345 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
29348 while ( ++$i <= $max_token_index ) {
29350 if ( $pos <= $$rtoken_map[$i] ) {
29352 # Let the calling routine handle errors in which we do not
29353 # land on a pre-token boundary. It can happen by running
29354 # perltidy on some non-perl scripts, for example.
29355 if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
29360 return ( $i, $error );
29363 sub find_here_doc {
29365 # find the target of a here document, if any
29366 # input parameters:
29367 # $i - token index of the second < of <<
29368 # ($i must be less than the last token index if this is called)
29369 # output parameters:
29370 # $found_target = 0 didn't find target; =1 found target
29371 # HERE_TARGET - the target string (may be empty string)
29372 # $i - unchanged if not here doc,
29373 # or index of the last token of the here target
29374 # $saw_error - flag noting unbalanced quote on here target
29375 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
29377 my $found_target = 0;
29378 my $here_doc_target = '';
29379 my $here_quote_character = '';
29381 my ( $next_nonblank_token, $i_next_nonblank, $next_token );
29382 $next_token = $$rtokens[ $i + 1 ];
29384 # perl allows a backslash before the target string (heredoc.t)
29386 if ( $next_token eq '\\' ) {
29388 $next_token = $$rtokens[ $i + 2 ];
29391 ( $next_nonblank_token, $i_next_nonblank ) =
29392 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
29394 if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
29397 my $quote_depth = 0;
29402 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
29405 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
29406 $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
29408 if ($in_quote) { # didn't find end of quote, so no target found
29410 if ( $expecting == TERM ) {
29412 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
29417 else { # found ending quote
29422 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
29423 $tokj = $$rtokens[$j];
29425 # we have to remove any backslash before the quote character
29426 # so that the here-doc-target exactly matches this string
29430 && $$rtokens[ $j + 1 ] eq $here_quote_character );
29431 $here_doc_target .= $tokj;
29436 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
29438 write_logfile_entry(
29439 "found blank here-target after <<; suggest using \"\"\n");
29442 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
29444 my $here_doc_expected;
29445 if ( $expecting == UNKNOWN ) {
29446 $here_doc_expected = guess_if_here_doc($next_token);
29449 $here_doc_expected = 1;
29452 if ($here_doc_expected) {
29454 $here_doc_target = $next_token;
29461 if ( $expecting == TERM ) {
29463 write_logfile_entry("Note: bare here-doc operator <<\n");
29470 # patch to neglect any prepended backslash
29471 if ( $found_target && $backslash ) { $i++ }
29473 return ( $found_target, $here_doc_target, $here_quote_character, $i,
29479 # follow (or continue following) quoted string(s)
29480 # $in_quote return code:
29481 # 0 - ok, found end
29482 # 1 - still must find end of quote whose target is $quote_character
29483 # 2 - still looking for end of first of two quotes
29485 # Returns updated strings:
29486 # $quoted_string_1 = quoted string seen while in_quote=1
29487 # $quoted_string_2 = quoted string seen while in_quote=2
29489 $i, $in_quote, $quote_character,
29490 $quote_pos, $quote_depth, $quoted_string_1,
29491 $quoted_string_2, $rtokens, $rtoken_map,
29495 my $in_quote_starting = $in_quote;
29498 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
29501 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29504 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
29505 $quote_pos, $quote_depth, $max_token_index );
29506 $quoted_string_2 .= $quoted_string;
29507 if ( $in_quote == 1 ) {
29508 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
29509 $quote_character = '';
29512 $quoted_string_2 .= "\n";
29516 if ( $in_quote == 1 ) { # one (more) quote to follow
29519 $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29522 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
29523 $quote_pos, $quote_depth, $max_token_index );
29524 $quoted_string_1 .= $quoted_string;
29525 if ( $in_quote == 1 ) {
29526 $quoted_string_1 .= "\n";
29529 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29530 $quoted_string_1, $quoted_string_2 );
29533 sub follow_quoted_string {
29535 # scan for a specific token, skipping escaped characters
29536 # if the quote character is blank, use the first non-blank character
29537 # input parameters:
29538 # $rtokens = reference to the array of tokens
29539 # $i = the token index of the first character to search
29540 # $in_quote = number of quoted strings being followed
29541 # $beginning_tok = the starting quote character
29542 # $quote_pos = index to check next for alphanumeric delimiter
29543 # output parameters:
29544 # $i = the token index of the ending quote character
29545 # $in_quote = decremented if found end, unchanged if not
29546 # $beginning_tok = the starting quote character
29547 # $quote_pos = index to check next for alphanumeric delimiter
29548 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
29549 # $quoted_string = the text of the quote (without quotation tokens)
29550 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
29553 my ( $tok, $end_tok );
29554 my $i = $i_beg - 1;
29555 my $quoted_string = "";
29557 TOKENIZER_DEBUG_FLAG_QUOTE && do {
29559 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
29562 # get the corresponding end token
29563 if ( $beginning_tok !~ /^\s*$/ ) {
29564 $end_tok = matching_end_token($beginning_tok);
29567 # a blank token means we must find and use the first non-blank one
29569 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
29571 while ( $i < $max_token_index ) {
29572 $tok = $$rtokens[ ++$i ];
29574 if ( $tok !~ /^\s*$/ ) {
29576 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
29577 $i = $max_token_index;
29581 if ( length($tok) > 1 ) {
29582 if ( $quote_pos <= 0 ) { $quote_pos = 1 }
29583 $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
29586 $beginning_tok = $tok;
29589 $end_tok = matching_end_token($beginning_tok);
29595 $allow_quote_comments = 1;
29600 # There are two different loops which search for the ending quote
29601 # character. In the rare case of an alphanumeric quote delimiter, we
29602 # have to look through alphanumeric tokens character-by-character, since
29603 # the pre-tokenization process combines multiple alphanumeric
29604 # characters, whereas for a non-alphanumeric delimiter, only tokens of
29605 # length 1 can match.
29607 ###################################################################
29608 # Case 1 (rare): loop for case of alphanumeric quote delimiter..
29609 # "quote_pos" is the position the current word to begin searching
29610 ###################################################################
29611 if ( $beginning_tok =~ /\w/ ) {
29613 # Note this because it is not recommended practice except
29614 # for obfuscated perl contests
29615 if ( $in_quote == 1 ) {
29616 write_logfile_entry(
29617 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
29620 while ( $i < $max_token_index ) {
29622 if ( $quote_pos == 0 || ( $i < 0 ) ) {
29623 $tok = $$rtokens[ ++$i ];
29625 if ( $tok eq '\\' ) {
29627 # retain backslash unless it hides the end token
29628 $quoted_string .= $tok
29629 unless $$rtokens[ $i + 1 ] eq $end_tok;
29631 last if ( $i >= $max_token_index );
29632 $tok = $$rtokens[ ++$i ];
29635 my $old_pos = $quote_pos;
29637 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
29641 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
29643 if ( $quote_pos > 0 ) {
29646 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
29650 if ( $quote_depth == 0 ) {
29656 $quoted_string .= substr( $tok, $old_pos );
29661 ########################################################################
29662 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
29663 ########################################################################
29666 while ( $i < $max_token_index ) {
29667 $tok = $$rtokens[ ++$i ];
29669 if ( $tok eq $end_tok ) {
29672 if ( $quote_depth == 0 ) {
29677 elsif ( $tok eq $beginning_tok ) {
29680 elsif ( $tok eq '\\' ) {
29682 # retain backslash unless it hides the beginning or end token
29683 $tok = $$rtokens[ ++$i ];
29684 $quoted_string .= '\\'
29685 unless ( $tok eq $end_tok || $tok eq $beginning_tok );
29687 $quoted_string .= $tok;
29690 if ( $i > $max_token_index ) { $i = $max_token_index }
29691 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
29695 sub indicate_error {
29696 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
29697 interrupt_logfile();
29699 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
29703 sub write_error_indicator_pair {
29704 my ( $line_number, $input_line, $pos, $carrat ) = @_;
29705 my ( $offset, $numbered_line, $underline ) =
29706 make_numbered_line( $line_number, $input_line, $pos );
29707 $underline = write_on_underline( $underline, $pos - $offset, $carrat );
29708 warning( $numbered_line . "\n" );
29709 $underline =~ s/\s*$//;
29710 warning( $underline . "\n" );
29713 sub make_numbered_line {
29715 # Given an input line, its line number, and a character position of
29716 # interest, create a string not longer than 80 characters of the form
29717 # $lineno: sub_string
29718 # such that the sub_string of $str contains the position of interest
29720 # Here is an example of what we want, in this case we add trailing
29721 # '...' because the line is long.
29723 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29725 # Here is another example, this time in which we used leading '...'
29726 # because of excessive length:
29728 # 2: ... er of the World Wide Web Consortium's
29730 # input parameters are:
29731 # $lineno = line number
29732 # $str = the text of the line
29733 # $pos = position of interest (the error) : 0 = first character
29736 # - $offset = an offset which corrects the position in case we only
29737 # display part of a line, such that $pos-$offset is the effective
29738 # position from the start of the displayed line.
29739 # - $numbered_line = the numbered line as above,
29740 # - $underline = a blank 'underline' which is all spaces with the same
29741 # number of characters as the numbered line.
29743 my ( $lineno, $str, $pos ) = @_;
29744 my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
29745 my $excess = length($str) - $offset - 68;
29746 my $numc = ( $excess > 0 ) ? 68 : undef;
29748 if ( defined($numc) ) {
29749 if ( $offset == 0 ) {
29750 $str = substr( $str, $offset, $numc - 4 ) . " ...";
29753 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
29758 if ( $offset == 0 ) {
29761 $str = "... " . substr( $str, $offset + 4 );
29765 my $numbered_line = sprintf( "%d: ", $lineno );
29766 $offset -= length($numbered_line);
29767 $numbered_line .= $str;
29768 my $underline = " " x length($numbered_line);
29769 return ( $offset, $numbered_line, $underline );
29772 sub write_on_underline {
29774 # The "underline" is a string that shows where an error is; it starts
29775 # out as a string of blanks with the same length as the numbered line of
29776 # code above it, and we have to add marking to show where an error is.
29777 # In the example below, we want to write the string '--^' just below
29778 # the line of bad code:
29780 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29782 # We are given the current underline string, plus a position and a
29783 # string to write on it.
29785 # In the above example, there will be 2 calls to do this:
29786 # First call: $pos=19, pos_chr=^
29787 # Second call: $pos=16, pos_chr=---
29789 # This is a trivial thing to do with substr, but there is some
29792 my ( $underline, $pos, $pos_chr ) = @_;
29794 # check for error..shouldn't happen
29795 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
29798 my $excess = length($pos_chr) + $pos - length($underline);
29799 if ( $excess > 0 ) {
29800 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
29802 substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
29803 return ($underline);
29808 # Break a string, $str, into a sequence of preliminary tokens. We
29809 # are interested in these types of tokens:
29810 # words (type='w'), example: 'max_tokens_wanted'
29811 # digits (type = 'd'), example: '0755'
29812 # whitespace (type = 'b'), example: ' '
29813 # any other single character (i.e. punct; type = the character itself).
29814 # We cannot do better than this yet because we might be in a quoted
29815 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all
29817 my ( $str, $max_tokens_wanted ) = @_;
29819 # we return references to these 3 arrays:
29820 my @tokens = (); # array of the tokens themselves
29821 my @token_map = (0); # string position of start of each token
29822 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
29827 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
29830 # note that this must come before words!
29831 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
29834 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
29836 # single-character punctuation
29837 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
29841 return ( \@tokens, \@token_map, \@type );
29845 push @token_map, pos($str);
29847 } while ( --$max_tokens_wanted != 0 );
29849 return ( \@tokens, \@token_map, \@type );
29854 # this is an old debug routine
29855 my ( $rtokens, $rtoken_map ) = @_;
29856 my $num = scalar(@$rtokens);
29859 for ( $i = 0 ; $i < $num ; $i++ ) {
29860 my $len = length( $$rtokens[$i] );
29861 print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
29865 sub matching_end_token {
29867 # find closing character for a pattern
29868 my $beginning_token = shift;
29870 if ( $beginning_token eq '{' ) {
29873 elsif ( $beginning_token eq '[' ) {
29876 elsif ( $beginning_token eq '<' ) {
29879 elsif ( $beginning_token eq '(' ) {
29887 sub dump_token_types {
29891 # This should be the latest list of token types in use
29892 # adding NEW_TOKENS: add a comment here
29893 print $fh <<'END_OF_LIST';
29895 Here is a list of the token types currently used for lines of type 'CODE'.
29896 For the following tokens, the "type" of a token is just the token itself.
29898 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
29899 ( ) <= >= == =~ !~ != ++ -- /= x=
29900 ... **= <<= >>= &&= ||= //= <=>
29901 , + - / * | % ! x ~ = \ ? : . < > ^ &
29903 The following additional token types are defined:
29906 b blank (white space)
29907 { indent: opening structural curly brace or square bracket or paren
29908 (code block, anonymous hash reference, or anonymous array reference)
29909 } outdent: right structural curly brace or square bracket or paren
29910 [ left non-structural square bracket (enclosing an array index)
29911 ] right non-structural square bracket
29912 ( left non-structural paren (all but a list right of an =)
29913 ) right non-structural paren
29914 L left non-structural curly brace (enclosing a key)
29915 R right non-structural curly brace
29916 ; terminal semicolon
29917 f indicates a semicolon in a "for" statement
29918 h here_doc operator <<
29920 Q indicates a quote or pattern
29921 q indicates a qw quote block
29923 C user-defined constant or constant function (with void prototype = ())
29924 U user-defined function taking parameters
29925 G user-defined function taking block parameter (like grep/map/eval)
29926 M (unused, but reserved for subroutine definition name)
29927 P (unused, but -html uses it to label pod text)
29928 t type indicater such as %,$,@,*,&,sub
29929 w bare word (perhaps a subroutine call)
29930 i identifier of some type (with leading %, $, @, *, &, sub, -> )
29933 F a file test operator (like -e)
29935 Z identifier in indirect object slot: may be file handle, object
29936 J LABEL: code block label
29937 j LABEL after next, last, redo, goto
29940 pp pre-increment operator ++
29941 mm pre-decrement operator --
29942 A : used as attribute separator
29944 Here are the '_line_type' codes used internally:
29945 SYSTEM - system-specific code before hash-bang line
29946 CODE - line of perl code (including comments)
29947 POD_START - line starting pod, such as '=head'
29948 POD - pod documentation text
29949 POD_END - last line of pod section, '=cut'
29950 HERE - text of here-document
29951 HERE_END - last line of here-doc (target word)
29952 FORMAT - format section
29953 FORMAT_END - last line of format section, '.'
29954 DATA_START - __DATA__ line
29955 DATA - unidentified text following __DATA__
29956 END_START - __END__ line
29957 END - unidentified text following __END__
29958 ERROR - we are in big trouble, probably not a perl script
29964 # These names are used in error messages
29965 @opening_brace_names = qw# '{' '[' '(' '?' #;
29966 @closing_brace_names = qw# '}' ']' ')' ':' #;
29969 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
29970 <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
29972 @is_digraph{@digraphs} = (1) x scalar(@digraphs);
29974 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.=);
29975 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
29977 # make a hash of all valid token types for self-checking the tokenizer
29978 # (adding NEW_TOKENS : select a new character and add to this list)
29979 my @valid_token_types = qw#
29980 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
29981 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
29983 push( @valid_token_types, @digraphs );
29984 push( @valid_token_types, @trigraphs );
29985 push( @valid_token_types, ( '#', ',', 'CORE::' ) );
29986 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
29988 # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
29989 my @file_test_operators =
29990 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
29991 @is_file_test_operator{@file_test_operators} =
29992 (1) x scalar(@file_test_operators);
29994 # these functions have prototypes of the form (&), so when they are
29995 # followed by a block, that block MAY BE followed by an operator.
29996 # Smartmatch operator ~~ may be followed by anonymous hash or array ref
29997 @_ = qw( do eval );
29998 @is_block_operator{@_} = (1) x scalar(@_);
30000 # these functions allow an identifier in the indirect object slot
30001 @_ = qw( print printf sort exec system say);
30002 @is_indirect_object_taker{@_} = (1) x scalar(@_);
30004 # These tokens may precede a code block
30005 # patched for SWITCH/CASE/CATCH. Actually these could be removed
30006 # now and we could let the extended-syntax coding handle them
30008 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
30009 unless do while until eval for foreach map grep sort
30010 switch case given when catch);
30011 @is_code_block_token{@_} = (1) x scalar(@_);
30013 # I'll build the list of keywords incrementally
30016 # keywords and tokens after which a value or pattern is expected,
30017 # but not an operator. In other words, these should consume terms
30018 # to their right, or at least they are not expected to be followed
30019 # immediately by operators.
30020 my @value_requestor = qw(
30241 # patched above for SWITCH/CASE given/when err say
30242 # 'err' is a fairly safe addition.
30243 # TODO: 'default' still needed if appropriate
30244 # 'use feature' seen, but perltidy works ok without it.
30245 # Concerned that 'default' could break code.
30246 push( @Keywords, @value_requestor );
30248 # These are treated the same but are not keywords:
30253 push( @value_requestor, @extra_vr );
30255 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
30257 # this list contains keywords which do not look for arguments,
30258 # so that they might be followed by an operator, or at least
30260 my @operator_requestor = qw(
30284 push( @Keywords, @operator_requestor );
30286 # These are treated the same but are not considered keywords:
30293 push( @operator_requestor, @extra_or );
30295 @expecting_operator_token{@operator_requestor} =
30296 (1) x scalar(@operator_requestor);
30298 # these token TYPES expect trailing operator but not a term
30299 # note: ++ and -- are post-increment and decrement, 'C' = constant
30300 my @operator_requestor_types = qw( ++ -- C <> q );
30301 @expecting_operator_types{@operator_requestor_types} =
30302 (1) x scalar(@operator_requestor_types);
30304 # these token TYPES consume values (terms)
30305 # note: pp and mm are pre-increment and decrement
30306 # f=semicolon in for, F=file test operator
30307 my @value_requestor_type = qw#
30308 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
30309 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
30310 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
30311 f F pp mm Y p m U J G j >> << ^ t
30312 ~. ^. |. &. ^.= |.= &.=
30314 push( @value_requestor_type, ',' )
30315 ; # (perl doesn't like a ',' in a qw block)
30316 @expecting_term_types{@value_requestor_type} =
30317 (1) x scalar(@value_requestor_type);
30319 # Note: the following valid token types are not assigned here to
30320 # hashes requesting to be followed by values or terms, but are
30321 # instead currently hard-coded into sub operator_expected:
30322 # ) -> :: Q R Z ] b h i k n v w } #
30324 # For simple syntax checking, it is nice to have a list of operators which
30325 # will really be unhappy if not followed by a term. This includes most
30327 %really_want_term = %expecting_term_types;
30329 # with these exceptions...
30330 delete $really_want_term{'U'}; # user sub, depends on prototype
30331 delete $really_want_term{'F'}; # file test works on $_ if no following term
30332 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
30335 @_ = qw(q qq qw qx qr s y tr m);
30336 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
30338 # These keywords are handled specially in the tokenizer code:
30339 my @special_keywords = qw(
30355 push( @Keywords, @special_keywords );
30357 # Keywords after which list formatting may be used
30358 # WARNING: do not include |map|grep|eval or perl may die on
30359 # syntax errors (map1.t).
30360 my @keyword_taking_list = qw(
30434 @is_keyword_taking_list{@keyword_taking_list} =
30435 (1) x scalar(@keyword_taking_list);
30437 # These are not used in any way yet
30438 # my @unused_keywords = qw(
30444 # The list of keywords was originally extracted from function 'keyword' in
30445 # perl file toke.c version 5.005.03, using this utility, plus a
30446 # little editing: (file getkwd.pl):
30447 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
30448 # Add 'get' prefix where necessary, then split into the above lists.
30449 # This list should be updated as necessary.
30450 # The list should not contain these special variables:
30451 # ARGV DATA ENV SIG STDERR STDIN STDOUT
30454 @is_keyword{@Keywords} = (1) x scalar(@Keywords);