lib/Perl/Tidy.pm

   1 ############################################################
   2 #
   3 #    perltidy - a perl script indenter and formatter
   4 #
   5 #    Copyright (c) 2000-2003 by Steve Hancock
   6 #    Distributed under the GPL license agreement; see file COPYING
   7 #
   8 #    This program is free software; you can redistribute it and/or modify
   9 #    it under the terms of the GNU General Public License as published by
  10 #    the Free Software Foundation; either version 2 of the License, or
  11 #    (at your option) any later version.
  12 #
  13 #    This program is distributed in the hope that it will be useful,
  14 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 #    GNU General Public License for more details.
  17 #
  18 #    You should have received a copy of the GNU General Public License
  19 #    along with this program; if not, write to the Free Software
  20 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 #
  22 #    For brief instructions instructions, try 'perltidy -h'.
  23 #    For more complete documentation, try 'man perltidy'
  24 #    or visit http://perltidy.sourceforge.net
  25 #
  26 #    This script is an example of the default style.  It was formatted with:
  27 #
  28 #      perltidy Tidy.pm
  29 #
  30 #    Code Contributions:
  31 #      Michael Cartmell supplied code for adaptation to VMS and helped with
  32 #        v-strings.
  33 #      Hugh S. Myers supplied sub streamhandle and the supporting code to
  34 #        create a Perl::Tidy module which can operate on strings, arrays, etc.
  35 #      Yves Orton supplied coding to help detect Windows versions.
  36 #      Axel Rose supplied a patch for MacPerl.
  37 #      Many others have supplied key ideas, suggestions, and bug reports;
  38 #        see the CHANGES file.
  39 #
  40 ############################################################
  41
  42 package Perl::Tidy;
  43 use 5.004;    # need IO::File from 5.004 or later
  44 BEGIN { $^W = 1; }    # turn on warnings
  45
  46 use strict;
  47 use Exporter;
  48 use Carp;
  49 $|++;
  50
  51 use vars qw{
  52   $VERSION
  53   @ISA
  54   @EXPORT
  55   $missing_file_spec
  56 };
  57
  58 @ISA    = qw( Exporter );
  59 @EXPORT = qw( &perltidy );
  60
  61 use IO::File;
  62 use File::Basename;
  63
  64 BEGIN {
  65     ( $VERSION = q($Id: Tidy.pm,v 1.46 2003/10/21 14:09:29 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  66 }
  67
  68 sub streamhandle {
  69
  70     # given filename and mode (r or w), create an object which:
  71     #   has a 'getline' method if mode='r', and
  72     #   has a 'print' method if mode='w'.
  73     # The objects also need a 'close' method.
  74     #
  75     # How the object is made:
  76     #
  77     # if $filename is:     Make object using:
  78     # ----------------     -----------------
  79     # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
  80     # string               IO::File
  81     # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
  82     # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
  83     # object               object
  84     #                      (check for 'print' method for 'w' mode)
  85     #                      (check for 'getline' method for 'r' mode)
  86     my $ref = ref( my $filename = shift );
  87     my $mode = shift;
  88     my $New;
  89     my $fh;
  90
  91     # handle a reference
  92     if ($ref) {
  93         if ( $ref eq 'ARRAY' ) {
  94             $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
  95         }
  96         elsif ( $ref eq 'SCALAR' ) {
  97             $New = sub { Perl::Tidy::IOScalar->new(@_) };
  98         }
  99         else {
 100
 101             # Accept an object with a getline method for reading. Note:
 102             # IO::File is built-in and does not respond to the defined
 103             # operator.  If this causes trouble, the check can be
 104             # skipped and we can just let it crash if there is no
 105             # getline.
 106             if ( $mode =~ /[rR]/ ) {
 107                 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
 108                     $New = sub { $filename };
 109                 }
 110                 else {
 111                     $New = sub { undef };
 112                     confess <<EOM;
 113 ------------------------------------------------------------------------
 114 No 'getline' method is defined for object of class $ref
 115 Please check your call to Perl::Tidy::perltidy.  Trace follows.
 116 ------------------------------------------------------------------------
 117 EOM
 118                 }
 119             }
 120
 121             # Accept an object with a print method for writing.
 122             # See note above about IO::File
 123             if ( $mode =~ /[wW]/ ) {
 124                 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
 125                     $New = sub { $filename };
 126                 }
 127                 else {
 128                     $New = sub { undef };
 129                     confess <<EOM;
 130 ------------------------------------------------------------------------
 131 No 'print' method is defined for object of class $ref
 132 Please check your call to Perl::Tidy::perltidy. Trace follows.
 133 ------------------------------------------------------------------------
 134 EOM
 135                 }
 136             }
 137         }
 138     }
 139
 140     # handle a string
 141     else {
 142         if ( $filename eq '-' ) {
 143             $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
 144         }
 145         else {
 146             $New = sub { IO::File->new(@_) };
 147         }
 148     }
 149     $fh = $New->( $filename, $mode )
 150       or warn "Couldn't open file:$filename in mode:$mode : $!\n";
 151     return $fh, ( $ref or $filename );
 152 }
 153
 154 sub find_input_line_ending {
 155
 156     # Peek at a file and return first line ending character.
 157     # Quietly return undef in case of any trouble.
 158     my ($input_file) = @_;
 159     my $ending;
 160
 161     # silently ignore input from object or stdin
 162     if ( ref($input_file) || $input_file eq '-' ) {
 163         return $ending;
 164     }
 165     open( INFILE, $input_file ) || return $ending;
 166
 167     binmode INFILE;
 168     my $buf;
 169     read( INFILE, $buf, 1024 );
 170     close INFILE;
 171     if ( $buf && $buf =~ /([\012\015]+)/ ) {
 172         my $test = $1;
 173
 174         # dos
 175         if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 176
 177         # mac
 178         elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 179
 180         # unix
 181         elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 182
 183         # unknown
 184         else { }
 185     }
 186
 187     # no ending seen
 188     else { }
 189
 190     return $ending;
 191 }
 192
 193 sub catfile {
 194
 195     # concatenate a path and file basename
 196     # returns undef in case of error
 197
 198     BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
 199
 200     # use File::Spec if we can
 201     unless ($missing_file_spec) {
 202         return File::Spec->catfile(@_);
 203     }
 204
 205     # Perl 5.004 systems may not have File::Spec so we'll make
 206     # a simple try.  We assume File::Basename is available.
 207     # return undef if not successful.
 208     my $name      = pop @_;
 209     my $path      = join '/', @_;
 210     my $test_file = $path . $name;
 211     my ( $test_name, $test_path ) = fileparse($test_file);
 212     return $test_file if ( $test_name eq $name );
 213     return undef      if ( $^O        eq 'VMS' );
 214
 215     # this should work at least for Windows and Unix:
 216     $test_file = $path . '/' . $name;
 217     ( $test_name, $test_path ) = fileparse($test_file);
 218     return $test_file if ( $test_name eq $name );
 219     return undef;
 220 }
 221
 222 sub make_temporary_filename {
 223
 224     # Make a temporary filename.
 225     #
 226     # The POSIX tmpnam() function tends to be unreliable for non-unix
 227     # systems (at least for the win32 systems that I've tested), so use
 228     # a pre-defined name.  A slight disadvantage of this is that two
 229     # perltidy runs in the same working directory may conflict.
 230     # However, the chance of that is small and managable by the user.
 231     # An alternative would be to check for the file's existance and use,
 232     # say .TMP0, .TMP1, etc, but that scheme has its own problems.  So,
 233     # keep it simple.
 234     my $name = "perltidy.TMP";
 235     if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
 236         return $name;
 237     }
 238     eval "use POSIX qw(tmpnam)";
 239     if ($@) { return $name }
 240     use IO::File;
 241
 242     # just make a couple of tries before giving up and using the default
 243     for ( 0 .. 1 ) {
 244         my $tmpname = tmpnam();
 245         my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
 246         if ($fh) {
 247             $fh->close();
 248             return ($tmpname);
 249             last;
 250         }
 251     }
 252     return ($name);
 253 }
 254
 255 # Here is a map of the flow of data from the input source to the output
 256 # line sink:
 257 #
 258 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
 259 #       input                         groups                 output
 260 #       lines   tokens      lines       of          lines    lines
 261 #                                      lines
 262 #
 263 # The names correspond to the package names responsible for the unit processes.
 264 #
 265 # The overall process is controlled by the "main" package.
 266 #
 267 # LineSource is the stream of input lines
 268 #
 269 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
 270 # if necessary.  A token is any section of the input line which should be
 271 # manipulated as a single entity during formatting.  For example, a single
 272 # ',' character is a token, and so is an entire side comment.  It handles
 273 # the complexities of Perl syntax, such as distinguishing between '<<' as
 274 # a shift operator and as a here-document, or distinguishing between '/'
 275 # as a divide symbol and as a pattern delimiter.
 276 #
 277 # Formatter inserts and deletes whitespace between tokens, and breaks
 278 # sequences of tokens at appropriate points as output lines.  It bases its
 279 # decisions on the default rules as modified by any command-line options.
 280 #
 281 # VerticalAligner collects groups of lines together and tries to line up
 282 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
 283 #
 284 # FileWriter simply writes lines to the output stream.
 285 #
 286 # The Logger package, not shown, records significant events and warning
 287 # messages.  It writes a .LOG file, which may be saved with a
 288 # '-log' or a '-g' flag.
 289
 290 {
 291
 292     # variables needed by interrupt handler:
 293     my $tokenizer;
 294     my $input_file;
 295
 296     # this routine may be called to give a status report if interrupted.  If a
 297     # parameter is given, it will call exit with that parameter.  This is no
 298     # longer used because it works under Unix but not under Windows.
 299     sub interrupt_handler {
 300
 301         my $exit_flag = shift;
 302         print STDERR "perltidy interrupted";
 303         if ($tokenizer) {
 304             my $input_line_number =
 305               Perl::Tidy::Tokenizer::get_input_line_number();
 306             print STDERR " at line $input_line_number";
 307         }
 308         if ($input_file) {
 309
 310             if ( ref $input_file ) { print STDERR " of reference to:" }
 311             else { print STDERR " of file:" }
 312             print STDERR " $input_file";
 313         }
 314         print STDERR "\n";
 315         exit $exit_flag if defined($exit_flag);
 316     }
 317
 318     sub perltidy {
 319
 320         my %defaults = (
 321             argv        => undef,
 322             destination => undef,
 323             formatter   => undef,
 324             logfile     => undef,
 325             errorfile   => undef,
 326             perltidyrc  => undef,
 327             source      => undef,
 328             stderr      => undef,
 329         );
 330
 331         # don't overwrite callers ARGV
 332         local @ARGV = @ARGV;
 333
 334         my %input_hash = @_;
 335         if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
 336             local $" = ')(';
 337             my @good_keys = sort keys %defaults;
 338             @bad_keys = sort @bad_keys;
 339             confess <<EOM;
 340 ------------------------------------------------------------------------
 341 Unknown perltidy parameter : (@bad_keys)
 342 perltidy only understands : (@good_keys)
 343 ------------------------------------------------------------------------
 344
 345 EOM
 346         }
 347
 348         %input_hash = ( %defaults, %input_hash );
 349         my $argv               = $input_hash{'argv'};
 350         my $destination_stream = $input_hash{'destination'};
 351         my $errorfile_stream   = $input_hash{'errorfile'};
 352         my $logfile_stream     = $input_hash{'logfile'};
 353         my $perltidyrc_stream  = $input_hash{'perltidyrc'};
 354         my $source_stream      = $input_hash{'source'};
 355         my $stderr_stream      = $input_hash{'stderr'};
 356         my $user_formatter     = $input_hash{'formatter'};
 357
 358         if ($user_formatter) {
 359
 360             # if the user defines a formatter, there is no output stream,
 361             # but we need a null stream to keep coding simple
 362             $destination_stream = Perl::Tidy::DevNull->new();
 363         }
 364
 365         # see if ARGV is overridden
 366         if ( defined($argv) ) {
 367
 368             my $rargv = ref $argv;
 369             if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
 370
 371             # ref to ARRAY
 372             if ($rargv) {
 373                 if ( $rargv eq 'ARRAY' ) {
 374                     @ARGV = @$argv;
 375                 }
 376                 else {
 377                     croak <<EOM;
 378 ------------------------------------------------------------------------
 379 Please check value of -argv in call to perltidy;
 380 it must be a string or ref to ARRAY but is: $rargv
 381 ------------------------------------------------------------------------
 382 EOM
 383                 }
 384             }
 385
 386             # string
 387             else {
 388                 my ( $rargv, $msg ) = parse_args($argv);
 389                 if ($msg) {
 390                     die <<EOM;
 391 Error parsing this string passed to to perltidy with 'argv':
 392 $msg
 393 EOM
 394                 }
 395                 @ARGV = @{$rargv};
 396             }
 397         }
 398
 399         # redirect STDERR if requested
 400         if ($stderr_stream) {
 401             my ( $fh_stderr, $stderr_file ) =
 402               Perl::Tidy::streamhandle( $stderr_stream, 'w' );
 403             if ($fh_stderr) { *STDERR = $fh_stderr }
 404             else {
 405                 croak <<EOM;
 406 ------------------------------------------------------------------------
 407 Unable to redirect STDERR to $stderr_stream
 408 Please check value of -stderr in call to perltidy
 409 ------------------------------------------------------------------------
 410 EOM
 411             }
 412         }
 413
 414         my $rpending_complaint;
 415         $$rpending_complaint = "";
 416         my $rpending_logfile_message;
 417         $$rpending_logfile_message = "";
 418
 419         my ( $is_Windows, $Windows_type ) =
 420           look_for_Windows($rpending_complaint);
 421
 422         # VMS file names are restricted to a 40.40 format, so we append _tdy
 423         # instead of .tdy, etc. (but see also sub check_vms_filename)
 424         my $dot;
 425         my $dot_pattern;
 426         if ( $^O eq 'VMS' ) {
 427             $dot         = '_';
 428             $dot_pattern = '_';
 429         }
 430         else {
 431             $dot         = '.';
 432             $dot_pattern = '\.';    # must escape for use in regex
 433         }
 434
 435         # handle command line options
 436         my ( $rOpts, $config_file, $rraw_options, $saw_extrude ) =
 437           process_command_line(
 438             $perltidyrc_stream, $is_Windows,
 439             $Windows_type,      $rpending_complaint
 440           );
 441
 442         if ($user_formatter) {
 443             $rOpts->{'format'} = 'user';
 444         }
 445
 446         # there must be one entry here for every possible format
 447         my %default_file_extension = (
 448             tidy => 'tdy',
 449             html => 'html',
 450             user => '',
 451         );
 452
 453         # be sure we have a valid output format
 454         unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
 455             my $formats = join ' ',
 456               sort map { "'" . $_ . "'" } keys %default_file_extension;
 457             my $fmt = $rOpts->{'format'};
 458             die "-format='$fmt' but must be one of: $formats\n";
 459         }
 460
 461         my $output_extension =
 462           make_extension( $rOpts->{'output-file-extension'},
 463             $default_file_extension{ $rOpts->{'format'} }, $dot );
 464
 465         my $backup_extension =
 466           make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
 467
 468         my $html_toc_extension =
 469           make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
 470
 471         my $html_src_extension =
 472           make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
 473
 474         # check for -b option;
 475         my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
 476           && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
 477           && @ARGV > 0;    # silently ignore if standard input;
 478                            # this allows -b to be in a .perltidyrc file
 479                            # without error messages when running from an editor
 480
 481         # turn off -b with warnings in case of conflicts with other options
 482         if ($in_place_modify) {
 483             if ( $rOpts->{'standard-output'} ) {
 484                 warn "Ignoring -b; you may not use -b and -st together\n";
 485                 $in_place_modify = 0;
 486             }
 487             if ($destination_stream) {
 488                 warn
 489 "Ignoring -b; you may not specify a destination array and -b together\n";
 490                 $in_place_modify = 0;
 491             }
 492             if ($source_stream) {
 493                 warn
 494 "Ignoring -b; you may not specify a source array and -b together\n";
 495                 $in_place_modify = 0;
 496             }
 497             if ( $rOpts->{'outfile'} ) {
 498                 warn "Ignoring -b; you may not use -b and -o together\n";
 499                 $in_place_modify = 0;
 500             }
 501             if ( defined( $rOpts->{'output-path'} ) ) {
 502                 warn "Ignoring -b; you may not use -b and -opath together\n";
 503                 $in_place_modify = 0;
 504             }
 505         }
 506
 507         Perl::Tidy::Formatter::check_options($rOpts);
 508         if ( $rOpts->{'format'} eq 'html' ) {
 509             Perl::Tidy::HtmlWriter->check_options($rOpts);
 510         }
 511
 512         # make the pattern of file extensions that we shouldn't touch
 513         my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
 514         if ($output_extension) {
 515             $_ = quotemeta($output_extension);
 516             $forbidden_file_extensions .= "|$_";
 517         }
 518         if ( $in_place_modify && $backup_extension ) {
 519             $_ = quotemeta($backup_extension);
 520             $forbidden_file_extensions .= "|$_";
 521         }
 522         $forbidden_file_extensions .= ')$';
 523
 524         # Create a diagnostics object if requested;
 525         # This is only useful for code development
 526         my $diagnostics_object = undef;
 527         if ( $rOpts->{'DIAGNOSTICS'} ) {
 528             $diagnostics_object = Perl::Tidy::Diagnostics->new();
 529         }
 530
 531         # no filenames should be given if input is from an array
 532         if ($source_stream) {
 533             if ( @ARGV > 0 ) {
 534                 die
 535 "You may not specify any filenames when a source array is given\n";
 536             }
 537
 538             # we'll stuff the source array into ARGV
 539             unshift( @ARGV, $source_stream );
 540
 541             # No special treatment for source stream which is a filename.
 542             # This will enable checks for binary files and other bad stuff.
 543             $source_stream = undef unless ref($source_stream);
 544         }
 545
 546         # use stdin by default if no source array and no args
 547         else {
 548             unshift( @ARGV, '-' ) unless @ARGV;
 549         }
 550
 551         # loop to process all files in argument list
 552         my $number_of_files = @ARGV;
 553         my $formatter       = undef;
 554         $tokenizer = undef;
 555         while ( $input_file = shift @ARGV ) {
 556             my $fileroot;
 557             my $input_file_permissions;
 558
 559             #---------------------------------------------------------------
 560             # determine the input file name
 561             #---------------------------------------------------------------
 562             if ($source_stream) {
 563                 $fileroot = "perltidy";
 564             }
 565             elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
 566                 $fileroot = "perltidy";   # root name to use for .ERR, .LOG, etc
 567                 $in_place_modify = 0;
 568             }
 569             else {
 570                 $fileroot = $input_file;
 571                 unless ( -e $input_file ) {
 572
 573                     # file doesn't exist - check for a file glob
 574                     if ( $input_file =~ /([\?\*\[\{])/ ) {
 575
 576                         # Windows shell may not remove quotes, so do it
 577                         my $input_file = $input_file;
 578                         if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
 579                         if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
 580                         my $pattern = fileglob_to_re($input_file);
 581                         eval "/$pattern/";
 582                         if ( !$@ && opendir( DIR, './' ) ) {
 583                             my @files =
 584                               grep { /$pattern/ && !-d $_ } readdir(DIR);
 585                             closedir(DIR);
 586                             if (@files) {
 587                                 unshift @ARGV, @files;
 588                                 next;
 589                             }
 590                         }
 591                     }
 592                     print "skipping file: '$input_file': no matches found\n";
 593                     next;
 594                 }
 595
 596                 unless ( -f $input_file ) {
 597                     print "skipping file: $input_file: not a regular file\n";
 598                     next;
 599                 }
 600
 601                 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
 602                     print
 603 "skipping file: $input_file: Non-text (override with -f)\n";
 604                     next;
 605                 }
 606
 607                 # we should have a valid filename now
 608                 $fileroot               = $input_file;
 609                 $input_file_permissions = ( stat $input_file )[2] & 07777;
 610
 611                 if ( $^O eq 'VMS' ) {
 612                     ( $fileroot, $dot ) = check_vms_filename($fileroot);
 613                 }
 614
 615                 # add option to change path here
 616                 if ( defined( $rOpts->{'output-path'} ) ) {
 617
 618                     my ( $base, $old_path ) = fileparse($fileroot);
 619                     my $new_path = $rOpts->{'output-path'};
 620                     unless ( -d $new_path ) {
 621                         unless ( mkdir $new_path, 0777 ) {
 622                             die "unable to create directory $new_path: $!\n";
 623                         }
 624                     }
 625                     my $path = $new_path;
 626                     $fileroot = catfile( $path, $base );
 627                     unless ($fileroot) {
 628                         die <<EOM;
 629 ------------------------------------------------------------------------
 630 Problem combining $new_path and $base to make a filename; check -opath
 631 ------------------------------------------------------------------------
 632 EOM
 633                     }
 634                 }
 635             }
 636
 637             # Skip files with same extension as the output files because
 638             # this can lead to a messy situation with files like
 639             # script.tdy.tdy.tdy ... or worse problems ...  when you
 640             # rerun perltidy over and over with wildcard input.
 641             if (
 642                 !$source_stream
 643                 && (   $input_file =~ /$forbidden_file_extensions/o
 644                     || $input_file eq 'DIAGNOSTICS' )
 645               )
 646             {
 647                 print "skipping file: $input_file: wrong extension\n";
 648                 next;
 649             }
 650
 651             # the 'source_object' supplies a method to read the input file
 652             my $source_object =
 653               Perl::Tidy::LineSource->new( $input_file, $rOpts,
 654                 $rpending_logfile_message );
 655             next unless ($source_object);
 656
 657             # register this file name with the Diagnostics package
 658             $diagnostics_object->set_input_file($input_file)
 659               if $diagnostics_object;
 660
 661             #---------------------------------------------------------------
 662             # determine the output file name
 663             #---------------------------------------------------------------
 664             my $output_file = undef;
 665             my $actual_output_extension;
 666
 667             if ( $rOpts->{'outfile'} ) {
 668
 669                 if ( $number_of_files <= 1 ) {
 670
 671                     if ( $rOpts->{'standard-output'} ) {
 672                         die "You may not use -o and -st together\n";
 673                     }
 674                     elsif ($destination_stream) {
 675                         die
 676 "You may not specify a destination array and -o together\n";
 677                     }
 678                     elsif ( defined( $rOpts->{'output-path'} ) ) {
 679                         die "You may not specify -o and -opath together\n";
 680                     }
 681                     elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
 682                         die "You may not specify -o and -oext together\n";
 683                     }
 684                     $output_file = $rOpts->{outfile};
 685
 686                     # make sure user gives a file name after -o
 687                     if ( $output_file =~ /^-/ ) {
 688                         die "You must specify a valid filename after -o\n";
 689                     }
 690
 691                     # do not overwrite input file with -o
 692                     if ( defined($input_file_permissions)
 693                         && ( $output_file eq $input_file ) )
 694                     {
 695                         die
 696                           "Use 'perltidy -b $input_file' to modify in-place\n";
 697                     }
 698                 }
 699                 else {
 700                     die "You may not use -o with more than one input file\n";
 701                 }
 702             }
 703             elsif ( $rOpts->{'standard-output'} ) {
 704                 if ($destination_stream) {
 705                     die
 706 "You may not specify a destination array and -st together\n";
 707                 }
 708                 $output_file = '-';
 709
 710                 if ( $number_of_files <= 1 ) {
 711                 }
 712                 else {
 713                     die "You may not use -st with more than one input file\n";
 714                 }
 715             }
 716             elsif ($destination_stream) {
 717                 $output_file = $destination_stream;
 718             }
 719             elsif ($source_stream) {  # source but no destination goes to stdout
 720                 $output_file = '-';
 721             }
 722             elsif ( $input_file eq '-' ) {
 723                 $output_file = '-';
 724             }
 725             else {
 726                 if ($in_place_modify) {
 727                     $output_file = IO::File->new_tmpfile()
 728                       or die "cannot open temp file for -b option: $!\n";
 729                 }
 730                 else {
 731                     $actual_output_extension = $output_extension;
 732                     $output_file             = $fileroot . $output_extension;
 733                 }
 734             }
 735
 736             # the 'sink_object' knows how to write the output file
 737             my $tee_file = $fileroot . $dot . "TEE";
 738
 739             my $line_separator = $rOpts->{'output-line-ending'};
 740             if ( $rOpts->{'preserve-line-endings'} ) {
 741                 $line_separator = find_input_line_ending($input_file);
 742             }
 743             $line_separator = "\n" unless defined($line_separator);
 744
 745             my $sink_object =
 746               Perl::Tidy::LineSink->new( $output_file, $tee_file,
 747                 $line_separator, $rOpts, $rpending_logfile_message );
 748
 749             #---------------------------------------------------------------
 750             # initialize the error logger
 751             #---------------------------------------------------------------
 752             my $warning_file = $fileroot . $dot . "ERR";
 753             if ($errorfile_stream) { $warning_file = $errorfile_stream }
 754             my $log_file = $fileroot . $dot . "LOG";
 755             if ($logfile_stream) { $log_file = $logfile_stream }
 756
 757             my $logger_object =
 758               Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
 759                 $saw_extrude );
 760             write_logfile_header(
 761                 $rOpts,        $logger_object, $config_file,
 762                 $rraw_options, $Windows_type
 763             );
 764             if ($$rpending_logfile_message) {
 765                 $logger_object->write_logfile_entry($$rpending_logfile_message);
 766             }
 767             if ($$rpending_complaint) {
 768                 $logger_object->complain($$rpending_complaint);
 769             }
 770
 771             #---------------------------------------------------------------
 772             # initialize the debug object, if any
 773             #---------------------------------------------------------------
 774             my $debugger_object = undef;
 775             if ( $rOpts->{DEBUG} ) {
 776                 $debugger_object =
 777                   Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
 778             }
 779
 780             #---------------------------------------------------------------
 781             # create a formatter for this file : html writer or pretty printer
 782             #---------------------------------------------------------------
 783
 784             # we have to delete any old formatter because, for safety,
 785             # the formatter will check to see that there is only one.
 786             $formatter = undef;
 787
 788             if ($user_formatter) {
 789                 $formatter = $user_formatter;
 790             }
 791             elsif ( $rOpts->{'format'} eq 'html' ) {
 792                 $formatter =
 793                   Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
 794                     $actual_output_extension, $html_toc_extension,
 795                     $html_src_extension );
 796             }
 797             elsif ( $rOpts->{'format'} eq 'tidy' ) {
 798                 $formatter = Perl::Tidy::Formatter->new(
 799                     logger_object      => $logger_object,
 800                     diagnostics_object => $diagnostics_object,
 801                     sink_object        => $sink_object,
 802                 );
 803             }
 804             else {
 805                 die "I don't know how to do -format=$rOpts->{'format'}\n";
 806             }
 807
 808             unless ($formatter) {
 809                 die "Unable to continue with $rOpts->{'format'} formatting\n";
 810             }
 811
 812             #---------------------------------------------------------------
 813             # create the tokenizer for this file
 814             #---------------------------------------------------------------
 815             $tokenizer = undef;                     # must destroy old tokenizer
 816             $tokenizer = Perl::Tidy::Tokenizer->new(
 817                 source_object       => $source_object,
 818                 logger_object       => $logger_object,
 819                 debugger_object     => $debugger_object,
 820                 diagnostics_object  => $diagnostics_object,
 821                 starting_level      => $rOpts->{'starting-indentation-level'},
 822                 tabs                => $rOpts->{'tabs'},
 823                 indent_columns      => $rOpts->{'indent-columns'},
 824                 look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
 825                 look_for_autoloader => $rOpts->{'look-for-autoloader'},
 826                 look_for_selfloader => $rOpts->{'look-for-selfloader'},
 827                 trim_qw             => $rOpts->{'trim-qw'},
 828             );
 829
 830             #---------------------------------------------------------------
 831             # now we can do it
 832             #---------------------------------------------------------------
 833             process_this_file( $tokenizer, $formatter );
 834
 835             #---------------------------------------------------------------
 836             # close the input source and report errors
 837             #---------------------------------------------------------------
 838             $source_object->close_input_file();
 839
 840             # get file names to use for syntax check
 841             my $ifname = $source_object->get_input_file_copy_name();
 842             my $ofname = $sink_object->get_output_file_copy();
 843
 844             #---------------------------------------------------------------
 845             # handle the -b option (backup and modify in-place)
 846             #---------------------------------------------------------------
 847             if ($in_place_modify) {
 848                 unless ( -f $input_file ) {
 849
 850                     # oh, oh, no real file to backup ..
 851                     # shouldn't happen because of numerous preliminary checks
 852                     die print
 853 "problem with -b backing up input file '$input_file': not a file\n";
 854                 }
 855                 my $backup_name = $input_file . $backup_extension;
 856                 if ( -f $backup_name ) {
 857                     unlink($backup_name)
 858                       or die
 859 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
 860                 }
 861                 rename( $input_file, $backup_name )
 862                   or die
 863 "problem renaming $input_file to $backup_name for -b option: $!\n";
 864                 $ifname = $backup_name;
 865
 866                 seek( $output_file, 0, 0 )
 867                   or die "unable to rewind tmp file for -b option: $!\n";
 868
 869                 my $fout = IO::File->new("> $input_file")
 870                   or die
 871 "problem opening $input_file for write for -b option; check directory permissions: $!\n";
 872                 my $line;
 873                 while ( $line = $output_file->getline() ) {
 874                     $fout->print($line);
 875                 }
 876                 $fout->close();
 877                 $output_file = $input_file;
 878                 $ofname      = $input_file;
 879             }
 880
 881             #---------------------------------------------------------------
 882             # clean up and report errors
 883             #---------------------------------------------------------------
 884             $sink_object->close_output_file()    if $sink_object;
 885             $debugger_object->close_debug_file() if $debugger_object;
 886
 887             my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
 888             if ($output_file) {
 889
 890                 if ($input_file_permissions) {
 891
 892                     # give output script same permissions as input script, but
 893                     # make it user-writable or else we can't run perltidy again.
 894                     # Thus we retain whatever executable flags were set.
 895                     if ( $rOpts->{'format'} eq 'tidy' ) {
 896                         chmod( $input_file_permissions | 0600, $output_file );
 897                     }
 898
 899                     # else use default permissions for html and any other format
 900
 901                 }
 902                 if ( $logger_object && $rOpts->{'check-syntax'} ) {
 903                     $infile_syntax_ok =
 904                       check_syntax( $ifname, $ofname, $logger_object, $rOpts );
 905                 }
 906             }
 907
 908             $logger_object->finish( $infile_syntax_ok, $formatter )
 909               if $logger_object;
 910         }    # end of loop to process all files
 911     }    # end of main program
 912 }
 913
 914 sub fileglob_to_re {
 915
 916     # modified (corrected) from version in find2perl
 917     my $x = shift;
 918     $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
 919     $x =~ s#\*#.*#g;               # '*' -> '.*'
 920     $x =~ s#\?#.#g;                # '?' -> '.'
 921     "^$x\\z";                      # match whole word
 922 }
 923
 924 sub make_extension {
 925
 926     # Make a file extension, including any leading '.' if necessary
 927     # The '.' may actually be an '_' under VMS
 928     my ( $extension, $default, $dot ) = @_;
 929
 930     # Use the default if none specified
 931     $extension = $default unless ($extension);
 932
 933     # Only extensions with these leading characters get a '.'
 934     # This rule gives the user some freedom
 935     if ( $extension =~ /^[a-zA-Z0-9]/ ) {
 936         $extension = $dot . $extension;
 937     }
 938     return $extension;
 939 }
 940
 941 sub write_logfile_header {
 942     my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
 943       @_;
 944     $logger_object->write_logfile_entry(
 945 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
 946     );
 947     if ($Windows_type) {
 948         $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
 949     }
 950     my $options_string = join( ' ', @$rraw_options );
 951
 952     if ($config_file) {
 953         $logger_object->write_logfile_entry(
 954             "Found Configuration File >>> $config_file \n");
 955     }
 956     $logger_object->write_logfile_entry(
 957         "Configuration and command line parameters for this run:\n");
 958     $logger_object->write_logfile_entry("$options_string\n");
 959
 960     if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
 961         $rOpts->{'logfile'} = 1;    # force logfile to be saved
 962         $logger_object->write_logfile_entry(
 963             "Final parameter set for this run\n");
 964         $logger_object->write_logfile_entry(
 965             "------------------------------------\n");
 966
 967         foreach ( keys %{$rOpts} ) {
 968             $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
 969         }
 970         $logger_object->write_logfile_entry(
 971             "------------------------------------\n");
 972     }
 973     $logger_object->write_logfile_entry(
 974         "To find error messages search for 'WARNING' with your editor\n");
 975 }
 976
 977 sub process_command_line {
 978
 979     my ( $perltidyrc_stream, $is_Windows, $Windows_type, $rpending_complaint ) =
 980       @_;
 981
 982     use Getopt::Long;
 983
 984     ######################################################################
 985     # Note: a few options are not documented in the man page and usage
 986     # message. This is because these are experimental or debug options and
 987     # may or may not be retained in future versions.
 988     #
 989     # Here are the undocumented flags as far as I know.  Any of them
 990     # may disappear at any time.  They are mainly for fine-tuning
 991     # and debugging.
 992     #
 993     # fll --> fuzzy-line-length           # a trivial parameter which gets
 994     #                                       turned off for the extrude option
 995     #                                       which is mainly for debugging
 996     # chk --> check-multiline-quotes      # check for old bug; to be deleted
 997     # scl --> short-concatenation-item-length   # helps break at '.'
 998     # recombine                           # for debugging line breaks
 999     # I   --> DIAGNOSTICS                 # for debugging
1000     ######################################################################
1001
1002     # here is a summary of the Getopt codes:
1003     # <none> does not take an argument
1004     # =s takes a mandatory string
1005     # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1006     # =i takes a mandatory integer
1007     # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1008     # ! does not take an argument and may be negated
1009     #  i.e., -foo and -nofoo are allowed
1010     # a double dash signals the end of the options list
1011     #
1012     #---------------------------------------------------------------
1013     # Define the option string passed to GetOptions.
1014     #---------------------------------------------------------------
1015
1016     my @option_string = ();
1017     my %expansion     = ();
1018     my $rexpansion    = \%expansion;
1019
1020     #  These options are parsed directly by perltidy:
1021     #    help h
1022     #    version v
1023     #  However, they are included in the option set so that they will
1024     #  be seen in the options dump.
1025
1026     # These long option names have no abbreviations or are treated specially
1027     @option_string = qw(
1028       html!
1029       noprofile
1030       no-profile
1031       npro
1032       recombine!
1033     );
1034
1035     # routine to install and check options
1036     my $add_option = sub {
1037         my ( $long_name, $short_name, $flag ) = @_;
1038         push @option_string, $long_name . $flag;
1039         if ($short_name) {
1040             if ( $expansion{$short_name} ) {
1041                 my $existing_name = $expansion{$short_name}[0];
1042                 die
1043 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1044             }
1045             $expansion{$short_name} = [$long_name];
1046             if ( $flag eq '!' ) {
1047                 my $nshort_name = 'n' . $short_name;
1048                 my $nolong_name = 'no' . $long_name;
1049                 if ( $expansion{$nshort_name} ) {
1050                     my $existing_name = $expansion{$nshort_name}[0];
1051                     die
1052 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1053                 }
1054                 $expansion{$nshort_name} = [$nolong_name];
1055             }
1056         }
1057     };
1058
1059     # Install long option names which have a simple abbreviation.
1060     # Options with code '!' get standard negation ('no' for long names,
1061     # 'n' for abbreviations)
1062     $add_option->( 'DEBUG',                                     'D',     '!' );
1063     $add_option->( 'DIAGNOSTICS',                               'I',     '!' );
1064     $add_option->( 'add-newlines',                              'anl',   '!' );
1065     $add_option->( 'add-semicolons',                            'asc',   '!' );
1066     $add_option->( 'add-whitespace',                            'aws',   '!' );
1067     $add_option->( 'backup-and-modify-in-place',                'b',     '!' );
1068     $add_option->( 'backup-file-extension',                     'bext',  '=s' );
1069     $add_option->( 'blanks-before-blocks',                      'bbb',   '!' );
1070     $add_option->( 'blanks-before-comments',                    'bbc',   '!' );
1071     $add_option->( 'blanks-before-subs',                        'bbs',   '!' );
1072     $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1073     $add_option->( 'block-brace-vertical-tightness',            'bbvt',  '=i' );
1074     $add_option->( 'block-brace-vertical-tightness-list',       'bbvtl', '=s' );
1075     $add_option->( 'brace-left-and-indent',                     'bli',   '!' );
1076     $add_option->( 'brace-left-and-indent-list',                'blil',  '=s' );
1077     $add_option->( 'brace-tightness',                           'bt',    '=i' );
1078     $add_option->( 'brace-vertical-tightness',                  'bvt',   '=i' );
1079     $add_option->( 'brace-vertical-tightness-closing',          'bvtc',  '=i' );
1080     $add_option->( 'break-at-old-comma-breakpoints',            'boc',   '!' );
1081     $add_option->( 'break-at-old-keyword-breakpoints',          'bok',   '!' );
1082     $add_option->( 'break-at-old-logical-breakpoints',          'bol',   '!' );
1083     $add_option->( 'break-at-old-trinary-breakpoints',          'bot',   '!' );
1084     $add_option->( 'check-multiline-quotes',                    'chk',   '!' );
1085     $add_option->( 'check-syntax',                              'syn',   '!' );
1086     $add_option->( 'closing-side-comment-else-flag',            'csce',  '=i' );
1087     $add_option->( 'closing-side-comment-interval',             'csci',  '=i' );
1088     $add_option->( 'closing-side-comment-list',                 'cscl',  '=s' );
1089     $add_option->( 'closing-side-comment-maximum-text',         'csct',  '=i' );
1090     $add_option->( 'closing-side-comment-prefix',               'cscp',  '=s' );
1091     $add_option->( 'closing-side-comment-warnings',             'cscw',  '!' );
1092     $add_option->( 'closing-side-comments',                     'csc',   '!' );
1093     $add_option->( 'closing-token-indentation',                 'cti',   '=i' );
1094     $add_option->( 'closing-paren-indentation',                 'cpi',   '=i' );
1095     $add_option->( 'closing-brace-indentation',                 'cbi',   '=i' );
1096     $add_option->( 'closing-square-bracket-indentation',        'csbi',  '=i' );
1097     $add_option->( 'continuation-indentation',                  'ci',    '=i' );
1098     $add_option->( 'comma-arrow-breakpoints',                   'cab',   '=i' );
1099     $add_option->( 'cuddled-else',                              'ce',    '!' );
1100     $add_option->( 'delete-block-comments',                     'dbc',   '!' );
1101     $add_option->( 'delete-closing-side-comments',              'dcsc',  '!' );
1102     $add_option->( 'delete-old-newlines',                       'dnl',   '!' );
1103     $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1104     $add_option->( 'delete-pod',                                'dp',    '!' );
1105     $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1106     $add_option->( 'delete-side-comments',                      'dsc',   '!' );
1107     $add_option->( 'dump-defaults',                             'ddf',   '!' );
1108     $add_option->( 'dump-long-names',                           'dln',   '!' );
1109     $add_option->( 'dump-options',                              'dop',   '!' );
1110     $add_option->( 'dump-profile',                              'dpro',  '!' );
1111     $add_option->( 'dump-short-names',                          'dsn',   '!' );
1112     $add_option->( 'dump-token-types',                          'dtt',   '!' );
1113     $add_option->( 'dump-want-left-space',                      'dwls',  '!' );
1114     $add_option->( 'dump-want-right-space',                     'dwrs',  '!' );
1115     $add_option->( 'entab-leading-whitespace',                  'et',    '=i' );
1116     $add_option->( 'force-read-binary',                         'f',     '!' );
1117     $add_option->( 'format',                                    'fmt',   '=s' );
1118     $add_option->( 'fuzzy-line-length',                         'fll',   '!' );
1119     $add_option->( 'hanging-side-comments',                     'hsc',   '!' );
1120     $add_option->( 'help',                                      'h',     '' );
1121     $add_option->( 'ignore-old-line-breaks',                    'iob',   '!' );
1122     $add_option->( 'indent-block-comments',                     'ibc',   '!' );
1123     $add_option->( 'indent-closing-brace',                      'icb',   '!' );
1124     $add_option->( 'indent-columns',                            'i',     '=i' );
1125     $add_option->( 'indent-spaced-block-comments',              'isbc',  '!' );
1126     $add_option->( 'line-up-parentheses',                       'lp',    '!' );
1127     $add_option->( 'logfile',                                   'log',   '!' );
1128     $add_option->( 'logfile-gap',                               'g',     ':i' );
1129     $add_option->( 'long-block-line-count',                     'lbl',   '=i' );
1130     $add_option->( 'look-for-autoloader',                       'lal',   '!' );
1131     $add_option->( 'look-for-hash-bang',                        'x',     '!' );
1132     $add_option->( 'look-for-selfloader',                       'lsl',   '!' );
1133     $add_option->( 'maximum-consecutive-blank-lines',           'mbl',   '=i' );
1134     $add_option->( 'maximum-fields-per-table',                  'mft',   '=i' );
1135     $add_option->( 'maximum-line-length',                       'l',     '=i' );
1136     $add_option->( 'minimum-space-to-comment',                  'msc',   '=i' );
1137     $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1138     $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1139     $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1140     $add_option->( 'opening-brace-always-on-right',             'bar',   '' );
1141     $add_option->( 'opening-brace-on-new-line',                 'bl',    '!' );
1142     $add_option->( 'opening-sub-brace-on-new-line',             'sbl',   '!' );
1143     $add_option->( 'outdent-keyword-list',                      'okwl',  '=s' );
1144     $add_option->( 'outdent-keywords',                          'okw',   '!' );
1145     $add_option->( 'outdent-labels',                            'ola',   '!' );
1146     $add_option->( 'outdent-long-comments',                     'olc',   '!' );
1147     $add_option->( 'outdent-long-quotes',                       'olq',   '!' );
1148     $add_option->( 'outdent-static-block-comments',             'osbc',  '!' );
1149     $add_option->( 'outfile',                                   'o',     '=s' );
1150     $add_option->( 'output-file-extension',                     'oext',  '=s' );
1151     $add_option->( 'output-line-ending',                        'ole',   '=s' );
1152     $add_option->( 'output-path',                               'opath', '=s' );
1153     $add_option->( 'paren-tightness',                           'pt',    '=i' );
1154     $add_option->( 'paren-vertical-tightness',                  'pvt',   '=i' );
1155     $add_option->( 'paren-vertical-tightness-closing',          'pvtc',  '=i' );
1156     $add_option->( 'pass-version-line',                         'pvl',   '!' );
1157     $add_option->( 'perl-syntax-check-flags',                   'pscf',  '=s' );
1158     $add_option->( 'preserve-line-endings',                     'ple',   '!' );
1159     $add_option->( 'profile',                                   'pro',   '=s' );
1160     $add_option->( 'quiet',                                     'q',     '!' );
1161     $add_option->( 'short-concatenation-item-length',           'scl',   '=i' );
1162     $add_option->( 'show-options',                              'opt',   '!' );
1163     $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1164     $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1165     $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1166     $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1167     $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1168     $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1169     $add_option->( 'standard-error-output',                     'se',    '!' );
1170     $add_option->( 'standard-output',                           'st',    '!' );
1171     $add_option->( 'starting-indentation-level',                'sil',   '=i' );
1172     $add_option->( 'static-block-comment-prefix',               'sbcp',  '=s' );
1173     $add_option->( 'static-block-comments',                     'sbc',   '!' );
1174     $add_option->( 'static-side-comment-prefix',                'sscp',  '=s' );
1175     $add_option->( 'static-side-comments',                      'ssc',   '!' );
1176     $add_option->( 'swallow-optional-blank-lines',              'sob',   '!' );
1177     $add_option->( 'tabs',                                      't',     '!' );
1178     $add_option->( 'tee-block-comments',                        'tbc',   '!' );
1179     $add_option->( 'tee-pod',                                   'tp',    '!' );
1180     $add_option->( 'tee-side-comments',                         'tsc',   '!' );
1181     $add_option->( 'trim-qw',                                   'tqw',   '!' );
1182     $add_option->( 'version',                                   'v',     '' );
1183     $add_option->( 'vertical-tightness',                        'vt',    '=i' );
1184     $add_option->( 'vertical-tightness-closing',                'vtc',   '=i' );
1185     $add_option->( 'want-break-after',                          'wba',   '=s' );
1186     $add_option->( 'want-break-before',                         'wbb',   '=s' );
1187     $add_option->( 'want-left-space',                           'wls',   '=s' );
1188     $add_option->( 'want-right-space',                          'wrs',   '=s' );
1189     $add_option->( 'warning-output',                            'w',     '!' );
1190
1191     # The Perl::Tidy::HtmlWriter will add its own options to the string
1192     Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1193
1194     #---------------------------------------------------------------
1195     # Assign default values to the above options here, except
1196     # for 'outfile' and 'help'.
1197     # These settings should approximate the perlstyle(1) suggestions.
1198     #---------------------------------------------------------------
1199     my @defaults = qw(
1200       add-newlines
1201       add-semicolons
1202       add-whitespace
1203       blanks-before-blocks
1204       blanks-before-comments
1205       blanks-before-subs
1206       block-brace-tightness=0
1207       block-brace-vertical-tightness=0
1208       brace-tightness=1
1209       brace-vertical-tightness-closing=0
1210       brace-vertical-tightness=0
1211       break-at-old-logical-breakpoints
1212       break-at-old-trinary-breakpoints
1213       break-at-old-keyword-breakpoints
1214       comma-arrow-breakpoints=1
1215       nocheck-syntax
1216       closing-side-comment-interval=6
1217       closing-side-comment-maximum-text=20
1218       closing-side-comment-else-flag=0
1219       closing-paren-indentation=0
1220       closing-brace-indentation=0
1221       closing-square-bracket-indentation=0
1222       continuation-indentation=2
1223       delete-old-newlines
1224       delete-semicolons
1225       fuzzy-line-length
1226       hanging-side-comments
1227       indent-block-comments
1228       indent-columns=4
1229       long-block-line-count=8
1230       look-for-autoloader
1231       look-for-selfloader
1232       maximum-consecutive-blank-lines=1
1233       maximum-fields-per-table=0
1234       maximum-line-length=80
1235       minimum-space-to-comment=4
1236       nobrace-left-and-indent
1237       nocuddled-else
1238       nodelete-old-whitespace
1239       nohtml
1240       nologfile
1241       noquiet
1242       noshow-options
1243       nostatic-side-comments
1244       noswallow-optional-blank-lines
1245       notabs
1246       nowarning-output
1247       outdent-labels
1248       outdent-long-quotes
1249       outdent-long-comments
1250       paren-tightness=1
1251       paren-vertical-tightness-closing=0
1252       paren-vertical-tightness=0
1253       pass-version-line
1254       recombine
1255       short-concatenation-item-length=8
1256       space-for-semicolon
1257       square-bracket-tightness=1
1258       square-bracket-vertical-tightness-closing=0
1259       square-bracket-vertical-tightness=0
1260       static-block-comments
1261       trim-qw
1262       format=tidy
1263       backup-file-extension=bak
1264
1265       pod2html
1266       html-table-of-contents
1267       html-entities
1268     );
1269
1270     push @defaults, "perl-syntax-check-flags=-c -T";
1271
1272     #---------------------------------------------------------------
1273     # set the defaults by passing the above list through GetOptions
1274     #---------------------------------------------------------------
1275     my %Opts = ();
1276     {
1277         local @ARGV;
1278         my $i;
1279
1280         for $i (@defaults) { push @ARGV, "--" . $i }
1281
1282         if ( !GetOptions( \%Opts, @option_string ) ) {
1283             die "Programming Bug: error in setting default options";
1284         }
1285     }
1286
1287     #---------------------------------------------------------------
1288     # Define abbreviations which will be expanded into the above primitives.
1289     # These may be defined recursively.
1290     #---------------------------------------------------------------
1291     %expansion = (
1292         %expansion,
1293         'freeze-newlines'    => [qw(noadd-newlines nodelete-old-newlines)],
1294         'fnl'                => [qw(freeze-newlines)],
1295         'freeze-whitespace'  => [qw(noadd-whitespace nodelete-old-whitespace)],
1296         'fws'                => [qw(freeze-whitespace)],
1297         'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1298         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1299         'nooutdent-long-lines' =>
1300           [qw(nooutdent-long-quotes nooutdent-long-comments)],
1301         'noll'                => [qw(nooutdent-long-lines)],
1302         'io'                  => [qw(indent-only)],
1303         'delete-all-comments' =>
1304           [qw(delete-block-comments delete-side-comments delete-pod)],
1305         'nodelete-all-comments' =>
1306           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1307         'dac'              => [qw(delete-all-comments)],
1308         'ndac'             => [qw(nodelete-all-comments)],
1309         'gnu'              => [qw(gnu-style)],
1310         'tee-all-comments' =>
1311           [qw(tee-block-comments tee-side-comments tee-pod)],
1312         'notee-all-comments' =>
1313           [qw(notee-block-comments notee-side-comments notee-pod)],
1314         'tac'   => [qw(tee-all-comments)],
1315         'ntac'  => [qw(notee-all-comments)],
1316         'html'  => [qw(format=html)],
1317         'nhtml' => [qw(format=tidy)],
1318         'tidy'  => [qw(format=tidy)],
1319
1320         'break-after-comma-arrows'   => [qw(cab=0)],
1321         'nobreak-after-comma-arrows' => [qw(cab=1)],
1322         'baa'                        => [qw(cab=0)],
1323         'nbaa'                       => [qw(cab=1)],
1324
1325         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1326         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1327         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1328         'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1329         'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1330
1331         'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1332         'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1333         'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1334         'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1335         'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1336
1337         'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1338         'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1339         'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1340
1341         'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1342         'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1343         'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1344
1345         'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1346         'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1347         'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1348
1349         'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1350         'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1351         'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1352
1353         # 'mangle' originally deleted pod and comments, but to keep it
1354         # reversible, it no longer does.  But if you really want to
1355         # delete them, just use:
1356         #   -mangle -dac
1357
1358         # An interesting use for 'mangle' is to do this:
1359         #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1360         # which will form as many one-line blocks as possible
1361
1362         'mangle' => [
1363             qw(
1364               check-syntax
1365               delete-old-newlines
1366               delete-old-whitespace
1367               delete-semicolons
1368               indent-columns=0
1369               maximum-consecutive-blank-lines=0
1370               maximum-line-length=100000
1371               noadd-newlines
1372               noadd-semicolons
1373               noadd-whitespace
1374               noblanks-before-blocks
1375               noblanks-before-subs
1376               notabs
1377               )
1378         ],
1379
1380         # 'extrude' originally deleted pod and comments, but to keep it
1381         # reversible, it no longer does.  But if you really want to
1382         # delete them, just use
1383         #   extrude -dac
1384         #
1385         # An interesting use for 'extrude' is to do this:
1386         #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1387         # which will break up all one-line blocks.
1388
1389         'extrude' => [
1390             qw(
1391               check-syntax
1392               ci=0
1393               delete-old-newlines
1394               delete-old-whitespace
1395               delete-semicolons
1396               indent-columns=0
1397               maximum-consecutive-blank-lines=0
1398               maximum-line-length=1
1399               noadd-semicolons
1400               noadd-whitespace
1401               noblanks-before-blocks
1402               noblanks-before-subs
1403               nofuzzy-line-length
1404               notabs
1405               )
1406         ],
1407
1408         # this style tries to follow the GNU Coding Standards (which do
1409         # not really apply to perl but which are followed by some perl
1410         # programmers).
1411         'gnu-style' => [
1412             qw(
1413               lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1414               )
1415         ],
1416
1417         # Additional styles can be added here
1418     );
1419
1420     Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1421
1422     # Uncomment next line to dump all expansions for debugging:
1423     # dump_short_names(\%expansion);
1424
1425     my $word;
1426     my @raw_options        = ();
1427     my $config_file        = "";
1428     my $saw_ignore_profile = 0;
1429     my $saw_extrude        = 0;
1430     my $saw_dump_profile   = 0;
1431     my $i;
1432
1433     #---------------------------------------------------------------
1434     # Take a first look at the command-line parameters.  Do as many
1435     # immediate dumps as possible, which can avoid confusion if the
1436     # perltidyrc file has an error.
1437     #---------------------------------------------------------------
1438     foreach $i (@ARGV) {
1439
1440         $i =~ s/^--/-/;
1441         if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1442             $saw_ignore_profile = 1;
1443         }
1444
1445         # note: this must come before -pro and -profile, below:
1446         elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1447             $saw_dump_profile = 1;
1448         }
1449         elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1450             if ($config_file) {
1451                 warn
1452 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1453             }
1454             $config_file = $2;
1455             unless ( -e $config_file ) {
1456                 warn "cannot find file given with -pro=$config_file: $!\n";
1457                 $config_file = "";
1458             }
1459         }
1460         elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1461             die "usage: -pro=filename or --profile=filename, no spaces\n";
1462         }
1463         elsif ( $i =~ /^-extrude$/ ) {
1464             $saw_extrude = 1;
1465         }
1466         elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1467             usage();
1468             exit 1;
1469         }
1470         elsif ( $i =~ /^-(version|v)$/ ) {
1471             show_version();
1472             exit 1;
1473         }
1474         elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1475             dump_defaults(@defaults);
1476             exit 1;
1477         }
1478         elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1479             dump_long_names(@option_string);
1480             exit 1;
1481         }
1482         elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1483             dump_short_names( \%expansion );
1484             exit 1;
1485         }
1486         elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1487             Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1488             exit 1;
1489         }
1490     }
1491
1492     if ( $saw_dump_profile && $saw_ignore_profile ) {
1493         warn "No profile to dump because of -npro\n";
1494         exit 1;
1495     }
1496
1497     #---------------------------------------------------------------
1498     # read any .perltidyrc configuration file
1499     #---------------------------------------------------------------
1500     unless ($saw_ignore_profile) {
1501
1502         # resolve possible conflict between $perltidyrc_stream passed
1503         # as call parameter to perltidy and -pro=filename on command
1504         # line.
1505         if ($perltidyrc_stream) {
1506             if ($config_file) {
1507                 warn <<EOM;
1508  Conflict: a perltidyrc configuration file was specified both as this
1509  perltidy call parameter: $perltidyrc_stream
1510  and with this -profile=$config_file.
1511  Using -profile=$config_file.
1512 EOM
1513             }
1514             else {
1515                 $config_file = $perltidyrc_stream;
1516             }
1517         }
1518
1519         # look for a config file if we don't have one yet
1520         my $rconfig_file_chatter;
1521         $$rconfig_file_chatter = "";
1522         $config_file           =
1523           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
1524             $rpending_complaint )
1525           unless $config_file;
1526
1527         # open any config file
1528         my $fh_config;
1529         if ($config_file) {
1530             ( $fh_config, $config_file ) =
1531               Perl::Tidy::streamhandle( $config_file, 'r' );
1532             unless ($fh_config) {
1533                 $$rconfig_file_chatter .=
1534                   "# $config_file exists but cannot be opened\n";
1535             }
1536         }
1537
1538         if ($saw_dump_profile) {
1539             if ($saw_dump_profile) {
1540                 dump_config_file( $fh_config, $config_file,
1541                     $rconfig_file_chatter );
1542                 exit 1;
1543             }
1544         }
1545
1546         if ($fh_config) {
1547
1548             my $rconfig_list =
1549               read_config_file( $fh_config, $config_file, \%expansion );
1550
1551             # process any .perltidyrc parameters right now so we can
1552             # localize errors
1553             if (@$rconfig_list) {
1554                 local @ARGV = @$rconfig_list;
1555
1556                 expand_command_abbreviations( \%expansion, \@raw_options,
1557                     $config_file );
1558
1559                 if ( !GetOptions( \%Opts, @option_string ) ) {
1560                     die
1561 "Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
1562                 }
1563
1564                 # Undo any options which cause premature exit.  They are not
1565                 # appropriate for a config file, and it could be hard to
1566                 # diagnose the cause of the premature exit.
1567                 foreach (
1568                     qw{
1569                     dump-defaults
1570                     dump-long-names
1571                     dump-options
1572                     dump-profile
1573                     dump-short-names
1574                     dump-token-types
1575                     dump-want-left-space
1576                     dump-want-right-space
1577                     help
1578                     stylesheet
1579                     version
1580                     }
1581                   )
1582                 {
1583                     if ( defined( $Opts{$_} ) ) {
1584                         delete $Opts{$_};
1585                         warn "ignoring --$_ in config file: $config_file\n";
1586                     }
1587                 }
1588             }
1589         }
1590     }
1591
1592     #---------------------------------------------------------------
1593     # now process the command line parameters
1594     #---------------------------------------------------------------
1595     expand_command_abbreviations( \%expansion, \@raw_options, $config_file );
1596
1597     if ( !GetOptions( \%Opts, @option_string ) ) {
1598         die "Error on command line; for help try 'perltidy -h'\n";
1599     }
1600
1601     if ( $Opts{'dump-options'} ) {
1602         dump_options( \%Opts );
1603         exit 1;
1604     }
1605
1606     #---------------------------------------------------------------
1607     # Now we have to handle any interactions among the options..
1608     #---------------------------------------------------------------
1609
1610     # Since -vt, -vtc, and -cti are abbreviations, but under
1611     # msdos, an unquoted input parameter like vtc=1 will be
1612     # seen as 2 parameters, vtc and 1, so the abbreviations
1613     # won't be seen.  Therefore, we will catch them here if
1614     # they get through.
1615
1616     if ( defined $Opts{'vertical-tightness'} ) {
1617         my $vt = $Opts{'vertical-tightness'};
1618         $Opts{'paren-vertical-tightness'}          = $vt;
1619         $Opts{'square-bracket-vertical-tightness'} = $vt;
1620         $Opts{'brace-vertical-tightness'}          = $vt;
1621     }
1622
1623     if ( defined $Opts{'vertical-tightness-closing'} ) {
1624         my $vtc = $Opts{'vertical-tightness-closing'};
1625         $Opts{'paren-vertical-tightness-closing'}          = $vtc;
1626         $Opts{'square-bracket-vertical-tightness-closing'} = $vtc;
1627         $Opts{'brace-vertical-tightness-closing'}          = $vtc;
1628     }
1629
1630     if ( defined $Opts{'closing-token-indentation'} ) {
1631         my $cti = $Opts{'closing-token-indentation'};
1632         $Opts{'closing-square-bracket-indentation'} = $cti;
1633         $Opts{'closing-brace-indentation'}          = $cti;
1634         $Opts{'closing-paren-indentation'}          = $cti;
1635     }
1636
1637     # In quiet mode, there is no log file and hence no way to report
1638     # results of syntax check, so don't do it.
1639     if ( $Opts{'quiet'} ) {
1640         $Opts{'check-syntax'} = 0;
1641     }
1642
1643     # can't check syntax if no output
1644     if ( $Opts{'format'} ne 'tidy' ) {
1645         $Opts{'check-syntax'} = 0;
1646     }
1647
1648     # Never let Windows 9x/Me systems run syntax check -- this will prevent a
1649     # wide variety of nasty problems on these systems, because they cannot
1650     # reliably run backticks.  Don't even think about changing this!
1651     if (   $Opts{'check-syntax'}
1652         && $is_Windows
1653         && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
1654     {
1655         $Opts{'check-syntax'} = 0;
1656     }
1657
1658     # It's really a bad idea to check syntax as root unless you wrote
1659     # the script yourself.  FIXME: not sure if this works with VMS
1660     unless ($is_Windows) {
1661
1662         if ( $< == 0 && $Opts{'check-syntax'} ) {
1663             $Opts{'check-syntax'} = 0;
1664             $$rpending_complaint .=
1665 "Syntax check deactivated for safety; you shouldn't run this as root\n";
1666         }
1667     }
1668
1669     # see if user set a non-negative logfile-gap
1670     if ( defined( $Opts{'logfile-gap'} ) && $Opts{'logfile-gap'} >= 0 ) {
1671
1672         # a zero gap will be taken as a 1
1673         if ( $Opts{'logfile-gap'} == 0 ) {
1674             $Opts{'logfile-gap'} = 1;
1675         }
1676
1677         # setting a non-negative logfile gap causes logfile to be saved
1678         $Opts{'logfile'} = 1;
1679     }
1680
1681     # not setting logfile gap, or setting it negative, causes default of 50
1682     else {
1683         $Opts{'logfile-gap'} = 50;
1684     }
1685
1686     # set short-cut flag when only indentation is to be done.
1687     # Note that the user may or may not have already set the
1688     # indent-only flag.
1689     if (   !$Opts{'add-whitespace'}
1690         && !$Opts{'delete-old-whitespace'}
1691         && !$Opts{'add-newlines'}
1692         && !$Opts{'delete-old-newlines'} )
1693     {
1694         $Opts{'indent-only'} = 1;
1695     }
1696
1697     # -isbc implies -ibc
1698     if ( $Opts{'indent-spaced-block-comments'} ) {
1699         $Opts{'indent-block-comments'} = 1;
1700     }
1701
1702     # -bli flag implies -bl
1703     if ( $Opts{'brace-left-and-indent'} ) {
1704         $Opts{'opening-brace-on-new-line'} = 1;
1705     }
1706
1707     if (   $Opts{'opening-brace-always-on-right'}
1708         && $Opts{'opening-brace-on-new-line'} )
1709     {
1710         warn <<EOM;
1711  Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
1712   'opening-brace-on-new-line' (-bl).  Ignoring -bl.
1713 EOM
1714         $Opts{'opening-brace-on-new-line'} = 0;
1715     }
1716
1717     # it simplifies things if -bl is 0 rather than undefined
1718     if ( !defined( $Opts{'opening-brace-on-new-line'} ) ) {
1719         $Opts{'opening-brace-on-new-line'} = 0;
1720     }
1721
1722     # -sbl defaults to -bl if not defined
1723     if ( !defined( $Opts{'opening-sub-brace-on-new-line'} ) ) {
1724         $Opts{'opening-sub-brace-on-new-line'} =
1725           $Opts{'opening-brace-on-new-line'};
1726     }
1727
1728     # set shortcut flag if no blanks to be written
1729     unless ( $Opts{'maximum-consecutive-blank-lines'} ) {
1730         $Opts{'swallow-optional-blank-lines'} = 1;
1731     }
1732
1733     if ( $Opts{'entab-leading-whitespace'} ) {
1734         if ( $Opts{'entab-leading-whitespace'} < 0 ) {
1735             warn "-et=n must use a positive integer; ignoring -et\n";
1736             $Opts{'entab-leading-whitespace'} = undef;
1737         }
1738
1739         # entab leading whitespace has priority over the older 'tabs' option
1740         if ( $Opts{'tabs'} ) { $Opts{'tabs'} = 0; }
1741     }
1742
1743     if ( $Opts{'output-line-ending'} ) {
1744         unless ( is_unix() ) {
1745             warn "ignoring -ole; only works under unix\n";
1746             $Opts{'output-line-ending'} = undef;
1747         }
1748     }
1749     if ( $Opts{'preserve-line-endings'} ) {
1750         unless ( is_unix() ) {
1751             warn "ignoring -ple; only works under unix\n";
1752             $Opts{'preserve-line-endings'} = undef;
1753         }
1754     }
1755
1756     return ( \%Opts, $config_file, \@raw_options, $saw_extrude );
1757
1758 }    # end of process_command_line
1759
1760 sub expand_command_abbreviations {
1761
1762     # go through @ARGV and expand any abbreviations
1763
1764     my ( $rexpansion, $rraw_options, $config_file ) = @_;
1765     my ($word);
1766
1767     # set a pass limit to prevent an infinite loop;
1768     # 10 should be plenty, but it may be increased to allow deeply
1769     # nested expansions.
1770     my $max_passes = 10;
1771     my @new_argv   = ();
1772
1773     # keep looping until all expansions have been converted into actual
1774     # dash parameters..
1775     for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
1776         my @new_argv     = ();
1777         my $abbrev_count = 0;
1778
1779         # loop over each item in @ARGV..
1780         foreach $word (@ARGV) {
1781
1782             # convert any leading 'no-' to just 'no'
1783             if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
1784
1785             # if it is a dash flag (instead of a file name)..
1786             if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
1787
1788                 my $abr   = $1;
1789                 my $flags = $2;
1790
1791                 # save the raw input for debug output in case of circular refs
1792                 if ( $pass_count == 0 ) {
1793                     push( @$rraw_options, $word );
1794                 }
1795
1796                 # recombine abbreviation and flag, if necessary,
1797                 # to allow abbreviations with arguments such as '-vt=1'
1798                 if ( $rexpansion->{ $abr . $flags } ) {
1799                     $abr   = $abr . $flags;
1800                     $flags = "";
1801                 }
1802
1803                 # if we see this dash item in the expansion hash..
1804                 if ( $rexpansion->{$abr} ) {
1805                     $abbrev_count++;
1806
1807                     # stuff all of the words that it expands to into the
1808                     # new arg list for the next pass
1809                     foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
1810                         next unless $abbrev;    # for safety; shouldn't happen
1811                         push( @new_argv, '--' . $abbrev . $flags );
1812                     }
1813                 }
1814
1815                 # not in expansion hash, must be actual long name
1816                 else {
1817                     push( @new_argv, $word );
1818                 }
1819             }
1820
1821             # not a dash item, so just save it for the next pass
1822             else {
1823                 push( @new_argv, $word );
1824             }
1825         }    # end of this pass
1826
1827         # update parameter list @ARGV to the new one
1828         @ARGV = @new_argv;
1829         last unless ( $abbrev_count > 0 );
1830
1831         # make sure we are not in an infinite loop
1832         if ( $pass_count == $max_passes ) {
1833             print STDERR
1834 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
1835             print STDERR "Here are the raw options\n";
1836             local $" = ')(';
1837             print STDERR "(@$rraw_options)\n";
1838             my $num = @new_argv;
1839
1840             if ( $num < 50 ) {
1841                 print STDERR "After $max_passes passes here is ARGV\n";
1842                 print STDERR "(@new_argv)\n";
1843             }
1844             else {
1845                 print STDERR "After $max_passes passes ARGV has $num entries\n";
1846             }
1847
1848             if ($config_file) {
1849                 die <<"DIE";
1850 Please check your configuration file $config_file for circular-references.
1851 To deactivate it, use -npro.
1852 DIE
1853             }
1854             else {
1855                 die <<'DIE';
1856 Program bug - circular-references in the %expansion hash, probably due to
1857 a recent program change.
1858 DIE
1859             }
1860         }    # end of check for circular references
1861     }    # end of loop over all passes
1862 }
1863
1864 # Debug routine -- this will dump the expansion hash
1865 sub dump_short_names {
1866     my $rexpansion = shift;
1867     print STDOUT <<EOM;
1868 List of short names.  This list shows how all abbreviations are
1869 translated into other abbreviations and, eventually, into long names.
1870 New abbreviations may be defined in a .perltidyrc file.
1871 For a list of all long names, use perltidy --dump-long-names (-dln).
1872 --------------------------------------------------------------------------
1873 EOM
1874     foreach my $abbrev ( sort keys %$rexpansion ) {
1875         my @list = @{ $$rexpansion{$abbrev} };
1876         print STDOUT "$abbrev --> @list\n";
1877     }
1878 }
1879
1880 sub check_vms_filename {
1881
1882     # given a valid filename (the perltidy input file)
1883     # create a modified filename and separator character
1884     # suitable for VMS.
1885     #
1886     # Contributed by Michael Cartmell
1887     #
1888     my ( $base, $path ) = fileparse( $_[0] );
1889
1890     # remove explicit ; version
1891     $base =~ s/;-?\d*$//
1892
1893       # remove explicit . version ie two dots in filename NB ^ escapes a dot
1894       or $base =~ s/(          # begin capture $1
1895                   (?:^|[^^])\. # match a dot not preceded by a caret
1896                   (?:          # followed by nothing
1897                     |          # or
1898                     .*[^^]     # anything ending in a non caret
1899                   )
1900                 )              # end capture $1
1901                 \.-?\d*$       # match . version number
1902               /$1/x;
1903
1904     # normalise filename, if there are no unescaped dots then append one
1905     $base .= '.' unless $base =~ /(?:^|[^^])\./;
1906
1907     # if we don't already have an extension then we just append the extention
1908     my $separator = ( $base =~ /\.$/ ) ? "" : "_";
1909     return ( $path . $base, $separator );
1910 }
1911
1912 sub Win_OS_Type {
1913
1914     # Returns a string that determines what MS OS we are on.
1915     # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net
1916     # Returns nothing if not an MS system.
1917     # Contributed by: Yves Orton
1918
1919     my $rpending_complaint = shift;
1920     return unless $^O =~ /win32|dos/i;    # is it a MS box?
1921
1922     # It _should_ have Win32 unless something is really weird
1923     return unless eval('require Win32');
1924
1925     # Use the standard API call to determine the version
1926     my ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion();
1927
1928     return "win32s" unless $id;           # If id==0 then its a win32s box.
1929     my $os = {                            # Magic numbers from MSDN
1930                                           # documentation of GetOSVersion
1931         1 => {
1932             0  => "95",
1933             10 => "98",
1934             90 => "Me"
1935         },
1936         2 => {
1937             0  => "2000",
1938             1  => "XP/.Net",
1939             51 => "NT3.51"
1940         }
1941     }->{$id}->{$minor};
1942
1943     # This _really_ shouldnt happen. At least not for quite a while
1944     unless ( defined $os ) {
1945         $$rpending_complaint .= <<EOS;
1946 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
1947 We won't be able to look for a system-wide config file.
1948 EOS
1949     }
1950
1951     # Unfortunately the logic used for the various versions isnt so clever..
1952     # so we have to handle an outside case.
1953     return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
1954 }
1955
1956 sub is_unix {
1957     return ( $^O !~ /win32|dos/i )
1958       && ( $^O ne 'VMS' )
1959       && ( $^O ne 'OS2' )
1960       && ( $^O ne 'MacOS' );
1961 }
1962
1963 sub look_for_Windows {
1964
1965     # determine Windows sub-type and location of
1966     # system-wide configuration files
1967     my $rpending_complaint = shift;
1968     my $is_Windows         = ( $^O =~ /win32|dos/i );
1969     my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
1970     return ( $is_Windows, $Windows_type );
1971 }
1972
1973 sub find_config_file {
1974
1975     # look for a .perltidyrc configuration file
1976     my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
1977         $rpending_complaint ) = @_;
1978
1979     $$rconfig_file_chatter .= "# Config file search...system reported as:";
1980     if ($is_Windows) {
1981         $$rconfig_file_chatter .= "Windows $Windows_type\n";
1982     }
1983     else {
1984         $$rconfig_file_chatter .= " $^O\n";
1985     }
1986
1987     # sub to check file existance and record all tests
1988     my $exists_config_file = sub {
1989         my $config_file = shift;
1990         return 0 unless $config_file;
1991         $$rconfig_file_chatter .= "# Testing: $config_file\n";
1992         return -f $config_file;
1993     };
1994
1995     my $config_file;
1996
1997     # look in current directory first
1998     $config_file = ".perltidyrc";
1999     return $config_file if $exists_config_file->($config_file);
2000
2001     # Default environment vars.
2002     my @envs = qw(PERLTIDY HOME);
2003
2004     # Check the NT/2k/XP locations, first a local machine def, then a
2005     # network def
2006     push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2007
2008     # Now go through the enviornment ...
2009     foreach my $var (@envs) {
2010         $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2011         if ( defined( $ENV{$var} ) ) {
2012             $$rconfig_file_chatter .= " = $ENV{$var}\n";
2013
2014             # test ENV{ PERLTIDY } as file:
2015             if ( $var eq 'PERLTIDY' ) {
2016                 $config_file = "$ENV{$var}";
2017                 return $config_file if $exists_config_file->($config_file);
2018             }
2019
2020             # test ENV as directory:
2021             $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2022             return $config_file if $exists_config_file->($config_file);
2023         }
2024         else {
2025             $$rconfig_file_chatter .= "\n";
2026         }
2027     }
2028
2029     # then look for a system-wide definition
2030     # where to look varies with OS
2031     if ($is_Windows) {
2032
2033         if ($Windows_type) {
2034             my ( $os, $system, $allusers ) =
2035               Win_Config_Locs( $rpending_complaint, $Windows_type );
2036
2037             # Check All Users directory, if there is one.
2038             if ($allusers) {
2039                 $config_file = catfile( $allusers, ".perltidyrc" );
2040                 return $config_file if $exists_config_file->($config_file);
2041             }
2042
2043             # Check system directory.
2044             $config_file = catfile( $system, ".perltidyrc" );
2045             return $config_file if $exists_config_file->($config_file);
2046         }
2047     }
2048
2049     # Place to add customization code for other systems
2050     elsif ( $^O eq 'OS2' ) {
2051     }
2052     elsif ( $^O eq 'MacOS' ) {
2053     }
2054     elsif ( $^O eq 'VMS' ) {
2055     }
2056
2057     # Assume some kind of Unix
2058     else {
2059
2060         $config_file = "/usr/local/etc/perltidyrc";
2061         return $config_file if $exists_config_file->($config_file);
2062
2063         $config_file = "/etc/perltidyrc";
2064         return $config_file if $exists_config_file->($config_file);
2065     }
2066
2067     # Couldn't find a config file
2068     return;
2069 }
2070
2071 sub Win_Config_Locs {
2072
2073     # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2074     # or undef if its not a win32 OS.  In list context returns OS, System
2075     # Directory, and All Users Directory.  All Users will be empty on a
2076     # 9x/Me box.  Contributed by: Yves Orton.
2077
2078     my $rpending_complaint = shift;
2079     my $os = (@_) ? shift: Win_OS_Type();
2080     return unless $os;
2081
2082     my $system   = "";
2083     my $allusers = "";
2084
2085     if ( $os =~ /9[58]|Me/ ) {
2086         $system = "C:/Windows";
2087     }
2088     elsif ( $os =~ /NT|XP|2000/ ) {
2089         $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2090         $allusers =
2091           ( $os =~ /NT/ )
2092           ? "C:/WinNT/profiles/All Users/"
2093           : "C:/Documents and Settings/All Users/";
2094     }
2095     else {
2096
2097         # This currently would only happen on a win32s computer.
2098         # I dont have one to test So I am unsure how to proceed.
2099         # Sorry. :-)
2100         $$rpending_complaint .=
2101 "I dont know a sensible place to look for config files on an $os system.\n";
2102         return;
2103     }
2104     return wantarray ? ( $os, $system, $allusers ) : $os;
2105 }
2106
2107 sub dump_config_file {
2108     my $fh                   = shift;
2109     my $config_file          = shift;
2110     my $rconfig_file_chatter = shift;
2111     print STDOUT "$$rconfig_file_chatter";
2112     if ($fh) {
2113         print STDOUT "# Dump of file: '$config_file'\n";
2114         while ( $_ = $fh->getline() ) { print STDOUT }
2115         eval { $fh->close() };
2116     }
2117     else {
2118         print STDOUT "# ...no config file found\n";
2119     }
2120 }
2121
2122 sub read_config_file {
2123
2124     my ( $fh, $config_file, $rexpansion ) = @_;
2125     my @config_list = ();
2126
2127     my $name = undef;
2128     my $line_no;
2129     while ( $_ = $fh->getline() ) {
2130         $line_no++;
2131         chomp;
2132         next if /^\s*#/;    # skip full-line comment
2133         $_ = strip_comment( $_, $config_file, $line_no );
2134         s/^\s*(.*?)\s*$/$1/;    # trim both ends
2135         next unless $_;
2136
2137         # look for something of the general form
2138         #    newname { body }
2139         # or just
2140         #    body
2141
2142         if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2143             my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2144
2145             # handle a new alias definition
2146             if ($newname) {
2147                 if ($name) {
2148                     die
2149 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2150                 }
2151                 $name = $newname;
2152
2153                 if ( ${$rexpansion}{$name} ) {
2154                     local $" = ')(';
2155                     my @names = sort keys %$rexpansion;
2156                     print "Here is a list of all installed aliases\n(@names)\n";
2157                     die
2158 "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2159                 }
2160                 ${$rexpansion}{$name} = [];
2161             }
2162
2163             # now do the body
2164             if ($body) {
2165
2166                 my ( $rbody_parts, $msg ) = parse_args($body);
2167                 if ($msg) {
2168                     die <<EOM;
2169 Error reading file $config_file at line number $line_no.
2170 $msg
2171 Please fix this line or use -npro to avoid reading this file
2172 EOM
2173                 }
2174
2175                 if ($name) {
2176
2177                     # remove leading dashes if this is an alias
2178                     foreach (@$rbody_parts) { s/^\-+//; }
2179                     push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2180                 }
2181
2182                 else {
2183                     push( @config_list, @$rbody_parts );
2184                 }
2185             }
2186
2187             if ($curly) {
2188                 unless ($name) {
2189                     die
2190 "Unexpected '}' seen in config file $config_file line $.\n";
2191                 }
2192                 $name = undef;
2193             }
2194         }
2195     }
2196     eval { $fh->close() };
2197     return ( \@config_list );
2198 }
2199
2200 sub strip_comment {
2201
2202     my ( $instr, $config_file, $line_no ) = @_;
2203
2204     # nothing to do if no comments
2205     if ( $instr !~ /#/ ) {
2206         return $instr;
2207     }
2208
2209     # use simple method of no quotes
2210     elsif ( $instr !~ /['"]/ ) {
2211         $instr =~ s/\s*\#.*$//;    # simple trim
2212         return $instr;
2213     }
2214
2215     # handle comments and quotes
2216     my $outstr     = "";
2217     my $quote_char = "";
2218     while (1) {
2219
2220         # looking for ending quote character
2221         if ($quote_char) {
2222             if ( $instr =~ /\G($quote_char)/gc ) {
2223                 $quote_char = "";
2224                 $outstr .= $1;
2225             }
2226             elsif ( $instr =~ /\G(.)/gc ) {
2227                 $outstr .= $1;
2228             }
2229
2230             # error..we reached the end without seeing the ending quote char
2231             else {
2232                 die <<EOM;
2233 Error reading file $config_file at line number $line_no.
2234 Did not see ending quote character <$quote_char> in this text:
2235 $instr
2236 Please fix this line or use -npro to avoid reading this file
2237 EOM
2238                 last;
2239             }
2240         }
2241
2242         # accumulating characters and looking for start of a quoted string
2243         else {
2244             if ( $instr =~ /\G([\"\'])/gc ) {
2245                 $outstr .= $1;
2246                 $quote_char = $1;
2247             }
2248             elsif ( $instr =~ /\G#/gc ) {
2249                 last;
2250             }
2251             elsif ( $instr =~ /\G(.)/gc ) {
2252                 $outstr .= $1;
2253             }
2254             else {
2255                 last;
2256             }
2257         }
2258     }
2259     return $outstr;
2260 }
2261
2262 sub parse_args {
2263
2264     # Parse a command string containing multiple string with possible
2265     # quotes, into individual commands.  It might look like this, for example:
2266     #
2267     #    -wba=" + - "  -some-thing -wbb='. && ||'
2268     #
2269     # There is no need, at present, to handle escaped quote characters.
2270     # (They are not perltidy tokens, so needn't be in strings).
2271
2272     my ($body)     = @_;
2273     my @body_parts = ();
2274     my $quote_char = "";
2275     my $part       = "";
2276     my $msg        = "";
2277     while (1) {
2278
2279         # looking for ending quote character
2280         if ($quote_char) {
2281             if ( $body =~ /\G($quote_char)/gc ) {
2282                 $quote_char = "";
2283             }
2284             elsif ( $body =~ /\G(.)/gc ) {
2285                 $part .= $1;
2286             }
2287
2288             # error..we reached the end without seeing the ending quote char
2289             else {
2290                 if ($part) { push @body_parts, $part; }
2291                 $msg = <<EOM;
2292 Did not see ending quote character <$quote_char> in this text:
2293 $body
2294 EOM
2295                 last;
2296             }
2297         }
2298
2299         # accumulating characters and looking for start of a quoted string
2300         else {
2301             if ( $body =~ /\G([\"\'])/gc ) {
2302                 $quote_char = $1;
2303             }
2304             elsif ( $body =~ /\G(\s+)/gc ) {
2305                 if ($part) { push @body_parts, $part; }
2306                 $part = "";
2307             }
2308             elsif ( $body =~ /\G(.)/gc ) {
2309                 $part .= $1;
2310             }
2311             else {
2312                 if ($part) { push @body_parts, $part; }
2313                 last;
2314             }
2315         }
2316     }
2317     return ( \@body_parts, $msg );
2318 }
2319
2320 sub dump_long_names {
2321
2322     my @names = sort @_;
2323     print STDOUT <<EOM;
2324 # Command line long names (passed to GetOptions)
2325 #---------------------------------------------------------------
2326 # here is a summary of the Getopt codes:
2327 # <none> does not take an argument
2328 # =s takes a mandatory string
2329 # :s takes an optional string
2330 # =i takes a mandatory integer
2331 # :i takes an optional integer
2332 # ! does not take an argument and may be negated
2333 #  i.e., -foo and -nofoo are allowed
2334 # a double dash signals the end of the options list
2335 #
2336 #---------------------------------------------------------------
2337 EOM
2338
2339     foreach (@names) { print STDOUT "$_\n" }
2340 }
2341
2342 sub dump_defaults {
2343     my @defaults = sort @_;
2344     print STDOUT "Default command line options:\n";
2345     foreach (@_) { print STDOUT "$_\n" }
2346 }
2347
2348 sub dump_options {
2349     my ($rOpts) = @_;
2350     local $" = "\n";
2351     print STDOUT "Final parameter set for this run\n";
2352     foreach ( sort keys %{$rOpts} ) {
2353         print STDOUT "$_=$rOpts->{$_}\n";
2354     }
2355 }
2356
2357 sub show_version {
2358     print <<"EOM";
2359 This is perltidy, v$VERSION
2360
2361 Copyright 2000-2003, Steve Hancock
2362
2363 Perltidy is free software and may be copied under the terms of the GNU
2364 General Public License, which is included in the distribution files.
2365
2366 Complete documentation for perltidy can be found using 'man perltidy'
2367 or on the internet at http://perltidy.sourceforge.net.
2368 EOM
2369 }
2370
2371 sub usage {
2372
2373     print STDOUT <<EOF;
2374 This is perltidy version $VERSION, a perl script indenter.  Usage:
2375
2376     perltidy [ options ] file1 file2 file3 ...
2377             (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2378     perltidy [ options ] file1 -o outfile
2379     perltidy [ options ] file1 -st >outfile
2380     perltidy [ options ] <infile >outfile
2381
2382 Options have short and long forms. Short forms are shown; see
2383 man pages for long forms.  Note: '=s' indicates a required string,
2384 and '=n' indicates a required integer.
2385
2386 I/O control
2387  -h      show this help
2388  -o=file name of the output file (only if single input file)
2389  -oext=s change output extension from 'tdy' to s
2390  -opath=path  change path to be 'path' for output files
2391  -b      backup original to .bak and modify file in-place
2392  -bext=s change default backup extension from 'bak' to s
2393  -q      deactivate error messages (for running under editor)
2394  -w      include non-critical warning messages in the .ERR error output
2395  -syn    run perl -c to check syntax (default under unix systems)
2396  -log    save .LOG file, which has useful diagnostics
2397  -f      force perltidy to read a binary file
2398  -g      like -log but writes more detailed .LOG file, for debugging scripts
2399  -opt    write the set of options actually used to a .LOG file
2400  -npro   ignore .perltidyrc configuration command file
2401  -pro=file   read configuration commands from file instead of .perltidyrc
2402  -st     send output to standard output, STDOUT
2403  -se     send error output to standard error output, STDERR
2404  -v      display version number to standard output and quit
2405
2406 Basic Options:
2407  -i=n    use n columns per indentation level (default n=4)
2408  -t      tabs: use one tab character per indentation level, not recommeded
2409  -nt     no tabs: use n spaces per indentation level (default)
2410  -et=n   entab leading whitespace n spaces per tab; not recommended
2411  -io     "indent only": just do indentation, no other formatting.
2412  -sil=n  set starting indentation level to n;  use if auto detection fails
2413  -ole=s  specify output line ending (s=dos or win, mac, unix)
2414  -ple    keep output line endings same as input (input must be filename)
2415
2416 Whitespace Control
2417  -fws    freeze whitespace; this disables all whitespace changes
2418            and disables the following switches:
2419  -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
2420  -bbt    same as -bt but for code block braces; same as -bt if not given
2421  -bbvt   block braces vertically tight; use with -bl or -bli
2422  -bbvtl=s  make -bbvt to apply to selected list of block types
2423  -pt=n   paren tightness (n=0, 1 or 2)
2424  -sbt=n  square bracket tightness (n=0, 1, or 2)
2425  -bvt=n  brace vertical tightness,
2426          n=(0=open, 1=close unless multiple steps on a line, 2=always close)
2427  -pvt=n  paren vertical tightness (see -bvt for n)
2428  -sbvt=n square bracket vertical tightness (see -bvt for n)
2429  -bvtc=n closing brace vertical tightness:
2430          n=(0=open, 1=sometimes close, 2=always close)
2431  -pvtc=n closing paren vertical tightness, see -bvtc for n.
2432  -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
2433  -ci=n   sets continuation indentation=n,  default is n=2 spaces
2434  -lp     line up parentheses, brackets, and non-BLOCK braces
2435  -sfs    add space before semicolon in for( ; ; )
2436  -aws    allow perltidy to add whitespace (default)
2437  -dws    delete all old non-essential whitespace
2438  -icb    indent closing brace of a code block
2439  -cti=n  closing indentation of paren, square bracket, or non-block brace:
2440          n=0 none, =1 align with opening, =2 one full indentation level
2441  -icp    equivalent to -cti=2
2442  -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
2443  -wrs=s  want space right of tokens in string;
2444  -sts    put space before terminal semicolon of a statement
2445  -sak=s  put space between keywords given in s and '(';
2446  -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
2447
2448 Line Break Control
2449  -fnl    freeze newlines; this disables all line break changes
2450             and disables the following switches:
2451  -anl    add newlines;  ok to introduce new line breaks
2452  -bbs    add blank line before subs and packages
2453  -bbc    add blank line before block comments
2454  -bbb    add blank line between major blocks
2455  -sob    swallow optional blank lines
2456  -ce     cuddled else; use this style: '} else {'
2457  -dnl    delete old newlines (default)
2458  -mbl=n  maximum consecutive blank lines (default=1)
2459  -l=n    maximum line length;  default n=80
2460  -bl     opening brace on new line
2461  -sbl    opening sub brace on new line.  value of -bl is used if not given.
2462  -bli    opening brace on new line and indented
2463  -bar    opening brace always on right, even for long clauses
2464  -vt=n   vertical tightness (requires -lp); n controls break after opening
2465          token: 0=never  1=no break if next line balanced   2=no break
2466  -vtc=n  vertical tightness of closing container; n controls if closing
2467          token starts new line: 0=always  1=not unless list  1=never
2468  -wba=s  want break after tokens in string; i.e. wba=': .'
2469  -wbb=s  want break before tokens in string
2470
2471 Following Old Breakpoints
2472  -boc    break at old comma breaks: turns off all automatic list formatting
2473  -bol    break at old logical breakpoints: or, and, ||, && (default)
2474  -bok    break at old list keyword breakpoints such as map, sort (default)
2475  -bot    break at old conditional (trinary ?:) operator breakpoints (default)
2476  -cab=n  break at commas after a comma-arrow (=>):
2477          n=0 break at all commas after =>
2478          n=1 stable: break unless this breaks an existing one-line container
2479          n=2 break only if a one-line container cannot be formed
2480          n=3 do not treat commas after => specially at all
2481
2482 Comment controls
2483  -ibc    indent block comments (default)
2484  -isbc   indent spaced block comments; may indent unless no leading space
2485  -msc=n  minimum desired spaces to side comment, default 4
2486  -csc    add or update closing side comments after closing BLOCK brace
2487  -dcsc   delete closing side comments created by a -csc command
2488  -cscp=s change closing side comment prefix to be other than '## end'
2489  -cscl=s change closing side comment to apply to selected list of blocks
2490  -csci=n minimum number of lines needed to apply a -csc tag, default n=6
2491  -csct=n maximum number of columns of appended text, default n=20
2492  -cscw   causes warning if old side comment is overwritten with -csc
2493
2494  -sbc    use 'static block comments' identified by leading '##' (default)
2495  -sbcp=s change static block comment identifier to be other than '##'
2496  -osbc   outdent static block comments
2497
2498  -ssc    use 'static side comments' identified by leading '##' (default)
2499  -sscp=s change static side comment identifier to be other than '##'
2500
2501 Delete selected text
2502  -dac    delete all comments AND pod
2503  -dbc    delete block comments
2504  -dsc    delete side comments
2505  -dp     delete pod
2506
2507 Send selected text to a '.TEE' file
2508  -tac    tee all comments AND pod
2509  -tbc    tee block comments
2510  -tsc    tee side comments
2511  -tp     tee pod
2512
2513 Outdenting
2514  -olq    outdent long quoted strings (default)
2515  -olc    outdent a long block comment line
2516  -ola    outdent statement labels
2517  -okw    outdent control keywords (redo, next, last, goto, return)
2518  -okwl=s specify alternative keywords for -okw command
2519
2520 Other controls
2521  -mft=n  maximum fields per table; default n=40
2522  -x      do not format lines before hash-bang line (i.e., for VMS)
2523  -asc    allows perltidy to add a ';' when missing (default)
2524  -dsm    allows perltidy to delete an unnecessary ';'  (default)
2525
2526 Combinations of other parameters
2527  -gnu     attempt to follow GNU Coding Standards as applied to perl
2528  -mangle  remove as many newlines as possible (but keep comments and pods)
2529  -extrude  insert as many newlines as possible
2530
2531 Dump and die, debugging
2532  -dop    dump options used in this run to standard output and quit
2533  -ddf    dump default options to standard output and quit
2534  -dsn    dump all option short names to standard output and quit
2535  -dln    dump option long names to standard output and quit
2536  -dpro   dump whatever configuration file is in effect to standard output
2537  -dtt    dump all token types to standard output and quit
2538
2539 HTML
2540  -html write an html file (see 'man perl2web' for many options)
2541        Note: when -html is used, no indentation or formatting are done.
2542        Hint: try perltidy -html -css=mystyle.css filename.pl
2543        and edit mystyle.css to change the appearance of filename.html.
2544        -nnn gives line numbers
2545        -pre only writes out <pre>..</pre> code section
2546        -toc places a table of contents to subs at the top (default)
2547        -pod passes pod text through pod2html (default)
2548        -frm write html as a frame (3 files)
2549        -text=s extra extension for table of contents if -frm, default='toc'
2550        -sext=s extra extension for file content if -frm, default='src'
2551
2552 A prefix of "n" negates short form toggle switches, and a prefix of "no"
2553 negates the long forms.  For example, -nasc means don't add missing
2554 semicolons.
2555
2556 If you are unable to see this entire text, try "perltidy -h | more"
2557 For more detailed information, and additional options, try "man perltidy",
2558 or go to the perltidy home page at http://perltidy.sourceforge.net
2559 EOF
2560
2561 }
2562
2563 sub process_this_file {
2564
2565     my ( $truth, $beauty ) = @_;
2566
2567     # loop to process each line of this file
2568     while ( my $line_of_tokens = $truth->get_line() ) {
2569         $beauty->write_line($line_of_tokens);
2570     }
2571
2572     # finish up
2573     eval { $beauty->finish_formatting() };
2574     $truth->report_tokenization_errors();
2575 }
2576
2577 sub check_syntax {
2578
2579     # Use 'perl -c' to make sure that we did not create bad syntax
2580     # This is a very good independent check for programming errors
2581     #
2582     # Given names of the input and output files, ($ifname, $ofname),
2583     # we do the following:
2584     # - check syntax of the input file
2585     # - if bad, all done (could be an incomplete code snippet)
2586     # - if infile syntax ok, then check syntax of the output file;
2587     #   - if outfile syntax bad, issue warning; this implies a code bug!
2588     # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
2589
2590     my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
2591     my $infile_syntax_ok = 0;
2592     my $line_of_dashes   = '-' x 42 . "\n";
2593
2594     my $flags = $rOpts->{'perl-syntax-check-flags'};
2595
2596     # be sure we invoke perl with -c
2597     # note: perl will accept repeated flags like '-c -c'.  It is safest
2598     # to append another -c than try to find an interior bundled c, as
2599     # in -Tc, because such a 'c' might be in a quoted string, for example.
2600     if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
2601
2602     # be sure we invoke perl with -x if requested
2603     # same comments about repeated parameters applies
2604     if ( $rOpts->{'look-for-hash-bang'} ) {
2605         if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
2606     }
2607
2608     # this shouldn't happen unless a termporary file couldn't be made
2609     if ( $ifname eq '-' ) {
2610         $logger_object->write_logfile_entry(
2611             "Cannot run perl -c on STDIN and STDOUT\n");
2612         return $infile_syntax_ok;
2613     }
2614
2615     $logger_object->write_logfile_entry(
2616         "checking input file syntax with perl $flags\n");
2617     $logger_object->write_logfile_entry($line_of_dashes);
2618
2619     # Not all operating systems/shells support redirection of the standard
2620     # error output.
2621     my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
2622
2623     my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
2624     $logger_object->write_logfile_entry("$perl_output\n");
2625
2626     if ( $perl_output =~ /syntax\s*OK/ ) {
2627         $infile_syntax_ok = 1;
2628         $logger_object->write_logfile_entry($line_of_dashes);
2629         $logger_object->write_logfile_entry(
2630             "checking output file syntax with perl $flags ...\n");
2631         $logger_object->write_logfile_entry($line_of_dashes);
2632
2633         my $perl_output =
2634           do_syntax_check( $ofname, $flags, $error_redirection );
2635         $logger_object->write_logfile_entry("$perl_output\n");
2636
2637         unless ( $perl_output =~ /syntax\s*OK/ ) {
2638             $logger_object->write_logfile_entry($line_of_dashes);
2639             $logger_object->warning(
2640 "The output file has a syntax error when tested with perl $flags $ofname !\n"
2641             );
2642             $logger_object->warning(
2643                 "This implies an error in perltidy; the file $ofname is bad\n");
2644             $logger_object->report_definite_bug();
2645
2646             # the perl version number will be helpful for diagnosing the problem
2647             $logger_object->write_logfile_entry(
2648                 qx/perl -v $error_redirection/ . "\n" );
2649         }
2650     }
2651     else {
2652
2653         # Only warn of perl -c syntax errors.  Other messages,
2654         # such as missing modules, are too common.  They can be
2655         # seen by running with perltidy -w
2656         $logger_object->complain("A syntax check using perl $flags gives: \n");
2657         $logger_object->complain($line_of_dashes);
2658         $logger_object->complain("$perl_output\n");
2659         $logger_object->complain($line_of_dashes);
2660         $infile_syntax_ok = -1;
2661         $logger_object->write_logfile_entry($line_of_dashes);
2662         $logger_object->write_logfile_entry(
2663 "The output file will not be checked because of input file problems\n"
2664         );
2665     }
2666     return $infile_syntax_ok;
2667 }
2668
2669 sub do_syntax_check {
2670     my ( $fname, $flags, $error_redirection ) = @_;
2671
2672     # We have to quote the filename in case it has unusual characters
2673     # or spaces.  Example: this filename #CM11.pm# gives trouble.
2674     $fname = '"' . $fname . '"';
2675
2676     # Under VMS something like -T will become -t (and an error) so we
2677     # will put quotes around the flags.  Double quotes seem to work on
2678     # Unix/Windows/VMS, but this may not work on all systems.  (Single
2679     # quotes do not work under Windows).  It could become necessary to
2680     # put double quotes around each flag, such as:  -"c"  -"T"
2681     # We may eventually need some system-dependent coding here.
2682     $flags = '"' . $flags . '"';
2683
2684     # now wish for luck...
2685     return qx/perl $flags $fname $error_redirection/;
2686 }
2687
2688 #####################################################################
2689 #
2690 # This is a stripped down version of IO::Scalar
2691 # Given a reference to a scalar, it supplies either:
2692 # a getline method which reads lines (mode='r'), or
2693 # a print method which reads lines (mode='w')
2694 #
2695 #####################################################################
2696 package Perl::Tidy::IOScalar;
2697 use Carp;
2698
2699 sub new {
2700     my ( $package, $rscalar, $mode ) = @_;
2701     my $ref = ref $rscalar;
2702     if ( $ref ne 'SCALAR' ) {
2703         confess <<EOM;
2704 ------------------------------------------------------------------------
2705 expecting ref to SCALAR but got ref to ($ref); trace follows:
2706 ------------------------------------------------------------------------
2707 EOM
2708
2709     }
2710     if ( $mode eq 'w' ) {
2711         $$rscalar = "";
2712         return bless [ $rscalar, $mode ], $package;
2713     }
2714     elsif ( $mode eq 'r' ) {
2715
2716         # Convert a scalar to an array.
2717         # This avoids looking for "\n" on each call to getline
2718         my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
2719         my $i_next = 0;
2720         return bless [ \@array, $mode, $i_next ], $package;
2721     }
2722     else {
2723         confess <<EOM;
2724 ------------------------------------------------------------------------
2725 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
2726 ------------------------------------------------------------------------
2727 EOM
2728     }
2729 }
2730
2731 sub getline {
2732     my $self = shift;
2733     my $mode = $self->[1];
2734     if ( $mode ne 'r' ) {
2735         confess <<EOM;
2736 ------------------------------------------------------------------------
2737 getline call requires mode = 'r' but mode = ($mode); trace follows:
2738 ------------------------------------------------------------------------
2739 EOM
2740     }
2741     my $i = $self->[2]++;
2742     ##my $line = $self->[0]->[$i];
2743     return $self->[0]->[$i];
2744 }
2745
2746 sub print {
2747     my $self = shift;
2748     my $mode = $self->[1];
2749     if ( $mode ne 'w' ) {
2750         confess <<EOM;
2751 ------------------------------------------------------------------------
2752 print call requires mode = 'w' but mode = ($mode); trace follows:
2753 ------------------------------------------------------------------------
2754 EOM
2755     }
2756     ${ $self->[0] } .= $_[0];
2757 }
2758 sub close { return }
2759
2760 #####################################################################
2761 #
2762 # This is a stripped down version of IO::ScalarArray
2763 # Given a reference to an array, it supplies either:
2764 # a getline method which reads lines (mode='r'), or
2765 # a print method which reads lines (mode='w')
2766 #
2767 # NOTE: this routine assumes that that there aren't any embedded
2768 # newlines within any of the array elements.  There are no checks
2769 # for that.
2770 #
2771 #####################################################################
2772 package Perl::Tidy::IOScalarArray;
2773 use Carp;
2774
2775 sub new {
2776     my ( $package, $rarray, $mode ) = @_;
2777     my $ref = ref $rarray;
2778     if ( $ref ne 'ARRAY' ) {
2779         confess <<EOM;
2780 ------------------------------------------------------------------------
2781 expecting ref to ARRAY but got ref to ($ref); trace follows:
2782 ------------------------------------------------------------------------
2783 EOM
2784
2785     }
2786     if ( $mode eq 'w' ) {
2787         @$rarray = ();
2788         return bless [ $rarray, $mode ], $package;
2789     }
2790     elsif ( $mode eq 'r' ) {
2791         my $i_next = 0;
2792         return bless [ $rarray, $mode, $i_next ], $package;
2793     }
2794     else {
2795         confess <<EOM;
2796 ------------------------------------------------------------------------
2797 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
2798 ------------------------------------------------------------------------
2799 EOM
2800     }
2801 }
2802
2803 sub getline {
2804     my $self = shift;
2805     my $mode = $self->[1];
2806     if ( $mode ne 'r' ) {
2807         confess <<EOM;
2808 ------------------------------------------------------------------------
2809 getline requires mode = 'r' but mode = ($mode); trace follows:
2810 ------------------------------------------------------------------------
2811 EOM
2812     }
2813     my $i = $self->[2]++;
2814     ##my $line = $self->[0]->[$i];
2815     return $self->[0]->[$i];
2816 }
2817
2818 sub print {
2819     my $self = shift;
2820     my $mode = $self->[1];
2821     if ( $mode ne 'w' ) {
2822         confess <<EOM;
2823 ------------------------------------------------------------------------
2824 print requires mode = 'w' but mode = ($mode); trace follows:
2825 ------------------------------------------------------------------------
2826 EOM
2827     }
2828     push @{ $self->[0] }, $_[0];
2829 }
2830 sub close { return }
2831
2832 #####################################################################
2833 #
2834 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
2835 # which returns the next line to be parsed
2836 #
2837 #####################################################################
2838
2839 package Perl::Tidy::LineSource;
2840
2841 sub new {
2842
2843     my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
2844     my $input_file_copy = undef;
2845     my $fh_copy;
2846
2847     my $input_line_ending;
2848     if ( $rOpts->{'preserve-line-endings'} ) {
2849         $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
2850     }
2851
2852     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
2853     return undef unless $fh;
2854
2855     # in order to check output syntax when standard output is used,
2856     # or when it is an object, we have to make a copy of the file
2857     if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
2858     {
2859
2860         # Turning off syntax check when input output is used.
2861         # The reason is that temporary files cause problems on
2862         # on many systems.
2863         $rOpts->{'check-syntax'} = 0;
2864         $input_file_copy = '-';
2865
2866         $$rpending_logfile_message .= <<EOM;
2867 Note: --syntax check will be skipped because standard input is used
2868 EOM
2869
2870     }
2871
2872     return bless {
2873         _fh                => $fh,
2874         _fh_copy           => $fh_copy,
2875         _filename          => $input_file,
2876         _input_file_copy   => $input_file_copy,
2877         _input_line_ending => $input_line_ending,
2878         _rinput_buffer     => [],
2879         _started           => 0,
2880     }, $class;
2881 }
2882
2883 sub get_input_file_copy_name {
2884     my $self   = shift;
2885     my $ifname = $self->{_input_file_copy};
2886     unless ($ifname) {
2887         $ifname = $self->{_filename};
2888     }
2889     return $ifname;
2890 }
2891
2892 sub close_input_file {
2893     my $self = shift;
2894     eval { $self->{_fh}->close() };
2895     eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
2896 }
2897
2898 sub get_line {
2899     my $self          = shift;
2900     my $line          = undef;
2901     my $fh            = $self->{_fh};
2902     my $fh_copy       = $self->{_fh_copy};
2903     my $rinput_buffer = $self->{_rinput_buffer};
2904
2905     if ( scalar(@$rinput_buffer) ) {
2906         $line = shift @$rinput_buffer;
2907     }
2908     else {
2909         $line = $fh->getline();
2910
2911         # patch to read raw mac files under unix, dos
2912         # see if the first line has embedded \r's
2913         if ( $line && !$self->{_started} ) {
2914             if ( $line =~ /[\015][^\015\012]/ ) {
2915
2916                 # found one -- break the line up and store in a buffer
2917                 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
2918                 my $count = @$rinput_buffer;
2919                 $line = shift @$rinput_buffer;
2920             }
2921             $self->{_started}++;
2922         }
2923     }
2924     if ( $line && $fh_copy ) { $fh_copy->print($line); }
2925     return $line;
2926 }
2927
2928 sub old_get_line {
2929     my $self    = shift;
2930     my $line    = undef;
2931     my $fh      = $self->{_fh};
2932     my $fh_copy = $self->{_fh_copy};
2933     $line = $fh->getline();
2934     if ( $line && $fh_copy ) { $fh_copy->print($line); }
2935     return $line;
2936 }
2937
2938 #####################################################################
2939 #
2940 # the Perl::Tidy::LineSink class supplies a write_line method for
2941 # actual file writing
2942 #
2943 #####################################################################
2944
2945 package Perl::Tidy::LineSink;
2946
2947 sub new {
2948
2949     my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
2950         $rpending_logfile_message )
2951       = @_;
2952     my $fh               = undef;
2953     my $fh_copy          = undef;
2954     my $fh_tee           = undef;
2955     my $output_file_copy = "";
2956     my $output_file_open = 0;
2957
2958     if ( $rOpts->{'format'} eq 'tidy' ) {
2959         ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
2960         unless ($fh) { die "Cannot write to output stream\n"; }
2961         $output_file_open = 1;
2962     }
2963
2964     # in order to check output syntax when standard output is used,
2965     # or when it is an object, we have to make a copy of the file
2966     if ( $output_file eq '-' || ref $output_file ) {
2967         if ( $rOpts->{'check-syntax'} ) {
2968
2969             # Turning off syntax check when standard output is used.
2970             # The reason is that temporary files cause problems on
2971             # on many systems.
2972             $rOpts->{'check-syntax'} = 0;
2973             $output_file_copy = '-';
2974             $$rpending_logfile_message .= <<EOM;
2975 Note: --syntax check will be skipped because standard output is used
2976 EOM
2977
2978         }
2979     }
2980
2981     bless {
2982         _fh               => $fh,
2983         _fh_copy          => $fh_copy,
2984         _fh_tee           => $fh_tee,
2985         _output_file      => $output_file,
2986         _output_file_open => $output_file_open,
2987         _output_file_copy => $output_file_copy,
2988         _tee_flag         => 0,
2989         _tee_file         => $tee_file,
2990         _tee_file_opened  => 0,
2991         _line_separator   => $line_separator,
2992     }, $class;
2993 }
2994
2995 sub write_line {
2996
2997     my $self    = shift;
2998     my $fh      = $self->{_fh};
2999     my $fh_copy = $self->{_fh_copy};
3000
3001     my $output_file_open = $self->{_output_file_open};
3002     chomp $_[0];
3003     $_[0] .= $self->{_line_separator};
3004
3005     $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3006     print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3007
3008     if ( $self->{_tee_flag} ) {
3009         unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3010         my $fh_tee = $self->{_fh_tee};
3011         print $fh_tee $_[0];
3012     }
3013 }
3014
3015 sub get_output_file_copy {
3016     my $self   = shift;
3017     my $ofname = $self->{_output_file_copy};
3018     unless ($ofname) {
3019         $ofname = $self->{_output_file};
3020     }
3021     return $ofname;
3022 }
3023
3024 sub tee_on {
3025     my $self = shift;
3026     $self->{_tee_flag} = 1;
3027 }
3028
3029 sub tee_off {
3030     my $self = shift;
3031     $self->{_tee_flag} = 0;
3032 }
3033
3034 sub really_open_tee_file {
3035     my $self     = shift;
3036     my $tee_file = $self->{_tee_file};
3037     my $fh_tee;
3038     $fh_tee = IO::File->new(">$tee_file")
3039       or die("couldn't open TEE file $tee_file: $!\n");
3040     $self->{_tee_file_opened} = 1;
3041     $self->{_fh_tee}          = $fh_tee;
3042 }
3043
3044 sub close_output_file {
3045     my $self = shift;
3046     eval { $self->{_fh}->close() }      if $self->{_output_file_open};
3047     eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3048     $self->close_tee_file();
3049 }
3050
3051 sub close_tee_file {
3052     my $self = shift;
3053
3054     if ( $self->{_tee_file_opened} ) {
3055         eval { $self->{_fh_tee}->close() };
3056         $self->{_tee_file_opened} = 0;
3057     }
3058 }
3059
3060 #####################################################################
3061 #
3062 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3063 # useful for program development.
3064 #
3065 # Only one such file is created regardless of the number of input
3066 # files processed.  This allows the results of processing many files
3067 # to be summarized in a single file.
3068 #
3069 #####################################################################
3070
3071 package Perl::Tidy::Diagnostics;
3072
3073 sub new {
3074
3075     my $class = shift;
3076     bless {
3077         _write_diagnostics_count => 0,
3078         _last_diagnostic_file    => "",
3079         _input_file              => "",
3080         _fh                      => undef,
3081     }, $class;
3082 }
3083
3084 sub set_input_file {
3085     my $self = shift;
3086     $self->{_input_file} = $_[0];
3087 }
3088
3089 # This is a diagnostic routine which is useful for program development.
3090 # Output from debug messages go to a file named DIAGNOSTICS, where
3091 # they are labeled by file and line.  This allows many files to be
3092 # scanned at once for some particular condition of interest.
3093 sub write_diagnostics {
3094     my $self = shift;
3095
3096     unless ( $self->{_write_diagnostics_count} ) {
3097         open DIAGNOSTICS, ">DIAGNOSTICS"
3098           or death("couldn't open DIAGNOSTICS: $!\n");
3099     }
3100
3101     my $last_diagnostic_file = $self->{_last_diagnostic_file};
3102     my $input_file           = $self->{_input_file};
3103     if ( $last_diagnostic_file ne $input_file ) {
3104         print DIAGNOSTICS "\nFILE:$input_file\n";
3105     }
3106     $self->{_last_diagnostic_file} = $input_file;
3107     my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3108     print DIAGNOSTICS "$input_line_number:\t@_";
3109     $self->{_write_diagnostics_count}++;
3110 }
3111
3112 #####################################################################
3113 #
3114 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3115 #
3116 #####################################################################
3117
3118 package Perl::Tidy::Logger;
3119
3120 sub new {
3121     my $class = shift;
3122     my $fh;
3123     my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3124
3125     # remove any old error output file
3126     unless ( ref($warning_file) ) {
3127         if ( -e $warning_file ) { unlink($warning_file) }
3128     }
3129
3130     bless {
3131         _log_file                      => $log_file,
3132         _fh_warnings                   => undef,
3133         _rOpts                         => $rOpts,
3134         _fh_warnings                   => undef,
3135         _last_input_line_written       => 0,
3136         _at_end_of_file                => 0,
3137         _use_prefix                    => 1,
3138         _block_log_output              => 0,
3139         _line_of_tokens                => undef,
3140         _output_line_number            => undef,
3141         _wrote_line_information_string => 0,
3142         _wrote_column_headings         => 0,
3143         _warning_file                  => $warning_file,
3144         _warning_count                 => 0,
3145         _complaint_count               => 0,
3146         _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
3147         _saw_brace_error => 0,
3148         _saw_extrude     => $saw_extrude,
3149         _output_array    => [],
3150     }, $class;
3151 }
3152
3153 sub close_log_file {
3154
3155     my $self = shift;
3156     if ( $self->{_fh_warnings} ) {
3157         eval { $self->{_fh_warnings}->close() };
3158         $self->{_fh_warnings} = undef;
3159     }
3160 }
3161
3162 sub get_warning_count {
3163     my $self = shift;
3164     return $self->{_warning_count};
3165 }
3166
3167 sub get_use_prefix {
3168     my $self = shift;
3169     return $self->{_use_prefix};
3170 }
3171
3172 sub block_log_output {
3173     my $self = shift;
3174     $self->{_block_log_output} = 1;
3175 }
3176
3177 sub unblock_log_output {
3178     my $self = shift;
3179     $self->{_block_log_output} = 0;
3180 }
3181
3182 sub interrupt_logfile {
3183     my $self = shift;
3184     $self->{_use_prefix} = 0;
3185     $self->warning("\n");
3186     $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
3187 }
3188
3189 sub resume_logfile {
3190     my $self = shift;
3191     $self->write_logfile_entry( '#' x 60 . "\n" );
3192     $self->{_use_prefix} = 1;
3193 }
3194
3195 sub we_are_at_the_last_line {
3196     my $self = shift;
3197     unless ( $self->{_wrote_line_information_string} ) {
3198         $self->write_logfile_entry("Last line\n\n");
3199     }
3200     $self->{_at_end_of_file} = 1;
3201 }
3202
3203 # record some stuff in case we go down in flames
3204 sub black_box {
3205     my $self = shift;
3206     my ( $line_of_tokens, $output_line_number ) = @_;
3207     my $input_line        = $line_of_tokens->{_line_text};
3208     my $input_line_number = $line_of_tokens->{_line_number};
3209
3210     # save line information in case we have to write a logfile message
3211     $self->{_line_of_tokens}                = $line_of_tokens;
3212     $self->{_output_line_number}            = $output_line_number;
3213     $self->{_wrote_line_information_string} = 0;
3214
3215     my $last_input_line_written = $self->{_last_input_line_written};
3216     my $rOpts                   = $self->{_rOpts};
3217     if (
3218         (
3219             ( $input_line_number - $last_input_line_written ) >=
3220             $rOpts->{'logfile-gap'}
3221         )
3222         || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3223       )
3224     {
3225         my $rlevels                      = $line_of_tokens->{_rlevels};
3226         my $structural_indentation_level = $$rlevels[0];
3227         $self->{_last_input_line_written} = $input_line_number;
3228         ( my $out_str = $input_line ) =~ s/^\s*//;
3229         chomp $out_str;
3230
3231         $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3232
3233         if ( length($out_str) > 35 ) {
3234             $out_str = substr( $out_str, 0, 35 ) . " ....";
3235         }
3236         $self->logfile_output( "", "$out_str\n" );
3237     }
3238 }
3239
3240 sub write_logfile_entry {
3241     my $self = shift;
3242
3243     # add leading >>> to avoid confusing error mesages and code
3244     $self->logfile_output( ">>>", "@_" );
3245 }
3246
3247 sub write_column_headings {
3248     my $self = shift;
3249
3250     $self->{_wrote_column_headings} = 1;
3251     my $routput_array = $self->{_output_array};
3252     push @{$routput_array}, <<EOM;
3253 The nesting depths in the table below are at the start of the lines.
3254 The indicated output line numbers are not always exact.
3255 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3256
3257 in:out indent c b  nesting   code + messages; (messages begin with >>>)
3258 lines  levels i k            (code begins with one '.' per indent level)
3259 ------  ----- - - --------   -------------------------------------------
3260 EOM
3261 }
3262
3263 sub make_line_information_string {
3264
3265     # make columns of information when a logfile message needs to go out
3266     my $self                    = shift;
3267     my $line_of_tokens          = $self->{_line_of_tokens};
3268     my $input_line_number       = $line_of_tokens->{_line_number};
3269     my $line_information_string = "";
3270     if ($input_line_number) {
3271
3272         my $output_line_number       = $self->{_output_line_number};
3273         my $brace_depth              = $line_of_tokens->{_curly_brace_depth};
3274         my $paren_depth              = $line_of_tokens->{_paren_depth};
3275         my $square_bracket_depth     = $line_of_tokens->{_square_bracket_depth};
3276         my $python_indentation_level =
3277           $line_of_tokens->{_python_indentation_level};
3278         my $rlevels         = $line_of_tokens->{_rlevels};
3279         my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3280         my $rci_levels      = $line_of_tokens->{_rci_levels};
3281         my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3282
3283         my $structural_indentation_level = $$rlevels[0];
3284
3285         $self->write_column_headings() unless $self->{_wrote_column_headings};
3286
3287         # keep logfile columns aligned for scripts up to 999 lines;
3288         # for longer scripts it doesn't really matter
3289         my $extra_space = "";
3290         $extra_space .=
3291             ( $input_line_number < 10 ) ? "  "
3292           : ( $input_line_number < 100 ) ? " "
3293           : "";
3294         $extra_space .=
3295             ( $output_line_number < 10 ) ? "  "
3296           : ( $output_line_number < 100 ) ? " "
3297           : "";
3298
3299         # there are 2 possible nesting strings:
3300         # the original which looks like this:  (0 [1 {2
3301         # the new one, which looks like this:  {{[
3302         # the new one is easier to read, and shows the order, but
3303         # could be arbitrarily long, so we use it unless it is too long
3304         my $nesting_string =
3305           "($paren_depth [$square_bracket_depth {$brace_depth";
3306         my $nesting_string_new = $$rnesting_tokens[0];
3307
3308         my $ci_level = $$rci_levels[0];
3309         if ( $ci_level > 9 ) { $ci_level = '*' }
3310         my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3311
3312         if ( length($nesting_string_new) <= 8 ) {
3313             $nesting_string =
3314               $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3315         }
3316         if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3317         $line_information_string =
3318 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3319     }
3320     return $line_information_string;
3321 }
3322
3323 sub logfile_output {
3324     my $self = shift;
3325     my ( $prompt, $msg ) = @_;
3326     return if ( $self->{_block_log_output} );
3327
3328     my $routput_array = $self->{_output_array};
3329     if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3330         push @{$routput_array}, "$msg";
3331     }
3332     else {
3333         my $line_information_string = $self->make_line_information_string();
3334         $self->{_wrote_line_information_string} = 1;
3335
3336         if ($line_information_string) {
3337             push @{$routput_array}, "$line_information_string   $prompt$msg";
3338         }
3339         else {
3340             push @{$routput_array}, "$msg";
3341         }
3342     }
3343 }
3344
3345 sub get_saw_brace_error {
3346     my $self = shift;
3347     return $self->{_saw_brace_error};
3348 }
3349
3350 sub increment_brace_error {
3351     my $self = shift;
3352     $self->{_saw_brace_error}++;
3353 }
3354
3355 sub brace_warning {
3356     my $self = shift;
3357     use constant BRACE_WARNING_LIMIT => 10;
3358     my $saw_brace_error = $self->{_saw_brace_error};
3359
3360     if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3361         $self->warning(@_);
3362     }
3363     $saw_brace_error++;
3364     $self->{_saw_brace_error} = $saw_brace_error;
3365
3366     if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3367         $self->warning("No further warnings of this type will be given\n");
3368     }
3369 }
3370
3371 sub complain {
3372
3373     # handle non-critical warning messages based on input flag
3374     my $self  = shift;
3375     my $rOpts = $self->{_rOpts};
3376
3377     # these appear in .ERR output only if -w flag is used
3378     if ( $rOpts->{'warning-output'} ) {
3379         $self->warning(@_);
3380     }
3381
3382     # otherwise, they go to the .LOG file
3383     else {
3384         $self->{_complaint_count}++;
3385         $self->write_logfile_entry(@_);
3386     }
3387 }
3388
3389 sub warning {
3390
3391     # report errors to .ERR file (or stdout)
3392     my $self = shift;
3393     use constant WARNING_LIMIT => 50;
3394
3395     my $rOpts = $self->{_rOpts};
3396     unless ( $rOpts->{'quiet'} ) {
3397
3398         my $warning_count = $self->{_warning_count};
3399         unless ($warning_count) {
3400             my $warning_file = $self->{_warning_file};
3401             my $fh_warnings;
3402             if ( $rOpts->{'standard-error-output'} ) {
3403                 $fh_warnings = *STDERR;
3404             }
3405             else {
3406                 ( $fh_warnings, my $filename ) =
3407                   Perl::Tidy::streamhandle( $warning_file, 'w' );
3408                 $fh_warnings or die("couldn't open $filename $!\n");
3409                 warn "## Please see file $filename\n";
3410             }
3411             $self->{_fh_warnings} = $fh_warnings;
3412         }
3413
3414         my $fh_warnings = $self->{_fh_warnings};
3415         if ( $warning_count < WARNING_LIMIT ) {
3416             if ( $self->get_use_prefix() > 0 ) {
3417                 my $input_line_number =
3418                   Perl::Tidy::Tokenizer::get_input_line_number();
3419                 print $fh_warnings "$input_line_number:\t@_";
3420                 $self->write_logfile_entry("WARNING: @_");
3421             }
3422             else {
3423                 print $fh_warnings @_;
3424                 $self->write_logfile_entry(@_);
3425             }
3426         }
3427         $warning_count++;
3428         $self->{_warning_count} = $warning_count;
3429
3430         if ( $warning_count == WARNING_LIMIT ) {
3431             print $fh_warnings "No further warnings will be given";
3432         }
3433     }
3434 }
3435
3436 # programming bug codes:
3437 #   -1 = no bug
3438 #    0 = maybe, not sure.
3439 #    1 = definitely
3440 sub report_possible_bug {
3441     my $self         = shift;
3442     my $saw_code_bug = $self->{_saw_code_bug};
3443     $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
3444 }
3445
3446 sub report_definite_bug {
3447     my $self = shift;
3448     $self->{_saw_code_bug} = 1;
3449 }
3450
3451 sub ask_user_for_bug_report {
3452     my $self = shift;
3453
3454     my ( $infile_syntax_ok, $formatter ) = @_;
3455     my $saw_code_bug = $self->{_saw_code_bug};
3456     if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
3457         $self->warning(<<EOM);
3458
3459 You may have encountered a code bug in perltidy.  If you think so, and
3460 the problem is not listed in the BUGS file at
3461 http://perltidy.sourceforge.net, please report it so that it can be
3462 corrected.  Include the smallest possible script which has the problem,
3463 along with the .LOG file. See the manual pages for contact information.
3464 Thank you!
3465 EOM
3466
3467     }
3468     elsif ( $saw_code_bug == 1 ) {
3469         if ( $self->{_saw_extrude} ) {
3470             $self->warning(<<EOM);
3471 You may have encountered a bug in perltidy.  However, since you are
3472 using the -extrude option, the problem may be with perl itself, which
3473 has occasional parsing problems with this type of file.  If you believe
3474 that the problem is with perltidy, and the problem is not listed in the
3475 BUGS file at http://perltidy.sourceforge.net, please report it so that
3476 it can be corrected.  Include the smallest possible script which has the
3477 problem, along with the .LOG file. See the manual pages for contact
3478 information.
3479 Thank you!
3480 EOM
3481         }
3482         else {
3483             $self->warning(<<EOM);
3484
3485 Oops, you seem to have encountered a bug in perltidy.  Please check the
3486 BUGS file at http://perltidy.sourceforge.net.  If the problem is not
3487 listed there, please report it so that it can be corrected.  Include the
3488 smallest possible script which produces this message, along with the
3489 .LOG file if appropriate.  See the manual pages for contact information.
3490 Your efforts are appreciated.
3491 Thank you!
3492 EOM
3493             my $added_semicolon_count = 0;
3494             eval {
3495                 $added_semicolon_count =
3496                   $formatter->get_added_semicolon_count();
3497             };
3498             if ( $added_semicolon_count > 0 ) {
3499                 $self->warning(<<EOM);
3500
3501 The log file shows that perltidy added $added_semicolon_count semicolons.
3502 Please rerun with -nasc to see if that is the cause of the syntax error.  Even
3503 if that is the problem, please report it so that it can be fixed.
3504 EOM
3505
3506             }
3507         }
3508     }
3509 }
3510
3511 sub finish {
3512
3513     # called after all formatting to summarize errors
3514     my $self = shift;
3515     my ( $infile_syntax_ok, $formatter ) = @_;
3516
3517     my $rOpts         = $self->{_rOpts};
3518     my $warning_count = $self->{_warning_count};
3519     my $saw_code_bug  = $self->{_saw_code_bug};
3520
3521     my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
3522       || $saw_code_bug == 1
3523       || $rOpts->{'logfile'};
3524     my $log_file = $self->{_log_file};
3525     if ($warning_count) {
3526         if ($save_logfile) {
3527             $self->block_log_output();    # avoid echoing this to the logfile
3528             $self->warning(
3529                 "The logfile $log_file may contain useful information\n");
3530             $self->unblock_log_output();
3531         }
3532
3533         if ( $self->{_complaint_count} > 0 ) {
3534             $self->warning(
3535 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
3536             );
3537         }
3538
3539         if ( $self->{_saw_brace_error}
3540             && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
3541         {
3542             $self->warning("To save a full .LOG file rerun with -g\n");
3543         }
3544     }
3545     $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
3546
3547     if ($save_logfile) {
3548         my $log_file = $self->{_log_file};
3549         my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
3550         if ($fh) {
3551             my $routput_array = $self->{_output_array};
3552             foreach ( @{$routput_array} ) { $fh->print($_) }
3553             eval                          { $fh->close() };
3554         }
3555     }
3556 }
3557
3558 #####################################################################
3559 #
3560 # The Perl::Tidy::DevNull class supplies a dummy print method
3561 #
3562 #####################################################################
3563
3564 package Perl::Tidy::DevNull;
3565 sub new { return bless {}, $_[0] }
3566 sub print { return }
3567 sub close { return }
3568
3569 #####################################################################
3570 #
3571 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
3572 #
3573 #####################################################################
3574
3575 package Perl::Tidy::HtmlWriter;
3576
3577 use File::Basename;
3578
3579 # class variables
3580 use vars qw{
3581   %html_color
3582   %html_bold
3583   %html_italic
3584   %token_short_names
3585   %short_to_long_names
3586   $rOpts
3587   $css_filename
3588   $css_linkname
3589   $missing_html_entities
3590 };
3591
3592 # replace unsafe characters with HTML entity representation if HTML::Entities
3593 # is available
3594 { eval "use HTML::Entities"; $missing_html_entities = $@; }
3595
3596 sub new {
3597
3598     my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
3599         $html_src_extension )
3600       = @_;
3601
3602     my $html_file_opened = 0;
3603     my $html_fh;
3604     ( $html_fh, my $html_filename ) =
3605       Perl::Tidy::streamhandle( $html_file, 'w' );
3606     unless ($html_fh) {
3607         warn("can't open $html_file: $!\n");
3608         return undef;
3609     }
3610     $html_file_opened = 1;
3611
3612     if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
3613         $input_file = "NONAME";
3614     }
3615
3616     # write the table of contents to a string
3617     my $toc_string;
3618     my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
3619
3620     my $html_pre_fh;
3621     my @pre_string_stack;
3622     if ( $rOpts->{'html-pre-only'} ) {
3623
3624         # pre section goes directly to the output stream
3625         $html_pre_fh = $html_fh;
3626         $html_pre_fh->print( <<"PRE_END");
3627 <pre>
3628 PRE_END
3629     }
3630     else {
3631
3632         # pre section go out to a temporary string
3633         my $pre_string;
3634         $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
3635         push @pre_string_stack, \$pre_string;
3636     }
3637
3638     # pod text gets diverted if the 'pod2html' is used
3639     my $html_pod_fh;
3640     my $pod_string;
3641     if ( $rOpts->{'pod2html'} ) {
3642         if ( $rOpts->{'html-pre-only'} ) {
3643             undef $rOpts->{'pod2html'};
3644         }
3645         else {
3646             eval "use Pod::Html";
3647             if ($@) {
3648                 warn
3649 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
3650                 undef $rOpts->{'pod2html'};
3651             }
3652             else {
3653                 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
3654             }
3655         }
3656     }
3657
3658     my $toc_filename;
3659     my $src_filename;
3660     if ( $rOpts->{'frames'} ) {
3661         unless ($extension) {
3662             warn
3663 "cannot use frames without a specified output extension; ignoring -frm\n";
3664             undef $rOpts->{'frames'};
3665         }
3666         else {
3667             $toc_filename = $input_file . $html_toc_extension . $extension;
3668             $src_filename = $input_file . $html_src_extension . $extension;
3669         }
3670     }
3671
3672     # ----------------------------------------------------------
3673     # Output is now directed as follows:
3674     # html_toc_fh <-- table of contents items
3675     # html_pre_fh <-- the <pre> section of formatted code, except:
3676     # html_pod_fh <-- pod goes here with the pod2html option
3677     # ----------------------------------------------------------
3678
3679     my $title = $rOpts->{'title'};
3680     unless ($title) {
3681         ( $title, my $path ) = fileparse($input_file);
3682     }
3683     my $toc_item_count = 0;
3684     my $in_toc_package = "";
3685     my $last_level     = 0;
3686     bless {
3687         _input_file        => $input_file,          # name of input file
3688         _title             => $title,               # title, unescaped
3689         _html_file         => $html_file,           # name of .html output file
3690         _toc_filename      => $toc_filename,        # for frames option
3691         _src_filename      => $src_filename,        # for frames option
3692         _html_file_opened  => $html_file_opened,    # a flag
3693         _html_fh           => $html_fh,             # the output stream
3694         _html_pre_fh       => $html_pre_fh,         # pre section goes here
3695         _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
3696         _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
3697         _rpod_string       => \$pod_string,         # string holding pod
3698         _pod_cut_count     => 0,                    # how many =cut's?
3699         _html_toc_fh       => $html_toc_fh,         # fh for table of contents
3700         _rtoc_string       => \$toc_string,         # string holding toc
3701         _rtoc_item_count   => \$toc_item_count,     # how many toc items
3702         _rin_toc_package   => \$in_toc_package,     # package name
3703         _rtoc_name_count   => {},                   # hash to track unique names
3704         _rpackage_stack    => [],                   # stack to check for package
3705                                                     # name changes
3706         _rlast_level       => \$last_level,         # brace indentation level
3707     }, $class;
3708 }
3709
3710 sub add_toc_item {
3711
3712     # Add an item to the html table of contents.
3713     # This is called even if no table of contents is written,
3714     # because we still want to put the anchors in the <pre> text.
3715     # We are given an anchor name and its type; types are:
3716     #      'package', 'sub', '__END__', '__DATA__', 'EOF'
3717     # There must be an 'EOF' call at the end to wrap things up.
3718     my $self = shift;
3719     my ( $name, $type ) = @_;
3720     my $html_toc_fh     = $self->{_html_toc_fh};
3721     my $html_pre_fh     = $self->{_html_pre_fh};
3722     my $rtoc_name_count = $self->{_rtoc_name_count};
3723     my $rtoc_item_count = $self->{_rtoc_item_count};
3724     my $rlast_level     = $self->{_rlast_level};
3725     my $rin_toc_package = $self->{_rin_toc_package};
3726     my $rpackage_stack  = $self->{_rpackage_stack};
3727
3728     # packages contain sublists of subs, so to avoid errors all package
3729     # items are written and finished with the following routines
3730     my $end_package_list = sub {
3731         if ($$rin_toc_package) {
3732             $html_toc_fh->print("</ul>\n</li>\n");
3733             $$rin_toc_package = "";
3734         }
3735     };
3736
3737     my $start_package_list = sub {
3738         my ( $unique_name, $package ) = @_;
3739         if ($$rin_toc_package) { $end_package_list->() }
3740         $html_toc_fh->print(<<EOM);
3741 <li><a href=\"#$unique_name\">package $package</a>
3742 <ul>
3743 EOM
3744         $$rin_toc_package = $package;
3745     };
3746
3747     # start the table of contents on the first item
3748     unless ($$rtoc_item_count) {
3749
3750         # but just quit if we hit EOF without any other entries
3751         # in this case, there will be no toc
3752         return if ( $type eq 'EOF' );
3753         $html_toc_fh->print( <<"TOC_END");
3754 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
3755 <ul>
3756 TOC_END
3757     }
3758     $$rtoc_item_count++;
3759
3760     # make a unique anchor name for this location:
3761     #   - packages get a 'package-' prefix
3762     #   - subs use their names
3763     my $unique_name = $name;
3764     if ( $type eq 'package' ) { $unique_name = "package-$name" }
3765
3766     # append '-1', '-2', etc if necessary to make unique; this will
3767     # be unique because subs and packages cannot have a '-'
3768     if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
3769         $unique_name .= "-$count";
3770     }
3771
3772     #   - all names get terminal '-' if pod2html is used, to avoid
3773     #     conflicts with anchor names created by pod2html
3774     if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
3775
3776     # start/stop lists of subs
3777     if ( $type eq 'sub' ) {
3778         my $package = $rpackage_stack->[$$rlast_level];
3779         unless ($package) { $package = 'main' }
3780
3781         # if we're already in a package/sub list, be sure its the right
3782         # package or else close it
3783         if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
3784             $end_package_list->();
3785         }
3786
3787         # start a package/sub list if necessary
3788         unless ($$rin_toc_package) {
3789             $start_package_list->( $unique_name, $package );
3790         }
3791     }
3792
3793     # now write an entry in the toc for this item
3794     if ( $type eq 'package' ) {
3795         $start_package_list->( $unique_name, $name );
3796     }
3797     elsif ( $type eq 'sub' ) {
3798         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
3799     }
3800     else {
3801         $end_package_list->();
3802         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
3803     }
3804
3805     # write the anchor in the <pre> section
3806     $html_pre_fh->print("<a name=\"$unique_name\"></a>");
3807
3808     # end the table of contents, if any, on the end of file
3809     if ( $type eq 'EOF' ) {
3810         $html_toc_fh->print( <<"TOC_END");
3811 </ul>
3812 <!-- END CODE INDEX -->
3813 TOC_END
3814     }
3815 }
3816
3817 BEGIN {
3818
3819     # This is the official list of tokens which may be identified by the
3820     # user.  Long names are used as getopt keys.  Short names are
3821     # convenient short abbreviations for specifying input.  Short names
3822     # somewhat resemble token type characters, but are often different
3823     # because they may only be alphanumeric, to allow command line
3824     # input.  Also, note that because of case insensitivity of html,
3825     # this table must be in a single case only (I've chosen to use all
3826     # lower case).
3827     # When adding NEW_TOKENS: update this hash table
3828     # short names => long names
3829     %short_to_long_names = (
3830         'n'  => 'numeric',
3831         'p'  => 'paren',
3832         'q'  => 'quote',
3833         's'  => 'structure',
3834         'c'  => 'comment',
3835         'v'  => 'v-string',
3836         'cm' => 'comma',
3837         'w'  => 'bareword',
3838         'co' => 'colon',
3839         'pu' => 'punctuation',
3840         'i'  => 'identifier',
3841         'j'  => 'label',
3842         'h'  => 'here-doc-target',
3843         'hh' => 'here-doc-text',
3844         'k'  => 'keyword',
3845         'sc' => 'semicolon',
3846         'm'  => 'subroutine',
3847         'pd' => 'pod-text',
3848     );
3849
3850     # Now we have to map actual token types into one of the above short
3851     # names; any token types not mapped will get 'punctuation'
3852     # properties.
3853
3854     # The values of this hash table correspond to the keys of the
3855     # previous hash table.
3856     # The keys of this hash table are token types and can be seen
3857     # by running with --dump-token-types (-dtt).
3858
3859     # When adding NEW_TOKENS: update this hash table
3860     # $type => $short_name
3861     %token_short_names = (
3862         '#'  => 'c',
3863         'n'  => 'n',
3864         'v'  => 'v',
3865         'k'  => 'k',
3866         'F'  => 'k',
3867         'Q'  => 'q',
3868         'q'  => 'q',
3869         'J'  => 'j',
3870         'j'  => 'j',
3871         'h'  => 'h',
3872         'H'  => 'hh',
3873         'w'  => 'w',
3874         ','  => 'cm',
3875         '=>' => 'cm',
3876         ';'  => 'sc',
3877         ':'  => 'co',
3878         'f'  => 'sc',
3879         '('  => 'p',
3880         ')'  => 'p',
3881         'M'  => 'm',
3882         'P'  => 'pd',
3883         'A'  => 'co',
3884     );
3885
3886     # These token types will all be called identifiers for now
3887     # FIXME: need to separate user defined modules as separate type
3888     my @identifier = qw" i t U C Y Z G :: ";
3889     @token_short_names{@identifier} = ('i') x scalar(@identifier);
3890
3891     # These token types will be called 'structure'
3892     my @structure = qw" { } ";
3893     @token_short_names{@structure} = ('s') x scalar(@structure);
3894
3895     # OLD NOTES: save for reference
3896     # Any of these could be added later if it would be useful.
3897     # For now, they will by default become punctuation
3898     #    my @list = qw" L R [ ] ";
3899     #    @token_long_names{@list} = ('non-structure') x scalar(@list);
3900     #
3901     #    my @list = qw"
3902     #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
3903     #      ";
3904     #    @token_long_names{@list} = ('math') x scalar(@list);
3905     #
3906     #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
3907     #    @token_long_names{@list} = ('bit') x scalar(@list);
3908     #
3909     #    my @list = qw" == != < > <= <=> ";
3910     #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
3911     #
3912     #    my @list = qw" && || ! &&= ||= ";
3913     #    @token_long_names{@list} = ('logical') x scalar(@list);
3914     #
3915     #    my @list = qw" . .= =~ !~ x x= ";
3916     #    @token_long_names{@list} = ('string-operators') x scalar(@list);
3917     #
3918     #    # Incomplete..
3919     #    my @list = qw" .. -> <> ... \ ? ";
3920     #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
3921
3922 }
3923
3924 sub make_getopt_long_names {
3925     my $class = shift;
3926     my ($rgetopt_names) = @_;
3927     while ( my ( $short_name, $name ) = each %short_to_long_names ) {
3928         push @$rgetopt_names, "html-color-$name=s";
3929         push @$rgetopt_names, "html-italic-$name!";
3930         push @$rgetopt_names, "html-bold-$name!";
3931     }
3932     push @$rgetopt_names, "html-color-background=s";
3933     push @$rgetopt_names, "html-linked-style-sheet=s";
3934     push @$rgetopt_names, "nohtml-style-sheets";
3935     push @$rgetopt_names, "html-pre-only";
3936     push @$rgetopt_names, "html-line-numbers";
3937     push @$rgetopt_names, "html-entities!";
3938     push @$rgetopt_names, "stylesheet";
3939     push @$rgetopt_names, "html-table-of-contents!";
3940     push @$rgetopt_names, "pod2html!";
3941     push @$rgetopt_names, "frames!";
3942     push @$rgetopt_names, "html-toc-extension=s";
3943     push @$rgetopt_names, "html-src-extension=s";
3944
3945     # Pod::Html parameters:
3946     push @$rgetopt_names, "backlink=s";
3947     push @$rgetopt_names, "cachedir=s";
3948     push @$rgetopt_names, "htmlroot=s";
3949     push @$rgetopt_names, "libpods=s";
3950     push @$rgetopt_names, "podpath=s";
3951     push @$rgetopt_names, "podroot=s";
3952     push @$rgetopt_names, "title=s";
3953
3954     # Pod::Html parameters with leading 'pod' which will be removed
3955     # before the call to Pod::Html
3956     push @$rgetopt_names, "podquiet!";
3957     push @$rgetopt_names, "podverbose!";
3958     push @$rgetopt_names, "podrecurse!";
3959     push @$rgetopt_names, "podflush";
3960     push @$rgetopt_names, "podheader!";
3961     push @$rgetopt_names, "podindex!";
3962 }
3963
3964 sub make_abbreviated_names {
3965
3966     # We're appending things like this to the expansion list:
3967     #      'hcc'    => [qw(html-color-comment)],
3968     #      'hck'    => [qw(html-color-keyword)],
3969     #  etc
3970     my $class = shift;
3971     my ($rexpansion) = @_;
3972
3973     # abbreviations for color/bold/italic properties
3974     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
3975         ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
3976         ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
3977         ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
3978         ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
3979         ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
3980     }
3981
3982     # abbreviations for all other html options
3983     ${$rexpansion}{"hcbg"}  = ["html-color-background"];
3984     ${$rexpansion}{"pre"}   = ["html-pre-only"];
3985     ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
3986     ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
3987     ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
3988     ${$rexpansion}{"hent"}  = ["html-entities"];
3989     ${$rexpansion}{"nhent"} = ["nohtml-entities"];
3990     ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
3991     ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
3992     ${$rexpansion}{"ss"}    = ["stylesheet"];
3993     ${$rexpansion}{"pod"}   = ["pod2html"];
3994     ${$rexpansion}{"npod"}  = ["nopod2html"];
3995     ${$rexpansion}{"frm"}   = ["frames"];
3996     ${$rexpansion}{"nfrm"}  = ["noframes"];
3997     ${$rexpansion}{"text"}  = ["html-toc-extension"];
3998     ${$rexpansion}{"sext"}  = ["html-src-extension"];
3999 }
4000
4001 sub check_options {
4002
4003     # This will be called once after options have been parsed
4004     my $class = shift;
4005     $rOpts = shift;
4006
4007     # X11 color names for default settings that seemed to look ok
4008     # (these color names are only used for programming clarity; the hex
4009     # numbers are actually written)
4010     use constant ForestGreen   => "#228B22";
4011     use constant SaddleBrown   => "#8B4513";
4012     use constant magenta4      => "#8B008B";
4013     use constant IndianRed3    => "#CD5555";
4014     use constant DeepSkyBlue4  => "#00688B";
4015     use constant MediumOrchid3 => "#B452CD";
4016     use constant black         => "#000000";
4017     use constant white         => "#FFFFFF";
4018     use constant red           => "#FF0000";
4019
4020     # set default color, bold, italic properties
4021     # anything not listed here will be given the default (punctuation) color --
4022     # these types currently not listed and get default: ws pu s sc cm co p
4023     # When adding NEW_TOKENS: add an entry here if you don't want defaults
4024
4025     # set_default_properties( $short_name, default_color, bold?, italic? );
4026     set_default_properties( 'c',  ForestGreen,   0, 0 );
4027     set_default_properties( 'pd', ForestGreen,   0, 1 );
4028     set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4029     set_default_properties( 'q',  IndianRed3,    0, 0 );
4030     set_default_properties( 'hh', IndianRed3,    0, 1 );
4031     set_default_properties( 'h',  IndianRed3,    1, 0 );
4032     set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4033     set_default_properties( 'w',  black,         0, 0 );
4034     set_default_properties( 'n',  MediumOrchid3, 0, 0 );
4035     set_default_properties( 'v',  MediumOrchid3, 0, 0 );
4036     set_default_properties( 'j',  IndianRed3,    1, 0 );
4037     set_default_properties( 'm',  red,           1, 0 );
4038
4039     set_default_color( 'html-color-background',  white );
4040     set_default_color( 'html-color-punctuation', black );
4041
4042     # setup property lookup tables for tokens based on their short names
4043     # every token type has a short name, and will use these tables
4044     # to do the html markup
4045     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4046         $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
4047         $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
4048         $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4049     }
4050
4051     # write style sheet to STDOUT and die if requested
4052     if ( defined( $rOpts->{'stylesheet'} ) ) {
4053         write_style_sheet_file('-');
4054         exit 1;
4055     }
4056
4057     # make sure user gives a file name after -css
4058     if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4059         $css_linkname = $rOpts->{'html-linked-style-sheet'};
4060         if ( $css_linkname =~ /^-/ ) {
4061             die "You must specify a valid filename after -css\n";
4062         }
4063     }
4064
4065     # check for conflict
4066     if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4067         $rOpts->{'nohtml-style-sheets'} = 0;
4068         warning("You can't specify both -css and -nss; -nss ignored\n");
4069     }
4070
4071     # write a style sheet file if necessary
4072     if ($css_linkname) {
4073
4074         # if the selected filename exists, don't write, because user may
4075         # have done some work by hand to create it; use backup name instead
4076         # Also, this will avoid a potential disaster in which the user
4077         # forgets to specify the style sheet, like this:
4078         #    perltidy -html -css myfile1.pl myfile2.pl
4079         # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4080         my $css_filename = $css_linkname;
4081         unless ( -e $css_filename ) {
4082             write_style_sheet_file($css_filename);
4083         }
4084     }
4085     $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4086 }
4087
4088 sub write_style_sheet_file {
4089
4090     my $css_filename = shift;
4091     my $fh;
4092     unless ( $fh = IO::File->new("> $css_filename") ) {
4093         die "can't open $css_filename: $!\n";
4094     }
4095     write_style_sheet_data($fh);
4096     eval { $fh->close };
4097 }
4098
4099 sub write_style_sheet_data {
4100
4101     # write the style sheet data to an open file handle
4102     my $fh = shift;
4103
4104     my $bg_color   = $rOpts->{'html-color-background'};
4105     my $text_color = $rOpts->{'html-color-punctuation'};
4106
4107     # pre-bgcolor is new, and may not be defined
4108     my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4109     $pre_bg_color = $bg_color unless $pre_bg_color;
4110
4111     $fh->print(<<"EOM");
4112 /* default style sheet generated by perltidy */
4113 body {background: $bg_color; color: $text_color}
4114 pre { color: $text_color;
4115       background: $pre_bg_color;
4116       font-family: courier;
4117     }
4118
4119 EOM
4120
4121     foreach my $short_name ( sort keys %short_to_long_names ) {
4122         my $long_name = $short_to_long_names{$short_name};
4123
4124         my $abbrev = '.' . $short_name;
4125         if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
4126         my $color = $html_color{$short_name};
4127         if ( !defined($color) ) { $color = $text_color }
4128         $fh->print("$abbrev \{ color: $color;");
4129
4130         if ( $html_bold{$short_name} ) {
4131             $fh->print(" font-weight:bold;");
4132         }
4133
4134         if ( $html_italic{$short_name} ) {
4135             $fh->print(" font-style:italic;");
4136         }
4137         $fh->print("} /* $long_name */\n");
4138     }
4139 }
4140
4141 sub set_default_color {
4142
4143     # make sure that options hash $rOpts->{$key} contains a valid color
4144     my ( $key, $color ) = @_;
4145     if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4146     $rOpts->{$key} = check_RGB($color);
4147 }
4148
4149 sub check_RGB {
4150
4151     # if color is a 6 digit hex RGB value, prepend a #, otherwise
4152     # assume that it is a valid ascii color name
4153     my ($color) = @_;
4154     if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4155     return $color;
4156 }
4157
4158 sub set_default_properties {
4159     my ( $short_name, $color, $bold, $italic ) = @_;
4160
4161     set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4162     my $key;
4163     $key = "html-bold-$short_to_long_names{$short_name}";
4164     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4165     $key = "html-italic-$short_to_long_names{$short_name}";
4166     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4167 }
4168
4169 sub pod_to_html {
4170
4171     # Use Pod::Html to process the pod and make the page
4172     # then merge the perltidy code sections into it.
4173     # return 1 if success, 0 otherwise
4174     my $self = shift;
4175     my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4176     my $input_file   = $self->{_input_file};
4177     my $title        = $self->{_title};
4178     my $success_flag = 0;
4179
4180     # don't try to use pod2html if no pod
4181     unless ($pod_string) {
4182         return $success_flag;
4183     }
4184
4185     # Pod::Html requires a real temporary filename
4186     # If we are making a frame, we have a name available
4187     # Otherwise, we have to fine one
4188     my $tmpfile;
4189     if ( $rOpts->{'frames'} ) {
4190         $tmpfile = $self->{_toc_filename};
4191     }
4192     else {
4193         $tmpfile = Perl::Tidy::make_temporary_filename();
4194     }
4195     my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4196     unless ($fh_tmp) {
4197         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4198         return $success_flag;
4199     }
4200
4201     #------------------------------------------------------------------
4202     # Warning: a temporary file is open; we have to clean up if
4203     # things go bad.  From here on all returns should be by going to
4204     # RETURN so that the temporary file gets unlinked.
4205     #------------------------------------------------------------------
4206
4207     # write the pod text to the temporary file
4208     $fh_tmp->print($pod_string);
4209     $fh_tmp->close();
4210
4211     # Hand off the pod to pod2html.
4212     # Note that we can use the same temporary filename for input and output
4213     # because of the way pod2html works.
4214     {
4215
4216         my @args;
4217         push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4218         my $kw;
4219
4220         # Flags with string args:
4221         # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4222         # "podpath=s", "podroot=s"
4223         # Note: -css=s is handled by perltidy itself
4224         foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4225             if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4226         }
4227
4228         # Toggle switches; these have extra leading 'pod'
4229         # "header!", "index!", "recurse!", "quiet!", "verbose!"
4230         foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4231             my $kwd = $kw;    # allows us to strip 'pod'
4232             if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4233             elsif ( defined( $rOpts->{$kw} ) ) {
4234                 $kwd =~ s/^pod//;
4235                 push @args, "--no$kwd";
4236             }
4237         }
4238
4239         # "flush",
4240         $kw = 'podflush';
4241         if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4242
4243         # Must clean up if pod2html dies (it can);
4244         # Be careful not to overwrite callers __DIE__ routine
4245         local $SIG{__DIE__} = sub {
4246             print $_[0];
4247             unlink $tmpfile if -e $tmpfile;
4248             exit 1;
4249         };
4250
4251         pod2html(@args);
4252     }
4253     $fh_tmp = IO::File->new( $tmpfile, 'r' );
4254     unless ($fh_tmp) {
4255
4256         # this error shouldn't happen ... we just used this filename
4257         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4258         goto RETURN;
4259     }
4260
4261     my $html_fh = $self->{_html_fh};
4262     my @toc;
4263     my $in_toc;
4264     my $no_print;
4265
4266     # This routine will write the html selectively and store the toc
4267     my $html_print = sub {
4268         foreach (@_) {
4269             $html_fh->print($_) unless ($no_print);
4270             if ($in_toc) { push @toc, $_ }
4271         }
4272     };
4273
4274     # loop over lines of html output from pod2html and merge in
4275     # the necessary perltidy html sections
4276     my ( $saw_body, $saw_index, $saw_body_end );
4277     while ( my $line = $fh_tmp->getline() ) {
4278
4279         if ( $line =~ /^\s*<html>\s*$/i ) {
4280             my $date = localtime;
4281             $html_print->("<!-- Generated by perltidy on $date -->\n");
4282             $html_print->($line);
4283         }
4284
4285         # Copy the perltidy css, if any, after <body> tag
4286         elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4287             $saw_body = 1;
4288             $html_print->($css_string) if $css_string;
4289             $html_print->($line);
4290
4291             # add a top anchor and heading
4292             $html_print->("<a name=\"-top-\"></a>\n");
4293             $title = escape_html($title);
4294             $html_print->("<h1>$title</h1>\n");
4295         }
4296         elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4297             $in_toc = 1;
4298
4299             # when frames are used, an extra table of contents in the
4300             # contents panel is confusing, so don't print it
4301             $no_print = $rOpts->{'frames'}
4302               || !$rOpts->{'html-table-of-contents'};
4303             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4304             $html_print->($line);
4305         }
4306
4307         # Copy the perltidy toc, if any, after the Pod::Html toc
4308         elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4309             $saw_index = 1;
4310             $html_print->($line);
4311             if ($toc_string) {
4312                 $html_print->("<hr />\n") if $rOpts->{'frames'};
4313                 $html_print->("<h2>Code Index:</h2>\n");
4314                 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4315                 $html_print->(@toc);
4316             }
4317             $in_toc   = 0;
4318             $no_print = 0;
4319         }
4320
4321         # Copy one perltidy section after each marker
4322         elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4323             $line = $2;
4324             $html_print->($1) if $1;
4325
4326             # Intermingle code and pod sections if we saw multiple =cut's.
4327             if ( $self->{_pod_cut_count} > 1 ) {
4328                 my $rpre_string = shift(@$rpre_string_stack);
4329                 if ($$rpre_string) {
4330                     $html_print->('<pre>');
4331                     $html_print->($$rpre_string);
4332                     $html_print->('</pre>');
4333                 }
4334                 else {
4335
4336                     # shouldn't happen: we stored a string before writing
4337                     # each marker.
4338                     warn
4339 "Problem merging html stream with pod2html; order may be wrong\n";
4340                 }
4341                 $html_print->($line);
4342             }
4343
4344             # If didn't see multiple =cut lines, we'll put the pod out first
4345             # and then the code, because it's less confusing.
4346             else {
4347
4348                 # since we are not intermixing code and pod, we don't need
4349                 # or want any <hr> lines which separated pod and code
4350                 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4351             }
4352         }
4353
4354         # Copy any remaining code section before the </body> tag
4355         elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4356             $saw_body_end = 1;
4357             if (@$rpre_string_stack) {
4358                 unless ( $self->{_pod_cut_count} > 1 ) {
4359                     $html_print->('<hr />');
4360                 }
4361                 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4362                     $html_print->('<pre>');
4363                     $html_print->($$rpre_string);
4364                     $html_print->('</pre>');
4365                 }
4366             }
4367             $html_print->($line);
4368         }
4369         else {
4370             $html_print->($line);
4371         }
4372     }
4373
4374     $success_flag = 1;
4375     unless ($saw_body) {
4376         warn "Did not see <body> in pod2html output\n";
4377         $success_flag = 0;
4378     }
4379     unless ($saw_body_end) {
4380         warn "Did not see </body> in pod2html output\n";
4381         $success_flag = 0;
4382     }
4383     unless ($saw_index) {
4384         warn "Did not find INDEX END in pod2html output\n";
4385         $success_flag = 0;
4386     }
4387
4388   RETURN:
4389     eval { $html_fh->close() };
4390
4391     # note that we have to unlink tmpfile before making frames
4392     # because the tmpfile may be one of the names used for frames
4393     unlink $tmpfile if -e $tmpfile;
4394     if ( $success_flag && $rOpts->{'frames'} ) {
4395         $self->make_frame( \@toc );
4396     }
4397     return $success_flag;
4398 }
4399
4400 sub make_frame {
4401
4402     # Make a frame with table of contents in the left panel
4403     # and the text in the right panel.
4404     # On entry:
4405     #  $html_filename contains the no-frames html output
4406     #  $rtoc is a reference to an array with the table of contents
4407     my $self          = shift;
4408     my ($rtoc)        = @_;
4409     my $input_file    = $self->{_input_file};
4410     my $html_filename = $self->{_html_file};
4411     my $toc_filename  = $self->{_toc_filename};
4412     my $src_filename  = $self->{_src_filename};
4413     my $title         = $self->{_title};
4414     $title = escape_html($title);
4415
4416     # FUTURE input parameter:
4417     my $top_basename = "";
4418
4419     # We need to produce 3 html files:
4420     # 1. - the table of contents
4421     # 2. - the contents (source code) itself
4422     # 3. - the frame which contains them
4423
4424     # get basenames for relative links
4425     my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
4426     my ( $src_basename, $src_path ) = fileparse($src_filename);
4427
4428     # 1. Make the table of contents panel, with appropriate changes
4429     # to the anchor names
4430     my $src_frame_name = 'SRC';
4431     my $first_anchor   =
4432       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
4433         $src_frame_name );
4434
4435     # 2. The current .html filename is renamed to be the contents panel
4436     rename( $html_filename, $src_filename )
4437       or die "Cannot rename $html_filename to $src_filename:$!\n";
4438
4439     # 3. Then use the original html filename for the frame
4440     write_frame_html(
4441         $title,        $html_filename, $top_basename,
4442         $toc_basename, $src_basename,  $src_frame_name
4443     );
4444 }
4445
4446 sub write_toc_html {
4447
4448     # write a separate html table of contents file for frames
4449     my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
4450     my $fh = IO::File->new( $toc_filename, 'w' )
4451       or die "Cannot open $toc_filename:$!\n";
4452     $fh->print(<<EOM);
4453 <html>
4454 <head>
4455 <title>$title</title>
4456 </head>
4457 <body>
4458 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
4459 EOM
4460
4461     my $first_anchor =
4462       change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
4463     $fh->print( join "", @$rtoc );
4464
4465     $fh->print(<<EOM);
4466 </body>
4467 </html>
4468 EOM
4469
4470 }
4471
4472 sub write_frame_html {
4473
4474     # write an html file to be the table of contents frame
4475     my (
4476         $title,        $frame_filename, $top_basename,
4477         $toc_basename, $src_basename,   $src_frame_name
4478       )
4479       = @_;
4480
4481     my $fh = IO::File->new( $frame_filename, 'w' )
4482       or die "Cannot open $toc_basename:$!\n";
4483
4484     $fh->print(<<EOM);
4485 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4486     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
4487 <?xml version="1.0" encoding="iso-8859-1" ?>
4488 <html xmlns="http://www.w3.org/1999/xhtml">
4489 <head>
4490 <title>$title</title>
4491 </head>
4492 EOM
4493
4494     # two left panels, one right, if master index file
4495     if ($top_basename) {
4496         $fh->print(<<EOM);
4497 <frameset cols="20%,80%">
4498 <frameset rows="30%,70%">
4499 <frame src = "$top_basename" />
4500 <frame src = "$toc_basename" />
4501 </frameset>
4502 EOM
4503     }
4504
4505     # one left panels, one right, if no master index file
4506     else {
4507         $fh->print(<<EOM);
4508 <frameset cols="20%,*">
4509 <frame src = "$toc_basename" />
4510 EOM
4511     }
4512     $fh->print(<<EOM);
4513 <frame src = "$src_basename" name = "$src_frame_name" />
4514 <noframes>
4515 <body>
4516 <p>If you see this message, you are using a non-frame-capable web client.</p>
4517 <p>This document contains:</p>
4518 <ul>
4519 <li><a href="$toc_basename">A table of contents</a></li>
4520 <li><a href="$src_basename">The source code</a></li>
4521 </ul>
4522 </body>
4523 </noframes>
4524 </frameset>
4525 </html>
4526 EOM
4527 }
4528
4529 sub change_anchor_names {
4530
4531     # add a filename and target to anchors
4532     # also return the first anchor
4533     my ( $rlines, $filename, $target ) = @_;
4534     my $first_anchor;
4535     foreach my $line (@$rlines) {
4536
4537         #  We're looking for lines like this:
4538         #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
4539         #  ----  -       --------  -----------------
4540         #  $1              $4            $5
4541         if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
4542             my $pre  = $1;
4543             my $name = $4;
4544             my $post = $5;
4545             my $href = "$filename#$name";
4546             $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
4547             unless ($first_anchor) { $first_anchor = $href }
4548         }
4549     }
4550     return $first_anchor;
4551 }
4552
4553 sub close_html_file {
4554     my $self = shift;
4555     return unless $self->{_html_file_opened};
4556
4557     my $html_fh     = $self->{_html_fh};
4558     my $rtoc_string = $self->{_rtoc_string};
4559
4560     # There are 3 basic paths to html output...
4561
4562     # ---------------------------------
4563     # Path 1: finish up if in -pre mode
4564     # ---------------------------------
4565     if ( $rOpts->{'html-pre-only'} ) {
4566         $html_fh->print( <<"PRE_END");
4567 </pre>
4568 PRE_END
4569         eval { $html_fh->close() };
4570         return;
4571     }
4572
4573     # Finish the index
4574     $self->add_toc_item( 'EOF', 'EOF' );
4575
4576     my $rpre_string_stack = $self->{_rpre_string_stack};
4577
4578     # Patch to darken the <pre> background color in case of pod2html and
4579     # interleaved code/documentation.  Otherwise, the distinction
4580     # between code and documentation is blurred.
4581     if (   $rOpts->{pod2html}
4582         && $self->{_pod_cut_count} >= 1
4583         && $rOpts->{'html-color-background'} eq '#FFFFFF' )
4584     {
4585         $rOpts->{'html-pre-color-background'} = '#F0F0F0';
4586     }
4587
4588     # put the css or its link into a string, if used
4589     my $css_string;
4590     my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
4591
4592     # use css linked to another file
4593     if ( $rOpts->{'html-linked-style-sheet'} ) {
4594         $fh_css->print(
4595             qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
4596         );
4597     }
4598
4599     # use css embedded in this file
4600     elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
4601         $fh_css->print( <<'ENDCSS');
4602 <style type="text/css">
4603 <!--
4604 ENDCSS
4605         write_style_sheet_data($fh_css);
4606         $fh_css->print( <<"ENDCSS");
4607 -->
4608 </style>
4609 ENDCSS
4610     }
4611
4612     # -----------------------------------------------------------
4613     # path 2: use pod2html if requested
4614     #         If we fail for some reason, continue on to path 3
4615     # -----------------------------------------------------------
4616     if ( $rOpts->{'pod2html'} ) {
4617         my $rpod_string = $self->{_rpod_string};
4618         $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
4619             $rpre_string_stack )
4620           && return;
4621     }
4622
4623     # --------------------------------------------------
4624     # path 3: write code in html, with pod only in italics
4625     # --------------------------------------------------
4626     my $input_file = $self->{_input_file};
4627     my $title      = escape_html($input_file);
4628     my $date       = localtime;
4629     $html_fh->print( <<"HTML_START");
4630 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4631    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
4632 <!-- Generated by perltidy on $date -->
4633 <html xmlns="http://www.w3.org/1999/xhtml">
4634 <head>
4635 <title>$title</title>
4636 HTML_START
4637
4638     # output the css, if used
4639     if ($css_string) {
4640         $html_fh->print($css_string);
4641         $html_fh->print( <<"ENDCSS");
4642 </head>
4643 <body>
4644 ENDCSS
4645     }
4646     else {
4647
4648         $html_fh->print( <<"HTML_START");
4649 </head>
4650 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
4651 HTML_START
4652     }
4653
4654     $html_fh->print("<a name=\"-top-\"></a>\n");
4655     $html_fh->print( <<"EOM");
4656 <h1>$title</h1>
4657 EOM
4658
4659     # copy the table of contents
4660     if (   $$rtoc_string
4661         && !$rOpts->{'frames'}
4662         && $rOpts->{'html-table-of-contents'} )
4663     {
4664         $html_fh->print($$rtoc_string);
4665     }
4666
4667     # copy the pre section(s)
4668     my $fname_comment = $input_file;
4669     $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
4670     $html_fh->print( <<"END_PRE");
4671 <hr />
4672 <!-- contents of filename: $fname_comment -->
4673 <pre>
4674 END_PRE
4675
4676     foreach my $rpre_string (@$rpre_string_stack) {
4677         $html_fh->print($$rpre_string);
4678     }
4679
4680     # and finish the html page
4681     $html_fh->print( <<"HTML_END");
4682 </pre>
4683 </body>
4684 </html>
4685 HTML_END
4686     eval { $html_fh->close() };    # could be object without close method
4687
4688     if ( $rOpts->{'frames'} ) {
4689         my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
4690         $self->make_frame( \@toc );
4691     }
4692 }
4693
4694 sub markup_tokens {
4695     my $self = shift;
4696     my ( $rtokens, $rtoken_type, $rlevels ) = @_;
4697     my ( @colored_tokens, $j, $string, $type, $token, $level );
4698     my $rlast_level    = $self->{_rlast_level};
4699     my $rpackage_stack = $self->{_rpackage_stack};
4700
4701     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
4702         $type  = $$rtoken_type[$j];
4703         $token = $$rtokens[$j];
4704         $level = $$rlevels[$j];
4705         $level = 0 if ( $level < 0 );
4706
4707         #-------------------------------------------------------
4708         # Update the package stack.  The package stack is needed to keep
4709         # the toc correct because some packages may be declared within
4710         # blocks and go out of scope when we leave the block.
4711         #-------------------------------------------------------
4712         if ( $level > $$rlast_level ) {
4713             unless ( $rpackage_stack->[ $level - 1 ] ) {
4714                 $rpackage_stack->[ $level - 1 ] = 'main';
4715             }
4716             $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
4717         }
4718         elsif ( $level < $$rlast_level ) {
4719             my $package = $rpackage_stack->[$level];
4720             unless ($package) { $package = 'main' }
4721
4722             # if we change packages due to a nesting change, we
4723             # have to make an entry in the toc
4724             if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
4725                 $self->add_toc_item( $package, 'package' );
4726             }
4727         }
4728         $$rlast_level = $level;
4729
4730         #-------------------------------------------------------
4731         # Intercept a sub name here; split it
4732         # into keyword 'sub' and sub name; and add an
4733         # entry in the toc
4734         #-------------------------------------------------------
4735         if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
4736             $token = $self->markup_html_element( $1, 'k' );
4737             push @colored_tokens, $token;
4738             $token = $2;
4739             $type  = 'M';
4740
4741             # but don't include sub declarations in the toc;
4742             # these wlll have leading token types 'i;'
4743             my $signature = join "", @$rtoken_type;
4744             unless ( $signature =~ /^i;/ ) {
4745                 my $subname = $token;
4746                 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
4747                 $self->add_toc_item( $subname, 'sub' );
4748             }
4749         }
4750
4751         #-------------------------------------------------------
4752         # Intercept a package name here; split it
4753         # into keyword 'package' and name; add to the toc,
4754         # and update the package stack
4755         #-------------------------------------------------------
4756         if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
4757             $token = $self->markup_html_element( $1, 'k' );
4758             push @colored_tokens, $token;
4759             $token = $2;
4760             $type  = 'i';
4761             $self->add_toc_item( "$token", 'package' );
4762             $rpackage_stack->[$level] = $token;
4763         }
4764
4765         $token = $self->markup_html_element( $token, $type );
4766         push @colored_tokens, $token;
4767     }
4768     return ( \@colored_tokens );
4769 }
4770
4771 sub markup_html_element {
4772     my $self = shift;
4773     my ( $token, $type ) = @_;
4774
4775     return $token if ( $type eq 'b' );    # skip a blank token
4776     return $token if ( $token =~ /^\s*$/ );    # skip a blank line
4777     $token = escape_html($token);
4778
4779     # get the short abbreviation for this token type
4780     my $short_name = $token_short_names{$type};
4781     if ( !defined($short_name) ) {
4782         $short_name = "pu";                    # punctuation is default
4783     }
4784
4785     # handle style sheets..
4786     if ( !$rOpts->{'nohtml-style-sheets'} ) {
4787         if ( $short_name ne 'pu' ) {
4788             $token = qq(<span class="$short_name">) . $token . "</span>";
4789         }
4790     }
4791
4792     # handle no style sheets..
4793     else {
4794         my $color = $html_color{$short_name};
4795
4796         if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
4797             $token = qq(<font color="$color">) . $token . "</font>";
4798         }
4799         if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
4800         if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
4801     }
4802     return $token;
4803 }
4804
4805 sub escape_html {
4806
4807     my $token = shift;
4808     if ($missing_html_entities) {
4809         $token =~ s/\&/&amp;/g;
4810         $token =~ s/\</&lt;/g;
4811         $token =~ s/\>/&gt;/g;
4812         $token =~ s/\"/&quot;/g;
4813     }
4814     else {
4815         HTML::Entities::encode_entities($token);
4816     }
4817     return $token;
4818 }
4819
4820 sub finish_formatting {
4821
4822     # called after last line
4823     my $self = shift;
4824     $self->close_html_file();
4825     return;
4826 }
4827
4828 sub write_line {
4829
4830     my $self = shift;
4831     return unless $self->{_html_file_opened};
4832     my $html_pre_fh      = $self->{_html_pre_fh};
4833     my ($line_of_tokens) = @_;
4834     my $line_type        = $line_of_tokens->{_line_type};
4835     my $input_line       = $line_of_tokens->{_line_text};
4836     my $line_number      = $line_of_tokens->{_line_number};
4837     chomp $input_line;
4838
4839     # markup line of code..
4840     my $html_line;
4841     if ( $line_type eq 'CODE' ) {
4842         my $rtoken_type = $line_of_tokens->{_rtoken_type};
4843         my $rtokens     = $line_of_tokens->{_rtokens};
4844         my $rlevels     = $line_of_tokens->{_rlevels};
4845
4846         if ( $input_line =~ /(^\s*)/ ) {
4847             $html_line = $1;
4848         }
4849         else {
4850             $html_line = "";
4851         }
4852         my ($rcolored_tokens) =
4853           $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
4854         $html_line .= join '', @$rcolored_tokens;
4855     }
4856
4857     # markup line of non-code..
4858     else {
4859         my $line_character;
4860         if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
4861         elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
4862         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
4863         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
4864         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
4865         elsif ( $line_type eq 'END_START' )  {
4866             $line_character = 'k';
4867             $self->add_toc_item( '__END__', '__END__' );
4868         }
4869         elsif ( $line_type eq 'DATA_START' ) {
4870             $line_character = 'k';
4871             $self->add_toc_item( '__DATA__', '__DATA__' );
4872         }
4873         elsif ( $line_type =~ /^POD/ ) {
4874             $line_character = 'P';
4875             if ( $rOpts->{'pod2html'} ) {
4876                 my $html_pod_fh = $self->{_html_pod_fh};
4877                 if ( $line_type eq 'POD_START' ) {
4878
4879                     my $rpre_string_stack = $self->{_rpre_string_stack};
4880                     my $rpre_string       = $rpre_string_stack->[-1];
4881
4882                     # if we have written any non-blank lines to the
4883                     # current pre section, start writing to a new output
4884                     # string
4885                     if ( $$rpre_string =~ /\S/ ) {
4886                         my $pre_string;
4887                         $html_pre_fh =
4888                           Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4889                         $self->{_html_pre_fh} = $html_pre_fh;
4890                         push @$rpre_string_stack, \$pre_string;
4891
4892                         # leave a marker in the pod stream so we know
4893                         # where to put the pre section we just
4894                         # finished.
4895                         my $for_html = '=for html';    # don't confuse pod utils
4896                         $html_pod_fh->print(<<EOM);
4897
4898 $for_html
4899 <!-- pERLTIDY sECTION -->
4900
4901 EOM
4902                     }
4903
4904                     # otherwise, just clear the current string and start
4905                     # over
4906                     else {
4907                         $$rpre_string = "";
4908                         $html_pod_fh->print("\n");
4909                     }
4910                 }
4911                 $html_pod_fh->print( $input_line . "\n" );
4912                 if ( $line_type eq 'POD_END' ) {
4913                     $self->{_pod_cut_count}++;
4914                     $html_pod_fh->print("\n");
4915                 }
4916                 return;
4917             }
4918         }
4919         else { $line_character = 'Q' }
4920         $html_line = $self->markup_html_element( $input_line, $line_character );
4921     }
4922
4923     # add the line number if requested
4924     if ( $rOpts->{'html-line-numbers'} ) {
4925         my $extra_space .=
4926             ( $line_number < 10 ) ? "   "
4927           : ( $line_number < 100 )  ? "  "
4928           : ( $line_number < 1000 ) ? " "
4929           : "";
4930         $html_line = $extra_space . $line_number . " " . $html_line;
4931     }
4932
4933     # write the line
4934     $html_pre_fh->print("$html_line\n");
4935 }
4936
4937 #####################################################################
4938 #
4939 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4940 # line breaks to the token stream
4941 #
4942 # WARNING: This is not a real class for speed reasons.  Only one
4943 # Formatter may be used.
4944 #
4945 #####################################################################
4946
4947 package Perl::Tidy::Formatter;
4948
4949 BEGIN {
4950
4951     # Caution: these debug flags produce a lot of output
4952     # They should all be 0 except when debugging small scripts
4953     use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
4954     use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
4955     use constant FORMATTER_DEBUG_FLAG_CI      => 0;
4956     use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
4957     use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
4958     use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
4959     use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
4960     use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
4961     use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
4962     use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
4963     use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
4964     use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
4965
4966     my $debug_warning = sub {
4967         print "FORMATTER_DEBUGGING with key $_[0]\n";
4968     };
4969
4970     FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
4971     FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
4972     FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
4973     FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
4974     FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
4975     FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
4976     FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
4977     FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
4978     FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
4979     FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
4980     FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
4981     FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
4982 }
4983
4984 use Carp;
4985 use vars qw{
4986
4987   @gnu_stack
4988   $max_gnu_stack_index
4989   $gnu_position_predictor
4990   $line_start_index_to_go
4991   $last_indentation_written
4992   $last_unadjusted_indentation
4993   $last_leading_token
4994
4995   $saw_VERSION_in_this_file
4996   $saw_END_or_DATA_
4997
4998   @gnu_item_list
4999   $max_gnu_item_index
5000   $gnu_sequence_number
5001   $last_output_indentation
5002   %last_gnu_equals
5003   %gnu_comma_count
5004   %gnu_arrow_count
5005
5006   @block_type_to_go
5007   @type_sequence_to_go
5008   @container_environment_to_go
5009   @bond_strength_to_go
5010   @forced_breakpoint_to_go
5011   @lengths_to_go
5012   @levels_to_go
5013   @leading_spaces_to_go
5014   @reduced_spaces_to_go
5015   @matching_token_to_go
5016   @mate_index_to_go
5017   @nesting_blocks_to_go
5018   @ci_levels_to_go
5019   @nesting_depth_to_go
5020   @nobreak_to_go
5021   @old_breakpoint_to_go
5022   @tokens_to_go
5023   @types_to_go
5024
5025   %saved_opening_indentation
5026
5027   $max_index_to_go
5028   $comma_count_in_batch
5029   $old_line_count_in_batch
5030   $last_nonblank_index_to_go
5031   $last_nonblank_type_to_go
5032   $last_nonblank_token_to_go
5033   $last_last_nonblank_index_to_go
5034   $last_last_nonblank_type_to_go
5035   $last_last_nonblank_token_to_go
5036   @nonblank_lines_at_depth
5037   $starting_in_quote
5038
5039   $forced_breakpoint_count
5040   $forced_breakpoint_undo_count
5041   @forced_breakpoint_undo_stack
5042   %postponed_breakpoint
5043
5044   $tabbing
5045   $embedded_tab_count
5046   $first_embedded_tab_at
5047   $last_embedded_tab_at
5048   $deleted_semicolon_count
5049   $first_deleted_semicolon_at
5050   $last_deleted_semicolon_at
5051   $added_semicolon_count
5052   $first_added_semicolon_at
5053   $last_added_semicolon_at
5054   $saw_negative_indentation
5055   $first_tabbing_disagreement
5056   $last_tabbing_disagreement
5057   $in_tabbing_disagreement
5058   $tabbing_disagreement_count
5059   $input_line_tabbing
5060
5061   $last_line_type
5062   $last_line_leading_type
5063   $last_line_leading_level
5064   $last_last_line_leading_level
5065
5066   %block_leading_text
5067   %block_opening_line_number
5068   $csc_new_statement_ok
5069   $accumulating_text_for_block
5070   $leading_block_text
5071   $rleading_block_if_elsif_text
5072   $leading_block_text_level
5073   $leading_block_text_length_exceeded
5074   $leading_block_text_line_length
5075   $leading_block_text_line_number
5076   $closing_side_comment_prefix_pattern
5077   $closing_side_comment_list_pattern
5078
5079   $last_nonblank_token
5080   $last_nonblank_type
5081   $last_last_nonblank_token
5082   $last_last_nonblank_type
5083   $last_nonblank_block_type
5084   $last_output_level
5085   %is_do_follower
5086   %is_if_brace_follower
5087   %space_after_keyword
5088   $rbrace_follower
5089   $looking_for_else
5090   %is_last_next_redo_return
5091   %is_other_brace_follower
5092   %is_else_brace_follower
5093   %is_anon_sub_brace_follower
5094   %is_anon_sub_1_brace_follower
5095   %is_sort_map_grep
5096   %is_sort_map_grep_eval
5097   %is_sort_map_grep_eval_do
5098   %is_block_without_semicolon
5099   %is_if_unless
5100   %is_and_or
5101   %is_assignment
5102   %is_chain_operator
5103   %is_if_unless_and_or_last_next_redo_return
5104
5105   @has_broken_sublist
5106   @dont_align
5107   @want_comma_break
5108
5109   $index_start_one_line_block
5110   $semicolons_before_block_self_destruct
5111   $index_max_forced_break
5112   $input_line_number
5113   $diagnostics_object
5114   $vertical_aligner_object
5115   $logger_object
5116   $file_writer_object
5117   $formatter_self
5118   @ci_stack
5119   $last_line_had_side_comment
5120   %want_break_before
5121   %outdent_keyword
5122   $static_block_comment_pattern
5123   $static_side_comment_pattern
5124   %opening_vertical_tightness
5125   %closing_vertical_tightness
5126   %closing_token_indentation
5127   $block_brace_vertical_tightness_pattern
5128
5129   $rOpts_add_newlines
5130   $rOpts_add_whitespace
5131   $rOpts_block_brace_tightness
5132   $rOpts_block_brace_vertical_tightness
5133   $rOpts_brace_left_and_indent
5134   $rOpts_comma_arrow_breakpoints
5135   $rOpts_break_at_old_keyword_breakpoints
5136   $rOpts_break_at_old_comma_breakpoints
5137   $rOpts_break_at_old_logical_breakpoints
5138   $rOpts_break_at_old_trinary_breakpoints
5139   $rOpts_closing_side_comment_else_flag
5140   $rOpts_closing_side_comment_maximum_text
5141   $rOpts_continuation_indentation
5142   $rOpts_cuddled_else
5143   $rOpts_delete_old_whitespace
5144   $rOpts_fuzzy_line_length
5145   $rOpts_indent_columns
5146   $rOpts_line_up_parentheses
5147   $rOpts_maximum_fields_per_table
5148   $rOpts_maximum_line_length
5149   $rOpts_short_concatenation_item_length
5150   $rOpts_swallow_optional_blank_lines
5151   $rOpts_ignore_old_line_breaks
5152
5153   $half_maximum_line_length
5154
5155   %is_opening_type
5156   %is_closing_type
5157   %is_keyword_returning_list
5158   %tightness
5159   %matching_token
5160   $rOpts
5161   %right_bond_strength
5162   %left_bond_strength
5163   %binary_ws_rules
5164   %want_left_space
5165   %want_right_space
5166   %is_digraph
5167   %is_trigraph
5168   $bli_pattern
5169   $bli_list_string
5170   %is_closing_type
5171   %is_opening_type
5172   %is_closing_token
5173   %is_opening_token
5174 };
5175
5176 BEGIN {
5177
5178     # default list of block types for which -bli would apply
5179     $bli_list_string = 'if else elsif unless while for foreach do : sub';
5180
5181     @_ = qw(
5182       .. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
5183       <= >= == =~ !~ != ++ -- /= x=
5184     );
5185     @is_digraph{@_} = (1) x scalar(@_);
5186
5187     @_ = qw( ... **= <<= >>= &&= ||= <=> );
5188     @is_trigraph{@_} = (1) x scalar(@_);
5189
5190     @_ = qw(
5191       = **= += *= &= <<= &&=
5192       -= /= |= >>= ||=
5193       .= %= ^=
5194       x=
5195     );
5196     @is_assignment{@_} = (1) x scalar(@_);
5197
5198     @_ = qw(
5199       grep
5200       keys
5201       map
5202       reverse
5203       sort
5204       split
5205     );
5206     @is_keyword_returning_list{@_} = (1) x scalar(@_);
5207
5208     @_ = qw(is if unless and or last next redo return);
5209     @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5210
5211     @_ = qw(last next redo return);
5212     @is_last_next_redo_return{@_} = (1) x scalar(@_);
5213
5214     @_ = qw(sort map grep);
5215     @is_sort_map_grep{@_} = (1) x scalar(@_);
5216
5217     @_ = qw(sort map grep eval);
5218     @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5219
5220     @_ = qw(sort map grep eval do);
5221     @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5222
5223     @_ = qw(if unless);
5224     @is_if_unless{@_} = (1) x scalar(@_);
5225
5226     @_ = qw(and or);
5227     @is_and_or{@_} = (1) x scalar(@_);
5228
5229     # We can remove semicolons after blocks preceded by these keywords
5230     @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
5231       unless while until for foreach);
5232     @is_block_without_semicolon{@_} = (1) x scalar(@_);
5233
5234     # 'L' is token for opening { at hash key
5235     @_ = qw" L { ( [ ";
5236     @is_opening_type{@_} = (1) x scalar(@_);
5237
5238     # 'R' is token for closing } at hash key
5239     @_ = qw" R } ) ] ";
5240     @is_closing_type{@_} = (1) x scalar(@_);
5241
5242     @_ = qw" { ( [ ";
5243     @is_opening_token{@_} = (1) x scalar(@_);
5244
5245     @_ = qw" } ) ] ";
5246     @is_closing_token{@_} = (1) x scalar(@_);
5247 }
5248
5249 # whitespace codes
5250 use constant WS_YES      => 1;
5251 use constant WS_OPTIONAL => 0;
5252 use constant WS_NO       => -1;
5253
5254 # Token bond strengths.
5255 use constant NO_BREAK    => 10000;
5256 use constant VERY_STRONG => 100;
5257 use constant STRONG      => 2.1;
5258 use constant NOMINAL     => 1.1;
5259 use constant WEAK        => 0.8;
5260 use constant VERY_WEAK   => 0.55;
5261
5262 # values for testing indexes in output array
5263 use constant UNDEFINED_INDEX => -1;
5264
5265 # Maximum number of little messages; probably need not be changed.
5266 use constant MAX_NAG_MESSAGES => 6;
5267
5268 # increment between sequence numbers for each type
5269 # For example, ?: pairs might have numbers 7,11,15,...
5270 use constant TYPE_SEQUENCE_INCREMENT => 4;
5271
5272 {
5273
5274     # methods to count instances
5275     my $_count = 0;
5276     sub get_count        { $_count; }
5277     sub _increment_count { ++$_count }
5278     sub _decrement_count { --$_count }
5279 }
5280
5281 # interface to Perl::Tidy::Logger routines
5282 sub warning {
5283     if ($logger_object) {
5284         $logger_object->warning(@_);
5285     }
5286 }
5287
5288 sub complain {
5289     if ($logger_object) {
5290         $logger_object->complain(@_);
5291     }
5292 }
5293
5294 sub write_logfile_entry {
5295     if ($logger_object) {
5296         $logger_object->write_logfile_entry(@_);
5297     }
5298 }
5299
5300 sub black_box {
5301     if ($logger_object) {
5302         $logger_object->black_box(@_);
5303     }
5304 }
5305
5306 sub report_definite_bug {
5307     if ($logger_object) {
5308         $logger_object->report_definite_bug();
5309     }
5310 }
5311
5312 sub get_saw_brace_error {
5313     if ($logger_object) {
5314         $logger_object->get_saw_brace_error();
5315     }
5316 }
5317
5318 sub we_are_at_the_last_line {
5319     if ($logger_object) {
5320         $logger_object->we_are_at_the_last_line();
5321     }
5322 }
5323
5324 # interface to Perl::Tidy::Diagnostics routine
5325 sub write_diagnostics {
5326
5327     if ($diagnostics_object) {
5328         $diagnostics_object->write_diagnostics(@_);
5329     }
5330 }
5331
5332 sub get_added_semicolon_count {
5333     my $self = shift;
5334     return $added_semicolon_count;
5335 }
5336
5337 sub DESTROY {
5338     $_[0]->_decrement_count();
5339 }
5340
5341 sub new {
5342
5343     my $class = shift;
5344
5345     # we are given an object with a write_line() method to take lines
5346     my %defaults = (
5347         sink_object        => undef,
5348         diagnostics_object => undef,
5349         logger_object      => undef,
5350     );
5351     my %args = ( %defaults, @_ );
5352
5353     $logger_object      = $args{logger_object};
5354     $diagnostics_object = $args{diagnostics_object};
5355
5356     # we create another object with a get_line() and peek_ahead() method
5357     my $sink_object = $args{sink_object};
5358     $file_writer_object =
5359       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
5360
5361     # initialize the leading whitespace stack to negative levels
5362     # so that we can never run off the end of the stack
5363     $gnu_position_predictor = 0;    # where the current token is predicted to be
5364     $max_gnu_stack_index    = 0;
5365     $max_gnu_item_index     = -1;
5366     $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
5367     @gnu_item_list               = ();
5368     $last_output_indentation     = 0;
5369     $last_indentation_written    = 0;
5370     $last_unadjusted_indentation = 0;
5371     $last_leading_token          = "";
5372
5373     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
5374     $saw_END_or_DATA_         = 0;
5375
5376     @block_type_to_go            = ();
5377     @type_sequence_to_go         = ();
5378     @container_environment_to_go = ();
5379     @bond_strength_to_go         = ();
5380     @forced_breakpoint_to_go     = ();
5381     @lengths_to_go               = ();    # line length to start of ith token
5382     @levels_to_go                = ();
5383     @matching_token_to_go        = ();
5384     @mate_index_to_go            = ();
5385     @nesting_blocks_to_go        = ();
5386     @ci_levels_to_go             = ();
5387     @nesting_depth_to_go         = (0);
5388     @nobreak_to_go               = ();
5389     @old_breakpoint_to_go        = ();
5390     @tokens_to_go                = ();
5391     @types_to_go                 = ();
5392     @leading_spaces_to_go        = ();
5393     @reduced_spaces_to_go        = ();
5394
5395     @dont_align         = ();
5396     @has_broken_sublist = ();
5397     @want_comma_break   = ();
5398
5399     @ci_stack                   = ("");
5400     $saw_negative_indentation   = 0;
5401     $first_tabbing_disagreement = 0;
5402     $last_tabbing_disagreement  = 0;
5403     $tabbing_disagreement_count = 0;
5404     $in_tabbing_disagreement    = 0;
5405     $input_line_tabbing         = undef;
5406
5407     $last_line_type               = "";
5408     $last_last_line_leading_level = 0;
5409     $last_line_leading_level      = 0;
5410     $last_line_leading_type       = '#';
5411
5412     $last_nonblank_token        = ';';
5413     $last_nonblank_type         = ';';
5414     $last_last_nonblank_token   = ';';
5415     $last_last_nonblank_type    = ';';
5416     $last_nonblank_block_type   = "";
5417     $last_output_level          = 0;
5418     $looking_for_else           = 0;
5419     $embedded_tab_count         = 0;
5420     $first_embedded_tab_at      = 0;
5421     $last_embedded_tab_at       = 0;
5422     $deleted_semicolon_count    = 0;
5423     $first_deleted_semicolon_at = 0;
5424     $last_deleted_semicolon_at  = 0;
5425     $added_semicolon_count      = 0;
5426     $first_added_semicolon_at   = 0;
5427     $last_added_semicolon_at    = 0;
5428     $last_line_had_side_comment = 0;
5429     %postponed_breakpoint       = ();
5430
5431     # variables for adding side comments
5432     %block_leading_text        = ();
5433     %block_opening_line_number = ();
5434     $csc_new_statement_ok      = 1;
5435
5436     %saved_opening_indentation = ();
5437
5438     reset_block_text_accumulator();
5439
5440     prepare_for_new_input_lines();
5441
5442     $vertical_aligner_object =
5443       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
5444         $logger_object, $diagnostics_object );
5445
5446     if ( $rOpts->{'entab-leading-whitespace'} ) {
5447         write_logfile_entry(
5448 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
5449         );
5450     }
5451     elsif ( $rOpts->{'tabs'} ) {
5452         write_logfile_entry("Indentation will be with a tab character\n");
5453     }
5454     else {
5455         write_logfile_entry(
5456             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
5457     }
5458
5459     # This was the start of a formatter referent, but object-oriented
5460     # coding has turned out to be too slow here.
5461     $formatter_self = {};
5462
5463     bless $formatter_self, $class;
5464
5465     # Safety check..this is not a class yet
5466     if ( _increment_count() > 1 ) {
5467         confess
5468 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
5469     }
5470     return $formatter_self;
5471 }
5472
5473 sub prepare_for_new_input_lines {
5474
5475     $gnu_sequence_number++;    # increment output batch counter
5476     %last_gnu_equals                = ();
5477     %gnu_comma_count                = ();
5478     %gnu_arrow_count                = ();
5479     $line_start_index_to_go         = 0;
5480     $max_gnu_item_index             = UNDEFINED_INDEX;
5481     $index_max_forced_break         = UNDEFINED_INDEX;
5482     $max_index_to_go                = UNDEFINED_INDEX;
5483     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
5484     $last_nonblank_type_to_go       = '';
5485     $last_nonblank_token_to_go      = '';
5486     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
5487     $last_last_nonblank_type_to_go  = '';
5488     $last_last_nonblank_token_to_go = '';
5489     $forced_breakpoint_count        = 0;
5490     $forced_breakpoint_undo_count   = 0;
5491     $rbrace_follower                = undef;
5492     $lengths_to_go[0]               = 0;
5493     $old_line_count_in_batch        = 1;
5494     $comma_count_in_batch           = 0;
5495     $starting_in_quote              = 0;
5496
5497     destroy_one_line_block();
5498 }
5499
5500 sub write_line {
5501
5502     my $self = shift;
5503     my ($line_of_tokens) = @_;
5504
5505     my $line_type  = $line_of_tokens->{_line_type};
5506     my $input_line = $line_of_tokens->{_line_text};
5507
5508     my $want_blank_line_next = 0;
5509
5510     # _line_type codes are:
5511     #   SYSTEM         - system-specific code before hash-bang line
5512     #   CODE           - line of perl code (including comments)
5513     #   POD_START      - line starting pod, such as '=head'
5514     #   POD            - pod documentation text
5515     #   POD_END        - last line of pod section, '=cut'
5516     #   HERE           - text of here-document
5517     #   HERE_END       - last line of here-doc (target word)
5518     #   FORMAT         - format section
5519     #   FORMAT_END     - last line of format section, '.'
5520     #   DATA_START     - __DATA__ line
5521     #   DATA           - unidentified text following __DATA__
5522     #   END_START      - __END__ line
5523     #   END            - unidentified text following __END__
5524     #   ERROR          - we are in big trouble, probably not a perl script
5525     #
5526     # handle line of code..
5527     if ( $line_type eq 'CODE' ) {
5528
5529         # let logger see all non-blank lines of code
5530         if ( $input_line !~ /^\s*$/ ) {
5531             my $output_line_number =
5532               $vertical_aligner_object->get_output_line_number();
5533             black_box( $line_of_tokens, $output_line_number );
5534         }
5535         print_line_of_tokens($line_of_tokens);
5536     }
5537
5538     # handle line of non-code..
5539     else {
5540
5541         # set special flags
5542         my $skip_line = 0;
5543         my $tee_line  = 0;
5544         if ( $line_type =~ /^POD/ ) {
5545
5546             # Pod docs should have a preceding blank line.  But be
5547             # very careful in __END__ and __DATA__ sections, because:
5548             #   1. the user may be using this section for any purpose whatsoever
5549             #   2. the blank counters are not active there
5550             # It should be safe to request a blank line between an
5551             # __END__ or __DATA__ and an immediately following '=head'
5552             # type line, (types END_START and DATA_START), but not for
5553             # any other lines of type END or DATA.
5554             if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
5555             if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
5556             if (   !$skip_line
5557                 && $line_type eq 'POD_START'
5558                 && $last_line_type !~ /^(END|DATA)$/ )
5559             {
5560                 want_blank_line();
5561             }
5562
5563             # patch to put a blank line after =cut
5564             # (required by podchecker)
5565             if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
5566                 $file_writer_object->reset_consecutive_blank_lines();
5567                 $want_blank_line_next = 1;
5568             }
5569         }
5570
5571         # leave the blank counters in a predictable state
5572         # after __END__ or __DATA__
5573         elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
5574             $file_writer_object->reset_consecutive_blank_lines();
5575             $saw_END_or_DATA_ = 1;
5576         }
5577
5578         # write unindented non-code line
5579         if ( !$skip_line ) {
5580             if ($tee_line) { $file_writer_object->tee_on() }
5581             write_unindented_line($input_line);
5582             if ($tee_line)             { $file_writer_object->tee_off() }
5583             if ($want_blank_line_next) { want_blank_line(); }
5584         }
5585     }
5586     $last_line_type = $line_type;
5587 }
5588
5589 sub create_one_line_block {
5590     $index_start_one_line_block            = $_[0];
5591     $semicolons_before_block_self_destruct = $_[1];
5592 }
5593
5594 sub destroy_one_line_block {
5595     $index_start_one_line_block            = UNDEFINED_INDEX;
5596     $semicolons_before_block_self_destruct = 0;
5597 }
5598
5599 sub leading_spaces_to_go {
5600
5601     # return the number of indentation spaces for a token in the output stream;
5602     # these were previously stored by 'set_leading_whitespace'.
5603
5604     return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
5605
5606 }
5607
5608 sub get_SPACES {
5609
5610     # return the number of leading spaces associated with an indentation
5611     # variable $indentation is either a constant number of spaces or an object
5612     # with a get_SPACES method.
5613     my $indentation = shift;
5614     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
5615 }
5616
5617 sub get_RECOVERABLE_SPACES {
5618
5619     # return the number of spaces (+ means shift right, - means shift left)
5620     # that we would like to shift a group of lines with the same indentation
5621     # to get them to line up with their opening parens
5622     my $indentation = shift;
5623     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
5624 }
5625
5626 sub get_AVAILABLE_SPACES_to_go {
5627
5628     my $item = $leading_spaces_to_go[ $_[0] ];
5629
5630     # return the number of available leading spaces associated with an
5631     # indentation variable.  $indentation is either a constant number of
5632     # spaces or an object with a get_AVAILABLE_SPACES method.
5633     return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
5634 }
5635
5636 sub new_lp_indentation_item {
5637
5638     # this is an interface to the IndentationItem class
5639     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
5640
5641     # A negative level implies not to store the item in the item_list
5642     my $index = 0;
5643     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
5644
5645     my $item = Perl::Tidy::IndentationItem->new(
5646         $spaces,      $level,
5647         $ci_level,    $available_spaces,
5648         $index,       $gnu_sequence_number,
5649         $align_paren, $max_gnu_stack_index,
5650         $line_start_index_to_go,
5651     );
5652
5653     if ( $level >= 0 ) {
5654         $gnu_item_list[$max_gnu_item_index] = $item;
5655     }
5656
5657     return $item;
5658 }
5659
5660 sub set_leading_whitespace {
5661
5662     # This routine defines leading whitespace
5663     # given: the level and continuation_level of a token,
5664     # define: space count of leading string which would apply if it
5665     # were the first token of a new line.
5666
5667     my ( $level, $ci_level, $in_continued_quote ) = @_;
5668
5669     # modify for -bli, which adds one continuation indentation for
5670     # opening braces
5671     if (   $rOpts_brace_left_and_indent
5672         && $max_index_to_go == 0
5673         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
5674     {
5675         $ci_level++;
5676     }
5677
5678     # patch to avoid trouble when input file has negative indentation.
5679     # other logic should catch this error.
5680     if ( $level < 0 ) { $level = 0 }
5681
5682     #-------------------------------------------
5683     # handle the standard indentation scheme
5684     #-------------------------------------------
5685     unless ($rOpts_line_up_parentheses) {
5686         my $space_count = $ci_level * $rOpts_continuation_indentation + $level *
5687           $rOpts_indent_columns;
5688         my $ci_spaces =
5689           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
5690
5691         if ($in_continued_quote) {
5692             $space_count = 0;
5693             $ci_spaces   = 0;
5694         }
5695         $leading_spaces_to_go[$max_index_to_go] = $space_count;
5696         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
5697         return;
5698     }
5699
5700     #-------------------------------------------------------------
5701     # handle case of -lp indentation..
5702     #-------------------------------------------------------------
5703
5704     # The continued_quote flag means that this is the first token of a
5705     # line, and it is the continuation of some kind of multi-line quote
5706     # or pattern.  It requires special treatment because it must have no
5707     # added leading whitespace. So we create a special indentation item
5708     # which is not in the stack.
5709     if ($in_continued_quote) {
5710         my $space_count     = 0;
5711         my $available_space = 0;
5712         $level = -1;    # flag to prevent storing in item_list
5713         $leading_spaces_to_go[$max_index_to_go]   =
5714           $reduced_spaces_to_go[$max_index_to_go] =
5715           new_lp_indentation_item( $space_count, $level, $ci_level,
5716             $available_space, 0 );
5717         return;
5718     }
5719
5720     # get the top state from the stack
5721     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
5722     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
5723     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
5724
5725     my $type        = $types_to_go[$max_index_to_go];
5726     my $token       = $tokens_to_go[$max_index_to_go];
5727     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
5728
5729     if ( $type eq '{' || $type eq '(' ) {
5730
5731         $gnu_comma_count{ $total_depth + 1 } = 0;
5732         $gnu_arrow_count{ $total_depth + 1 } = 0;
5733
5734         # If we come to an opening token after an '=' token of some type,
5735         # see if it would be helpful to 'break' after the '=' to save space
5736         my $last_equals = $last_gnu_equals{$total_depth};
5737         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
5738
5739             # find the position if we break at the '='
5740             my $i_test = $last_equals;
5741             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
5742             my $test_position = total_line_length( $i_test, $max_index_to_go );
5743
5744             if (
5745
5746                 # if we are beyond the midpoint
5747                 $gnu_position_predictor > $half_maximum_line_length
5748
5749                 # or if we can save some space by breaking at the '='
5750                 # without obscuring the second line by the first
5751                 || ( $test_position > 1 +
5752                     total_line_length( $line_start_index_to_go, $last_equals ) )
5753               )
5754             {
5755
5756                 # then make the switch -- note that we do not set a real
5757                 # breakpoint here because we may not really need one; sub
5758                 # scan_list will do that if necessary
5759                 $line_start_index_to_go = $i_test + 1;
5760                 $gnu_position_predictor = $test_position;
5761             }
5762         }
5763     }
5764
5765     # Check for decreasing depth ..
5766     # Note that one token may have both decreasing and then increasing
5767     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
5768     # in this example we would first go back to (1,0) then up to (2,0)
5769     # in a single call.
5770     if ( $level < $current_level || $ci_level < $current_ci_level ) {
5771
5772         # loop to find the first entry at or completely below this level
5773         my ( $lev, $ci_lev );
5774         while (1) {
5775             if ($max_gnu_stack_index) {
5776
5777                 # save index of token which closes this level
5778                 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
5779
5780                 # Undo any extra indentation if we saw no commas
5781                 my $available_spaces =
5782                   $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
5783
5784                 my $comma_count = 0;
5785                 my $arrow_count = 0;
5786                 if ( $type eq '}' || $type eq ')' ) {
5787                     $comma_count = $gnu_comma_count{$total_depth};
5788                     $arrow_count = $gnu_arrow_count{$total_depth};
5789                     $comma_count = 0 unless $comma_count;
5790                     $arrow_count = 0 unless $arrow_count;
5791                 }
5792                 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
5793                 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
5794
5795                 if ( $available_spaces > 0 ) {
5796
5797                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
5798
5799                         my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
5800                         my $seqno =
5801                           $gnu_stack[$max_gnu_stack_index]
5802                           ->get_SEQUENCE_NUMBER();
5803
5804                         # Be sure this item was created in this batch.  This
5805                         # should be true because we delete any available
5806                         # space from open items at the end of each batch.
5807                         if (   $gnu_sequence_number != $seqno
5808                             || $i > $max_gnu_item_index )
5809                         {
5810                             warning(
5811 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
5812                             );
5813                             report_definite_bug();
5814                         }
5815
5816                         else {
5817                             if ( $arrow_count == 0 ) {
5818                                 $gnu_item_list[$i]
5819                                   ->permanently_decrease_AVAILABLE_SPACES(
5820                                     $available_spaces);
5821                             }
5822                             else {
5823                                 $gnu_item_list[$i]
5824                                   ->tentatively_decrease_AVAILABLE_SPACES(
5825                                     $available_spaces);
5826                             }
5827
5828                             my $j;
5829                             for (
5830                                 $j = $i + 1 ;
5831                                 $j <= $max_gnu_item_index ;
5832                                 $j++
5833                               )
5834                             {
5835                                 $gnu_item_list[$j]
5836                                   ->decrease_SPACES($available_spaces);
5837                             }
5838                         }
5839                     }
5840                 }
5841
5842                 # go down one level
5843                 --$max_gnu_stack_index;
5844                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
5845                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
5846
5847                 # stop when we reach a level at or below the current level
5848                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
5849                     $space_count =
5850                       $gnu_stack[$max_gnu_stack_index]->get_SPACES();
5851                     $current_level    = $lev;
5852                     $current_ci_level = $ci_lev;
5853                     last;
5854                 }
5855             }
5856
5857             # reached bottom of stack .. should never happen because
5858             # only negative levels can get here, and $level was forced
5859             # to be positive above.
5860             else {
5861                 warning(
5862 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
5863                 );
5864                 report_definite_bug();
5865                 last;
5866             }
5867         }
5868     }
5869
5870     # handle increasing depth
5871     if ( $level > $current_level || $ci_level > $current_ci_level ) {
5872
5873         # Compute the standard incremental whitespace.  This will be
5874         # the minimum incremental whitespace that will be used.  This
5875         # choice results in a smooth transition between the gnu-style
5876         # and the standard style.
5877         my $standard_increment =
5878           ( $level - $current_level ) * $rOpts_indent_columns +
5879           ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
5880
5881         # Now we have to define how much extra incremental space
5882         # ("$available_space") we want.  This extra space will be
5883         # reduced as necessary when long lines are encountered or when
5884         # it becomes clear that we do not have a good list.
5885         my $available_space = 0;
5886         my $align_paren     = 0;
5887         my $excess          = 0;
5888
5889         # initialization on empty stack..
5890         if ( $max_gnu_stack_index == 0 ) {
5891             $space_count = $level * $rOpts_indent_columns;
5892         }
5893
5894         # if this is a BLOCK, add the standard increment
5895         elsif ($last_nonblank_block_type) {
5896             $space_count += $standard_increment;
5897         }
5898
5899         # if last nonblank token was not structural indentation,
5900         # just use standard increment
5901         elsif ( $last_nonblank_type ne '{' ) {
5902             $space_count += $standard_increment;
5903         }
5904
5905         # otherwise use the space to the first non-blank level change token
5906         else {
5907
5908             $space_count = $gnu_position_predictor;
5909
5910             my $min_gnu_indentation =
5911               $gnu_stack[$max_gnu_stack_index]->get_SPACES();
5912
5913             $available_space = $space_count - $min_gnu_indentation;
5914             if ( $available_space >= $standard_increment ) {
5915                 $min_gnu_indentation += $standard_increment;
5916             }
5917             elsif ( $available_space > 1 ) {
5918                 $min_gnu_indentation += $available_space + 1;
5919             }
5920             elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
5921                 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
5922                     $min_gnu_indentation += 2;
5923                 }
5924                 else {
5925                     $min_gnu_indentation += 1;
5926                 }
5927             }
5928             else {
5929                 $min_gnu_indentation += $standard_increment;
5930             }
5931             $available_space = $space_count - $min_gnu_indentation;
5932
5933             if ( $available_space < 0 ) {
5934                 $space_count     = $min_gnu_indentation;
5935                 $available_space = 0;
5936             }
5937             $align_paren = 1;
5938         }
5939
5940         # update state, but not on a blank token
5941         if ( $types_to_go[$max_index_to_go] ne 'b' ) {
5942
5943             $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
5944
5945             ++$max_gnu_stack_index;
5946             $gnu_stack[$max_gnu_stack_index] =
5947               new_lp_indentation_item( $space_count, $level, $ci_level,
5948                 $available_space, $align_paren );
5949
5950             # If the opening paren is beyond the half-line length, then
5951             # we will use the minimum (standard) indentation.  This will
5952             # help avoid problems associated with running out of space
5953             # near the end of a line.  As a result, in deeply nested
5954             # lists, there will be some indentations which are limited
5955             # to this minimum standard indentation. But the most deeply
5956             # nested container will still probably be able to shift its
5957             # parameters to the right for proper alignment, so in most
5958             # cases this will not be noticable.
5959             if (   $available_space > 0
5960                 && $space_count > $half_maximum_line_length )
5961             {
5962                 $gnu_stack[$max_gnu_stack_index]
5963                   ->tentatively_decrease_AVAILABLE_SPACES($available_space);
5964             }
5965         }
5966     }
5967
5968     # Count commas and look for non-list characters.  Once we see a
5969     # non-list character, we give up and don't look for any more commas.
5970     if ( $type eq '=>' ) {
5971         $gnu_arrow_count{$total_depth}++;
5972
5973         # tentatively treating '=>' like '=' for estimating breaks
5974         # TODO: this could use some experimentation
5975         $last_gnu_equals{$total_depth} = $max_index_to_go;
5976     }
5977
5978     elsif ( $type eq ',' ) {
5979         $gnu_comma_count{$total_depth}++;
5980     }
5981
5982     elsif ( $is_assignment{$type} ) {
5983         $last_gnu_equals{$total_depth} = $max_index_to_go;
5984     }
5985
5986     # this token might start a new line
5987     # if this is a non-blank..
5988     if ( $type ne 'b' ) {
5989
5990         # and if ..
5991         if (
5992
5993             # this is the first nonblank token of the line
5994             $max_index_to_go == 1 && $types_to_go[0] eq 'b'
5995
5996             # or previous character was one of these:
5997             || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
5998
5999             # or previous character was opening and this does not close it
6000             || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6001             || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6002
6003             # or this token is one of these:
6004             || $type =~ /^([\.]|\|\||\&\&)$/
6005
6006             # or this is a closing structure
6007             || (   $last_nonblank_type_to_go eq '}'
6008                 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6009
6010             # or previous token was keyword 'return'
6011             || ( $last_nonblank_type_to_go eq 'k'
6012                 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6013
6014             # or starting a new line at certain keywords is fine
6015             || (   $type eq 'k'
6016                 && $is_if_unless_and_or_last_next_redo_return{$token} )
6017
6018             # or this is after an assignment after a closing structure
6019             || (
6020                 $is_assignment{$last_nonblank_type_to_go}
6021                 && (
6022                     $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6023
6024                     # and it is significantly to the right
6025                     || $gnu_position_predictor > $half_maximum_line_length
6026                 )
6027             )
6028           )
6029         {
6030             check_for_long_gnu_style_lines();
6031             $line_start_index_to_go = $max_index_to_go;
6032
6033             # back up 1 token if we want to break before that type
6034             # otherwise, we may strand tokens like '?' or ':' on a line
6035             if ( $line_start_index_to_go > 0 ) {
6036                 if ( $last_nonblank_type_to_go eq 'k' ) {
6037
6038                     if ( $want_break_before{$last_nonblank_token_to_go} ) {
6039                         $line_start_index_to_go--;
6040                     }
6041                 }
6042                 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6043                     $line_start_index_to_go--;
6044                 }
6045             }
6046         }
6047     }
6048
6049     # remember the predicted position of this token on the output line
6050     if ( $max_index_to_go > $line_start_index_to_go ) {
6051         $gnu_position_predictor =
6052           total_line_length( $line_start_index_to_go, $max_index_to_go );
6053     }
6054     else {
6055         $gnu_position_predictor = $space_count +
6056           token_sequence_length( $max_index_to_go, $max_index_to_go );
6057     }
6058
6059     # store the indentation object for this token
6060     # this allows us to manipulate the leading whitespace
6061     # (in case we have to reduce indentation to fit a line) without
6062     # having to change any token values
6063     $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6064     $reduced_spaces_to_go[$max_index_to_go] =
6065       ( $max_gnu_stack_index > 0 && $ci_level )
6066       ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6067       : $gnu_stack[$max_gnu_stack_index];
6068     return;
6069 }
6070
6071 sub check_for_long_gnu_style_lines {
6072
6073     # look at the current estimated maximum line length, and
6074     # remove some whitespace if it exceeds the desired maximum
6075
6076     # this is only for the '-lp' style
6077     return unless ($rOpts_line_up_parentheses);
6078
6079     # nothing can be done if no stack items defined for this line
6080     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6081
6082     # see if we have exceeded the maximum desired line length
6083     # keep 2 extra free because they are needed in some cases
6084     # (result of trial-and-error testing)
6085     my $spaces_needed =
6086       $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6087
6088     return if ( $spaces_needed < 0 );
6089
6090     # We are over the limit, so try to remove a requested number of
6091     # spaces from leading whitespace.  We are only allowed to remove
6092     # from whitespace items created on this batch, since others have
6093     # already been used and cannot be undone.
6094     my @candidates = ();
6095     my $i;
6096
6097     # loop over all whitespace items created for the current batch
6098     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6099         my $item = $gnu_item_list[$i];
6100
6101         # item must still be open to be a candidate (otherwise it
6102         # cannot influence the current token)
6103         next if ( $item->get_CLOSED() >= 0 );
6104
6105         my $available_spaces = $item->get_AVAILABLE_SPACES();
6106
6107         if ( $available_spaces > 0 ) {
6108             push( @candidates, [ $i, $available_spaces ] );
6109         }
6110     }
6111
6112     return unless (@candidates);
6113
6114     # sort by available whitespace so that we can remove whitespace
6115     # from the maximum available first
6116     @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6117
6118     # keep removing whitespace until we are done or have no more
6119     my $candidate;
6120     foreach $candidate (@candidates) {
6121         my ( $i, $available_spaces ) = @{$candidate};
6122         my $deleted_spaces =
6123           ( $available_spaces > $spaces_needed )
6124           ? $spaces_needed
6125           : $available_spaces;
6126
6127         # remove the incremental space from this item
6128         $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6129
6130         my $i_debug = $i;
6131
6132         # update the leading whitespace of this item and all items
6133         # that came after it
6134         for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6135
6136             my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6137             if ( $old_spaces > $deleted_spaces ) {
6138                 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6139             }
6140
6141             # shouldn't happen except for code bug:
6142             else {
6143                 my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
6144                 my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6145                 my $old_level    = $gnu_item_list[$i]->get_LEVEL();
6146                 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6147                 warning(
6148 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
6149                 );
6150                 report_definite_bug();
6151             }
6152         }
6153         $gnu_position_predictor -= $deleted_spaces;
6154         $spaces_needed          -= $deleted_spaces;
6155         last unless ( $spaces_needed > 0 );
6156     }
6157 }
6158
6159 sub finish_lp_batch {
6160
6161     # This routine is called once after each each output stream batch is
6162     # finished to undo indentation for all incomplete -lp
6163     # indentation levels.  It is too risky to leave a level open,
6164     # because then we can't backtrack in case of a long line to follow.
6165     # This means that comments and blank lines will disrupt this
6166     # indentation style.  But the vertical aligner may be able to
6167     # get the space back if there are side comments.
6168
6169     # this is only for the 'lp' style
6170     return unless ($rOpts_line_up_parentheses);
6171
6172     # nothing can be done if no stack items defined for this line
6173     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6174
6175     # loop over all whitespace items created for the current batch
6176     my $i;
6177     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6178         my $item = $gnu_item_list[$i];
6179
6180         # only look for open items
6181         next if ( $item->get_CLOSED() >= 0 );
6182
6183         # Tentatively remove all of the available space
6184         # (The vertical aligner will try to get it back later)
6185         my $available_spaces = $item->get_AVAILABLE_SPACES();
6186         if ( $available_spaces > 0 ) {
6187
6188             # delete incremental space for this item
6189             $gnu_item_list[$i]
6190               ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6191
6192             # Reduce the total indentation space of any nodes that follow
6193             # Note that any such nodes must necessarily be dependents
6194             # of this node.
6195             foreach ( $i + 1 .. $max_gnu_item_index ) {
6196                 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6197             }
6198         }
6199     }
6200     return;
6201 }
6202
6203 sub reduce_lp_indentation {
6204
6205     # reduce the leading whitespace at token $i if possible by $spaces_needed
6206     # (a large value of $spaces_needed will remove all excess space)
6207     # NOTE: to be called from scan_list only for a sequence of tokens
6208     # contained between opening and closing parens/braces/brackets
6209
6210     my ( $i, $spaces_wanted ) = @_;
6211     my $deleted_spaces = 0;
6212
6213     my $item             = $leading_spaces_to_go[$i];
6214     my $available_spaces = $item->get_AVAILABLE_SPACES();
6215
6216     if (
6217         $available_spaces > 0
6218         && ( ( $spaces_wanted <= $available_spaces )
6219             || !$item->get_HAVE_CHILD() )
6220       )
6221     {
6222
6223         # we'll remove these spaces, but mark them as recoverable
6224         $deleted_spaces =
6225           $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6226     }
6227
6228     return $deleted_spaces;
6229 }
6230
6231 sub token_sequence_length {
6232
6233     # return length of tokens ($ifirst .. $ilast) including first & last
6234     # returns 0 if $ifirst > $ilast
6235     my $ifirst = shift;
6236     my $ilast  = shift;
6237     return 0 if ( $ilast < 0 || $ifirst > $ilast );
6238     return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6239     return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6240 }
6241
6242 sub total_line_length {
6243
6244     # return length of a line of tokens ($ifirst .. $ilast)
6245     my $ifirst = shift;
6246     my $ilast  = shift;
6247     if ( $ifirst < 0 ) { $ifirst = 0 }
6248
6249     return leading_spaces_to_go($ifirst) +
6250       token_sequence_length( $ifirst, $ilast );
6251 }
6252
6253 sub excess_line_length {
6254
6255     # return number of characters by which a line of tokens ($ifirst..$ilast)
6256     # exceeds the allowable line length.
6257     my $ifirst = shift;
6258     my $ilast  = shift;
6259     if ( $ifirst < 0 ) { $ifirst = 0 }
6260     return leading_spaces_to_go($ifirst) +
6261       token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6262 }
6263
6264 sub finish_formatting {
6265
6266     # flush buffer and write any informative messages
6267     my $self = shift;
6268
6269     flush();
6270     $file_writer_object->decrement_output_line_number()
6271       ;    # fix up line number since it was incremented
6272     we_are_at_the_last_line();
6273     if ( $added_semicolon_count > 0 ) {
6274         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6275         my $what =
6276           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6277         write_logfile_entry("$added_semicolon_count $what added:\n");
6278         write_logfile_entry(
6279             "  $first at input line $first_added_semicolon_at\n");
6280
6281         if ( $added_semicolon_count > 1 ) {
6282             write_logfile_entry(
6283                 "   Last at input line $last_added_semicolon_at\n");
6284         }
6285         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
6286         write_logfile_entry("\n");
6287     }
6288
6289     if ( $deleted_semicolon_count > 0 ) {
6290         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6291         my $what =
6292           ( $deleted_semicolon_count > 1 )
6293           ? "semicolons were"
6294           : "semicolon was";
6295         write_logfile_entry(
6296             "$deleted_semicolon_count unnecessary $what deleted:\n");
6297         write_logfile_entry(
6298             "  $first at input line $first_deleted_semicolon_at\n");
6299
6300         if ( $deleted_semicolon_count > 1 ) {
6301             write_logfile_entry(
6302                 "   Last at input line $last_deleted_semicolon_at\n");
6303         }
6304         write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
6305         write_logfile_entry("\n");
6306     }
6307
6308     if ( $embedded_tab_count > 0 ) {
6309         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6310         my $what =
6311           ( $embedded_tab_count > 1 )
6312           ? "quotes or patterns"
6313           : "quote or pattern";
6314         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6315         write_logfile_entry(
6316 "This means the display of this script could vary with device or software\n"
6317         );
6318         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
6319
6320         if ( $embedded_tab_count > 1 ) {
6321             write_logfile_entry(
6322                 "   Last at input line $last_embedded_tab_at\n");
6323         }
6324         write_logfile_entry("\n");
6325     }
6326
6327     if ($first_tabbing_disagreement) {
6328         write_logfile_entry(
6329 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6330         );
6331     }
6332
6333     if ($in_tabbing_disagreement) {
6334         write_logfile_entry(
6335 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
6336         );
6337     }
6338     else {
6339
6340         if ($last_tabbing_disagreement) {
6341
6342             write_logfile_entry(
6343 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
6344             );
6345         }
6346         else {
6347             write_logfile_entry("No indentation disagreement seen\n");
6348         }
6349     }
6350     write_logfile_entry("\n");
6351
6352     $vertical_aligner_object->report_anything_unusual();
6353
6354     $file_writer_object->report_line_length_errors();
6355 }
6356
6357 sub check_options {
6358
6359     # This routine is called to check the Opts hash after it is defined
6360
6361     ($rOpts) = @_;
6362     my ( $tabbing_string, $tab_msg );
6363
6364     make_static_block_comment_pattern();
6365     make_static_side_comment_pattern();
6366     make_closing_side_comment_prefix();
6367     make_closing_side_comment_list_pattern();
6368
6369     # If closing side comments ARE selected, then we can safely
6370     # delete old closing side comments unless closing side comment
6371     # warnings are requested.  This is a good idea because it will
6372     # eliminate any old csc's which fall below the line count threshold.
6373     # We cannot do this if warnings are turned on, though, because we
6374     # might delete some text which has been added.  So that must
6375     # be handled when comments are created.
6376     if ( $rOpts->{'closing-side-comments'} ) {
6377         if ( !$rOpts->{'closing-side-comment-warnings'} ) {
6378             $rOpts->{'delete-closing-side-comments'} = 1;
6379         }
6380     }
6381
6382     # If closing side comments ARE NOT selected, but warnings ARE
6383     # selected and we ARE DELETING csc's, then we will pretend to be
6384     # adding with a huge interval.  This will force the comments to be
6385     # generated for comparison with the old comments, but not added.
6386     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
6387         if ( $rOpts->{'delete-closing-side-comments'} ) {
6388             $rOpts->{'delete-closing-side-comments'}  = 0;
6389             $rOpts->{'closing-side-comments'}         = 1;
6390             $rOpts->{'closing-side-comment-interval'} = 100000000;
6391         }
6392     }
6393
6394     make_bli_pattern();
6395     make_block_brace_vertical_tightness_pattern();
6396
6397     if ( $rOpts->{'line-up-parentheses'} ) {
6398
6399         if (   $rOpts->{'indent-only'}
6400             || !$rOpts->{'add-newlines'}
6401             || !$rOpts->{'delete-old-newlines'} )
6402         {
6403             warn <<EOM;
6404 -----------------------------------------------------------------------
6405 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
6406
6407 The -lp indentation logic requires that perltidy be able to coordinate
6408 arbitrarily large numbers of line breakpoints.  This isn't possible
6409 with these flags. Sometimes an acceptable workaround is to use -wocb=3
6410 -----------------------------------------------------------------------
6411 EOM
6412             $rOpts->{'line-up-parentheses'} = 0;
6413         }
6414     }
6415
6416     # At present, tabs are not compatable with the line-up-parentheses style
6417     # (it would be possible to entab the total leading whitespace
6418     # just prior to writing the line, if desired).
6419     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
6420         warn <<EOM;
6421 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
6422 EOM
6423         $rOpts->{'tabs'} = 0;
6424     }
6425
6426     # Likewise, tabs are not compatable with outdenting..
6427     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
6428         warn <<EOM;
6429 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
6430 EOM
6431         $rOpts->{'tabs'} = 0;
6432     }
6433
6434     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
6435         warn <<EOM;
6436 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
6437 EOM
6438         $rOpts->{'tabs'} = 0;
6439     }
6440
6441     if ( !$rOpts->{'space-for-semicolon'} ) {
6442         $want_left_space{'f'} = -1;
6443     }
6444
6445     if ( $rOpts->{'space-terminal-semicolon'} ) {
6446         $want_left_space{';'} = 1;
6447     }
6448
6449     # implement outdenting preferences for keywords
6450     %outdent_keyword = ();
6451
6452     # load defaults
6453     @_ = qw(next last redo goto return);
6454
6455     # override defaults if requested
6456     if ( $_ = $rOpts->{'outdent-keyword-list'} ) {
6457         s/^\s+//;
6458         s/\s+$//;
6459         @_ = split /\s+/;
6460     }
6461
6462     # FUTURE: if not a keyword, assume that it is an identifier
6463     foreach (@_) {
6464         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
6465             $outdent_keyword{$_} = 1;
6466         }
6467         else {
6468             warn "ignoring '$_' in -okwl list; not a perl keyword";
6469         }
6470     }
6471
6472     # implement user whitespace preferences
6473     if ( $_ = $rOpts->{'want-left-space'} ) {
6474         s/^\s+//;
6475         s/\s+$//;
6476         @_ = split /\s+/;
6477         @want_left_space{@_} = (1) x scalar(@_);
6478     }
6479
6480     if ( $_ = $rOpts->{'want-right-space'} ) {
6481         s/^\s+//;
6482         s/\s+$//;
6483         @_ = split /\s+/;
6484         @want_right_space{@_} = (1) x scalar(@_);
6485     }
6486     if ( $_ = $rOpts->{'nowant-left-space'} ) {
6487         s/^\s+//;
6488         s/\s+$//;
6489         @_ = split /\s+/;
6490         @want_left_space{@_} = (-1) x scalar(@_);
6491     }
6492
6493     if ( $_ = $rOpts->{'nowant-right-space'} ) {
6494         s/^\s+//;
6495         s/\s+$//;
6496         @want_right_space{@_} = (-1) x scalar(@_);
6497     }
6498     if ( $rOpts->{'dump-want-left-space'} ) {
6499         dump_want_left_space(*STDOUT);
6500         exit 1;
6501     }
6502
6503     if ( $rOpts->{'dump-want-right-space'} ) {
6504         dump_want_right_space(*STDOUT);
6505         exit 1;
6506     }
6507
6508     # default keywords for which space is introduced before an opening paren
6509     # (at present, including them messes up vertical alignment)
6510     @_ = qw(my local our and or eq ne if else elsif until
6511       unless while for foreach return switch case given when);
6512     @space_after_keyword{@_} = (1) x scalar(@_);
6513
6514     # allow user to modify these defaults
6515     if ( $_ = $rOpts->{'space-after-keyword'} ) {
6516         s/^\s+//;
6517         s/\s+$//;
6518         @_ = split /\s+/;
6519         @space_after_keyword{@_} = (1) x scalar(@_);
6520     }
6521
6522     if ( $_ = $rOpts->{'nospace-after-keyword'} ) {
6523         s/^\s+//;
6524         s/\s+$//;
6525         @_ = split /\s+/;
6526         @space_after_keyword{@_} = (0) x scalar(@_);
6527     }
6528
6529     # implement user break preferences
6530     if ( $_ = $rOpts->{'want-break-after'} ) {
6531         @_ = split /\s+/;
6532         foreach my $tok (@_) {
6533             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
6534             my $lbs = $left_bond_strength{$tok};
6535             my $rbs = $right_bond_strength{$tok};
6536             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
6537                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
6538                   ( $lbs, $rbs );
6539             }
6540         }
6541     }
6542
6543     if ( $_ = $rOpts->{'want-break-before'} ) {
6544         s/^\s+//;
6545         s/\s+$//;
6546         @_ = split /\s+/;
6547         foreach my $tok (@_) {
6548             my $lbs = $left_bond_strength{$tok};
6549             my $rbs = $right_bond_strength{$tok};
6550             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
6551                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
6552                   ( $lbs, $rbs );
6553             }
6554         }
6555     }
6556
6557     # make note if breaks are before certain key types
6558     %want_break_before = ();
6559
6560     foreach my $tok ( '.', ',', ':', '?', '&&', '||', 'and', 'or', 'xor' ) {
6561         $want_break_before{$tok} =
6562           $left_bond_strength{$tok} < $right_bond_strength{$tok};
6563     }
6564
6565     # Coordinate ?/: breaks, which must be similar
6566     if ( !$want_break_before{':'} ) {
6567         $want_break_before{'?'}   = $want_break_before{':'};
6568         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
6569         $left_bond_strength{'?'}  = NO_BREAK;
6570     }
6571
6572     # Define here tokens which may follow the closing brace of a do statement
6573     # on the same line, as in:
6574     #   } while ( $something);
6575     @_ = qw(until while unless if ; );
6576     push @_, ',';
6577     @is_do_follower{@_} = (1) x scalar(@_);
6578
6579     # These tokens may follow the closing brace of an if or elsif block.
6580     # In other words, for cuddled else we want code to look like:
6581     #   } elsif ( $something) {
6582     #   } else {
6583     if ( $rOpts->{'cuddled-else'} ) {
6584         @_ = qw(else elsif);
6585         @is_if_brace_follower{@_} = (1) x scalar(@_);
6586     }
6587     else {
6588         %is_if_brace_follower = ();
6589     }
6590
6591     # nothing can follow the closing curly of an else { } block:
6592     %is_else_brace_follower = ();
6593
6594     # what can follow a multi-line anonymous sub definition closing curly:
6595     @_ = qw# ; : => or and  && || ) #;
6596     push @_, ',';
6597     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
6598
6599     # what can follow a one-line anonynomous sub closing curly:
6600     # one-line anonumous subs also have ']' here...
6601     # see tk3.t and PP.pm
6602     @_ = qw#  ; : => or and  && || ) ] #;
6603     push @_, ',';
6604     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
6605
6606     # What can follow a closing curly of a block
6607     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
6608     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
6609     @_ = qw#  ; : => or and  && || ) #;
6610     push @_, ',';
6611
6612     # allow cuddled continue if cuddled else is specified
6613     if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
6614
6615     @is_other_brace_follower{@_} = (1) x scalar(@_);
6616
6617     $right_bond_strength{'{'} = WEAK;
6618     $left_bond_strength{'{'}  = VERY_STRONG;
6619
6620     # make -l=0  equal to -l=infinite
6621     if ( !$rOpts->{'maximum-line-length'} ) {
6622         $rOpts->{'maximum-line-length'} = 1000000;
6623     }
6624
6625     # make -lbl=0  equal to -lbl=infinite
6626     if ( !$rOpts->{'long-block-line-count'} ) {
6627         $rOpts->{'long-block-line-count'} = 1000000;
6628     }
6629
6630     my $ole = $rOpts->{'output-line-ending'};
6631     ##if ($^O =~ /^(VMS|
6632     if ($ole) {
6633         my %endings = (
6634             dos  => "\015\012",
6635             win  => "\015\012",
6636             mac  => "\015",
6637             unix => "\012",
6638         );
6639         $ole = lc $ole;
6640         unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
6641             my $str = join " ", keys %endings;
6642             die <<EOM;
6643 Unrecognized line ending '$ole'; expecting one of: $str
6644 EOM
6645         }
6646         if ( $rOpts->{'preserve-line-endings'} ) {
6647             warn "Ignoring -ple; conflicts with -ole\n";
6648             $rOpts->{'preserve-line-endings'} = undef;
6649         }
6650     }
6651
6652     # hashes used to simplify setting whitespace
6653     %tightness = (
6654         '{' => $rOpts->{'brace-tightness'},
6655         '}' => $rOpts->{'brace-tightness'},
6656         '(' => $rOpts->{'paren-tightness'},
6657         ')' => $rOpts->{'paren-tightness'},
6658         '[' => $rOpts->{'square-bracket-tightness'},
6659         ']' => $rOpts->{'square-bracket-tightness'},
6660     );
6661     %matching_token = (
6662         '{' => '}',
6663         '(' => ')',
6664         '[' => ']',
6665         '?' => ':',
6666     );
6667
6668     # frequently used parameters
6669     $rOpts_add_newlines                   = $rOpts->{'add-newlines'};
6670     $rOpts_add_whitespace                 = $rOpts->{'add-whitespace'};
6671     $rOpts_block_brace_tightness          = $rOpts->{'block-brace-tightness'};
6672     $rOpts_block_brace_vertical_tightness =
6673       $rOpts->{'block-brace-vertical-tightness'};
6674     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
6675     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
6676     $rOpts_break_at_old_trinary_breakpoints =
6677       $rOpts->{'break-at-old-trinary-breakpoints'};
6678     $rOpts_break_at_old_comma_breakpoints =
6679       $rOpts->{'break-at-old-comma-breakpoints'};
6680     $rOpts_break_at_old_keyword_breakpoints =
6681       $rOpts->{'break-at-old-keyword-breakpoints'};
6682     $rOpts_break_at_old_logical_breakpoints =
6683       $rOpts->{'break-at-old-logical-breakpoints'};
6684     $rOpts_closing_side_comment_else_flag =
6685       $rOpts->{'closing-side-comment-else-flag'};
6686     $rOpts_closing_side_comment_maximum_text =
6687       $rOpts->{'closing-side-comment-maximum-text'};
6688     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
6689     $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
6690     $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
6691     $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
6692     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
6693     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
6694     $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
6695     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
6696     $rOpts_short_concatenation_item_length =
6697       $rOpts->{'short-concatenation-item-length'};
6698     $rOpts_swallow_optional_blank_lines =
6699       $rOpts->{'swallow-optional-blank-lines'};
6700     $rOpts_ignore_old_line_breaks = $rOpts->{'ignore-old-line-breaks'};
6701     $half_maximum_line_length     = $rOpts_maximum_line_length / 2;
6702
6703     # Note that both opening and closing tokens can access the opening
6704     # and closing flags of their container types.
6705     %opening_vertical_tightness = (
6706         '(' => $rOpts->{'paren-vertical-tightness'},
6707         '{' => $rOpts->{'brace-vertical-tightness'},
6708         '[' => $rOpts->{'square-bracket-vertical-tightness'},
6709         ')' => $rOpts->{'paren-vertical-tightness'},
6710         '}' => $rOpts->{'brace-vertical-tightness'},
6711         ']' => $rOpts->{'square-bracket-vertical-tightness'},
6712     );
6713
6714     %closing_vertical_tightness = (
6715         '(' => $rOpts->{'paren-vertical-tightness-closing'},
6716         '{' => $rOpts->{'brace-vertical-tightness-closing'},
6717         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
6718         ')' => $rOpts->{'paren-vertical-tightness-closing'},
6719         '}' => $rOpts->{'brace-vertical-tightness-closing'},
6720         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
6721     );
6722
6723     # assume flag for '>' same as ')' for closing qw quotes
6724     %closing_token_indentation = (
6725         ')' => $rOpts->{'closing-paren-indentation'},
6726         '}' => $rOpts->{'closing-brace-indentation'},
6727         ']' => $rOpts->{'closing-square-bracket-indentation'},
6728         '>' => $rOpts->{'closing-paren-indentation'},
6729     );
6730 }
6731
6732 sub make_static_block_comment_pattern {
6733
6734     # create the pattern used to identify static block comments
6735     $static_block_comment_pattern = '^(\s*)##';
6736
6737     # allow the user to change it
6738     if ( $rOpts->{'static-block-comment-prefix'} ) {
6739         my $prefix = $rOpts->{'static-block-comment-prefix'};
6740         $prefix =~ s/^\s*//;
6741         if ( $prefix !~ /^#/ ) {
6742             die "ERROR: the -sbcp prefix '$prefix' must begin with '#'\n";
6743
6744         }
6745         my $pattern = '^(\s*)' . $prefix;
6746         eval "'##'=~/$pattern/";
6747         if ($@) {
6748             die
6749 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
6750         }
6751         $static_block_comment_pattern = $pattern;
6752     }
6753 }
6754
6755 sub make_closing_side_comment_list_pattern {
6756
6757     # turn any input list into a regex for recognizing selected block types
6758     $closing_side_comment_list_pattern = '^\w+';
6759     if ( defined( $rOpts->{'closing-side-comment-list'} )
6760         && $rOpts->{'closing-side-comment-list'} )
6761     {
6762         $closing_side_comment_list_pattern =
6763           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
6764     }
6765 }
6766
6767 sub make_bli_pattern {
6768
6769     if (
6770         defined(
6771                  $rOpts->{'brace-left-and-indent-list'}
6772               && $rOpts->{'brace-left-and-indent-list'}
6773         )
6774       )
6775     {
6776         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
6777     }
6778
6779     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
6780 }
6781
6782 sub make_block_brace_vertical_tightness_pattern {
6783
6784     # turn any input list into a regex for recognizing selected block types
6785     $block_brace_vertical_tightness_pattern =
6786       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
6787
6788     if (
6789         defined(
6790                  $rOpts->{'block-brace-vertical-tightness-list'}
6791               && $rOpts->{'block-brace-vertical-tightness-list'}
6792         )
6793       )
6794     {
6795         $block_brace_vertical_tightness_pattern =
6796           make_block_pattern( '-bbvtl',
6797             $rOpts->{'block-brace-vertical-tightness-list'} );
6798     }
6799 }
6800
6801 sub make_block_pattern {
6802
6803     #  given a string of block-type keywords, return a regex to match them
6804     #  The only tricky part is that labels are indicated with a single ':'
6805     #  and the 'sub' token text may have additional text after it (name of
6806     #  sub).
6807     #
6808     #  Example:
6809     #
6810     #   input string: "if else elsif unless while for foreach do : sub";
6811     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
6812
6813     my ( $abbrev, $string ) = @_;
6814     $string =~ s/^\s+//;
6815     $string =~ s/\s+$//;
6816     my @list = split /\s+/, $string;
6817     my @words = ();
6818     my %seen;
6819     for my $i (@list) {
6820         next if $seen{$i};
6821         $seen{$i} = 1;
6822         if ( $i eq 'sub' ) {
6823         }
6824         elsif ( $i eq ':' ) {
6825             push @words, '\w+:';
6826         }
6827         elsif ( $i =~ /^\w/ ) {
6828             push @words, $i;
6829         }
6830         else {
6831             warn "unrecognized block type $i after $abbrev, ignoring\n";
6832         }
6833     }
6834     my $pattern = '(' . join( '|', @words ) . ')$';
6835     if ( $seen{'sub'} ) {
6836         $pattern = '(' . $pattern . '|sub)';
6837     }
6838     $pattern = '^' . $pattern;
6839     return $pattern;
6840 }
6841
6842 sub make_static_side_comment_pattern {
6843
6844     # create the pattern used to identify static side comments
6845     $static_side_comment_pattern = '^##';
6846
6847     # allow the user to change it
6848     if ( $rOpts->{'static-side-comment-prefix'} ) {
6849         my $prefix = $rOpts->{'static-side-comment-prefix'};
6850         $prefix =~ s/^\s*//;
6851         my $pattern = '^' . $prefix;
6852         eval "'##'=~/$pattern/";
6853         if ($@) {
6854             die
6855 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
6856         }
6857         $static_side_comment_pattern = $pattern;
6858     }
6859 }
6860
6861 sub make_closing_side_comment_prefix {
6862
6863     # Be sure we have a valid closing side comment prefix
6864     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
6865     my $csc_prefix_pattern;
6866     if ( !defined($csc_prefix) ) {
6867         $csc_prefix         = '## end';
6868         $csc_prefix_pattern = '^##\s+end';
6869     }
6870     else {
6871         my $test_csc_prefix = $csc_prefix;
6872         if ( $test_csc_prefix !~ /^#/ ) {
6873             $test_csc_prefix = '#' . $test_csc_prefix;
6874         }
6875
6876         # make a regex to recognize the prefix
6877         my $test_csc_prefix_pattern = $test_csc_prefix;
6878
6879         # escape any special characters
6880         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
6881
6882         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
6883
6884         # allow exact number of intermediate spaces to vary
6885         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
6886
6887         # make sure we have a good pattern
6888         # if we fail this we probably have an error in escaping
6889         # characters.
6890         eval "'##'=~/$test_csc_prefix_pattern/";
6891         if ($@) {
6892
6893             # shouldn't happen..must have screwed up escaping, above
6894             report_definite_bug();
6895             warn
6896 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
6897
6898             # just warn and keep going with defaults
6899             warn "Please consider using a simpler -cscp prefix\n";
6900             warn "Using default -cscp instead; please check output\n";
6901         }
6902         else {
6903             $csc_prefix         = $test_csc_prefix;
6904             $csc_prefix_pattern = $test_csc_prefix_pattern;
6905         }
6906     }
6907     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
6908     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
6909 }
6910
6911 sub dump_want_left_space {
6912     my $fh = shift;
6913     local $" = "\n";
6914     print $fh <<EOM;
6915 These values are the main control of whitespace to the left of a token type;
6916 They may be altered with the -wls parameter.
6917 For a list of token types, use perltidy --dump-token-types (-dtt)
6918  1 means the token wants a space to its left
6919 -1 means the token does not want a space to its left
6920 ------------------------------------------------------------------------
6921 EOM
6922     foreach ( sort keys %want_left_space ) {
6923         print $fh "$_\t$want_left_space{$_}\n";
6924     }
6925 }
6926
6927 sub dump_want_right_space {
6928     my $fh = shift;
6929     local $" = "\n";
6930     print $fh <<EOM;
6931 These values are the main control of whitespace to the right of a token type;
6932 They may be altered with the -wrs parameter.
6933 For a list of token types, use perltidy --dump-token-types (-dtt)
6934  1 means the token wants a space to its right
6935 -1 means the token does not want a space to its right
6936 ------------------------------------------------------------------------
6937 EOM
6938     foreach ( sort keys %want_right_space ) {
6939         print $fh "$_\t$want_right_space{$_}\n";
6940     }
6941 }
6942
6943 {    # begin is_essential_whitespace
6944
6945     my %is_sort_grep_map;
6946     my %is_for_foreach;
6947
6948     BEGIN {
6949
6950         @_ = qw(sort grep map);
6951         @is_sort_grep_map{@_} = (1) x scalar(@_);
6952
6953         @_ = qw(for foreach);
6954         @is_for_foreach{@_} = (1) x scalar(@_);
6955
6956     }
6957
6958     sub is_essential_whitespace {
6959
6960         # Essential whitespace means whitespace which cannot be safely deleted.
6961         # We are given three tokens and their types:
6962         # ($tokenl, $typel) is the token to the left of the space in question
6963         # ($tokenr, $typer) is the token to the right of the space in question
6964         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
6965         #
6966         # This is a slow routine but is not needed too often except when -mangle
6967         # is used.
6968         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
6969
6970         # never combine two bare words or numbers
6971         my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) )
6972
6973           # do not combine a number with a concatination dot
6974           # example: pom.caputo:
6975           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
6976           || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
6977           || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
6978
6979           # do not join a minus with a bare word, because you might form
6980           # a file test operator.  Example from Complex.pm:
6981           # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
6982           || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
6983
6984           # and something like this could become ambiguous without space
6985           # after the '-':
6986           #   use constant III=>1;
6987           #   $a = $b - III;
6988           # and even this:
6989           #   $a = - III;
6990           || ( ( $tokenl eq '-' )
6991             && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
6992
6993           # '= -' should not become =- or you will get a warning
6994           # about reversed -=
6995           # || ($tokenr eq '-')
6996
6997           # keep a space between a quote and a bareword to prevent the
6998           # bareword from becomming a quote modifier.
6999           || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7000
7001           # keep a space between a token ending in '$' and any word;
7002           # this caused trouble:  "die @$ if $@"
7003           || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7004             && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7005
7006           # perl is very fussy about spaces before <<
7007           || ( $tokenr =~ /^\<\</ )
7008
7009           # avoid combining tokens to create new meanings. Example:
7010           #     $a+ +$b must not become $a++$b
7011           || ( $is_digraph{ $tokenl . $tokenr } )
7012           || ( $is_trigraph{ $tokenl . $tokenr } )
7013
7014           # another example: do not combine these two &'s:
7015           #     allow_options & &OPT_EXECCGI
7016           || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7017
7018           # don't combine $$ or $# with any alphanumeric
7019           # (testfile mangle.t with --mangle)
7020           || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7021
7022           # retain any space after possible filehandle
7023           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7024           || ( $typel eq 'Z' || $typell eq 'Z' )
7025
7026           # keep paren separate in 'use Foo::Bar ()'
7027           || ( $tokenr eq '('
7028             && $typel   eq 'w'
7029             && $typell  eq 'k'
7030             && $tokenll eq 'use' )
7031
7032           # keep any space between filehandle and paren:
7033           # file mangle.t with --mangle:
7034           || ( $typel eq 'Y' && $tokenr eq '(' )
7035
7036           # retain any space after here doc operator ( hereerr.t)
7037           || ( $typel eq 'h' )
7038
7039           # FIXME: this needs some further work; extrude.t has test cases
7040           # it is safest to retain any space after start of ? : operator
7041           # because of perl's quirky parser.
7042           # ie, this line will fail if you remove the space after the '?':
7043           #    $b=join $comma ? ',' : ':', @_;   # ok
7044           #    $b=join $comma ?',' : ':', @_;   # error!
7045           # but this is ok :)
7046           #    $b=join $comma?',' : ':', @_;   # not a problem!
7047           ## || ($typel eq '?')
7048
7049           # be careful with a space around ++ and --, to avoid ambiguity as to
7050           # which token it applies
7051           || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7052           || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7053
7054           # need space after foreach my; for example, this will fail in
7055           # older versions of Perl:
7056           # foreach my$ft(@filetypes)...
7057           || (
7058             $tokenl eq 'my'
7059
7060             #  /^(for|foreach)$/
7061             && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/
7062           )
7063
7064           # must have space between grep and left paren; "grep(" will fail
7065           || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7066
7067           # don't stick numbers next to left parens, as in:
7068           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7069           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7070
7071           # don't join something like: for bla::bla:: abc
7072           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7073           || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) )
7074           ;    # the value of this long logic sequence is the result we want
7075         return $result;
7076     }
7077 }
7078
7079 sub set_white_space_flag {
7080
7081     #    This routine examines each pair of nonblank tokens and
7082     #    sets values for array @white_space_flag.
7083     #
7084     #    $white_space_flag[$j] is a flag indicating whether a white space
7085     #    BEFORE token $j is needed, with the following values:
7086     #
7087     #            -1 do not want a space before token $j
7088     #             0 optional space or $j is a whitespace
7089     #             1 want a space before token $j
7090     #
7091     #
7092     #   The values for the first token will be defined based
7093     #   upon the contents of the "to_go" output array.
7094     #
7095     #   Note: retain debug print statements because they are usually
7096     #   required after adding new token types.
7097
7098     BEGIN {
7099
7100         # initialize these global hashes, which control the use of
7101         # whitespace around tokens:
7102         #
7103         # %binary_ws_rules
7104         # %want_left_space
7105         # %want_right_space
7106         # %space_after_keyword
7107         #
7108         # Many token types are identical to the tokens themselves.
7109         # See the tokenizer for a complete list. Here are some special types:
7110         #   k = perl keyword
7111         #   f = semicolon in for statement
7112         #   m = unary minus
7113         #   p = unary plus
7114         # Note that :: is excluded since it should be contained in an identifier
7115         # Note that '->' is excluded because it never gets space
7116         # parentheses and brackets are excluded since they are handled specially
7117         # curly braces are included but may be overridden by logic, such as
7118         # newline logic.
7119
7120         # NEW_TOKENS: create a whitespace rule here.  This can be as
7121         # simple as adding your new letter to @spaces_both_sides, for
7122         # example.
7123
7124         @_ = qw" L { ( [ ";
7125         @is_opening_type{@_} = (1) x scalar(@_);
7126
7127         @_ = qw" R } ) ] ";
7128         @is_closing_type{@_} = (1) x scalar(@_);
7129
7130         my @spaces_both_sides = qw"
7131           + - * / % ? = . : x < > | & ^ .. << >> ** && .. ||  => += -=
7132           .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>=
7133           &&= ||= <=> A k f w F n C Y U G v
7134           ";
7135
7136         my @spaces_left_side = qw"
7137           t ! ~ m p { \ h pp mm Z j
7138           ";
7139         push( @spaces_left_side, '#' );    # avoids warning message
7140
7141         my @spaces_right_side = qw"
7142           ; } ) ] R J ++ -- **=
7143           ";
7144         push( @spaces_right_side, ',' );    # avoids warning message
7145         @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7146         @want_right_space{@spaces_both_sides} =
7147           (1) x scalar(@spaces_both_sides);
7148         @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
7149         @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7150         @want_left_space{@spaces_right_side} =
7151           (-1) x scalar(@spaces_right_side);
7152         @want_right_space{@spaces_right_side} =
7153           (1) x scalar(@spaces_right_side);
7154         $want_left_space{'L'}   = WS_NO;
7155         $want_left_space{'->'}  = WS_NO;
7156         $want_right_space{'->'} = WS_NO;
7157         $want_left_space{'**'}  = WS_NO;
7158         $want_right_space{'**'} = WS_NO;
7159
7160         # hash type information must stay tightly bound
7161         # as in :  ${xxxx}
7162         $binary_ws_rules{'i'}{'L'} = WS_NO;
7163         $binary_ws_rules{'i'}{'{'} = WS_YES;
7164         $binary_ws_rules{'k'}{'{'} = WS_YES;
7165         $binary_ws_rules{'U'}{'{'} = WS_YES;
7166         $binary_ws_rules{'i'}{'['} = WS_NO;
7167         $binary_ws_rules{'R'}{'L'} = WS_NO;
7168         $binary_ws_rules{'R'}{'{'} = WS_NO;
7169         $binary_ws_rules{'t'}{'L'} = WS_NO;
7170         $binary_ws_rules{'t'}{'{'} = WS_NO;
7171         $binary_ws_rules{'}'}{'L'} = WS_NO;
7172         $binary_ws_rules{'}'}{'{'} = WS_NO;
7173         $binary_ws_rules{'$'}{'L'} = WS_NO;
7174         $binary_ws_rules{'$'}{'{'} = WS_NO;
7175         $binary_ws_rules{'@'}{'L'} = WS_NO;
7176         $binary_ws_rules{'@'}{'{'} = WS_NO;
7177         $binary_ws_rules{'='}{'L'} = WS_YES;
7178
7179         # the following includes ') {'
7180         # as in :    if ( xxx ) { yyy }
7181         $binary_ws_rules{']'}{'L'} = WS_NO;
7182         $binary_ws_rules{']'}{'{'} = WS_NO;
7183         $binary_ws_rules{')'}{'{'} = WS_YES;
7184         $binary_ws_rules{')'}{'['} = WS_NO;
7185         $binary_ws_rules{']'}{'['} = WS_NO;
7186         $binary_ws_rules{']'}{'{'} = WS_NO;
7187         $binary_ws_rules{'}'}{'['} = WS_NO;
7188         $binary_ws_rules{'R'}{'['} = WS_NO;
7189
7190         $binary_ws_rules{']'}{'++'} = WS_NO;
7191         $binary_ws_rules{']'}{'--'} = WS_NO;
7192         $binary_ws_rules{')'}{'++'} = WS_NO;
7193         $binary_ws_rules{')'}{'--'} = WS_NO;
7194
7195         $binary_ws_rules{'R'}{'++'} = WS_NO;
7196         $binary_ws_rules{'R'}{'--'} = WS_NO;
7197
7198         $binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
7199         $binary_ws_rules{'w'}{':'} = WS_NO;
7200         $binary_ws_rules{'i'}{'Q'} = WS_YES;
7201         $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
7202
7203         # FIXME: we need to split 'i' into variables and functions
7204         # and have no space for functions but space for variables.  For now,
7205         # I have a special patch in the special rules below
7206         $binary_ws_rules{'i'}{'('} = WS_NO;
7207
7208         $binary_ws_rules{'w'}{'('} = WS_NO;
7209         $binary_ws_rules{'w'}{'{'} = WS_YES;
7210     }
7211     my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7212     my ( $last_token, $last_type, $last_block_type, $token, $type,
7213         $block_type );
7214     my (@white_space_flag);
7215     my $j_tight_closing_paren = -1;
7216
7217     if ( $max_index_to_go >= 0 ) {
7218         $token      = $tokens_to_go[$max_index_to_go];
7219         $type       = $types_to_go[$max_index_to_go];
7220         $block_type = $block_type_to_go[$max_index_to_go];
7221     }
7222     else {
7223         $token      = ' ';
7224         $type       = 'b';
7225         $block_type = '';
7226     }
7227
7228     # loop over all tokens
7229     my ( $j, $ws );
7230
7231     for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7232
7233         if ( $$rtoken_type[$j] eq 'b' ) {
7234             $white_space_flag[$j] = WS_OPTIONAL;
7235             next;
7236         }
7237
7238         # set a default value, to be changed as needed
7239         $ws              = undef;
7240         $last_token      = $token;
7241         $last_type       = $type;
7242         $last_block_type = $block_type;
7243         $token           = $$rtokens[$j];
7244         $type            = $$rtoken_type[$j];
7245         $block_type      = $$rblock_type[$j];
7246
7247         #---------------------------------------------------------------
7248         # section 1:
7249         # handle space on the inside of opening braces
7250         #---------------------------------------------------------------
7251
7252         #    /^[L\{\(\[]$/
7253         if ( $is_opening_type{$last_type} ) {
7254
7255             $j_tight_closing_paren = -1;
7256
7257             # let's keep empty matched braces together: () {} []
7258             # except for BLOCKS
7259             if ( $token eq $matching_token{$last_token} ) {
7260                 if ($block_type) {
7261                     $ws = WS_YES;
7262                 }
7263                 else {
7264                     $ws = WS_NO;
7265                 }
7266             }
7267             else {
7268
7269                 # we're considering the right of an opening brace
7270                 # tightness = 0 means always pad inside with space
7271                 # tightness = 1 means pad inside if "complex"
7272                 # tightness = 2 means never pad inside with space
7273
7274                 my $tightness;
7275                 if (   $last_type eq '{'
7276                     && $last_token eq '{'
7277                     && $last_block_type )
7278                 {
7279                     $tightness = $rOpts_block_brace_tightness;
7280                 }
7281                 else { $tightness = $tightness{$last_token} }
7282
7283                 if ( $tightness <= 0 ) {
7284                     $ws = WS_YES;
7285                 }
7286                 elsif ( $tightness > 1 ) {
7287                     $ws = WS_NO;
7288                 }
7289                 else {
7290
7291                     # Patch to count '-foo' as single token so that
7292                     # each of  $a{-foo} and $a{foo} and $a{'foo'} do
7293                     # not get spaces with default formatting.
7294                     my $j_here = $j;
7295                     ++$j_here
7296                       if ( $token eq '-'
7297                         && $last_token             eq '{'
7298                         && $$rtoken_type[ $j + 1 ] eq 'w' );
7299
7300                     # $j_next is where a closing token should be if
7301                     # the container has a single token
7302                     my $j_next =
7303                       ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
7304                       ? $j_here + 2
7305                       : $j_here + 1;
7306                     my $tok_next  = $$rtokens[$j_next];
7307                     my $type_next = $$rtoken_type[$j_next];
7308
7309                     # for tightness = 1, if there is just one token
7310                     # within the matching pair, we will keep it tight
7311                     if (
7312                         $tok_next eq $matching_token{$last_token}
7313
7314                         # but watch out for this: [ [ ]    (misc.t)
7315                         && $last_token ne $token
7316                       )
7317                     {
7318
7319                         # remember where to put the space for the closing paren
7320                         $j_tight_closing_paren = $j_next;
7321                         $ws                    = WS_NO;
7322                     }
7323                     else {
7324                         $ws = WS_YES;
7325                     }
7326                 }
7327             }
7328         }    # done with opening braces and brackets
7329         my $ws_1 = $ws
7330           if FORMATTER_DEBUG_FLAG_WHITE;
7331
7332         #---------------------------------------------------------------
7333         # section 2:
7334         # handle space on inside of closing brace pairs
7335         #---------------------------------------------------------------
7336
7337         #   /[\}\)\]R]/
7338         if ( $is_closing_type{$type} ) {
7339
7340             if ( $j == $j_tight_closing_paren ) {
7341
7342                 $j_tight_closing_paren = -1;
7343                 $ws                    = WS_NO;
7344             }
7345             else {
7346
7347                 if ( !defined($ws) ) {
7348
7349                     my $tightness;
7350                     if ( $type eq '}' && $token eq '}' && $block_type ) {
7351                         $tightness = $rOpts_block_brace_tightness;
7352                     }
7353                     else { $tightness = $tightness{$token} }
7354
7355                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7356                 }
7357             }
7358         }
7359
7360         my $ws_2 = $ws
7361           if FORMATTER_DEBUG_FLAG_WHITE;
7362
7363         #---------------------------------------------------------------
7364         # section 3:
7365         # use the binary table
7366         #---------------------------------------------------------------
7367         if ( !defined($ws) ) {
7368             $ws = $binary_ws_rules{$last_type}{$type};
7369         }
7370         my $ws_3 = $ws
7371           if FORMATTER_DEBUG_FLAG_WHITE;
7372
7373         #---------------------------------------------------------------
7374         # section 4:
7375         # some special cases
7376         #---------------------------------------------------------------
7377         if ( $token eq '(' ) {
7378
7379             # This will have to be tweaked as tokenization changes.
7380             # We want a space after certain block types:
7381             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7382             #
7383             # But not others:
7384             #     &{ $_->[1] } ( delete $_[$#_]{ $_->[0] } );
7385             # At present, the & block is not marked as a code block, so
7386             # this works:
7387             if ( $last_type eq '}' ) {
7388
7389                 if ( $is_sort_map_grep{$last_block_type} ) {
7390                     $ws = WS_YES;
7391                 }
7392                 else {
7393                     $ws = WS_NO;
7394                 }
7395             }
7396
7397             # -----------------------------------------------------
7398             # 'w' and 'i' checks for something like:
7399             #   myfun(    &myfun(   ->myfun(
7400             # -----------------------------------------------------
7401             if (   ( $last_type =~ /^[wkU]$/ )
7402                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7403             {
7404
7405                 # Do not introduce new space between keyword or function
7406                 # ( except in special cases) because this can
7407                 # introduce errors in some cases ( prnterr1.t )
7408                 unless ( $last_type eq 'k'
7409                     && $space_after_keyword{$last_token} )
7410                 {
7411                     $ws = WS_NO;
7412                 }
7413             }
7414
7415             # space between something like $i and ( in
7416             # for $i ( 0 .. 20 ) {
7417             # FIXME: eventually, type 'i' needs to be split into multiple
7418             # token types so this can be a hardwired rule.
7419             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7420                 $ws = WS_YES;
7421             }
7422
7423             # allow constant function followed by '()' to retain no space
7424             elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
7425                 ;
7426                 $ws = WS_NO;
7427             }
7428         }
7429
7430         # patch for SWITCH/CASE: make space at ']{' optional
7431         # since the '{' might begin a case or when block
7432         elsif ( $token eq '{' && $last_token eq ']' ) {
7433             $ws = WS_OPTIONAL;
7434         }
7435
7436         # keep space between 'sub' and '{' for anonymous sub definition
7437         if ( $type eq '{' ) {
7438             if ( $last_token eq 'sub' ) {
7439                 $ws = WS_YES;
7440             }
7441
7442             # this is needed to avoid no space in '){'
7443             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7444
7445             # avoid any space before the brace or bracket in something like
7446             #  @opts{'a','b',...}
7447             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7448                 $ws = WS_NO;
7449             }
7450         }
7451
7452         elsif ( $type eq 'i' ) {
7453
7454             # never a space before ->
7455             if ( $token =~ /^\-\>/ ) {
7456                 $ws = WS_NO;
7457             }
7458         }
7459
7460         # retain any space between '-' and bare word
7461         elsif ( $type eq 'w' || $type eq 'C' ) {
7462             $ws = WS_OPTIONAL if $last_type eq '-';
7463
7464             # never a space before ->
7465             if ( $token =~ /^\-\>/ ) {
7466                 $ws = WS_NO;
7467             }
7468         }
7469
7470         # retain any space between '-' and bare word
7471         # example: avoid space between 'USER' and '-' here:
7472         #   $myhash{USER-NAME}='steve';
7473         elsif ( $type eq 'm' || $type eq '-' ) {
7474             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
7475         }
7476
7477         # always space before side comment
7478         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
7479
7480         # always preserver whatever space was used after a possible
7481         # filehandle or here doc operator
7482         if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) {
7483             $ws = WS_OPTIONAL;
7484         }
7485
7486         my $ws_4 = $ws
7487           if FORMATTER_DEBUG_FLAG_WHITE;
7488
7489         #---------------------------------------------------------------
7490         # section 5:
7491         # default rules not covered above
7492         #---------------------------------------------------------------
7493         # if we fall through to here,
7494         # look at the pre-defined hash tables for the two tokens, and
7495         # if (they are equal) use the common value
7496         # if (either is zero or undef) use the other
7497         # if (either is -1) use it
7498         # That is,
7499         # left  vs right
7500         #  1    vs    1     -->  1
7501         #  0    vs    0     -->  0
7502         # -1    vs   -1     --> -1
7503         #
7504         #  0    vs   -1     --> -1
7505         #  0    vs    1     -->  1
7506         #  1    vs    0     -->  1
7507         # -1    vs    0     --> -1
7508         #
7509         # -1    vs    1     --> -1
7510         #  1    vs   -1     --> -1
7511         if ( !defined($ws) ) {
7512             my $wl = $want_left_space{$type};
7513             my $wr = $want_right_space{$last_type};
7514             if ( !defined($wl) ) { $wl = 0 }
7515             if ( !defined($wr) ) { $wr = 0 }
7516             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
7517         }
7518
7519         if ( !defined($ws) ) {
7520             $ws = 0;
7521             write_diagnostics(
7522                 "WS flag is undefined for tokens $last_token $token\n");
7523         }
7524
7525         # Treat newline as a whitespace. Otherwise, we might combine
7526         # 'Send' and '-recipients' here according to the above rules:
7527         #    my $msg = new Fax::Send
7528         #      -recipients => $to,
7529         #      -data => $data;
7530         if ( $ws == 0 && $j == 0 ) { $ws = 1 }
7531
7532         if (   ( $ws == 0 )
7533             && $j > 0
7534             && $j < $jmax
7535             && ( $last_type !~ /^[Zh]$/ ) )
7536         {
7537
7538             # If this happens, we have a non-fatal but undesirable
7539             # hole in the above rules which should be patched.
7540             write_diagnostics(
7541                 "WS flag is zero for tokens $last_token $token\n");
7542         }
7543         $white_space_flag[$j] = $ws;
7544
7545         FORMATTER_DEBUG_FLAG_WHITE && do {
7546             my $str = substr( $last_token, 0, 15 );
7547             $str .= ' ' x ( 16 - length($str) );
7548             if ( !defined($ws_1) ) { $ws_1 = "*" }
7549             if ( !defined($ws_2) ) { $ws_2 = "*" }
7550             if ( !defined($ws_3) ) { $ws_3 = "*" }
7551             if ( !defined($ws_4) ) { $ws_4 = "*" }
7552             print
7553 "WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
7554         };
7555     }
7556     return \@white_space_flag;
7557 }
7558
7559 {    # begin print_line_of_tokens
7560
7561     my $rtoken_type;
7562     my $rtokens;
7563     my $rlevels;
7564     my $rslevels;
7565     my $rblock_type;
7566     my $rcontainer_type;
7567     my $rcontainer_environment;
7568     my $rtype_sequence;
7569     my $input_line;
7570     my $rnesting_tokens;
7571     my $rci_levels;
7572     my $rnesting_blocks;
7573
7574     my $in_quote;
7575     my $python_indentation_level;
7576
7577     # These local token variables are stored by store_token_to_go:
7578     my $block_type;
7579     my $ci_level;
7580     my $container_environment;
7581     my $container_type;
7582     my $in_continued_quote;
7583     my $level;
7584     my $nesting_blocks;
7585     my $no_internal_newlines;
7586     my $slevel;
7587     my $token;
7588     my $type;
7589     my $type_sequence;
7590
7591     # routine to pull the jth token from the line of tokens
7592     sub extract_token {
7593         my $j = shift;
7594         $token                 = $$rtokens[$j];
7595         $type                  = $$rtoken_type[$j];
7596         $block_type            = $$rblock_type[$j];
7597         $container_type        = $$rcontainer_type[$j];
7598         $container_environment = $$rcontainer_environment[$j];
7599         $type_sequence         = $$rtype_sequence[$j];
7600         $level                 = $$rlevels[$j];
7601         $slevel                = $$rslevels[$j];
7602         $nesting_blocks        = $$rnesting_blocks[$j];
7603         $ci_level              = $$rci_levels[$j];
7604     }
7605
7606     {
7607         my @saved_token;
7608
7609         sub save_current_token {
7610
7611             @saved_token = (
7612                 $block_type,            $ci_level,
7613                 $container_environment, $container_type,
7614                 $in_continued_quote,    $level,
7615                 $nesting_blocks,        $no_internal_newlines,
7616                 $slevel,                $token,
7617                 $type,                  $type_sequence,
7618             );
7619         }
7620
7621         sub restore_current_token {
7622             (
7623                 $block_type,            $ci_level,
7624                 $container_environment, $container_type,
7625                 $in_continued_quote,    $level,
7626                 $nesting_blocks,        $no_internal_newlines,
7627                 $slevel,                $token,
7628                 $type,                  $type_sequence,
7629               )
7630               = @saved_token;
7631         }
7632     }
7633
7634     # Routine to place the current token into the output stream.
7635     # Called once per output token.
7636     sub store_token_to_go {
7637
7638         my $flag = $no_internal_newlines;
7639         if ( $_[0] ) { $flag = 1 }
7640
7641         $tokens_to_go[ ++$max_index_to_go ]            = $token;
7642         $types_to_go[$max_index_to_go]                 = $type;
7643         $nobreak_to_go[$max_index_to_go]               = $flag;
7644         $old_breakpoint_to_go[$max_index_to_go]        = 0;
7645         $forced_breakpoint_to_go[$max_index_to_go]     = 0;
7646         $block_type_to_go[$max_index_to_go]            = $block_type;
7647         $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
7648         $container_environment_to_go[$max_index_to_go] = $container_environment;
7649         $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
7650         $ci_levels_to_go[$max_index_to_go]             = $ci_level;
7651         $mate_index_to_go[$max_index_to_go]            = -1;
7652         $matching_token_to_go[$max_index_to_go]        = '';
7653
7654         # Note: negative levels are currently retained as a diagnostic so that
7655         # the 'final indentation level' is correctly reported for bad scripts.
7656         # But this means that every use of $level as an index must be checked.
7657         # If this becomes too much of a problem, we might give up and just clip
7658         # them at zero.
7659         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
7660         $levels_to_go[$max_index_to_go]        = $level;
7661         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
7662         $lengths_to_go[ $max_index_to_go + 1 ] =
7663           $lengths_to_go[$max_index_to_go] + length($token);
7664
7665         # Define the indentation that this token would have if it started
7666         # a new line.  We have to do this now because we need to know this
7667         # when considering one-line blocks.
7668         set_leading_whitespace( $level, $ci_level, $in_continued_quote );
7669
7670         if ( $type ne 'b' ) {
7671             $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
7672             $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
7673             $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
7674             $last_nonblank_index_to_go      = $max_index_to_go;
7675             $last_nonblank_type_to_go       = $type;
7676             $last_nonblank_token_to_go      = $token;
7677             if ( $type eq ',' ) {
7678                 $comma_count_in_batch++;
7679             }
7680         }
7681
7682         FORMATTER_DEBUG_FLAG_STORE && do {
7683             my ( $a, $b, $c ) = caller();
7684             print
7685 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
7686         };
7687     }
7688
7689     sub insert_new_token_to_go {
7690
7691         # insert a new token into the output stream.  use same level as
7692         # previous token; assumes a character at max_index_to_go.
7693         save_current_token();
7694         ( $token, $type, $slevel, $no_internal_newlines ) = @_;
7695
7696         if ( $max_index_to_go == UNDEFINED_INDEX ) {
7697             warning("code bug: bad call to insert_new_token_to_go\n");
7698         }
7699         $level = $levels_to_go[$max_index_to_go];
7700
7701         # FIXME: it seems to be necessary to use the next, rather than
7702         # previous, value of this variable when creating a new blank (align.t)
7703         #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
7704         $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
7705         $ci_level              = $ci_levels_to_go[$max_index_to_go];
7706         $container_environment = $container_environment_to_go[$max_index_to_go];
7707         $in_continued_quote    = 0;
7708         $block_type            = "";
7709         $type_sequence         = "";
7710         store_token_to_go();
7711         restore_current_token();
7712         return;
7713     }
7714
7715     my %is_until_while_for_if_elsif_else;
7716
7717     BEGIN {
7718
7719         # always break after a closing curly of these block types:
7720         @_ = qw(until while for if elsif else);
7721         @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
7722
7723     }
7724
7725     sub print_line_of_tokens {
7726
7727         my $line_of_tokens = shift;
7728
7729         # This routine is called once per input line to process all of
7730         # the tokens on that line.  This is the first stage of
7731         # beautification.
7732         #
7733         # Full-line comments and blank lines may be processed immediately.
7734         #
7735         # For normal lines of code, the tokens are stored one-by-one,
7736         # via calls to 'sub store_token_to_go', until a known line break
7737         # point is reached.  Then, the batch of collected tokens is
7738         # passed along to 'sub output_line_to_go' for further
7739         # processing.  This routine decides if there should be
7740         # whitespace between each pair of non-white tokens, so later
7741         # routines only need to decide on any additional line breaks.
7742         # Any whitespace is initally a single space character.  Later,
7743         # the vertical aligner may expand that to be multiple space
7744         # characters if necessary for alignment.
7745
7746         # extract input line number for error messages
7747         $input_line_number = $line_of_tokens->{_line_number};
7748
7749         $rtoken_type            = $line_of_tokens->{_rtoken_type};
7750         $rtokens                = $line_of_tokens->{_rtokens};
7751         $rlevels                = $line_of_tokens->{_rlevels};
7752         $rslevels               = $line_of_tokens->{_rslevels};
7753         $rblock_type            = $line_of_tokens->{_rblock_type};
7754         $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
7755         $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
7756         $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
7757         $input_line             = $line_of_tokens->{_line_text};
7758         $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
7759         $rci_levels             = $line_of_tokens->{_rci_levels};
7760         $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
7761
7762         $in_continued_quote = $starting_in_quote =
7763           $line_of_tokens->{_starting_in_quote};
7764         $in_quote                 = $line_of_tokens->{_ending_in_quote};
7765         $python_indentation_level =
7766           $line_of_tokens->{_python_indentation_level};
7767
7768         my $j;
7769         my $j_next;
7770         my $jmax;
7771         my $next_nonblank_token;
7772         my $next_nonblank_token_type;
7773         my $rwhite_space_flag;
7774
7775         $jmax                  = @$rtokens - 1;
7776         $block_type            = "";
7777         $container_type        = "";
7778         $container_environment = "";
7779         $type_sequence         = "";
7780         $no_internal_newlines  = 1 - $rOpts_add_newlines;
7781
7782         # Handle a continued quote..
7783         if ($in_continued_quote) {
7784
7785             # A line which is entirely a quote or pattern must go out
7786             # verbatim.  Note: the \n is contained in $input_line.
7787             if ( $jmax <= 0 ) {
7788                 if ( ( $input_line =~ "\t" ) ) {
7789                     note_embedded_tab();
7790                 }
7791                 write_unindented_line("$input_line");
7792                 $last_line_had_side_comment = 0;
7793                 return;
7794             }
7795
7796             # prior to version 20010406, perltidy had a bug which placed
7797             # continuation indentation before the last line of some multiline
7798             # quotes and patterns -- exactly the lines passing this way.
7799             # To help find affected lines in scripts run with these
7800             # versions, run with '-chk', and it will warn of any quotes or
7801             # patterns which might have been modified by these early
7802             # versions.
7803             if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
7804                 warning(
7805 "-chk: please check this line for extra leading whitespace\n"
7806                 );
7807             }
7808         }
7809
7810         # delete trailing blank tokens
7811         if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
7812
7813         # Handle a blank line..
7814         if ( $jmax < 0 ) {
7815
7816             # For the 'swallow-optional-blank-lines' option, we delete all
7817             # old blank lines and let the blank line rules generate any
7818             # needed blanks.
7819             if ( !$rOpts_swallow_optional_blank_lines ) {
7820                 flush();
7821                 $file_writer_object->write_blank_code_line();
7822                 $last_line_leading_type = 'b';
7823             }
7824             $last_line_had_side_comment = 0;
7825             return;
7826         }
7827
7828         # see if this is a static block comment (starts with ##)
7829         my $is_static_block_comment                       = 0;
7830         my $is_static_block_comment_without_leading_space = 0;
7831         if (   $jmax == 0
7832             && $$rtoken_type[0] eq '#'
7833             && $rOpts->{'static-block-comments'}
7834             && $input_line =~ /$static_block_comment_pattern/o )
7835         {
7836             $is_static_block_comment                       = 1;
7837             $is_static_block_comment_without_leading_space =
7838               ( length($1) <= 0 );
7839         }
7840
7841         # create a hanging side comment if appropriate
7842         if (
7843                $jmax == 0
7844             && $$rtoken_type[0] eq '#'    # only token is a comment
7845             && $last_line_had_side_comment    # last line had side comment
7846             && $input_line =~ /^\s/           # there is some leading space
7847             && !$is_static_block_comment    # do not make static comment hanging
7848             && $rOpts->{'hanging-side-comments'}    # user is allowing this
7849           )
7850         {
7851
7852             # We will insert an empty qw string at the start of the token list
7853             # to force this comment to be a side comment. The vertical aligner
7854             # should then line it up with the previous side comment.
7855             unshift @$rtoken_type,            'q';
7856             unshift @$rtokens,                '';
7857             unshift @$rlevels,                $$rlevels[0];
7858             unshift @$rslevels,               $$rslevels[0];
7859             unshift @$rblock_type,            '';
7860             unshift @$rcontainer_type,        '';
7861             unshift @$rcontainer_environment, '';
7862             unshift @$rtype_sequence,         '';
7863             unshift @$rnesting_tokens,        $$rnesting_tokens[0];
7864             unshift @$rci_levels,             $$rci_levels[0];
7865             unshift @$rnesting_blocks,        $$rnesting_blocks[0];
7866             $jmax = 1;
7867         }
7868
7869         # remember if this line has a side comment
7870         $last_line_had_side_comment =
7871           ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
7872
7873         # Handle a block (full-line) comment..
7874         if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
7875
7876             if ( $rOpts->{'delete-block-comments'} ) { return }
7877
7878             if ( $rOpts->{'tee-block-comments'} ) {
7879                 $file_writer_object->tee_on();
7880             }
7881
7882             destroy_one_line_block();
7883             output_line_to_go();
7884
7885             # output a blank line before block comments
7886             if (
7887                    $last_line_leading_type !~ /^[#b]$/
7888                 && $rOpts->{'blanks-before-comments'}    # only if allowed
7889                 && !
7890                 $is_static_block_comment    # never before static block comments
7891               )
7892             {
7893                 flush();                    # switching to new output stream
7894                 $file_writer_object->write_blank_code_line();
7895                 $last_line_leading_type = 'b';
7896             }
7897
7898             # TRIM COMMENTS -- This could be turned off as a option
7899             $$rtokens[0] =~ s/\s*$//;       # trim right end
7900
7901             if (
7902                 $rOpts->{'indent-block-comments'}
7903                 && ( !$rOpts->{'indent-spaced-block-comments'}
7904                     || $input_line =~ /^\s+/ )
7905                 && !$is_static_block_comment_without_leading_space
7906               )
7907             {
7908                 extract_token(0);
7909                 store_token_to_go();
7910                 output_line_to_go();
7911             }
7912             else {
7913                 flush();    # switching to new output stream
7914                 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
7915                 $last_line_leading_type = '#';
7916             }
7917             if ( $rOpts->{'tee-block-comments'} ) {
7918                 $file_writer_object->tee_off();
7919             }
7920             return;
7921         }
7922
7923         # compare input/output indentation except for continuation lines
7924         # (because they have an unknown amount of initial blank space)
7925         # and lines which are quotes (because they may have been outdented)
7926         # Note: this test is placed here because we know the continuation flag
7927         # at this point, which allows us to avoid non-meaningful checks.
7928         my $structural_indentation_level = $$rlevels[0];
7929         compare_indentation_levels( $python_indentation_level,
7930             $structural_indentation_level )
7931           unless ( $python_indentation_level < 0
7932             || ( $$rci_levels[0] > 0 )
7933             || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
7934           );
7935
7936         #   Patch needed for MakeMaker.  Do not break a statement
7937         #   in which $VERSION may be calculated.  See MakeMaker.pm;
7938         #   this is based on the coding in it.
7939         #   The first line of a file that matches this will be eval'd:
7940         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
7941         #   Examples:
7942         #     *VERSION = \'1.01';
7943         #     ( $VERSION ) = '$Revision: 1.46 $ ' =~ /\$Revision:\s+([^\s]+)/;
7944         #   We will pass such a line straight through without breaking
7945         #   it unless -npvl is used
7946
7947         my $is_VERSION_statement = 0;
7948
7949         if (
7950             !$saw_VERSION_in_this_file
7951             && $input_line =~ /VERSION/    # quick check to reject most lines
7952             && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
7953           )
7954         {
7955             $saw_VERSION_in_this_file = 1;
7956             $is_VERSION_statement     = 1;
7957             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
7958             $no_internal_newlines = 1;
7959         }
7960
7961         # take care of indentation-only
7962         # also write a line which is entirely a 'qw' list
7963         if ( $rOpts->{'indent-only'}
7964             || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) )
7965         {
7966             flush();
7967             $input_line =~ s/^\s*//;    # trim left end
7968             $input_line =~ s/\s*$//;    # trim right end
7969
7970             extract_token(0);
7971             $token                 = $input_line;
7972             $type                  = 'q';
7973             $block_type            = "";
7974             $container_type        = "";
7975             $container_environment = "";
7976             $type_sequence         = "";
7977             store_token_to_go();
7978             output_line_to_go();
7979             return;
7980         }
7981
7982         push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
7983         push( @$rtoken_type, 'b', 'b' );
7984         ($rwhite_space_flag) =
7985           set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
7986
7987         # find input tabbing to allow checks for tabbing disagreement
7988         ## not used for now
7989         ##$input_line_tabbing = "";
7990         ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
7991
7992         # if the buffer hasn't been flushed, add a leading space if
7993         # necessary to keep essential whitespace. This is really only
7994         # necessary if we are squeezing out all ws.
7995         if ( $max_index_to_go >= 0 ) {
7996
7997             $old_line_count_in_batch++;
7998
7999             if (
8000                 is_essential_whitespace(
8001                     $last_last_nonblank_token,
8002                     $last_last_nonblank_type,
8003                     $tokens_to_go[$max_index_to_go],
8004                     $types_to_go[$max_index_to_go],
8005                     $$rtokens[0],
8006                     $$rtoken_type[0]
8007                 )
8008               )
8009             {
8010                 my $slevel = $$rslevels[0];
8011                 insert_new_token_to_go( ' ', 'b', $slevel,
8012                     $no_internal_newlines );
8013             }
8014         }
8015
8016         # If we just saw the end of an elsif block, write nag message
8017         # if we do not see another elseif or an else.
8018         if ($looking_for_else) {
8019
8020             unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8021                 write_logfile_entry("(No else block)\n");
8022             }
8023             $looking_for_else = 0;
8024         }
8025
8026         # This is a good place to kill incomplete one-line blocks
8027         if (   ( $semicolons_before_block_self_destruct == 0 )
8028             && ( $max_index_to_go >= 0 )
8029             && ( $types_to_go[$max_index_to_go] eq ';' )
8030             && ( $$rtokens[0] ne '}' ) )
8031         {
8032             destroy_one_line_block();
8033             output_line_to_go();
8034         }
8035
8036         # loop to process the tokens one-by-one
8037         $type  = 'b';
8038         $token = "";
8039
8040         foreach $j ( 0 .. $jmax ) {
8041
8042             # pull out the local values for this token
8043             extract_token($j);
8044
8045             if ( $type eq '#' ) {
8046
8047                 # trim trailing whitespace
8048                 # (there is no option at present to prevent this)
8049                 $token =~ s/\s*$//;
8050
8051                 if (
8052                     $rOpts->{'delete-side-comments'}
8053
8054                     # delete closing side comments if necessary
8055                     || (   $rOpts->{'delete-closing-side-comments'}
8056                         && $token =~ /$closing_side_comment_prefix_pattern/o
8057                         && $last_nonblank_block_type =~
8058                         /$closing_side_comment_list_pattern/o )
8059                   )
8060                 {
8061                     if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8062                         unstore_token_to_go();
8063                     }
8064                     last;
8065                 }
8066             }
8067
8068             # If we are continuing after seeing a right curly brace, flush
8069             # buffer unless we see what we are looking for, as in
8070             #   } else ...
8071             if ( $rbrace_follower && $type ne 'b' ) {
8072
8073                 unless ( $rbrace_follower->{$token} ) {
8074                     output_line_to_go();
8075                 }
8076                 $rbrace_follower = undef;
8077             }
8078
8079             $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8080             $next_nonblank_token      = $$rtokens[$j_next];
8081             $next_nonblank_token_type = $$rtoken_type[$j_next];
8082
8083             #--------------------------------------------------------
8084             # Start of section to patch token text
8085             #--------------------------------------------------------
8086
8087             # Modify certain tokens here for whitespace
8088             # The following is not yet done, but could be:
8089             #   sub (x x x)
8090             if ( $type =~ /^[wit]$/ ) {
8091
8092                 # Examples:
8093                 # change '$  var'  to '$var' etc
8094                 #        '-> new'  to '->new'
8095                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8096                     $token =~ s/\s*//g;
8097                 }
8098
8099                 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8100             }
8101
8102             # change 'LABEL   :'   to 'LABEL:'
8103             elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8104
8105             # patch to add space to something like "x10"
8106             # This avoids having to split this token in the pre-tokenizer
8107             elsif ( $type eq 'n' ) {
8108                 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8109             }
8110
8111             elsif ( $type eq 'Q' ) {
8112                 note_embedded_tab() if ( $token =~ "\t" );
8113
8114                 # make note of something like '$var = s/xxx/yyy/;'
8115                 # in case it should have been '$var =~ s/xxx/yyy/;'
8116                 if (
8117                        $token               =~ /^(s|tr|y|m|\/)/
8118                     && $last_nonblank_token =~ /^(=|==|!=)$/
8119
8120                     # precededed by simple scalar
8121                     && $last_last_nonblank_type eq 'i'
8122                     && $last_last_nonblank_token =~ /^\$/
8123
8124                     # followed by some kind of termination
8125                     # (but give complaint if we can's see far enough ahead)
8126                     && $next_nonblank_token =~ /^[; \)\}]$/
8127
8128                     # scalar is not decleared
8129                     && !(
8130                            $types_to_go[0] eq 'k'
8131                         && $tokens_to_go[0] =~ /^(my|our|local)$/
8132                     )
8133                   )
8134                 {
8135                     my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8136                     complain(
8137 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8138                     );
8139                 }
8140             }
8141
8142            # trim blanks from right of qw quotes
8143            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8144             elsif ( $type eq 'q' ) {
8145                 $token =~ s/\s*$//;
8146                 note_embedded_tab() if ( $token =~ "\t" );
8147             }
8148
8149             #--------------------------------------------------------
8150             # End of section to patch token text
8151             #--------------------------------------------------------
8152
8153             # insert any needed whitespace
8154             if (   ( $type ne 'b' )
8155                 && ( $max_index_to_go >= 0 )
8156                 && ( $types_to_go[$max_index_to_go] ne 'b' )
8157                 && $rOpts_add_whitespace )
8158             {
8159                 my $ws = $$rwhite_space_flag[$j];
8160
8161                 if ( $ws == 1 ) {
8162                     insert_new_token_to_go( ' ', 'b', $slevel,
8163                         $no_internal_newlines );
8164                 }
8165             }
8166
8167             # Do not allow breaks which would promote a side comment to a
8168             # block comment.  In order to allow a break before an opening
8169             # or closing BLOCK, followed by a side comment, those sections
8170             # of code will handle this flag separately.
8171             my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8172             my $is_opening_BLOCK =
8173               (      $type eq '{'
8174                   && $token eq '{'
8175                   && $block_type
8176                   && $block_type ne 't' );
8177             my $is_closing_BLOCK =
8178               (      $type eq '}'
8179                   && $token eq '}'
8180                   && $block_type
8181                   && $block_type ne 't' );
8182
8183             if (   $side_comment_follows
8184                 && !$is_opening_BLOCK
8185                 && !$is_closing_BLOCK )
8186             {
8187                 $no_internal_newlines = 1;
8188             }
8189
8190             # We're only going to handle breaking for code BLOCKS at this
8191             # (top) level.  Other indentation breaks will be handled by
8192             # sub scan_list, which is better suited to dealing with them.
8193             if ($is_opening_BLOCK) {
8194
8195                 # Tentatively output this token.  This is required before
8196                 # calling starting_one_line_block.  We may have to unstore
8197                 # it, though, if we have to break before it.
8198                 store_token_to_go($side_comment_follows);
8199
8200                 # Look ahead to see if we might form a one-line block
8201                 my $too_long =
8202                   starting_one_line_block( $j, $jmax, $level, $slevel,
8203                     $ci_level, $rtokens, $rtoken_type, $rblock_type );
8204                 clear_breakpoint_undo_stack();
8205
8206                 # to simplify the logic below, set a flag to indicate if
8207                 # this opening brace is far from the keyword which introduces it
8208                 my $keyword_on_same_line = 1;
8209                 if (   ( $max_index_to_go >= 0 )
8210                     && ( $last_nonblank_type eq ')' ) )
8211                 {
8212                     if (   $block_type =~ /^(if|else|elsif)$/
8213                         && ( $tokens_to_go[0] eq '}' )
8214                         && $rOpts_cuddled_else )
8215                     {
8216                         $keyword_on_same_line = 1;
8217                     }
8218                     elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8219                     {
8220                         $keyword_on_same_line = 0;
8221                     }
8222                 }
8223
8224                 # decide if user requested break before '{'
8225                 my $want_break =
8226
8227                   # use -bl flag if not a sub block of any type
8228                   $block_type !~ /^sub/
8229                   ? $rOpts->{'opening-brace-on-new-line'}
8230
8231                   # use -sbl flag unless this is an anonymous sub block
8232                   : $block_type !~ /^sub\W*$/
8233                   ? $rOpts->{'opening-sub-brace-on-new-line'}
8234
8235                   # do not break for anonymous subs
8236                   : 0;
8237
8238                 # Break before an opening '{' ...
8239                 if (
8240
8241                     # if requested
8242                     $want_break
8243
8244                     # and we were unable to start looking for a block,
8245                     && $index_start_one_line_block == UNDEFINED_INDEX
8246
8247                     # or if it will not be on same line as its keyword, so that
8248                     # it will be outdented (eval.t, overload.t), and the user
8249                     # has not insisted on keeping it on the right
8250                     || (   !$keyword_on_same_line
8251                         && !$rOpts->{'opening-brace-always-on-right'} )
8252
8253                   )
8254                 {
8255
8256                     # but only if allowed
8257                     unless ($no_internal_newlines) {
8258
8259                         # since we already stored this token, we must unstore it
8260                         unstore_token_to_go();
8261
8262                         # then output the line
8263                         output_line_to_go();
8264
8265                         # and now store this token at the start of a new line
8266                         store_token_to_go($side_comment_follows);
8267                     }
8268                 }
8269
8270                 # Now update for side comment
8271                 if ($side_comment_follows) { $no_internal_newlines = 1 }
8272
8273                 # now output this line
8274                 unless ($no_internal_newlines) {
8275                     output_line_to_go();
8276                 }
8277             }
8278
8279             elsif ($is_closing_BLOCK) {
8280
8281                 # If there is a pending one-line block ..
8282                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8283
8284                     # we have to terminate it if..
8285                     if (
8286
8287                     # it is too long (final length may be different from
8288                     # initial estimate). note: must allow 1 space for this token
8289                         excess_line_length( $index_start_one_line_block,
8290                             $max_index_to_go ) >= 0
8291
8292                         # or if it has too many semicolons
8293                         || (   $semicolons_before_block_self_destruct == 0
8294                             && $last_nonblank_type ne ';' )
8295                       )
8296                     {
8297                         destroy_one_line_block();
8298                     }
8299                 }
8300
8301                 # put a break before this closing curly brace if appropriate
8302                 unless ( $no_internal_newlines
8303                     || $index_start_one_line_block != UNDEFINED_INDEX )
8304                 {
8305
8306                     # add missing semicolon if ...
8307                     # there are some tokens
8308                     if (
8309                         ( $max_index_to_go > 0 )
8310
8311                         # and we don't have one
8312                         && ( $last_nonblank_type ne ';' )
8313
8314                         # patch until some block type issues are fixed:
8315                         # Do not add semi-colon for block types '{',
8316                         # '}', and ';' because we cannot be sure yet
8317                         # that this is a block and not an anonomyous
8318                         # hash (blktype.t, blktype1.t)
8319                         && ( $block_type !~ /^[\{\};]$/ )
8320
8321                         # it seems best not to add semicolons in these
8322                         # special block types: sort|map|grep
8323                         && ( !$is_sort_map_grep{$block_type} )
8324
8325                         # and we are allowed to do so.
8326                         && $rOpts->{'add-semicolons'}
8327                       )
8328                     {
8329
8330                         save_current_token();
8331                         $token  = ';';
8332                         $type   = ';';
8333                         $level  = $levels_to_go[$max_index_to_go];
8334                         $slevel = $nesting_depth_to_go[$max_index_to_go];
8335                         $nesting_blocks =
8336                           $nesting_blocks_to_go[$max_index_to_go];
8337                         $ci_level       = $ci_levels_to_go[$max_index_to_go];
8338                         $block_type     = "";
8339                         $container_type = "";
8340                         $container_environment = "";
8341                         $type_sequence         = "";
8342
8343                         # Note - we remove any blank AFTER extracting its
8344                         # parameters such as level, etc, above
8345                         if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8346                             unstore_token_to_go();
8347                         }
8348                         store_token_to_go();
8349
8350                         note_added_semicolon();
8351                         restore_current_token();
8352                     }
8353
8354                     # then write out everything before this closing curly brace
8355                     output_line_to_go();
8356
8357                 }
8358
8359                 # Now update for side comment
8360                 if ($side_comment_follows) { $no_internal_newlines = 1 }
8361
8362                 # store the closing curly brace
8363                 store_token_to_go();
8364
8365                 # ok, we just stored a closing curly brace.  Often, but
8366                 # not always, we want to end the line immediately.
8367                 # So now we have to check for special cases.
8368
8369                 # if this '}' successfully ends a one-line block..
8370                 my $is_one_line_block = 0;
8371                 my $keep_going        = 0;
8372                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8373
8374                     # Remember the type of token just before the
8375                     # opening brace.  It would be more general to use
8376                     # a stack, but this will work for one-line blocks.
8377                     $is_one_line_block =
8378                       $types_to_go[$index_start_one_line_block];
8379
8380                     # we have to actually make it by removing tentative
8381                     # breaks that were set within it
8382                     undo_forced_breakpoint_stack(0);
8383                     set_nobreaks( $index_start_one_line_block,
8384                         $max_index_to_go - 1 );
8385
8386                     # then re-initialize for the next one-line block
8387                     destroy_one_line_block();
8388
8389                     # then decide if we want to break after the '}' ..
8390                     # We will keep going to allow certain brace followers as in:
8391                     #   do { $ifclosed = 1; last } unless $losing;
8392                     #
8393                     # But make a line break if the curly ends a
8394                     # significant block:
8395                     if ( $is_until_while_for_if_elsif_else{$block_type} ) {
8396                         output_line_to_go() unless ($no_internal_newlines);
8397                     }
8398                 }
8399
8400                 # set string indicating what we need to look for brace follower
8401                 # tokens
8402                 if ( $block_type eq 'do' ) {
8403                     $rbrace_follower = \%is_do_follower;
8404                 }
8405                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
8406                     $rbrace_follower = \%is_if_brace_follower;
8407                 }
8408                 elsif ( $block_type eq 'else' ) {
8409                     $rbrace_follower = \%is_else_brace_follower;
8410                 }
8411
8412                 # added eval for borris.t
8413                 elsif ($is_sort_map_grep_eval{$block_type}
8414                     || $is_one_line_block eq 'G' )
8415                 {
8416                     $rbrace_follower = undef;
8417                     $keep_going      = 1;
8418                 }
8419
8420                 # anonymous sub
8421                 elsif ( $block_type =~ /^sub\W*$/ ) {
8422
8423                     if ($is_one_line_block) {
8424                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
8425                     }
8426                     else {
8427                         $rbrace_follower = \%is_anon_sub_brace_follower;
8428                     }
8429                 }
8430
8431                 # TESTING ONLY for SWITCH/CASE - this is where to start
8432                 # recoding to retain else's on the same line as a case,
8433                 # but there is a lot more that would need to be done.
8434                 ##elsif ($block_type eq 'case') {$rbrace_follower = {else=>1};}
8435
8436                 # None of the above: specify what can follow a closing
8437                 # brace of a block which is not an
8438                 # if/elsif/else/do/sort/map/grep/eval
8439                 # Testfiles:
8440                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
8441                 else {
8442                     $rbrace_follower = \%is_other_brace_follower;
8443                 }
8444
8445                 # See if an elsif block is followed by another elsif or else;
8446                 # complain if not.
8447                 if ( $block_type eq 'elsif' ) {
8448
8449                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
8450                         $looking_for_else = 1;    # ok, check on next line
8451                     }
8452                     else {
8453
8454                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
8455                             write_logfile_entry("No else block :(\n");
8456                         }
8457                     }
8458                 }
8459
8460                 # keep going after certain block types (map,sort,grep,eval)
8461                 # added eval for borris.t
8462                 if ($keep_going) {
8463
8464                     # keep going
8465                 }
8466
8467                 # if no more tokens, postpone decision until re-entring
8468                 elsif ( ( $next_nonblank_token_type eq 'b' )
8469                     && $rOpts_add_newlines )
8470                 {
8471                     unless ($rbrace_follower) {
8472                         output_line_to_go() unless ($no_internal_newlines);
8473                     }
8474                 }
8475
8476                 elsif ($rbrace_follower) {
8477
8478                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
8479                         output_line_to_go() unless ($no_internal_newlines);
8480                     }
8481                     $rbrace_follower = undef;
8482                 }
8483
8484                 else {
8485                     output_line_to_go() unless ($no_internal_newlines);
8486                 }
8487
8488             }    # end treatment of closing block token
8489
8490             # handle semicolon
8491             elsif ( $type eq ';' ) {
8492
8493                 # kill one-line blocks with too many semicolons
8494                 $semicolons_before_block_self_destruct--;
8495                 if (
8496                     ( $semicolons_before_block_self_destruct < 0 )
8497                     || (   $semicolons_before_block_self_destruct == 0
8498                         && $next_nonblank_token_type !~ /^[b\}]$/ )
8499                   )
8500                 {
8501                     destroy_one_line_block();
8502                 }
8503
8504                 # Remove unnecessary semicolons, but not after bare
8505                 # blocks, where it could be unsafe if the brace is
8506                 # mistokenized.
8507                 if (
8508                     (
8509                         $last_nonblank_token eq '}'
8510                         && (
8511                             $is_block_without_semicolon{
8512                                 $last_nonblank_block_type}
8513                             || $last_nonblank_block_type =~ /^sub\s+\w/
8514                             || $last_nonblank_block_type =~ /^\w+:$/ )
8515                     )
8516                     || $last_nonblank_type eq ';'
8517                   )
8518                 {
8519
8520                     if (
8521                         $rOpts->{'delete-semicolons'}
8522
8523                         # don't delete ; before a # because it would promote it
8524                         # to a block comment
8525                         && ( $next_nonblank_token_type ne '#' )
8526                       )
8527                     {
8528                         note_deleted_semicolon();
8529                         output_line_to_go()
8530                           unless ( $no_internal_newlines
8531                             || $index_start_one_line_block != UNDEFINED_INDEX );
8532                         next;
8533                     }
8534                     else {
8535                         write_logfile_entry("Extra ';'\n");
8536                     }
8537                 }
8538                 store_token_to_go();
8539
8540                 output_line_to_go()
8541                   unless ( $no_internal_newlines
8542                     || ( $next_nonblank_token eq '}' ) );
8543
8544             }
8545
8546             # handle here_doc target string
8547             elsif ( $type eq 'h' ) {
8548                 $no_internal_newlines =
8549                   1;    # no newlines after seeing here-target
8550                 destroy_one_line_block();
8551                 store_token_to_go();
8552             }
8553
8554             # handle all other token types
8555             else {
8556
8557                 # if this is a blank...
8558                 if ( $type eq 'b' ) {
8559
8560                     # make it just one character
8561                     $token = ' ' if $rOpts_add_whitespace;
8562
8563                     # delete it if unwanted by whitespace rules
8564                     # or we are deleting all whitespace
8565                     my $ws = $$rwhite_space_flag[ $j + 1 ];
8566                     if ( ( defined($ws) && $ws == -1 )
8567                         || $rOpts_delete_old_whitespace )
8568                     {
8569
8570                         # unless it might make a syntax error
8571                         next
8572                           unless is_essential_whitespace(
8573                             $last_last_nonblank_token,
8574                             $last_last_nonblank_type,
8575                             $tokens_to_go[$max_index_to_go],
8576                             $types_to_go[$max_index_to_go],
8577                             $$rtokens[ $j + 1 ],
8578                             $$rtoken_type[ $j + 1 ]
8579                           );
8580                     }
8581                 }
8582                 store_token_to_go();
8583             }
8584
8585             # remember two previous nonblank OUTPUT tokens
8586             if ( $type ne '#' && $type ne 'b' ) {
8587                 $last_last_nonblank_token = $last_nonblank_token;
8588                 $last_last_nonblank_type  = $last_nonblank_type;
8589                 $last_nonblank_token      = $token;
8590                 $last_nonblank_type       = $type;
8591                 $last_nonblank_block_type = $block_type;
8592             }
8593
8594             # unset the continued-quote flag since it only applies to the
8595             # first token, and we want to resume normal formatting if
8596             # there are additional tokens on the line
8597             $in_continued_quote = 0;
8598
8599         }    # end of loop over all tokens in this 'line_of_tokens'
8600
8601         # we have to flush ..
8602         if (
8603
8604             # if there is a side comment
8605             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
8606
8607             # if this line which ends in a quote
8608             || $in_quote
8609
8610             # if this is a VERSION statement
8611             || $is_VERSION_statement
8612
8613             # to keep a label on one line if that is how it is now
8614             || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
8615
8616             # if we are instructed to keep all old line breaks
8617             || !$rOpts->{'delete-old-newlines'}
8618           )
8619         {
8620             destroy_one_line_block();
8621             output_line_to_go();
8622         }
8623
8624         # mark old line breakpoints in current output stream
8625         if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_line_breaks ) {
8626             $old_breakpoint_to_go[$max_index_to_go] = 1;
8627         }
8628     }
8629 }    # end print_line_of_tokens
8630
8631 sub note_added_semicolon {
8632     $last_added_semicolon_at = $input_line_number;
8633     if ( $added_semicolon_count == 0 ) {
8634         $first_added_semicolon_at = $last_added_semicolon_at;
8635     }
8636     $added_semicolon_count++;
8637     write_logfile_entry("Added ';' here\n");
8638 }
8639
8640 sub note_deleted_semicolon {
8641     $last_deleted_semicolon_at = $input_line_number;
8642     if ( $deleted_semicolon_count == 0 ) {
8643         $first_deleted_semicolon_at = $last_deleted_semicolon_at;
8644     }
8645     $deleted_semicolon_count++;
8646     write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
8647 }
8648
8649 sub note_embedded_tab {
8650     $embedded_tab_count++;
8651     $last_embedded_tab_at = $input_line_number;
8652     if ( !$first_embedded_tab_at ) {
8653         $first_embedded_tab_at = $last_embedded_tab_at;
8654     }
8655
8656     if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
8657         write_logfile_entry("Embedded tabs in quote or pattern\n");
8658     }
8659 }
8660
8661 sub starting_one_line_block {
8662
8663     # after seeing an opening curly brace, look for the closing brace
8664     # and see if the entire block will fit on a line.  This routine is
8665     # not always right because it uses the old whitespace, so a check
8666     # is made later (at the closing brace) to make sure we really
8667     # have a one-line block.  We have to do this preliminary check,
8668     # though, because otherwise we would always break at a semicolon
8669     # within a one-line block if the block contains multiple statements.
8670
8671     my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
8672         $rblock_type )
8673       = @_;
8674
8675     # kill any current block - we can only go 1 deep
8676     destroy_one_line_block();
8677
8678     # return value:
8679     #  1=distance from start of block to opening brace exceeds line length
8680     #  0=otherwise
8681
8682     my $i_start = 0;
8683
8684     # shouldn't happen: there must have been a prior call to
8685     # store_token_to_go to put the opening brace in the output stream
8686     if ( $max_index_to_go < 0 ) {
8687         warning("program bug: store_token_to_go called incorrectly\n");
8688         report_definite_bug();
8689     }
8690     else {
8691
8692         # cannot use one-line blocks with cuddled else else/elsif lines
8693         if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
8694             return 0;
8695         }
8696     }
8697
8698     my $block_type = $$rblock_type[$j];
8699
8700     # find the starting keyword for this block (such as 'if', 'else', ...)
8701
8702     if ( $block_type =~ /^[\{\}\;\:]$/ ) {
8703         $i_start = $max_index_to_go;
8704     }
8705
8706     elsif ( $last_last_nonblank_token_to_go eq ')' ) {
8707
8708         # For something like "if (xxx) {", the keyword "if" will be
8709         # just after the most recent break. This will be 0 unless
8710         # we have just killed a one-line block and are starting another.
8711         # (doif.t)
8712         $i_start = $index_max_forced_break + 1;
8713         if ( $types_to_go[$i_start] eq 'b' ) {
8714             $i_start++;
8715         }
8716
8717         unless ( $tokens_to_go[$i_start] eq $block_type ) {
8718             return 0;
8719         }
8720     }
8721
8722     # the previous nonblank token should start these block types
8723     elsif (
8724         ( $last_last_nonblank_token_to_go eq $block_type )
8725         || (   $block_type =~ /^sub/
8726             && $last_last_nonblank_token_to_go =~ /^sub/ )
8727       )
8728     {
8729         $i_start = $last_last_nonblank_index_to_go;
8730     }
8731
8732     # patch for SWITCH/CASE to retain one-line case/when blocks
8733     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
8734         $i_start = $index_max_forced_break + 1;
8735         if ( $types_to_go[$i_start] eq 'b' ) {
8736             $i_start++;
8737         }
8738         unless ( $tokens_to_go[$i_start] eq $block_type ) {
8739             return 0;
8740         }
8741     }
8742
8743     else {
8744         return 1;
8745     }
8746
8747     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
8748
8749     my $i;
8750
8751     # see if length is too long to even start
8752     if ( $pos > $rOpts_maximum_line_length ) {
8753         return 1;
8754     }
8755
8756     for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
8757
8758         # old whitespace could be arbitrarily large, so don't use it
8759         if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
8760         else { $pos += length( $$rtokens[$i] ) }
8761
8762         # Return false result if we exceed the maximum line length,
8763         if ( $pos > $rOpts_maximum_line_length ) {
8764             return 0;
8765         }
8766
8767         # or encounter another opening brace before finding the closing brace.
8768         elsif ($$rtokens[$i] eq '{'
8769             && $$rtoken_type[$i] eq '{'
8770             && $$rblock_type[$i] )
8771         {
8772             return 0;
8773         }
8774
8775         # if we find our closing brace..
8776         elsif ($$rtokens[$i] eq '}'
8777             && $$rtoken_type[$i] eq '}'
8778             && $$rblock_type[$i] )
8779         {
8780
8781             # be sure any trailing comment also fits on the line
8782             my $i_nonblank =
8783               ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
8784
8785             if ( $$rtoken_type[$i_nonblank] eq '#' ) {
8786                 $pos += length( $$rtokens[$i_nonblank] );
8787
8788                 if ( $i_nonblank > $i + 1 ) {
8789                     $pos += length( $$rtokens[ $i + 1 ] );
8790                 }
8791
8792                 if ( $pos > $rOpts_maximum_line_length ) {
8793                     return 0;
8794                 }
8795             }
8796
8797             # ok, it's a one-line block
8798             create_one_line_block( $i_start, 20 );
8799             return 0;
8800         }
8801
8802         # just keep going for other characters
8803         else {
8804         }
8805     }
8806
8807     # Allow certain types of new one-line blocks to form by joining
8808     # input lines.  These can be safely done, but for other block types,
8809     # we keep old one-line blocks but do not form new ones. It is not
8810     # always a good idea to make as many one-line blocks as possible,
8811     # so other types are not done.  The user can always use -mangle.
8812     if ( $is_sort_map_grep_eval{$block_type} ) {
8813         create_one_line_block( $i_start, 1 );
8814     }
8815
8816     return 0;
8817 }
8818
8819 sub unstore_token_to_go {
8820
8821     # remove most recent token from output stream
8822     if ( $max_index_to_go > 0 ) {
8823         $max_index_to_go--;
8824     }
8825     else {
8826         $max_index_to_go = UNDEFINED_INDEX;
8827     }
8828
8829 }
8830
8831 sub want_blank_line {
8832     flush();
8833     $file_writer_object->want_blank_line();
8834 }
8835
8836 sub write_unindented_line {
8837     flush();
8838     $file_writer_object->write_line( $_[0] );
8839 }
8840
8841 sub undo_lp_ci {
8842
8843     # If there is a single, long parameter within parens, like this:
8844     #
8845     #  $self->command( "/msg "
8846     #        . $infoline->chan
8847     #        . " You said $1, but did you know that it's square was "
8848     #        . $1 * $1 . " ?" );
8849     #
8850     # we can remove the continuation indentation of the 2nd and higher lines
8851     # to achieve this effect, which is more pleasing:
8852     #
8853     #  $self->command("/msg "
8854     #                 . $infoline->chan
8855     #                 . " You said $1, but did you know that it's square was "
8856     #                 . $1 * $1 . " ?");
8857
8858     my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
8859     my $max_line = @$ri_first - 1;
8860
8861     # must be multiple lines
8862     return unless $max_line > $line_open;
8863
8864     my $lev_start     = $levels_to_go[$i_start];
8865     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
8866
8867     # see if all additional lines in this container have continuation
8868     # indentation
8869     my $n;
8870     my $line_1 = 1 + $line_open;
8871     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
8872         my $ibeg = $$ri_first[$n];
8873         my $iend = $$ri_last[$n];
8874         if ( $ibeg eq $closing_index ) { $n--; last }
8875         return if ( $lev_start != $levels_to_go[$ibeg] );
8876         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
8877         last   if ( $closing_index <= $iend );
8878     }
8879
8880     # we can reduce the indentation of all continuation lines
8881     my $continuation_line_count = $n - $line_open;
8882     @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
8883       (0) x ($continuation_line_count);
8884     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
8885       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
8886 }
8887
8888 {
8889
8890     # Identify certain operators which often occur in chains.
8891     # We will try to improve alignment when these lead a line.
8892     my %is_chain_operator;
8893
8894     BEGIN {
8895         @_ = qw(&& || and or : ? .);
8896         @is_chain_operator{@_} = (1) x scalar(@_);
8897     }
8898
8899     sub set_logical_padding {
8900
8901         # Look at a batch of lines and see if extra padding can improve the
8902         # alignment when there are certain leading operators. Here is an
8903         # example, in which some extra space is introduced before
8904         # '( $year' to make it line up with the subsequent lines:
8905         #
8906         #       if (   ( $Year < 1601 )
8907         #           || ( $Year > 2899 )
8908         #           || ( $EndYear < 1601 )
8909         #           || ( $EndYear > 2899 ) )
8910         #       {
8911         #           &Error_OutOfRange;
8912         #       }
8913         #
8914         my ( $ri_first, $ri_last ) = @_;
8915         my $max_line = @$ri_first - 1;
8916
8917         my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line,
8918             $pad_spaces, $tok_next, $has_leading_op_next, $has_leading_op );
8919
8920         # looking at each line of this batch..
8921         foreach $line ( 0 .. $max_line - 1 ) {
8922
8923             # see if the next line begins with a logical operator
8924             $ibeg                = $$ri_first[$line];
8925             $iend                = $$ri_last[$line];
8926             $ibeg_next           = $$ri_first[ $line + 1 ];
8927             $tok_next            = $tokens_to_go[$ibeg_next];
8928             $has_leading_op_next = $is_chain_operator{$tok_next};
8929             next unless ($has_leading_op_next);
8930
8931             # next line must not be at lesser depth
8932             next
8933               if ( $nesting_depth_to_go[$ibeg] >
8934                 $nesting_depth_to_go[$ibeg_next] );
8935
8936             # identify the token in this line to be padded on the left
8937             $ipad = undef;
8938
8939             # handle lines at same depth...
8940             if ( $nesting_depth_to_go[$ibeg] ==
8941                 $nesting_depth_to_go[$ibeg_next] )
8942             {
8943
8944                 # if this is not first line of the batch ...
8945                 if ( $line > 0 ) {
8946
8947                     # and we have leading operator
8948                     next if $has_leading_op;
8949
8950                     # and ..
8951                     # 1. the previous line is at lesser depth, or
8952                     # 2. the previous line ends in an assignment
8953                     #
8954                     # Example 1: previous line at lesser depth
8955                     #       if (   ( $Year < 1601 )      # <- we are here but
8956                     #           || ( $Year > 2899 )      #  list has not yet
8957                     #           || ( $EndYear < 1601 )   # collapsed vertically
8958                     #           || ( $EndYear > 2899 ) )
8959                     #       {
8960                     #
8961                     # Example 2: previous line ending in assignment:
8962                     #    $leapyear =
8963                     #        $year % 4   ? 0     # <- We are here
8964                     #      : $year % 100 ? 1
8965                     #      : $year % 400 ? 0
8966                     #      : 1;
8967                     next
8968                       unless (
8969                         $is_assignment{ $types_to_go[$iendm] }
8970                         || ( $nesting_depth_to_go[$ibegm] <
8971                             $nesting_depth_to_go[$ibeg] )
8972                       );
8973
8974                     # we will add padding before the first token
8975                     $ipad = $ibeg;
8976                 }
8977
8978                 # for first line of the batch..
8979                 else {
8980
8981                     # WARNING: Never indent if first line is starting in a
8982                     # continued quote, which would change the quote.
8983                     next if $starting_in_quote;
8984
8985                     # if this is text after closing '}'
8986                     # then look for an interior token to pad
8987                     if ( $types_to_go[$ibeg] eq '}' ) {
8988
8989                     }
8990
8991                     # otherwise, we might pad if it looks really good
8992                     else {
8993
8994                         # we might pad token $ibeg, so be sure that it
8995                         # is at the same depth as the next line.
8996                         next
8997                           if ( $nesting_depth_to_go[ $ibeg + 1 ] !=
8998                             $nesting_depth_to_go[$ibeg_next] );
8999
9000                         # We can pad on line 1 of a statement if at least 3
9001                         # lines will be aligned. Otherwise, it
9002                         # can look very confusing.
9003                         if ( $max_line > 2 ) {
9004                             my $leading_token = $tokens_to_go[$ibeg_next];
9005
9006                             # never indent line 1 of a '.' series because
9007                             # previous line is most likely at same level.
9008                             # TODO: we should also look at the leasing_spaces
9009                             # of the last output line and skip if it is same
9010                             # as this line.
9011                             next if ( $leading_token eq '.' );
9012
9013                             my $count = 1;
9014                             foreach my $l ( 2 .. 3 ) {
9015                                 my $ibeg_next_next = $$ri_first[ $line + $l ];
9016                                 next
9017                                   unless $tokens_to_go[$ibeg_next_next] eq
9018                                   $leading_token;
9019                                 $count++;
9020                             }
9021                             next unless $count == 3;
9022                             $ipad = $ibeg;
9023                         }
9024                         else {
9025                             next;
9026                         }
9027                     }
9028                 }
9029             }
9030
9031             # find interior token to pad if necessary
9032             if ( !defined($ipad) ) {
9033
9034                 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
9035
9036                     # find any unclosed container
9037                     next
9038                       unless ( $type_sequence_to_go[$i]
9039                         && $mate_index_to_go[$i] > $iend );
9040
9041                     # find next nonblank token to pad
9042                     $ipad = $i + 1;
9043                     if ( $types_to_go[$ipad] eq 'b' ) {
9044                         $ipad++;
9045                         last if ( $ipad > $iend );
9046                     }
9047                 }
9048                 last unless $ipad;
9049             }
9050
9051             # next line must not be at greater depth
9052             my $iend_next = $$ri_last[ $line + 1 ];
9053             next
9054               if ( $nesting_depth_to_go[ $iend_next + 1 ] >
9055                 $nesting_depth_to_go[$ipad] );
9056
9057             # lines must be somewhat similar to be padded..
9058             my $inext_next = $ibeg_next + 1;
9059             if ( $types_to_go[$inext_next] eq 'b' ) {
9060                 $inext_next++;
9061             }
9062             my $type = $types_to_go[$ipad];
9063
9064             # see if there are multiple continuation lines
9065             my $logical_continuation_lines = 1;
9066             if ( $line + 2 <= $max_line ) {
9067                 my $leading_token  = $tokens_to_go[$ibeg_next];
9068                 my $ibeg_next_next = $$ri_first[ $line + 2 ];
9069                 if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
9070                     && $nesting_depth_to_go[$ibeg_next] eq
9071                     $nesting_depth_to_go[$ibeg_next_next] )
9072                 {
9073                     $logical_continuation_lines++;
9074                 }
9075             }
9076             if (
9077
9078                 # either we have multiple continuation lines to follow
9079                 # and we are not padding the first token
9080                 ( $logical_continuation_lines > 1 && $ipad > 0 )
9081
9082                 # or..
9083                 || (
9084
9085                     # types must match
9086                     $types_to_go[$inext_next] eq $type
9087
9088                     # and keywords must match if keyword
9089                     && !(
9090                            $type eq 'k'
9091                         && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
9092                     )
9093                 )
9094               )
9095             {
9096
9097                 #----------------------begin special check---------------
9098                 #
9099                 # One more check is needed before we can make the pad.
9100                 # If we are in a list with some long items, we want each
9101                 # item to stand out.  So in the following example, the
9102                 # first line begining with '$casefold->' would look good
9103                 # padded to align with the next line, but then it
9104                 # would be indented more than the last line, so we
9105                 # won't do it.
9106                 #
9107                 #  ok(
9108                 #      $casefold->{code}         eq '0041'
9109                 #        && $casefold->{status}  eq 'C'
9110                 #        && $casefold->{mapping} eq '0061',
9111                 #      'casefold 0x41'
9112                 #  );
9113                 #
9114                 # Note:
9115                 # It would be faster, and almost as good, to use a comma
9116                 # count, and not pad if comma_count > 1 and the previous
9117                 # line did not end with a comma.
9118                 #
9119                 my $ok_to_pad = 1;
9120
9121                 my $ibg   = $$ri_first[ $line + 1 ];
9122                 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
9123
9124                 # just use simplified formula for leading spaces to avoid
9125                 # needless sub calls
9126                 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
9127
9128                 # look at each line beyond the next ..
9129                 my $l = $line + 1;
9130                 foreach $l ( $line + 2 .. $max_line ) {
9131                     my $ibg = $$ri_first[$l];
9132
9133                     # quit looking at the end of this container
9134                     last
9135                       if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
9136                       || ( $nesting_depth_to_go[$ibg] < $depth );
9137
9138                     # cannot do the pad if a later line would be
9139                     # outdented more
9140                     if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
9141                         $ok_to_pad = 0;
9142                         last;
9143                     }
9144                 }
9145
9146                 # don't pad if we end in a broken list
9147                 if ( $l == $max_line ) {
9148                     my $i2 = $$ri_last[$l];
9149                     if ( $types_to_go[$i2] eq '#' ) {
9150                         my $i1 = $$ri_first[$l];
9151                         next
9152                           if (
9153                             terminal_type( \@types_to_go, \@block_type_to_go,
9154                                 $i1, $i2 ) eq ','
9155                           );
9156                     }
9157                 }
9158                 next unless $ok_to_pad;
9159
9160                 #----------------------end special check---------------
9161
9162                 my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
9163                 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
9164                 $pad_spaces = $length_2 - $length_1;
9165
9166                 # make sure this won't change if -lp is used
9167                 my $indentation_1 = $leading_spaces_to_go[$ibeg];
9168                 if ( ref($indentation_1) ) {
9169                     if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
9170                         my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
9171                         unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 )
9172                         {
9173                             $pad_spaces = 0;
9174                         }
9175                     }
9176                 }
9177
9178                 # we might be able to handle a pad of -1 by removing a blank
9179                 # token
9180                 if ( $pad_spaces < 0 ) {
9181                     if ( $pad_spaces == -1 ) {
9182                         if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
9183                         {
9184                             $tokens_to_go[ $ipad - 1 ] = '';
9185                         }
9186                     }
9187                     $pad_spaces = 0;
9188                 }
9189
9190                 # now apply any padding for alignment
9191                 if ( $ipad >= 0 && $pad_spaces ) {
9192                     my $length_t = total_line_length( $ibeg, $iend );
9193                     if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length )
9194                     {
9195                         $tokens_to_go[$ipad] =
9196                           ' ' x $pad_spaces . $tokens_to_go[$ipad];
9197                     }
9198                 }
9199             }
9200         }
9201         continue {
9202             $iendm          = $iend;
9203             $ibegm          = $ibeg;
9204             $has_leading_op = $has_leading_op_next;
9205         }    # end of loop over lines
9206         return;
9207     }
9208 }
9209
9210 sub correct_lp_indentation {
9211
9212     # When the -lp option is used, we need to make a last pass through
9213     # each line to correct the indentation positions in case they differ
9214     # from the predictions.  This is necessary because perltidy uses a
9215     # predictor/corrector method for aligning with opening parens.  The
9216     # predictor is usually good, but sometimes stumbles.  The corrector
9217     # tries to patch things up once the actual opening paren locations
9218     # are known.
9219     my ( $ri_first, $ri_last ) = @_;
9220     my $do_not_pad = 0;
9221
9222     #  Note on flag '$do_not_pad':
9223     #  We want to avoid a situation like this, where the aligner inserts
9224     #  whitespace before the '=' to align it with a previous '=', because
9225     #  otherwise the parens might become mis-aligned in a situation like
9226     #  this, where the '=' has become aligned with the previous line,
9227     #  pushing the opening '(' forward beyond where we want it.
9228     #
9229     #  $mkFloor::currentRoom = '';
9230     #  $mkFloor::c_entry     = $c->Entry(
9231     #                                 -width        => '10',
9232     #                                 -relief       => 'sunken',
9233     #                                 ...
9234     #                                 );
9235     #
9236     #  We leave it to the aligner to decide how to do this.
9237
9238     # first remove continuation indentation if appropriate
9239     my $max_line = @$ri_first - 1;
9240
9241     # looking at each line of this batch..
9242     my ( $ibeg, $iend );
9243     my $line;
9244     foreach $line ( 0 .. $max_line ) {
9245         $ibeg = $$ri_first[$line];
9246         $iend = $$ri_last[$line];
9247
9248         # looking at each token in this output line..
9249         my $i;
9250         foreach $i ( $ibeg .. $iend ) {
9251
9252             # How many space characters to place before this token
9253             # for special alignment.  Actual padding is done in the
9254             # continue block.
9255
9256             # looking for next unvisited indentation item
9257             my $indentation = $leading_spaces_to_go[$i];
9258             if ( !$indentation->get_MARKED() ) {
9259                 $indentation->set_MARKED(1);
9260
9261                 # looking for indentation item for which we are aligning
9262                 # with parens, braces, and brackets
9263                 next unless ( $indentation->get_ALIGN_PAREN() );
9264
9265                 # skip closed container on this line
9266                 if ( $i > $ibeg ) {
9267                     my $im = $i - 1;
9268                     if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
9269                     if (   $type_sequence_to_go[$im]
9270                         && $mate_index_to_go[$im] <= $iend )
9271                     {
9272                         next;
9273                     }
9274                 }
9275
9276                 if ( $line == 1 && $i == $ibeg ) {
9277                     $do_not_pad = 1;
9278                 }
9279
9280                 # Ok, let's see what the error is and try to fix it
9281                 my $actual_pos;
9282                 my $predicted_pos = $indentation->get_SPACES();
9283                 if ( $i > $ibeg ) {
9284
9285                     # token is mid-line - use length to previous token
9286                     $actual_pos = total_line_length( $ibeg, $i - 1 );
9287
9288                     # for mid-line token, we must check to see if all
9289                     # additional lines have continuation indentation,
9290                     # and remove it if so.  Otherwise, we do not get
9291                     # good alignment.
9292                     my $closing_index = $indentation->get_CLOSED();
9293                     if ( $closing_index > $iend ) {
9294                         my $ibeg_next = $$ri_first[ $line + 1 ];
9295                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
9296                             undo_lp_ci( $line, $i, $closing_index, $ri_first,
9297                                 $ri_last );
9298                         }
9299                     }
9300                 }
9301                 elsif ( $line > 0 ) {
9302
9303                     # handle case where token starts a new line;
9304                     # use length of previous line
9305                     my $ibegm = $$ri_first[ $line - 1 ];
9306                     my $iendm = $$ri_last[ $line - 1 ];
9307                     $actual_pos = total_line_length( $ibegm, $iendm );
9308
9309                     # follow -pt style
9310                     ++$actual_pos
9311                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
9312                 }
9313                 else {
9314
9315                     # token is first character of first line of batch
9316                     $actual_pos = $predicted_pos;
9317                 }
9318
9319                 my $move_right = $actual_pos - $predicted_pos;
9320
9321                 # done if no error to correct (gnu2.t)
9322                 if ( $move_right == 0 ) {
9323                     $indentation->set_RECOVERABLE_SPACES($move_right);
9324                     next;
9325                 }
9326
9327                 # if we have not seen closure for this indentation in
9328                 # this batch, we can only pass on a request to the
9329                 # vertical aligner
9330                 my $closing_index = $indentation->get_CLOSED();
9331
9332                 if ( $closing_index < 0 ) {
9333                     $indentation->set_RECOVERABLE_SPACES($move_right);
9334                     next;
9335                 }
9336
9337                 # If necessary, look ahead to see if there is really any
9338                 # leading whitespace dependent on this whitespace, and
9339                 # also find the longest line using this whitespace.
9340                 # Since it is always safe to move left if there are no
9341                 # dependents, we only need to do this if we may have
9342                 # dependent nodes or need to move right.
9343
9344                 my $right_margin = 0;
9345                 my $have_child   = $indentation->get_HAVE_CHILD();
9346
9347                 my %saw_indentation;
9348                 my $line_count = 1;
9349                 $saw_indentation{$indentation} = $indentation;
9350
9351                 if ( $have_child || $move_right > 0 ) {
9352                     $have_child = 0;
9353                     my $max_length = 0;
9354                     if ( $i == $ibeg ) {
9355                         $max_length = total_line_length( $ibeg, $iend );
9356                     }
9357
9358                     # look ahead at the rest of the lines of this batch..
9359                     my $line_t;
9360                     foreach $line_t ( $line + 1 .. $max_line ) {
9361                         my $ibeg_t = $$ri_first[$line_t];
9362                         my $iend_t = $$ri_last[$line_t];
9363                         last if ( $closing_index <= $ibeg_t );
9364
9365                         # remember all different indentation objects
9366                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
9367                         $saw_indentation{$indentation_t} = $indentation_t;
9368                         $line_count++;
9369
9370                         # remember longest line in the group
9371                         my $length_t = total_line_length( $ibeg_t, $iend_t );
9372                         if ( $length_t > $max_length ) {
9373                             $max_length = $length_t;
9374                         }
9375                     }
9376                     $right_margin = $rOpts_maximum_line_length - $max_length;
9377                     if ( $right_margin < 0 ) { $right_margin = 0 }
9378                 }
9379
9380                 my $first_line_comma_count =
9381                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
9382                 my $comma_count = $indentation->get_COMMA_COUNT();
9383                 my $arrow_count = $indentation->get_ARROW_COUNT();
9384
9385                 # This is a simple approximate test for vertical alignment:
9386                 # if we broke just after an opening paren, brace, bracket,
9387                 # and there are 2 or more commas in the first line,
9388                 # and there are no '=>'s,
9389                 # then we are probably vertically aligned.  We could set
9390                 # an exact flag in sub scan_list, but this is good
9391                 # enough.
9392                 my $indentation_count     = keys %saw_indentation;
9393                 my $is_vertically_aligned =
9394                   (      $i == $ibeg
9395                       && $first_line_comma_count > 1
9396                       && $indentation_count == 1
9397                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
9398
9399                 # Make the move if possible ..
9400                 if (
9401
9402                     # we can always move left
9403                     $move_right < 0
9404
9405                     # but we should only move right if we are sure it will
9406                     # not spoil vertical alignment
9407                     || ( $comma_count == 0 )
9408                     || ( $comma_count > 0 && !$is_vertically_aligned )
9409                   )
9410                 {
9411                     my $move =
9412                       ( $move_right <= $right_margin )
9413                       ? $move_right
9414                       : $right_margin;
9415
9416                     foreach ( keys %saw_indentation ) {
9417                         $saw_indentation{$_}
9418                           ->permanently_decrease_AVAILABLE_SPACES( -$move );
9419                     }
9420                 }
9421
9422                 # Otherwise, record what we want and the vertical aligner
9423                 # will try to recover it.
9424                 else {
9425                     $indentation->set_RECOVERABLE_SPACES($move_right);
9426                 }
9427             }
9428         }
9429     }
9430     return $do_not_pad;
9431 }
9432
9433 # flush is called to output any tokens in the pipeline, so that
9434 # an alternate source of lines can be written in the correct order
9435
9436 sub flush {
9437     destroy_one_line_block();
9438     output_line_to_go();
9439     Perl::Tidy::VerticalAligner::flush();
9440 }
9441
9442 # output_line_to_go sends one logical line of tokens on down the
9443 # pipeline to the VerticalAligner package, breaking the line into continuation
9444 # lines as necessary.  The line of tokens is ready to go in the "to_go"
9445 # arrays.
9446
9447 sub output_line_to_go {
9448
9449     # debug stuff; this routine can be called from many points
9450     FORMATTER_DEBUG_FLAG_OUTPUT && do {
9451         my ( $a, $b, $c ) = caller;
9452         write_diagnostics(
9453 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9454         );
9455         my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9456         write_diagnostics("$output_str\n");
9457     };
9458
9459     # just set a tentative breakpoint if we might be in a one-line block
9460     if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9461         set_forced_breakpoint($max_index_to_go);
9462         return;
9463     }
9464
9465     my $cscw_block_comment;
9466     $cscw_block_comment = add_closing_side_comment()
9467       if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9468
9469     match_opening_and_closing_tokens();
9470
9471     # tell the -lp option we are outputting a batch so it can close
9472     # any unfinished items in its stack
9473     finish_lp_batch();
9474
9475     my $imin = 0;
9476     my $imax = $max_index_to_go;
9477
9478     # trim any blank tokens
9479     if ( $max_index_to_go >= 0 ) {
9480         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9481         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9482     }
9483
9484     # anything left to write?
9485     if ( $imin <= $imax ) {
9486
9487         # add a blank line before certain key types
9488         if ( $last_line_leading_type !~ /^[#b]/ ) {
9489             my $want_blank    = 0;
9490             my $leading_token = $tokens_to_go[$imin];
9491             my $leading_type  = $types_to_go[$imin];
9492
9493             # blank lines before subs except declarations and one-liners
9494             # MCONVERSION LOCATION - for sub tokenization change
9495             if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9496                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9497                   && (
9498                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9499                         $imax ) !~ /^[\;\}]$/
9500                   );
9501             }
9502
9503             # break before all package declarations
9504             # MCONVERSION LOCATION - for tokenizaton change
9505             elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) {
9506                 $want_blank = ( $rOpts->{'blanks-before-subs'} );
9507             }
9508
9509             # break before certain key blocks except one-liners
9510             if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9511                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9512                   && (
9513                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9514                         $imax ) ne '}'
9515                   );
9516             }
9517
9518             # Break before certain block types if we haven't had a break at this
9519             # level for a while.  This is the difficult decision..
9520             elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9521                 && $leading_type eq 'k' )
9522             {
9523                 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9524                 if ( !defined($lc) ) { $lc = 0 }
9525
9526                 $want_blank = $rOpts->{'blanks-before-blocks'}
9527                   && $lc >= $rOpts->{'long-block-line-count'}
9528                   && $file_writer_object->get_consecutive_nonblank_lines() >=
9529                   $rOpts->{'long-block-line-count'}
9530                   && (
9531                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9532                         $imax ) ne '}'
9533                   );
9534             }
9535
9536             if ($want_blank) {
9537
9538                 # future: send blank line down normal path to VerticalAligner
9539                 Perl::Tidy::VerticalAligner::flush();
9540                 $file_writer_object->write_blank_code_line();
9541             }
9542         }
9543
9544         # update blank line variables and count number of consecutive
9545         # non-blank, non-comment lines at this level
9546         $last_last_line_leading_level = $last_line_leading_level;
9547         $last_line_leading_level      = $levels_to_go[$imin];
9548         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9549         $last_line_leading_type = $types_to_go[$imin];
9550         if (   $last_line_leading_level == $last_last_line_leading_level
9551             && $last_line_leading_type ne 'b'
9552             && $last_line_leading_type ne '#'
9553             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9554         {
9555             $nonblank_lines_at_depth[$last_line_leading_level]++;
9556         }
9557         else {
9558             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9559         }
9560
9561         FORMATTER_DEBUG_FLAG_FLUSH && do {
9562             my ( $package, $file, $line ) = caller;
9563             print
9564 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9565         };
9566
9567         # add a couple of extra terminal blank tokens
9568         pad_array_to_go();
9569
9570         # set all forced breakpoints for good list formatting
9571         my $saw_good_break = 0;
9572         my $is_long_line   = excess_line_length( $imin, $max_index_to_go ) > 0;
9573
9574         if (
9575             $max_index_to_go > 0
9576             && (
9577                    $is_long_line
9578                 || $old_line_count_in_batch > 1
9579                 || is_unbalanced_batch()
9580                 || (
9581                     $comma_count_in_batch
9582                     && (   $rOpts_maximum_fields_per_table > 0
9583                         || $rOpts_comma_arrow_breakpoints == 0 )
9584                 )
9585             )
9586           )
9587         {
9588             $saw_good_break = scan_list();
9589         }
9590
9591         # let $ri_first and $ri_last be references to lists of
9592         # first and last tokens of line fragments to output..
9593         my ( $ri_first, $ri_last );
9594
9595         # write a single line if..
9596         if (
9597
9598             # we aren't allowed to add any newlines
9599             !$rOpts_add_newlines
9600
9601             # or, we don't already have an interior breakpoint
9602             # and we didn't see a good breakpoint
9603             || (
9604                    !$forced_breakpoint_count
9605                 && !$saw_good_break
9606
9607                 # and this line is 'short'
9608                 && !$is_long_line
9609             )
9610           )
9611         {
9612             @$ri_first = ($imin);
9613             @$ri_last  = ($imax);
9614         }
9615
9616         # otherwise use multiple lines
9617         else {
9618
9619             ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
9620
9621             # now we do a correction step to clean this up a bit
9622             # (The only time we would not do this is for debugging)
9623             if ( $rOpts->{'recombine'} ) {
9624                 ( $ri_first, $ri_last ) =
9625                   recombine_breakpoints( $ri_first, $ri_last );
9626             }
9627         }
9628
9629         # do corrector step if -lp option is used
9630         my $do_not_pad = 0;
9631         if ($rOpts_line_up_parentheses) {
9632             $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9633         }
9634         send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9635     }
9636     prepare_for_new_input_lines();
9637
9638     # output any new -cscw block comment
9639     if ($cscw_block_comment) {
9640         flush();
9641         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9642     }
9643 }
9644
9645 sub reset_block_text_accumulator {
9646
9647     # save text after 'if' and 'elsif' to append after 'else'
9648     if ($accumulating_text_for_block) {
9649
9650         if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
9651             push @{$rleading_block_if_elsif_text}, $leading_block_text;
9652         }
9653     }
9654     $accumulating_text_for_block        = "";
9655     $leading_block_text                 = "";
9656     $leading_block_text_level           = 0;
9657     $leading_block_text_length_exceeded = 0;
9658     $leading_block_text_line_number     = 0;
9659     $leading_block_text_line_length     = 0;
9660 }
9661
9662 sub set_block_text_accumulator {
9663     my $i = shift;
9664     $accumulating_text_for_block = $tokens_to_go[$i];
9665     if ( $accumulating_text_for_block !~ /^els/ ) {
9666         $rleading_block_if_elsif_text = [];
9667     }
9668     $leading_block_text             = "";
9669     $leading_block_text_level       = $levels_to_go[$i];
9670     $leading_block_text_line_number =
9671       $vertical_aligner_object->get_output_line_number();
9672     $leading_block_text_length_exceeded = 0;
9673
9674     # this will contain the column number of the last character
9675     # of the closing side comment
9676     $leading_block_text_line_length =
9677       length($accumulating_text_for_block) +
9678       length( $rOpts->{'closing-side-comment-prefix'} ) +
9679       $leading_block_text_level * $rOpts_indent_columns + 3;
9680 }
9681
9682 sub accumulate_block_text {
9683     my $i = shift;
9684
9685     # accumulate leading text for -csc, ignoring any side comments
9686     if (   $accumulating_text_for_block
9687         && !$leading_block_text_length_exceeded
9688         && $types_to_go[$i] ne '#' )
9689     {
9690
9691         my $added_length = length( $tokens_to_go[$i] );
9692         $added_length += 1 if $i == 0;
9693         my $new_line_length = $leading_block_text_line_length + $added_length;
9694
9695         # we can add this text if we don't exceed some limits..
9696         if (
9697
9698             # we must not have already exceeded the text length limit
9699             length($leading_block_text) <
9700             $rOpts_closing_side_comment_maximum_text
9701
9702             # and either:
9703             # the new total line length must be below the line length limit
9704             # or the new length must be below the text length limit
9705             # (ie, we may allow one token to exceed the text length limit)
9706             && ( $new_line_length < $rOpts_maximum_line_length
9707                 || length($leading_block_text) + $added_length <
9708                 $rOpts_closing_side_comment_maximum_text )
9709
9710             # UNLESS: we are adding a closing paren before the brace we seek.
9711             # This is an attempt to avoid situations where the ... to be
9712             # added are longer than the omitted right paren, as in:
9713
9714             #   foreach my $item (@a_rather_long_variable_name_here) {
9715             #      &whatever;
9716             #   } ## end foreach my $item (@a_rather_long_variable_name_here...
9717
9718             || (
9719                 $tokens_to_go[$i] eq ')'
9720                 && (
9721                     (
9722                            $i + 1 <= $max_index_to_go
9723                         && $block_type_to_go[ $i + 1 ] eq
9724                         $accumulating_text_for_block
9725                     )
9726                     || (   $i + 2 <= $max_index_to_go
9727                         && $block_type_to_go[ $i + 2 ] eq
9728                         $accumulating_text_for_block )
9729                 )
9730             )
9731           )
9732         {
9733
9734             # add an extra space at each newline
9735             if ( $i == 0 ) { $leading_block_text .= ' ' }
9736
9737             # add the token text
9738             $leading_block_text .= $tokens_to_go[$i];
9739             $leading_block_text_line_length = $new_line_length;
9740         }
9741
9742         # show that text was truncated if necessary
9743         elsif ( $types_to_go[$i] ne 'b' ) {
9744             $leading_block_text_length_exceeded = 1;
9745             $leading_block_text .= '...';
9746         }
9747     }
9748 }
9749
9750 {
9751     my %is_if_elsif_else_unless_while_until_for_foreach;
9752
9753     BEGIN {
9754
9755         # These block types may have text between the keyword and opening
9756         # curly.  Note: 'else' does not, but must be included to allow trailing
9757         # if/elsif text to be appended.
9758         # patch for SWITCH/CASE: added 'case' and 'when'
9759         @_ = qw(if elsif else unless while until for foreach case when);
9760         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
9761     }
9762
9763     sub accumulate_csc_text {
9764
9765         # called once per output buffer when -csc is used. Accumulates
9766         # the text placed after certain closing block braces.
9767         # Defines and returns the following for this buffer:
9768
9769         my $block_leading_text = "";    # the leading text of the last '}'
9770         my $rblock_leading_if_elsif_text;
9771         my $i_block_leading_text =
9772           -1;    # index of token owning block_leading_text
9773         my $block_line_count    = 100;    # how many lines the block spans
9774         my $terminal_type       = 'b';    # type of last nonblank token
9775         my $i_terminal          = 0;      # index of last nonblank token
9776         my $terminal_block_type = "";
9777
9778         for my $i ( 0 .. $max_index_to_go ) {
9779             my $type       = $types_to_go[$i];
9780             my $block_type = $block_type_to_go[$i];
9781             my $token      = $tokens_to_go[$i];
9782
9783             # remember last nonblank token type
9784             if ( $type ne '#' && $type ne 'b' ) {
9785                 $terminal_type       = $type;
9786                 $terminal_block_type = $block_type;
9787                 $i_terminal          = $i;
9788             }
9789
9790             my $type_sequence = $type_sequence_to_go[$i];
9791             if ( $block_type && $type_sequence ) {
9792
9793                 if ( $token eq '}' ) {
9794
9795                     # restore any leading text saved when we entered this block
9796                     if ( defined( $block_leading_text{$type_sequence} ) ) {
9797                         ( $block_leading_text, $rblock_leading_if_elsif_text ) =
9798                           @{ $block_leading_text{$type_sequence} };
9799                         $i_block_leading_text = $i;
9800                         delete $block_leading_text{$type_sequence};
9801                         $rleading_block_if_elsif_text =
9802                           $rblock_leading_if_elsif_text;
9803                     }
9804
9805                     # if we run into a '}' then we probably started accumulating
9806                     # at something like a trailing 'if' clause..no harm done.
9807                     if (   $accumulating_text_for_block
9808                         && $levels_to_go[$i] <= $leading_block_text_level )
9809                     {
9810                         my $lev = $levels_to_go[$i];
9811                         reset_block_text_accumulator();
9812                     }
9813
9814                     if ( defined( $block_opening_line_number{$type_sequence} ) )
9815                     {
9816                         my $output_line_number =
9817                           $vertical_aligner_object->get_output_line_number();
9818                         $block_line_count = $output_line_number -
9819                           $block_opening_line_number{$type_sequence} + 1;
9820                         delete $block_opening_line_number{$type_sequence};
9821                     }
9822                     else {
9823
9824                         # Error: block opening line undefined for this line..
9825                         # This shouldn't be possible, but it is not a
9826                         # significant problem.
9827                     }
9828                 }
9829
9830                 elsif ( $token eq '{' ) {
9831
9832                     my $line_number =
9833                       $vertical_aligner_object->get_output_line_number();
9834                     $block_opening_line_number{$type_sequence} = $line_number;
9835
9836                     if (   $accumulating_text_for_block
9837                         && $levels_to_go[$i] == $leading_block_text_level )
9838                     {
9839
9840                         if ( $accumulating_text_for_block eq $block_type ) {
9841
9842                             # save any leading text before we enter this block
9843                             $block_leading_text{$type_sequence} = [
9844                                 $leading_block_text,
9845                                 $rleading_block_if_elsif_text
9846                             ];
9847                             $block_opening_line_number{$type_sequence} =
9848                               $leading_block_text_line_number;
9849                             reset_block_text_accumulator();
9850                         }
9851                         else {
9852
9853                             # shouldn't happen, but not a serious error.
9854                             # We were accumulating -csc text for block type
9855                             # $accumulating_text_for_block and unexpectedly
9856                             # encountered a '{' for block type $block_type.
9857                         }
9858                     }
9859                 }
9860             }
9861
9862             if (   $type eq 'k'
9863                 && $csc_new_statement_ok
9864                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
9865                 && $token =~ /$closing_side_comment_list_pattern/o )
9866             {
9867                 set_block_text_accumulator($i);
9868             }
9869             else {
9870
9871                 # note: ignoring type 'q' because of tricks being played
9872                 # with 'q' for hanging side comments
9873                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
9874                     $csc_new_statement_ok =
9875                       ( $block_type || $type eq 'J' || $type eq ';' );
9876                 }
9877                 if (   $type eq ';'
9878                     && $accumulating_text_for_block
9879                     && $levels_to_go[$i] == $leading_block_text_level )
9880                 {
9881                     reset_block_text_accumulator();
9882                 }
9883                 else {
9884                     accumulate_block_text($i);
9885                 }
9886             }
9887         }
9888
9889         # Treat an 'else' block specially by adding preceding 'if' and
9890         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
9891         # especially for cuddled-else formatting.
9892         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
9893             $block_leading_text =
9894               make_else_csc_text( $i_terminal, $terminal_block_type,
9895                 $block_leading_text, $rblock_leading_if_elsif_text );
9896         }
9897
9898         return ( $terminal_type, $i_terminal, $i_block_leading_text,
9899             $block_leading_text, $block_line_count );
9900     }
9901 }
9902
9903 sub make_else_csc_text {
9904
9905     # create additional -csc text for an 'else' and optionally 'elsif',
9906     # depending on the value of switch
9907     # $rOpts_closing_side_comment_else_flag:
9908     #
9909     #  = 0 add 'if' text to trailing else
9910     #  = 1 same as 0 plus:
9911     #      add 'if' to 'elsif's if can fit in line length
9912     #      add last 'elsif' to trailing else if can fit in one line
9913     #  = 2 same as 1 but do not check if exceed line length
9914     #
9915     # $rif_elsif_text = a reference to a list of all previous closing
9916     # side comments created for this if block
9917     #
9918     my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
9919     my $csc_text = $block_leading_text;
9920
9921     if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
9922     {
9923         return $csc_text;
9924     }
9925
9926     my $count = @{$rif_elsif_text};
9927     return $csc_text unless ($count);
9928
9929     my $if_text = '[ if' . $rif_elsif_text->[0];
9930
9931     # always show the leading 'if' text on 'else'
9932     if ( $block_type eq 'else' ) {
9933         $csc_text .= $if_text;
9934     }
9935
9936     # see if that's all
9937     if ( $rOpts_closing_side_comment_else_flag == 0 ) {
9938         return $csc_text;
9939     }
9940
9941     my $last_elsif_text = "";
9942     if ( $count > 1 ) {
9943         $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
9944         if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
9945     }
9946
9947     # tentatively append one more item
9948     my $saved_text = $csc_text;
9949     if ( $block_type eq 'else' ) {
9950         $csc_text .= $last_elsif_text;
9951     }
9952     else {
9953         $csc_text .= ' ' . $if_text;
9954     }
9955
9956     # all done if no length checks requested
9957     if ( $rOpts_closing_side_comment_else_flag == 2 ) {
9958         return $csc_text;
9959     }
9960
9961     # undo it if line length exceeded
9962     my $length =
9963       length($csc_text) + length($block_type) +
9964       length( $rOpts->{'closing-side-comment-prefix'} ) +
9965       $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
9966     if ( $length > $rOpts_maximum_line_length ) {
9967         $csc_text = $saved_text;
9968     }
9969     return $csc_text;
9970 }
9971
9972 sub add_closing_side_comment {
9973
9974     # add closing side comments after closing block braces if -csc used
9975     my $cscw_block_comment;
9976
9977     #---------------------------------------------------------------
9978     # Step 1: loop through all tokens of this line to accumulate
9979     # the text needed to create the closing side comments. Also see
9980     # how the line ends.
9981     #---------------------------------------------------------------
9982
9983     my ( $terminal_type, $i_terminal, $i_block_leading_text,
9984         $block_leading_text, $block_line_count )
9985       = accumulate_csc_text();
9986
9987     #---------------------------------------------------------------
9988     # Step 2: make the closing side comment if this ends a block
9989     #---------------------------------------------------------------
9990     my $have_side_comment = $i_terminal != $max_index_to_go;
9991
9992     # if this line might end in a block closure..
9993     if (
9994         $terminal_type eq '}'
9995
9996         # ..and either
9997         && (
9998
9999             # the block is long enough
10000             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
10001
10002             # or there is an existing comment to check
10003             || (   $have_side_comment
10004                 && $rOpts->{'closing-side-comment-warnings'} )
10005         )
10006
10007         # .. and if this is one of the types of interest
10008         && $block_type_to_go[$i_terminal] =~
10009         /$closing_side_comment_list_pattern/o
10010
10011         # ..and the corresponding opening brace must is not in this batch
10012         # (because we do not need to tag one-line blocks, although this
10013         # should also be caught with a positive -csci value)
10014         && $mate_index_to_go[$i_terminal] < 0
10015
10016         # ..and either
10017         && (
10018
10019             # this is the last token (line doesnt have a side comment)
10020             !$have_side_comment
10021
10022             # or the old side comment is a closing side comment
10023             || $tokens_to_go[$max_index_to_go] =~
10024             /$closing_side_comment_prefix_pattern/o
10025         )
10026       )
10027     {
10028
10029         # then make the closing side comment text
10030         my $token =
10031 "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
10032
10033         # append any extra descriptive text collected above
10034         if ( $i_block_leading_text == $i_terminal ) {
10035             $token .= $block_leading_text;
10036         }
10037         $token =~ s/\s*$//;    # trim any trailing whitespace
10038
10039         # handle case of existing closing side comment
10040         if ($have_side_comment) {
10041
10042             # warn if requested and tokens differ significantly
10043             if ( $rOpts->{'closing-side-comment-warnings'} ) {
10044                 my $old_csc = $tokens_to_go[$max_index_to_go];
10045                 my $new_csc = $token;
10046                 $new_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
10047                 my $new_trailing_dots = $1;
10048                 $old_csc =~ s/\.\.\.\s*$//;
10049                 $new_csc =~ s/\s+//g;            # trim all whitespace
10050                 $old_csc =~ s/\s+//g;
10051
10052                 # Patch to handle multiple closing side comments at
10053                 # else and elsif's.  These have become too complicated
10054                 # to check, so if we see an indication of
10055                 # '[ if' or '[ # elsif', then assume they were made
10056                 # by perltidy.
10057                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
10058                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
10059                 }
10060                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
10061                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
10062                 }
10063
10064                 # if old comment is contained in new comment,
10065                 # only compare the common part.
10066                 if ( length($new_csc) > length($old_csc) ) {
10067                     $new_csc = substr( $new_csc, 0, length($old_csc) );
10068                 }
10069
10070                 # if the new comment is shorter and has been limited,
10071                 # only compare the common part.
10072                 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
10073                 {
10074                     $old_csc = substr( $old_csc, 0, length($new_csc) );
10075                 }
10076
10077                 # any remaining difference?
10078                 if ( $new_csc ne $old_csc ) {
10079
10080                     # just leave the old comment if we are below the threshold
10081                     # for creating side comments
10082                     if ( $block_line_count <
10083                         $rOpts->{'closing-side-comment-interval'} )
10084                     {
10085                         $token = undef;
10086                     }
10087
10088                     # otherwise we'll make a note of it
10089                     else {
10090
10091                         warning(
10092 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
10093                         );
10094
10095                      # save the old side comment in a new trailing block comment
10096                         my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
10097                         $year  += 1900;
10098                         $month += 1;
10099                         $cscw_block_comment =
10100 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
10101                     }
10102                 }
10103                 else {
10104
10105                     # No differences.. we can safely delete old comment if we
10106                     # are below the threshold
10107                     if ( $block_line_count <
10108                         $rOpts->{'closing-side-comment-interval'} )
10109                     {
10110                         $token = undef;
10111                         unstore_token_to_go()
10112                           if ( $types_to_go[$max_index_to_go] eq '#' );
10113                         unstore_token_to_go()
10114                           if ( $types_to_go[$max_index_to_go] eq 'b' );
10115                     }
10116                 }
10117             }
10118
10119             # switch to the new csc (unless we deleted it!)
10120             $tokens_to_go[$max_index_to_go] = $token if $token;
10121         }
10122
10123         # handle case of NO existing closing side comment
10124         else {
10125
10126             # insert the new side comment into the output token stream
10127             my $type                  = '#';
10128             my $block_type            = '';
10129             my $type_sequence         = '';
10130             my $container_environment =
10131               $container_environment_to_go[$max_index_to_go];
10132             my $level                = $levels_to_go[$max_index_to_go];
10133             my $slevel               = $nesting_depth_to_go[$max_index_to_go];
10134             my $no_internal_newlines = 0;
10135
10136             my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
10137             my $ci_level           = $ci_levels_to_go[$max_index_to_go];
10138             my $in_continued_quote = 0;
10139
10140             # first insert a blank token
10141             insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
10142
10143             # then the side comment
10144             insert_new_token_to_go( $token, $type, $slevel,
10145                 $no_internal_newlines );
10146         }
10147     }
10148     return $cscw_block_comment;
10149 }
10150
10151 sub previous_nonblank_token {
10152     my ($i) = @_;
10153     if ( $i <= 0 ) {
10154         return "";
10155     }
10156     elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
10157         return $tokens_to_go[ $i - 1 ];
10158     }
10159     elsif ( $i > 1 ) {
10160         return $tokens_to_go[ $i - 2 ];
10161     }
10162     else {
10163         return "";
10164     }
10165 }
10166
10167 sub send_lines_to_vertical_aligner {
10168
10169     my ( $ri_first, $ri_last, $do_not_pad ) = @_;
10170
10171     my $rindentation_list = [0];    # ref to indentations for each line
10172
10173     set_vertical_alignment_markers( $ri_first, $ri_last );
10174
10175     # flush if necessary to avoid unwanted alignment
10176     my $must_flush = 0;
10177     if ( @$ri_first > 1 ) {
10178
10179         # flush before a long if statement
10180         if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
10181             $must_flush = 1;
10182         }
10183     }
10184     if ($must_flush) {
10185         Perl::Tidy::VerticalAligner::flush();
10186     }
10187
10188     set_logical_padding( $ri_first, $ri_last );
10189
10190     # loop to prepare each line for shipment
10191     my $n_last_line = @$ri_first - 1;
10192     my $in_comma_list;
10193     for my $n ( 0 .. $n_last_line ) {
10194         my $ibeg = $$ri_first[$n];
10195         my $iend = $$ri_last[$n];
10196
10197         my @patterns = ();
10198         my @tokens   = ();
10199         my @fields   = ();
10200         my $i_start  = $ibeg;
10201         my $i;
10202
10203         my $depth                 = 0;
10204         my @container_name        = ("");
10205         my @multiple_comma_arrows = (undef);
10206
10207         my $j = 0;    # field index
10208
10209         $patterns[0] = "";
10210         for $i ( $ibeg .. $iend ) {
10211
10212             # Keep track of containers balanced on this line only.
10213             # These are used below to prevent unwanted cross-line alignments.
10214             # Unbalanced containers already avoid aligning across
10215             # container boundaries.
10216             if ( $tokens_to_go[$i] eq '(' ) {
10217                 my $i_mate = $mate_index_to_go[$i];
10218                 if ( $i_mate > $i && $i_mate <= $iend ) {
10219                     $depth++;
10220                     my $seqno = $type_sequence_to_go[$i];
10221                     my $count = comma_arrow_count($seqno);
10222                     $multiple_comma_arrows[$depth] = $count && $count > 1;
10223                     my $name = previous_nonblank_token($i);
10224                     $name =~ s/^->//;
10225                     $container_name[$depth] = "+" . $name;
10226                 }
10227             }
10228             elsif ( $tokens_to_go[$i] eq ')' ) {
10229                 $depth-- if $depth > 0;
10230             }
10231
10232             # if we find a new synchronization token, we are done with
10233             # a field
10234             if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
10235
10236                 my $tok = my $raw_tok = $matching_token_to_go[$i];
10237
10238                 # make separators in different nesting depths unique
10239                 # by appending the nesting depth digit.
10240                 if ( $raw_tok ne '#' ) {
10241                     $tok .= "$nesting_depth_to_go[$i]";
10242                 }
10243
10244                 # do any special decorations for commas to avoid unwanted
10245                 # cross-line alignments.
10246                 if ( $raw_tok eq ',' ) {
10247                     if ( $container_name[$depth] ) {
10248                         $tok .= $container_name[$depth];
10249                     }
10250                 }
10251
10252                 # decorate '=>' with:
10253                 # - Nothing if this container is unbalanced on this line.
10254                 # - The previous token if it is balanced and multiple '=>'s
10255                 # - The container name if it is bananced and no other '=>'s
10256                 elsif ( $raw_tok eq '=>' ) {
10257                     if ( $container_name[$depth] ) {
10258                         if ( $multiple_comma_arrows[$depth] ) {
10259                             $tok .= "+" . previous_nonblank_token($i);
10260                         }
10261                         else {
10262                             $tok .= $container_name[$depth];
10263                         }
10264                     }
10265                 }
10266
10267                 # concatenate the text of the consecutive tokens to form
10268                 # the field
10269                 push( @fields,
10270                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
10271
10272                 # store the alignment token for this field
10273                 push( @tokens, $tok );
10274
10275                 # get ready for the next batch
10276                 $i_start = $i;
10277                 $j++;
10278                 $patterns[$j] = "";
10279             }
10280
10281             # continue accumulating tokens
10282             # handle non-keywords..
10283             if ( $types_to_go[$i] ne 'k' ) {
10284                 my $type = $types_to_go[$i];
10285
10286                 # Mark most things before arrows as a quote to
10287                 # get them to line up. Testfile: mixed.pl.
10288                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
10289                     my $next_type       = $types_to_go[ $i + 1 ];
10290                     my $i_next_nonblank =
10291                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
10292
10293                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
10294                         $type = 'Q';
10295                     }
10296                 }
10297
10298                 # minor patch to make numbers and quotes align
10299                 if ( $type eq 'n' ) { $type = 'Q' }
10300
10301                 $patterns[$j] .= $type;
10302             }
10303
10304             # for keywords we have to use the actual text
10305             else {
10306
10307                 # map certain keywords to the same 'if' class to align
10308                 # long if/elsif sequences. my testfile: elsif.pl
10309                 my $tok = $tokens_to_go[$i];
10310                 if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
10311                     $tok = 'if';
10312                 }
10313                 $patterns[$j] .= $tok;
10314             }
10315         }
10316
10317         # done with this line .. join text of tokens to make the last field
10318         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
10319
10320         my ( $indentation, $lev, $level_end, $is_semicolon_terminated,
10321             $is_outdented_line )
10322           = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
10323             $ri_first, $ri_last, $rindentation_list );
10324
10325         # we will allow outdenting of long lines..
10326         my $outdent_long_lines = (
10327
10328             # which are long quotes, if allowed
10329             ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
10330
10331             # which are long block comments, if allowed
10332               || (
10333                    $types_to_go[$ibeg] eq '#'
10334                 && $rOpts->{'outdent-long-comments'}
10335
10336                 # but not if this is a static block comment
10337                 && !(
10338                        $rOpts->{'static-block-comments'}
10339                     && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o
10340                 )
10341               )
10342         );
10343
10344         my $level_jump =
10345           $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
10346
10347         my $rvertical_tightness_flags =
10348           set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
10349             $ri_first, $ri_last );
10350
10351         # flush an outdented line to avoid any unwanted vertical alignment
10352         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10353
10354         # send this new line down the pipe
10355         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
10356         Perl::Tidy::VerticalAligner::append_line(
10357             $lev,
10358             $level_end,
10359             $indentation,
10360             \@fields,
10361             \@tokens,
10362             \@patterns,
10363             $forced_breakpoint_to_go[$iend] || $in_comma_list,
10364             $outdent_long_lines,
10365             $is_semicolon_terminated,
10366             $do_not_pad,
10367             $rvertical_tightness_flags,
10368             $level_jump,
10369         );
10370         $in_comma_list =
10371           $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
10372
10373         # flush an outdented line to avoid any unwanted vertical alignment
10374         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10375
10376         $do_not_pad = 0;
10377
10378     }    # end of loop to output each line
10379
10380     # remember indentation of lines containing opening containers for
10381     # later use by sub set_adjusted_indentation
10382     save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
10383 }
10384
10385 {        # begin unmatched_indexes
10386
10387     # closure to keep track of unbalanced containers.
10388     # arrays shared by the routines in this block:
10389     my @unmatched_opening_indexes_in_this_batch;
10390     my @unmatched_closing_indexes_in_this_batch;
10391     my %comma_arrow_count;
10392
10393     sub is_unbalanced_batch {
10394         @unmatched_opening_indexes_in_this_batch +
10395           @unmatched_closing_indexes_in_this_batch;
10396     }
10397
10398     sub comma_arrow_count {
10399         my $seqno = $_[0];
10400         return $comma_arrow_count{$seqno};
10401     }
10402
10403     sub match_opening_and_closing_tokens {
10404
10405         # Match up indexes of opening and closing braces, etc, in this batch.
10406         # This has to be done after all tokens are stored because unstoring
10407         # of tokens would otherwise cause trouble.
10408
10409         @unmatched_opening_indexes_in_this_batch = ();
10410         @unmatched_closing_indexes_in_this_batch = ();
10411         %comma_arrow_count                       = ();
10412
10413         my ( $i, $i_mate, $token );
10414         foreach $i ( 0 .. $max_index_to_go ) {
10415             if ( $type_sequence_to_go[$i] ) {
10416                 $token = $tokens_to_go[$i];
10417                 if ( $token =~ /^[\(\[\{\?]$/ ) {
10418                     push @unmatched_opening_indexes_in_this_batch, $i;
10419                 }
10420                 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
10421
10422                     $i_mate = pop @unmatched_opening_indexes_in_this_batch;
10423                     if ( defined($i_mate) && $i_mate >= 0 ) {
10424                         if ( $type_sequence_to_go[$i_mate] ==
10425                             $type_sequence_to_go[$i] )
10426                         {
10427                             $mate_index_to_go[$i]      = $i_mate;
10428                             $mate_index_to_go[$i_mate] = $i;
10429                         }
10430                         else {
10431                             push @unmatched_opening_indexes_in_this_batch,
10432                               $i_mate;
10433                             push @unmatched_closing_indexes_in_this_batch, $i;
10434                         }
10435                     }
10436                     else {
10437                         push @unmatched_closing_indexes_in_this_batch, $i;
10438                     }
10439                 }
10440             }
10441             elsif ( $tokens_to_go[$i] eq '=>' ) {
10442                 if (@unmatched_opening_indexes_in_this_batch) {
10443                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
10444                     my $seqno = $type_sequence_to_go[$j];
10445                     $comma_arrow_count{$seqno}++;
10446                 }
10447             }
10448         }
10449     }
10450
10451     sub save_opening_indentation {
10452
10453         # This should be called after each batch of tokens is output. It
10454         # saves indentations of lines of all unmatched opening tokens.
10455         # These will be used by sub get_opening_indentation.
10456
10457         my ( $ri_first, $ri_last, $rindentation_list ) = @_;
10458
10459         # we no longer need indentations of any saved indentations which
10460         # are unmatched closing tokens in this batch, because we will
10461         # never encounter them again.  So we can delete them to keep
10462         # the hash size down.
10463         foreach (@unmatched_closing_indexes_in_this_batch) {
10464             my $seqno = $type_sequence_to_go[$_];
10465             delete $saved_opening_indentation{$seqno};
10466         }
10467
10468         # we need to save indentations of any unmatched opening tokens
10469         # in this batch because we may need them in a subsequent batch.
10470         foreach (@unmatched_opening_indexes_in_this_batch) {
10471             my $seqno = $type_sequence_to_go[$_];
10472             $saved_opening_indentation{$seqno} = [
10473                 lookup_opening_indentation(
10474                     $_, $ri_first, $ri_last, $rindentation_list
10475                 )
10476             ];
10477         }
10478     }
10479 }    # end unmatched_indexes
10480
10481 sub get_opening_indentation {
10482
10483     # get the indentation of the line which output the opening token
10484     # corresponding to a given closing token in the current output batch.
10485     #
10486     # given:
10487     # $i_closing - index in this line of a closing token ')' '}' or ']'
10488     #
10489     # $ri_first - reference to list of the first index $i for each output
10490     #               line in this batch
10491     # $ri_last - reference to list of the last index $i for each output line
10492     #              in this batch
10493     # $rindentation_list - reference to a list containing the indentation
10494     #            used for each line.
10495     #
10496     # return:
10497     #   -the indentation of the line which contained the opening token
10498     #    which matches the token at index $i_opening
10499     #   -and its offset (number of columns) from the start of the line
10500     #
10501     my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
10502
10503     # first, see if the opening token is in the current batch
10504     my $i_opening = $mate_index_to_go[$i_closing];
10505     my ( $indent, $offset );
10506     if ( $i_opening >= 0 ) {
10507
10508         # it is..look up the indentation
10509         ( $indent, $offset ) =
10510           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
10511             $rindentation_list );
10512     }
10513
10514     # if not, it should have been stored in the hash by a previous batch
10515     else {
10516         my $seqno = $type_sequence_to_go[$i_closing];
10517         if ($seqno) {
10518             if ( $saved_opening_indentation{$seqno} ) {
10519                 ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
10520             }
10521         }
10522
10523         # if no sequence number it must be an unbalanced container
10524         else {
10525             $indent = 0;
10526             $offset = 0;
10527         }
10528     }
10529     return ( $indent, $offset );
10530 }
10531
10532 sub lookup_opening_indentation {
10533
10534     # get the indentation of the line in the current output batch
10535     # which output a selected opening token
10536     #
10537     # given:
10538     #   $i_opening - index of an opening token in the current output batch
10539     #                whose line indentation we need
10540     #   $ri_first - reference to list of the first index $i for each output
10541     #               line in this batch
10542     #   $ri_last - reference to list of the last index $i for each output line
10543     #              in this batch
10544     #   $rindentation_list - reference to a list containing the indentation
10545     #            used for each line.  (NOTE: the first slot in
10546     #            this list is the last returned line number, and this is
10547     #            followed by the list of indentations).
10548     #
10549     # return
10550     #   -the indentation of the line which contained token $i_opening
10551     #   -and its offset (number of columns) from the start of the line
10552
10553     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
10554
10555     my $nline = $rindentation_list->[0];    # line number of previous lookup
10556
10557     # reset line location if necessary
10558     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
10559
10560     # find the correct line
10561     unless ( $i_opening > $ri_last->[-1] ) {
10562         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
10563     }
10564
10565     # error - token index is out of bounds - shouldn't happen
10566     else {
10567         warning(
10568 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
10569         );
10570         report_definite_bug();
10571         $nline = $#{$ri_last};
10572     }
10573
10574     $rindentation_list->[0] =
10575       $nline;    # save line number to start looking next call
10576     my $ibeg = $ri_start->[$nline];
10577     my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
10578     return ( $rindentation_list->[ $nline + 1 ], $offset );
10579 }
10580
10581 sub set_adjusted_indentation {
10582
10583     # This routine has the final say regarding the actual indentation of
10584     # a line.  It starts with the basic indentation which has been
10585     # defined for the leading token, and then takes into account any
10586     # options that the user has set regarding special indenting and
10587     # outdenting.
10588
10589     my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
10590         $rindentation_list )
10591       = @_;
10592
10593     # we need to know the last token of this line
10594     my ( $terminal_type, $i_terminal ) =
10595       terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
10596
10597     my $is_outdented_line = 0;
10598
10599     my $is_semicolon_terminated = $terminal_type eq ';'
10600       && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
10601
10602     # Most lines are indented according to the initial token.
10603     # But it is common to outdent to the level just after the
10604     # terminal token in certain cases...
10605     # adjust_indentation flag:
10606     #       0 - do not adjust
10607     #       1 - outdent
10608     #       2 - vertically align with opening token
10609     #       3 - indent
10610     my $adjust_indentation         = 0;
10611     my $default_adjust_indentation = $adjust_indentation;
10612
10613     my ( $opening_indentation, $opening_offset );
10614
10615     # if we are at a closing token of some type..
10616     if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
10617
10618         # get the indentation of the line containing the corresponding
10619         # opening token
10620         ( $opening_indentation, $opening_offset ) =
10621           get_opening_indentation( $ibeg, $ri_first, $ri_last,
10622             $rindentation_list );
10623
10624         # First set the default behavior:
10625         # default behavior is to outdent closing lines
10626         # of the form:   ");  };  ];  )->xxx;"
10627         if (
10628             $is_semicolon_terminated
10629
10630             # and 'cuddled parens' of the form:   ")->pack("
10631             || (
10632                    $terminal_type      eq '('
10633                 && $types_to_go[$ibeg] eq ')'
10634                 && ( $nesting_depth_to_go[$iend] + 1 ==
10635                     $nesting_depth_to_go[$ibeg] )
10636             )
10637           )
10638         {
10639             $adjust_indentation = 1;
10640         }
10641
10642         # TESTING: outdent something like '),'
10643         if (
10644             $terminal_type eq ','
10645
10646             # allow just one character before the comma
10647             && $i_terminal == $ibeg + 1
10648
10649             # requre LIST environment; otherwise, we may outdent too much --
10650             # this can happen in calls without parentheses (overload.t);
10651             && $container_environment_to_go[$i_terminal] eq 'LIST'
10652           )
10653         {
10654             $adjust_indentation = 1;
10655         }
10656
10657         # undo continuation indentation of a terminal closing token if
10658         # it is the last token before a level decrease.  This will allow
10659         # a closing token to line up with its opening counterpart, and
10660         # avoids a indentation jump larger than 1 level.
10661         if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
10662             && $i_terminal == $ibeg )
10663         {
10664             my $ci              = $ci_levels_to_go[$ibeg];
10665             my $lev             = $levels_to_go[$ibeg];
10666             my $next_type       = $types_to_go[ $ibeg + 1 ];
10667             my $i_next_nonblank =
10668               ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
10669             if (   $i_next_nonblank <= $max_index_to_go
10670                 && $levels_to_go[$i_next_nonblank] < $lev )
10671             {
10672                 $adjust_indentation = 1;
10673             }
10674         }
10675
10676         $default_adjust_indentation = $adjust_indentation;
10677
10678         # Now modify default behavior according to user request:
10679         # handle option to indent non-blocks of the form );  };  ];
10680         # But don't do special indentation to something like ')->pack('
10681         if ( !$block_type_to_go[$ibeg] ) {
10682             my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
10683             if (
10684                 $cti == 1
10685                 && (   $i_terminal <= $ibeg + 1
10686                     || $is_semicolon_terminated )
10687               )
10688             {
10689                 $adjust_indentation = 2;
10690             }
10691             elsif ($cti == 2
10692                 && $is_semicolon_terminated
10693                 && $i_terminal == $ibeg + 1 )
10694             {
10695                 $adjust_indentation = 3;
10696             }
10697         }
10698
10699         # handle option to indent blocks
10700         else {
10701             if (
10702                 $rOpts->{'indent-closing-brace'}
10703                 && (
10704                     $i_terminal == $ibeg    #  isolated terminal '}'
10705                     || $is_semicolon_terminated
10706                 )
10707               )                             #  } xxxx ;
10708             {
10709                 $adjust_indentation = 3;
10710             }
10711         }
10712     }
10713
10714     # if at ');', '};', '>;', and '];' of a terminal qw quote
10715     elsif ( $$rpatterns[0] =~ /^qb*;$/ && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) {
10716         if ( $closing_token_indentation{$1} == 0 ) {
10717             $adjust_indentation = 1;
10718         }
10719         else {
10720             $adjust_indentation = 3;
10721         }
10722     }
10723
10724     # Handle variation in indentation styles...
10725     # Select the indentation object to define leading
10726     # whitespace.  If we are outdenting something like '} } );'
10727     # then we want to use one level below the last token
10728     # ($i_terminal) in order to get it to fully outdent through
10729     # all levels.
10730     my $indentation;
10731     my $lev;
10732     my $level_end = $levels_to_go[$iend];
10733
10734     if ( $adjust_indentation == 0 ) {
10735         $indentation = $leading_spaces_to_go[$ibeg];
10736         $lev         = $levels_to_go[$ibeg];
10737     }
10738     elsif ( $adjust_indentation == 1 ) {
10739         $indentation = $reduced_spaces_to_go[$i_terminal];
10740         $lev         = $levels_to_go[$i_terminal];
10741     }
10742
10743     # handle indented closing token which aligns with opening token
10744     elsif ( $adjust_indentation == 2 ) {
10745
10746         # handle option to align closing token with opening token
10747         $lev = $levels_to_go[$ibeg];
10748
10749         # calculate spaces needed to align with opening token
10750         my $space_count = get_SPACES($opening_indentation) + $opening_offset;
10751
10752         # Indent less than the previous line.
10753         #
10754         # Problem: For -lp we don't exactly know what it was if there were
10755         # recoverable spaces sent to the aligner.  A good solution would be to
10756         # force a flush of the vertical alignment buffer, so that we would
10757         # know.  For now, this rule is used for -lp:
10758         #
10759         # When the last line did not start with a closing token we will be
10760         # optimistic that the aligner will recover everything wanted.
10761         #
10762         # This rule will prevent us from breaking a hierarchy of closing
10763         # tokens, and in a worst case will leave a closing paren too far
10764         # indented, but this is better than frequently leaving it not indented
10765         # enough.
10766         my $last_spaces = get_SPACES($last_indentation_written);
10767         if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
10768             $last_spaces += get_RECOVERABLE_SPACES($last_indentation_written);
10769         }
10770
10771         # reset the indentation to the new space count if it works
10772         # only options are all or none: nothing in-between looks good
10773         $lev = $levels_to_go[$ibeg];
10774         if ( $space_count < $last_spaces ) {
10775             if ($rOpts_line_up_parentheses) {
10776                 my $lev = $levels_to_go[$ibeg];
10777                 $indentation =
10778                   new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
10779             }
10780             else {
10781                 $indentation = $space_count;
10782             }
10783         }
10784
10785         # revert to default if it doesnt work
10786         else {
10787             $space_count = leading_spaces_to_go($ibeg);
10788             if ( $default_adjust_indentation == 0 ) {
10789                 $indentation = $leading_spaces_to_go[$ibeg];
10790             }
10791             elsif ( $default_adjust_indentation == 1 ) {
10792                 $indentation = $reduced_spaces_to_go[$i_terminal];
10793                 $lev         = $levels_to_go[$i_terminal];
10794             }
10795         }
10796     }
10797
10798     # Full indentaion of closing tokens (-icb and -icp or -cti=2)
10799     else {
10800
10801         # There are two ways to handle -icb and -icp...
10802         # One way is to use the indentation of the previous line:
10803         # $indentation = $last_indentation_written;
10804
10805         # The other way is to use the indentation that the previous line
10806         # would have had if it hadn't been adjusted:
10807         $indentation = $last_unadjusted_indentation;
10808
10809         # Current method: use the minimum of the two. This avoids inconsistent
10810         # indentation.
10811         if ( get_SPACES($last_indentation_written) < get_SPACES($indentation) )
10812         {
10813             $indentation = $last_indentation_written;
10814         }
10815
10816         # use previous indentation but use own level
10817         # to cause list to be flushed properly
10818         $lev = $levels_to_go[$ibeg];
10819     }
10820
10821     # remember indentation except for multi-line quotes, which get
10822     # no indentation
10823     unless ( $types_to_go[$ibeg] eq 'Q' && $lev == 0 ) {
10824         $last_indentation_written    = $indentation;
10825         $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
10826         $last_leading_token          = $tokens_to_go[$ibeg];
10827     }
10828
10829     # be sure lines with leading closing tokens are not outdented more
10830     # than the line which contained the corresponding opening token.
10831     my $is_isolated_block_brace =
10832       ( $iend == $ibeg ) && $block_type_to_go[$ibeg];
10833     if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
10834         if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
10835             $indentation = $opening_indentation;
10836         }
10837     }
10838
10839     # remember the indentation of each line of this batch
10840     push @{$rindentation_list}, $indentation;
10841
10842     # outdent lines with certain leading tokens...
10843     if (
10844
10845         # must be first word of this batch
10846         $ibeg == 0
10847
10848         # and ...
10849         && (
10850
10851             # certain leading keywords if requested
10852             (
10853                    $rOpts->{'outdent-keywords'}
10854                 && $types_to_go[$ibeg] eq 'k'
10855                 && $outdent_keyword{ $tokens_to_go[$ibeg] }
10856             )
10857
10858             # or labels if requested
10859             || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
10860
10861             # or static block comments if requested
10862             || (   $types_to_go[$ibeg] eq '#'
10863                 && $rOpts->{'outdent-static-block-comments'}
10864                 && $tokens_to_go[$ibeg] =~ /$static_block_comment_pattern/o
10865                 && $rOpts->{'static-block-comments'} )
10866         )
10867       )
10868
10869     {
10870         my $space_count = leading_spaces_to_go($ibeg);
10871         if ( $space_count > 0 ) {
10872             $space_count -= $rOpts_continuation_indentation;
10873             $is_outdented_line = 1;
10874             if ( $space_count < 0 ) { $space_count = 0 }
10875
10876             # do not promote a spaced static block comment to non-spaced;
10877             # this is not normally necessary but could be for some
10878             # unusual user inputs (such as -ci = -i)
10879             if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
10880                 $space_count = 1;
10881             }
10882
10883             if ($rOpts_line_up_parentheses) {
10884                 $indentation =
10885                   new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
10886             }
10887             else {
10888                 $indentation = $space_count;
10889             }
10890         }
10891     }
10892
10893     return ( $indentation, $lev, $level_end, $is_semicolon_terminated,
10894         $is_outdented_line );
10895 }
10896
10897 sub set_vertical_tightness_flags {
10898
10899     my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
10900
10901     # Define vertical tightness controls for the nth line of a batch.
10902     # We create an array of parameters which tell the vertical aligner
10903     # if we should combine this line with the next line to achieve the
10904     # desired vertical tightness.  The array of parameters contains:
10905     #
10906     #   [0] type: 1=is opening tok 2=is closing tok  3=is opening block brace
10907     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
10908     #             if closing: spaces of padding to use
10909     #   [2] sequence number of container
10910     #   [3] valid flag: do not append if this flag is false. Will be
10911     #       true if appropriate -vt flag is set.  Otherwise, Will be
10912     #       made true only for 2 line container in parens with -lp
10913     #
10914     # These flags are used by sub set_leading_whitespace in
10915     # the vertical aligner
10916
10917     my $rvertical_tightness_flags;
10918
10919     # For non-BLOCK tokens, we will need to examine the next line
10920     # too, so we won't consider the last line.
10921     if ( $n < $n_last_line ) {
10922
10923         # see if last token is an opening token...not a BLOCK...
10924         my $ibeg_next = $$ri_first[ $n + 1 ];
10925         my $token_end = $tokens_to_go[$iend];
10926         my $iend_next = $$ri_last[ $n + 1 ];
10927         if (
10928                $type_sequence_to_go[$iend]
10929             && !$block_type_to_go[$iend]
10930             && $is_opening_token{$token_end}
10931             && (
10932                 $opening_vertical_tightness{$token_end} > 0
10933
10934                 # allow 2-line method call to be closed up
10935                 || (   $rOpts_line_up_parentheses
10936                     && $token_end eq '('
10937                     && $iend > $ibeg
10938                     && $types_to_go[ $iend - 1 ] ne 'b' )
10939             )
10940           )
10941         {
10942
10943             # avoid multiple jumps in nesting depth in one line if
10944             # requested
10945             my $ovt       = $opening_vertical_tightness{$token_end};
10946             my $iend_next = $$ri_last[ $n + 1 ];
10947             unless (
10948                 $ovt < 2
10949                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
10950                     $nesting_depth_to_go[$ibeg_next] )
10951               )
10952             {
10953
10954                 # If -vt flag has not been set, mark this as invalid
10955                 # and aligner will validate it if it sees the closing paren
10956                 # within 2 lines.
10957                 my $valid_flag = $ovt;
10958                 @{$rvertical_tightness_flags} =
10959                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
10960             }
10961         }
10962
10963         # see if first token of next line is a closing token...
10964         # ..and be sure this line does not have a side comment
10965         my $token_next = $tokens_to_go[$ibeg_next];
10966         if (   $type_sequence_to_go[$ibeg_next]
10967             && !$block_type_to_go[$ibeg_next]
10968             && $is_closing_token{$token_next}
10969             && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
10970         {
10971             my $ovt = $opening_vertical_tightness{$token_next};
10972             my $cvt = $closing_vertical_tightness{$token_next};
10973             if (
10974
10975                 # never append a trailing line like   )->pack(
10976                 # because it will throw off later alignment
10977                 (
10978                     $nesting_depth_to_go[$ibeg_next] ==
10979                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
10980                 )
10981                 && (
10982                     $cvt == 2
10983                     || (
10984                         $container_environment_to_go[$ibeg_next] ne 'LIST'
10985                         && (
10986                             $cvt == 1
10987
10988                             # allow closing up 2-line method calls
10989                             || (   $rOpts_line_up_parentheses
10990                                 && $token_next eq ')' )
10991                         )
10992                     )
10993                 )
10994               )
10995             {
10996
10997                 # decide which trailing closing tokens to append..
10998                 my $ok = 0;
10999                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
11000                 else {
11001                     my $str = join( '',
11002                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
11003
11004                     # append closing token if followed by comment or ';'
11005                     if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
11006                 }
11007
11008                 if ($ok) {
11009                     my $valid_flag = $cvt;
11010                     @{$rvertical_tightness_flags} = (
11011                         2,
11012                         $tightness{$token_next} == 2 ? 0 : 1,
11013                         $type_sequence_to_go[$ibeg_next], $valid_flag,
11014                     );
11015                 }
11016             }
11017         }
11018     }
11019
11020     # Check for a last line with isolated opening BLOCK curly
11021     elsif ($rOpts_block_brace_vertical_tightness
11022         && $ibeg               eq $iend
11023         && $types_to_go[$iend] eq '{'
11024         && $block_type_to_go[$iend] =~
11025         /$block_brace_vertical_tightness_pattern/o )
11026     {
11027         @{$rvertical_tightness_flags} =
11028           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
11029     }
11030
11031     return $rvertical_tightness_flags;
11032 }
11033
11034 {
11035     my %is_vertical_alignment_type;
11036     my %is_vertical_alignment_keyword;
11037
11038     BEGIN {
11039
11040         @_ = qw#
11041           = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=
11042           { ? : => =~ && ||
11043           #;
11044         @is_vertical_alignment_type{@_} = (1) x scalar(@_);
11045
11046         @_ = qw(if unless and or eq ne for foreach while until);
11047         @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
11048     }
11049
11050     sub set_vertical_alignment_markers {
11051
11052         # Look at the tokens in this output batch and define the array
11053         # 'matching_token_to_go' which marks tokens at which we would
11054         # accept vertical alignment.
11055
11056         # nothing to do if we aren't allowed to change whitespace
11057         if ( !$rOpts_add_whitespace ) {
11058             for my $i ( 0 .. $max_index_to_go ) {
11059                 $matching_token_to_go[$i] = '';
11060             }
11061             return;
11062         }
11063
11064         my ( $ri_first, $ri_last ) = @_;
11065
11066         # look at each line of this batch..
11067         my $last_vertical_alignment_before_index;
11068         my $vert_last_nonblank_type;
11069         my $vert_last_nonblank_token;
11070         my $vert_last_nonblank_block_type;
11071         my $max_line = @$ri_first - 1;
11072         my ( $i, $type, $token, $block_type, $alignment_type );
11073         my ( $ibeg, $iend, $line );
11074         foreach $line ( 0 .. $max_line ) {
11075             $ibeg                                 = $$ri_first[$line];
11076             $iend                                 = $$ri_last[$line];
11077             $last_vertical_alignment_before_index = -1;
11078             $vert_last_nonblank_type              = '';
11079             $vert_last_nonblank_token             = '';
11080             $vert_last_nonblank_block_type        = '';
11081
11082             # look at each token in this output line..
11083             foreach $i ( $ibeg .. $iend ) {
11084                 $alignment_type = '';
11085                 $type           = $types_to_go[$i];
11086                 $block_type     = $block_type_to_go[$i];
11087                 $token          = $tokens_to_go[$i];
11088
11089                 # check for flag indicating that we should not align
11090                 # this token
11091                 if ( $matching_token_to_go[$i] ) {
11092                     $matching_token_to_go[$i] = '';
11093                     next;
11094                 }
11095
11096                 #--------------------------------------------------------
11097                 # First see if we want to align BEFORE this token
11098                 #--------------------------------------------------------
11099
11100                 # The first possible token that we can align before
11101                 # is index 2 because: 1) it doesn't normally make sense to
11102                 # align before the first token and 2) the second
11103                 # token must be a blank if we are to align before
11104                 # the third
11105                 if ( $i < $ibeg + 2 ) {
11106                 }
11107
11108                 # must follow a blank token
11109                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
11110                 }
11111
11112                 # align a side comment --
11113                 elsif ( $type eq '#' ) {
11114
11115                     unless (
11116
11117                         # it is a static side comment
11118                         (
11119                                $rOpts->{'static-side-comments'}
11120                             && $token =~ /$static_side_comment_pattern/o
11121                         )
11122
11123                         # or a closing side comment
11124                         || (   $vert_last_nonblank_block_type
11125                             && $token =~
11126                             /$closing_side_comment_prefix_pattern/o )
11127                       )
11128                     {
11129                         $alignment_type = $type;
11130                     }    ## Example of a static side comment
11131                 }
11132
11133                 # otherwise, do not align two in a row to create a
11134                 # blank field
11135                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) {
11136                 }
11137
11138                 # align before one of these keywords
11139                 # (within a line, since $i>1)
11140                 elsif ( $type eq 'k' ) {
11141
11142                     #  /^(if|unless|and|or|eq|ne)$/
11143                     if ( $is_vertical_alignment_keyword{$token} ) {
11144                         $alignment_type = $token;
11145                     }
11146                 }
11147
11148                 # align before one of these types..
11149                 # Note: add '.' after new vertical aligner is operational
11150                 elsif ( $is_vertical_alignment_type{$type} ) {
11151                     $alignment_type = $token;
11152
11153                     # For a paren after keyword, only align something like this:
11154                     #    if    ( $a ) { &a }
11155                     #    elsif ( $b ) { &b }
11156                     if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
11157                         $alignment_type = ""
11158                           unless $vert_last_nonblank_token =~
11159                           /^(if|unless|elsif)$/;
11160                     }
11161
11162                     # be sure the alignment tokens are unique
11163                     # This didn't work well: reason not determined
11164                     # if ($token ne $type) {$alignment_type .= $type}
11165                 }
11166
11167               # NOTE: This is deactivated until the new vertical aligner
11168               # is finished because it causes the previous if/elsif alignment
11169               # to fail
11170               #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) {
11171               #    $alignment_type = $type;
11172               #}
11173
11174                 if ($alignment_type) {
11175                     $last_vertical_alignment_before_index = $i;
11176                 }
11177
11178                 #--------------------------------------------------------
11179                 # Next see if we want to align AFTER the previous nonblank
11180                 #--------------------------------------------------------
11181
11182                 # We want to line up ',' and interior ';' tokens, with the added
11183                 # space AFTER these tokens.  (Note: interior ';' is included
11184                 # because it may occur in short blocks).
11185                 if (
11186
11187                     # we haven't already set it
11188                     !$alignment_type
11189
11190                     # and its not the first token of the line
11191                     && ( $i > $ibeg )
11192
11193                     # and it follows a blank
11194                     && $types_to_go[ $i - 1 ] eq 'b'
11195
11196                     # and previous token IS one of these:
11197                     && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
11198
11199                     # and it's NOT one of these
11200                     && ( $type !~ /^[b\#\)\]\}]$/ )
11201
11202                     # then go ahead and align
11203                   )
11204
11205                 {
11206                     $alignment_type = $vert_last_nonblank_type;
11207                 }
11208
11209                 #--------------------------------------------------------
11210                 # then store the value
11211                 #--------------------------------------------------------
11212                 $matching_token_to_go[$i] = $alignment_type;
11213                 if ( $type ne 'b' ) {
11214                     $vert_last_nonblank_type       = $type;
11215                     $vert_last_nonblank_token      = $token;
11216                     $vert_last_nonblank_block_type = $block_type;
11217                 }
11218             }
11219         }
11220     }
11221 }
11222
11223 sub terminal_type {
11224
11225     #    returns type of last token on this line (terminal token), as follows:
11226     #    returns # for a full-line comment
11227     #    returns ' ' for a blank line
11228     #    otherwise returns final token type
11229
11230     my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
11231
11232     # check for full-line comment..
11233     if ( $$rtype[$ibeg] eq '#' ) {
11234         return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
11235     }
11236     else {
11237
11238         # start at end and walk bakwards..
11239         for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
11240
11241             # skip past any side comment and blanks
11242             next if ( $$rtype[$i] eq 'b' );
11243             next if ( $$rtype[$i] eq '#' );
11244
11245             # found it..make sure it is a BLOCK termination,
11246             # but hide a terminal } after sort/grep/map because it is not
11247             # necessarily the end of the line.  (terminal.t)
11248             my $terminal_type = $$rtype[$i];
11249             if (
11250                 $terminal_type eq '}'
11251                 && ( !$$rblock_type[$i]
11252                     || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
11253               )
11254             {
11255                 $terminal_type = 'b';
11256             }
11257             return wantarray ? ( $terminal_type, $i ) : $terminal_type;
11258         }
11259
11260         # empty line
11261         return wantarray ? ( ' ', $ibeg ) : ' ';
11262     }
11263 }
11264
11265 {
11266     my %is_good_keyword_breakpoint;
11267     my %is_lt_gt_le_ge;
11268
11269     sub set_bond_strengths {
11270
11271         BEGIN {
11272
11273             @_ = qw(if unless while until for foreach);
11274             @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
11275
11276             @_ = qw(lt gt le ge);
11277             @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
11278
11279             ###############################################################
11280             # NOTE: NO_BREAK's set here are HINTS which may not be honored;
11281             # essential NO_BREAKS's must be enforced in section 2, below.
11282             ###############################################################
11283
11284             # adding NEW_TOKENS: add a left and right bond strength by
11285             # mimmicking what is done for an existing token type.  You
11286             # can skip this step at first and take the default, then
11287             # tweak later to get desired results.
11288
11289             # The bond strengths should roughly follow precenence order where
11290             # possible.  If you make changes, please check the results very
11291             # carefully on a variety of scripts.
11292
11293             # no break around possible filehandle
11294             $left_bond_strength{'Z'}  = NO_BREAK;
11295             $right_bond_strength{'Z'} = NO_BREAK;
11296
11297             # never put a bare word on a new line:
11298             # example print (STDERR, "bla"); will fail with break after (
11299             $left_bond_strength{'w'} = NO_BREAK;
11300
11301         # blanks always have infinite strength to force breaks after real tokens
11302             $right_bond_strength{'b'} = NO_BREAK;
11303
11304             # try not to break on exponentation
11305             @_                       = qw" ** .. ... <=> ";
11306             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
11307             @right_bond_strength{@_} = (STRONG) x scalar(@_);
11308
11309             # The comma-arrow has very low precedence but not a good break point
11310             $left_bond_strength{'=>'}  = NO_BREAK;
11311             $right_bond_strength{'=>'} = NOMINAL;
11312
11313             # ok to break after label
11314             $left_bond_strength{'J'}  = NO_BREAK;
11315             $right_bond_strength{'J'} = NOMINAL;
11316             $left_bond_strength{'j'}  = STRONG;
11317             $right_bond_strength{'j'} = STRONG;
11318             $left_bond_strength{'A'}  = STRONG;
11319             $right_bond_strength{'A'} = STRONG;
11320
11321             $left_bond_strength{'->'}  = STRONG;
11322             $right_bond_strength{'->'} = VERY_STRONG;
11323
11324             # breaking AFTER these is just ok:
11325             @_                       = qw" % + - * / x  ";
11326             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
11327             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
11328
11329             # breaking BEFORE these is just ok:
11330             @_                       = qw" >> << ";
11331             @right_bond_strength{@_} = (STRONG) x scalar(@_);
11332             @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
11333
11334             # I prefer breaking before the string concatenation operator
11335             # because it can be hard to see at the end of a line
11336             # swap these to break after a '.'
11337             # this could be a future option
11338             $right_bond_strength{'.'} = STRONG;
11339             $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
11340
11341             @_                       = qw"} ] ) ";
11342             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
11343             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
11344
11345             # make these a little weaker than nominal so that they get
11346             # favored for end-of-line characters
11347             @_                       = qw"!= == =~ !~";
11348             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
11349             @right_bond_strength{@_} =
11350               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
11351
11352             # break AFTER these
11353             @_                       = qw" < >  | & >= <=";
11354             @left_bond_strength{@_}  = (VERY_STRONG) x scalar(@_);
11355             @right_bond_strength{@_} =
11356               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
11357
11358             # breaking either before or after a quote is ok
11359             # but bias for breaking before a quote
11360             $left_bond_strength{'Q'}  = NOMINAL;
11361             $right_bond_strength{'Q'} = NOMINAL + 0.02;
11362             $left_bond_strength{'q'}  = NOMINAL;
11363             $right_bond_strength{'q'} = NOMINAL;
11364
11365             # starting a line with a keyword is usually ok
11366             $left_bond_strength{'k'} = NOMINAL;
11367
11368             # we usually want to bond a keyword strongly to what immediately
11369             # follows, rather than leaving it stranded at the end of a line
11370             $right_bond_strength{'k'} = STRONG;
11371
11372             $left_bond_strength{'G'}  = NOMINAL;
11373             $right_bond_strength{'G'} = STRONG;
11374
11375             # it is very good to break AFTER various assignment operators
11376             @_ = qw(
11377               = **= += *= &= <<= &&=
11378               -= /= |= >>= ||=
11379               .= %= ^=
11380               x=
11381             );
11382             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
11383             @right_bond_strength{@_} =
11384               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
11385
11386             # break BEFORE '&&' and '||'
11387             # set strength of '||' to same as '=' so that chains like
11388             # $a = $b || $c || $d   will break before the first '||'
11389             $right_bond_strength{'||'} = NOMINAL;
11390             $left_bond_strength{'||'}  = $right_bond_strength{'='};
11391
11392             # set strength of && a little higher than ||
11393             $right_bond_strength{'&&'} = NOMINAL;
11394             $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
11395
11396             $left_bond_strength{';'}  = VERY_STRONG;
11397             $right_bond_strength{';'} = VERY_WEAK;
11398             $left_bond_strength{'f'}  = VERY_STRONG;
11399
11400             # make right strength of for ';' a little less than '='
11401             # to make for contents break after the ';' to avoid this:
11402             #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
11403             #     $number_of_fields )
11404             # and make it weaker than ',' and 'and' too
11405             $right_bond_strength{'f'} = VERY_WEAK - 0.03;
11406
11407             # The strengths of ?/: should be somewhere between
11408             # an '=' and a quote (NOMINAL),
11409             # make strength of ':' slightly less than '?' to help
11410             # break long chains of ? : after the colons
11411             $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
11412             $right_bond_strength{':'} = NO_BREAK;
11413             $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
11414             $right_bond_strength{'?'} = NO_BREAK;
11415
11416             $left_bond_strength{','}  = VERY_STRONG;
11417             $right_bond_strength{','} = VERY_WEAK;
11418
11419             # Set bond strengths of certain keywords
11420             # make 'or', 'and' slightly weaker than a ','
11421             $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
11422             $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
11423             $left_bond_strength{'xor'}  = NOMINAL;
11424             $right_bond_strength{'and'} = NOMINAL;
11425             $right_bond_strength{'or'}  = NOMINAL;
11426             $right_bond_strength{'xor'} = STRONG;
11427         }
11428
11429         # patch-its always ok to break at end of line
11430         $nobreak_to_go[$max_index_to_go] = 0;
11431
11432         # adding a small 'bias' to strengths is a simple way to make a line
11433         # break at the first of a sequence of identical terms.  For example,
11434         # to force long string of conditional operators to break with
11435         # each line ending in a ':', we can add a small number to the bond
11436         # strength of each ':'
11437         my $colon_bias = 0;
11438         my $amp_bias   = 0;
11439         my $bar_bias   = 0;
11440         my $and_bias   = 0;
11441         my $or_bias    = 0;
11442         my $dot_bias   = 0;
11443         my $f_bias     = 0;
11444         my $code_bias  = -.01;
11445         my $type       = 'b';
11446         my $token      = ' ';
11447         my $last_type;
11448         my $last_nonblank_type  = $type;
11449         my $last_nonblank_token = $token;
11450         my $delta_bias          = 0.0001;
11451         my $list_str            = $left_bond_strength{'?'};
11452
11453         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
11454             $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
11455         );
11456
11457         # preliminary loop to compute bond strengths
11458         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
11459             $last_type = $type;
11460             if ( $type ne 'b' ) {
11461                 $last_nonblank_type  = $type;
11462                 $last_nonblank_token = $token;
11463             }
11464             $type = $types_to_go[$i];
11465
11466             # strength on both sides of a blank is the same
11467             if ( $type eq 'b' && $last_type ne 'b' ) {
11468                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
11469                 next;
11470             }
11471
11472             $token               = $tokens_to_go[$i];
11473             $block_type          = $block_type_to_go[$i];
11474             $i_next              = $i + 1;
11475             $next_type           = $types_to_go[$i_next];
11476             $next_token          = $tokens_to_go[$i_next];
11477             $total_nesting_depth = $nesting_depth_to_go[$i_next];
11478             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
11479             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
11480             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
11481
11482             # Some token chemistry...  The decision about where to break a
11483             # line depends upon a "bond strength" between tokens.  The LOWER
11484             # the bond strength, the MORE likely a break.  The strength
11485             # values are based on trial-and-error, and need to be tweaked
11486             # occasionally to get desired results.  Things to keep in mind
11487             # are:
11488             #   1. relative strengths are important.  small differences
11489             #      in strengths can make big formatting differences.
11490             #   2. each indentation level adds one unit of bond strength
11491             #   3. a value of NO_BREAK makes an unbreakable bond
11492             #   4. a value of VERY_WEAK is the strength of a ','
11493             #   5. values below NOMINAL are considered ok break points
11494             #   6. values above NOMINAL are considered poor break points
11495             # We are computing the strength of the bond between the current
11496             # token and the NEXT token.
11497             my $bond_str = VERY_STRONG;    # a default, high strength
11498
11499             #---------------------------------------------------------------
11500             # section 1:
11501             # use minimum of left and right bond strengths if defined;
11502             # digraphs and trigraphs like to break on their left
11503             #---------------------------------------------------------------
11504             my $bsr = $right_bond_strength{$type};
11505
11506             if ( !defined($bsr) ) {
11507
11508                 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
11509                     $bsr = STRONG;
11510                 }
11511                 else {
11512                     $bsr = VERY_STRONG;
11513                 }
11514             }
11515
11516             # define right bond strengths of certain keywords
11517             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
11518                 $bsr = $right_bond_strength{$token};
11519             }
11520             elsif ( $token eq 'ne' or $token eq 'eq' ) {
11521                 $bsr = NOMINAL;
11522             }
11523             my $bsl = $left_bond_strength{$next_nonblank_type};
11524
11525             # set terminal bond strength to the nominal value
11526             # this will cause good preceding breaks to be retained
11527             if ( $i_next_nonblank > $max_index_to_go ) {
11528                 $bsl = NOMINAL;
11529             }
11530
11531             if ( !defined($bsl) ) {
11532
11533                 if (   $is_digraph{$next_nonblank_type}
11534                     || $is_trigraph{$next_nonblank_type} )
11535                 {
11536                     $bsl = WEAK;
11537                 }
11538                 else {
11539                     $bsl = VERY_STRONG;
11540                 }
11541             }
11542
11543             # define right bond strengths of certain keywords
11544             if ( $next_nonblank_type eq 'k'
11545                 && defined( $left_bond_strength{$next_nonblank_token} ) )
11546             {
11547                 $bsl = $left_bond_strength{$next_nonblank_token};
11548             }
11549             elsif ($next_nonblank_token eq 'ne'
11550                 or $next_nonblank_token eq 'eq' )
11551             {
11552                 $bsl = NOMINAL;
11553             }
11554             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
11555                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
11556             }
11557
11558             # Note: it might seem that we would want to keep a NO_BREAK if
11559             # either token has this value.  This didn't work, because in an
11560             # arrow list, it prevents the comma from separating from the
11561             # following bare word (which is probably quoted by its arrow).
11562             # So necessary NO_BREAK's have to be handled as special cases
11563             # in the final section.
11564             $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
11565             my $bond_str_1 = $bond_str;
11566
11567             #---------------------------------------------------------------
11568             # section 2:
11569             # special cases
11570             #---------------------------------------------------------------
11571
11572             # allow long lines before final { in an if statement, as in:
11573             #    if (..........
11574             #      ..........)
11575             #    {
11576             #
11577             # Otherwise, the line before the { tends to be too short.
11578             if ( $type eq ')' ) {
11579                 if ( $next_nonblank_type eq '{' ) {
11580                     $bond_str = VERY_WEAK + 0.03;
11581                 }
11582             }
11583
11584             elsif ( $type eq '(' ) {
11585                 if ( $next_nonblank_type eq '{' ) {
11586                     $bond_str = NOMINAL;
11587                 }
11588             }
11589
11590             # break on something like '} (', but keep this stronger than a ','
11591             # example is in 'howe.pl'
11592             elsif ( $type eq 'R' or $type eq '}' ) {
11593                 if ( $next_nonblank_type eq '(' ) {
11594                     $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
11595                 }
11596             }
11597
11598             #-----------------------------------------------------------------
11599             # adjust bond strength bias
11600             #-----------------------------------------------------------------
11601
11602             elsif ( $type eq 'f' ) {
11603                 $bond_str += $f_bias;
11604                 $f_bias   += $delta_bias;
11605             }
11606
11607           # in long ?: conditionals, bias toward just one set per line (colon.t)
11608             elsif ( $type eq ':' ) {
11609                 if ( !$want_break_before{$type} ) {
11610                     $bond_str   += $colon_bias;
11611                     $colon_bias += $delta_bias;
11612                 }
11613             }
11614
11615             if (   $next_nonblank_type eq ':'
11616                 && $want_break_before{$next_nonblank_type} )
11617             {
11618                 $bond_str   += $colon_bias;
11619                 $colon_bias += $delta_bias;
11620             }
11621
11622             # if leading '.' is used, align all but 'short' quotes;
11623             # the idea is to not place something like "\n" on a single line.
11624             elsif ( $next_nonblank_type eq '.' ) {
11625                 if ( $want_break_before{'.'} ) {
11626                     unless (
11627                         $last_nonblank_type eq '.'
11628                         && (
11629                             length($token) <=
11630                             $rOpts_short_concatenation_item_length )
11631                         && ( $token !~ /^[\)\]\}]$/ )
11632                       )
11633                     {
11634                         $dot_bias += $delta_bias;
11635                     }
11636                     $bond_str += $dot_bias;
11637                 }
11638             }
11639             elsif ($next_nonblank_type eq '&&'
11640                 && $want_break_before{$next_nonblank_type} )
11641             {
11642                 $bond_str += $amp_bias;
11643                 $amp_bias += $delta_bias;
11644             }
11645             elsif ($next_nonblank_type eq '||'
11646                 && $want_break_before{$next_nonblank_type} )
11647             {
11648                 $bond_str += $bar_bias;
11649                 $bar_bias += $delta_bias;
11650             }
11651             elsif ( $next_nonblank_type eq 'k' ) {
11652
11653                 if (   $next_nonblank_token eq 'and'
11654                     && $want_break_before{$next_nonblank_token} )
11655                 {
11656                     $bond_str += $and_bias;
11657                     $and_bias += $delta_bias;
11658                 }
11659                 elsif ($next_nonblank_token eq 'or'
11660                     && $want_break_before{$next_nonblank_token} )
11661                 {
11662                     $bond_str += $or_bias;
11663                     $or_bias  += $delta_bias;
11664                 }
11665
11666                 # FIXME: needs more testing
11667                 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
11668                     $bond_str = $list_str if ( $bond_str > $list_str );
11669                 }
11670             }
11671
11672             if ( $type eq ':'
11673                 && !$want_break_before{$type} )
11674             {
11675                 $bond_str   += $colon_bias;
11676                 $colon_bias += $delta_bias;
11677             }
11678             elsif ( $type eq '&&'
11679                 && !$want_break_before{$type} )
11680             {
11681                 $bond_str += $amp_bias;
11682                 $amp_bias += $delta_bias;
11683             }
11684             elsif ( $type eq '||'
11685                 && !$want_break_before{$type} )
11686             {
11687                 $bond_str += $bar_bias;
11688                 $bar_bias += $delta_bias;
11689             }
11690             elsif ( $type eq 'k' ) {
11691
11692                 if ( $token eq 'and'
11693                     && !$want_break_before{$token} )
11694                 {
11695                     $bond_str += $and_bias;
11696                     $and_bias += $delta_bias;
11697                 }
11698                 elsif ( $token eq 'or'
11699                     && !$want_break_before{$token} )
11700                 {
11701                     $bond_str += $or_bias;
11702                     $or_bias  += $delta_bias;
11703                 }
11704             }
11705
11706             # keep matrix and hash indices together
11707             # but make them a little below STRONG to allow breaking open
11708             # something like {'some-word'}{'some-very-long-word'} at the }{
11709             # (bracebrk.t)
11710             if (   ( $type eq ']' or $type eq 'R' )
11711                 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
11712               )
11713             {
11714                 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
11715             }
11716
11717             if ( $next_nonblank_token =~ /^->/ ) {
11718
11719                 # increase strength to the point where a break in the following
11720                 # will be after the opening paren rather than at the arrow:
11721                 #    $a->$b($c);
11722                 if ( $type eq 'i' ) {
11723                     $bond_str = 1.45 * STRONG;
11724                 }
11725
11726                 elsif ( $type =~ /^[\)\]\}R]$/ ) {
11727                     $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
11728                 }
11729
11730                 # otherwise make strength before an '->' a little over a '+'
11731                 else {
11732                     if ( $bond_str <= NOMINAL ) {
11733                         $bond_str = NOMINAL + 0.01;
11734                     }
11735                 }
11736             }
11737
11738             if ( $token eq ')' && $next_nonblank_token eq '[' ) {
11739                 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
11740             }
11741
11742             # map1.t -- correct for a quirk in perl
11743             if (   $token eq '('
11744                 && $next_nonblank_type eq 'i'
11745                 && $last_nonblank_type eq 'k'
11746                 && $is_sort_map_grep{$last_nonblank_token} )
11747
11748               #     /^(sort|map|grep)$/ )
11749             {
11750                 $bond_str = NO_BREAK;
11751             }
11752
11753             # extrude.t: do not break before paren at:
11754             #    -l pid_filename(
11755             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
11756                 $bond_str = NO_BREAK;
11757             }
11758
11759             # good to break after end of code blocks
11760             if ( $type eq '}' && $block_type ) {
11761
11762                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
11763                 $code_bias += $delta_bias;
11764             }
11765
11766             if ( $type eq 'k' ) {
11767
11768                 # allow certain control keywords to stand out
11769                 if (   $next_nonblank_type eq 'k'
11770                     && $is_last_next_redo_return{$token} )
11771                 {
11772                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
11773                 }
11774
11775 # Don't break after keyword my.  This is a quick fix for a
11776 # rare problem with perl. An example is this line from file
11777 # Container.pm:
11778 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
11779
11780                 if ( $token eq 'my' ) {
11781                     $bond_str = NO_BREAK;
11782                 }
11783
11784             }
11785
11786             # good to break before 'if', 'unless', etc
11787             if ( $is_if_brace_follower{$next_nonblank_token} ) {
11788                 $bond_str = VERY_WEAK;
11789             }
11790
11791             if ( $next_nonblank_type eq 'k' ) {
11792
11793                 # keywords like 'unless', 'if', etc, within statements
11794                 # make good breaks
11795                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
11796                     $bond_str = VERY_WEAK / 1.05;
11797                 }
11798             }
11799
11800             # try not to break before a comma-arrow
11801             elsif ( $next_nonblank_type eq '=>' ) {
11802                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
11803             }
11804
11805          #----------------------------------------------------------------------
11806          # only set NO_BREAK's from here on
11807          #----------------------------------------------------------------------
11808             if ( $type eq 'C' or $type eq 'U' ) {
11809
11810                 # use strict requires that bare word and => not be separated
11811                 if ( $next_nonblank_type eq '=>' ) {
11812                     $bond_str = NO_BREAK;
11813                 }
11814
11815             }
11816
11817            # use strict requires that bare word within braces not start new line
11818             elsif ( $type eq 'L' ) {
11819
11820                 if ( $next_nonblank_type eq 'w' ) {
11821                     $bond_str = NO_BREAK;
11822                 }
11823             }
11824
11825             # in older version of perl, use strict can cause problems with
11826             # breaks before bare words following opening parens.  For example,
11827             # this will fail under older versions if a break is made between
11828             # '(' and 'MAIL':
11829             #  use strict;
11830             #  open( MAIL, "a long filename or command");
11831             #  close MAIL;
11832             elsif ( $type eq '{' ) {
11833
11834                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
11835
11836                     # but it's fine to break if the word is followed by a '=>'
11837                     # or if it is obviously a sub call
11838                     my $i_next_next_nonblank = $i_next_nonblank + 1;
11839                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
11840                     if (   $next_next_type eq 'b'
11841                         && $i_next_nonblank < $max_index_to_go )
11842                     {
11843                         $i_next_next_nonblank++;
11844                         $next_next_type = $types_to_go[$i_next_next_nonblank];
11845                     }
11846
11847                     ##if ( $next_next_type ne '=>' ) {
11848                     # these are ok: '->xxx', '=>', '('
11849
11850                   # We'll check for an old breakpoint and keep a leading
11851                   # bareword if it was that way in the input file.  Presumably
11852                   # it was ok that way.  For example, the following would remain
11853                   # unchanged:
11854                   #
11855                   # @months = (
11856                   #   January,   February, March,    April,
11857                   #   May,       June,     July,     August,
11858                   #   September, October,  November, December,
11859                   # );
11860                   #
11861                   # This should be sufficient:
11862                     if ( !$old_breakpoint_to_go[$i]
11863                         && ( $next_next_type eq ',' || $next_next_type eq '}' )
11864                       )
11865                     {
11866                         $bond_str = NO_BREAK;
11867                     }
11868                 }
11869             }
11870
11871             elsif ( $type eq 'w' ) {
11872
11873                 if ( $next_nonblank_type eq 'R' ) {
11874                     $bond_str = NO_BREAK;
11875                 }
11876
11877                 # use strict requires that bare word and => not be separated
11878                 if ( $next_nonblank_type eq '=>' ) {
11879                     $bond_str = NO_BREAK;
11880                 }
11881             }
11882
11883           # in fact, use strict hates bare words on any new line.  For example,
11884           # a break before the underscore here provokes the wrath of use strict:
11885           #    if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
11886             elsif ( $type eq 'F' ) {
11887                 $bond_str = NO_BREAK;
11888             }
11889
11890             # use strict does not allow separating type info from trailing { }
11891             # testfile is readmail.pl
11892             elsif ( $type eq 't' or $type eq 'i' ) {
11893
11894                 if ( $next_nonblank_type eq 'L' ) {
11895                     $bond_str = NO_BREAK;
11896                 }
11897             }
11898
11899         # Do not break between a possible filehandle and a ? or /
11900         # and do not introduce a break after it if there is no blank (extrude.t)
11901             elsif ( $type eq 'Z' ) {
11902
11903                 # dont break..
11904                 if (
11905
11906                     # if there is no blank and we do not want one. Examples:
11907                     #    print $x++    # do not break after $x
11908                     #    print HTML"HELLO"   # break ok after HTML
11909                     (
11910                            $next_type ne 'b'
11911                         && defined( $want_left_space{$next_type} )
11912                         && $want_left_space{$next_type} == WS_NO
11913                     )
11914
11915                     # or we might be followed by the start of a quote
11916                     || $next_nonblank_type =~ /^[\/\?]$/
11917                   )
11918                 {
11919                     $bond_str = NO_BREAK;
11920                 }
11921             }
11922
11923             # Do not break before a possible file handle
11924             if ( $next_nonblank_type eq 'Z' ) {
11925                 $bond_str = NO_BREAK;
11926             }
11927
11928             # As a defensive measure, do not break between a '(' and a
11929             # filehandle.  In some cases, this can cause an error.  For
11930             # example, the following program works:
11931             #    my $msg="hi!\n";
11932             #    print
11933             #    ( STDOUT
11934             #    $msg
11935             #    );
11936             #
11937             # But this program fails:
11938             #    my $msg="hi!\n";
11939             #    print
11940             #    (
11941             #    STDOUT
11942             #    $msg
11943             #    );
11944             #
11945             # This is normally only a problem with the 'extrude' option
11946             if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
11947                 $bond_str = NO_BREAK;
11948             }
11949
11950             # patch to put cuddled elses back together when on multiple
11951             # lines, as in: } \n else \n { \n
11952             if ($rOpts_cuddled_else) {
11953
11954                 if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
11955                     || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
11956                 {
11957                     $bond_str = NO_BREAK;
11958                 }
11959             }
11960
11961             # keep '}' together with ';'
11962             if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
11963                 $bond_str = NO_BREAK;
11964             }
11965
11966             # never break between sub name and opening paren
11967             if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
11968                 $bond_str = NO_BREAK;
11969             }
11970
11971             #---------------------------------------------------------------
11972             # section 3:
11973             # now take nesting depth into account
11974             #---------------------------------------------------------------
11975             # final strength incorporates the bond strength and nesting depth
11976             my $strength;
11977
11978             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
11979                 if ( $total_nesting_depth > 0 ) {
11980                     $strength = $bond_str + $total_nesting_depth;
11981                 }
11982                 else {
11983                     $strength = $bond_str;
11984                 }
11985             }
11986             else {
11987                 $strength = NO_BREAK;
11988             }
11989
11990             # always break after side comment
11991             if ( $type eq '#' ) { $strength = 0 }
11992
11993             $bond_strength_to_go[$i] = $strength;
11994
11995             FORMATTER_DEBUG_FLAG_BOND && do {
11996                 my $str = substr( $token, 0, 15 );
11997                 $str .= ' ' x ( 16 - length($str) );
11998                 print
11999 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
12000             };
12001         }
12002     }
12003
12004 }
12005
12006 sub pad_array_to_go {
12007
12008     # to simplify coding in scan_list and set_bond_strengths, it helps
12009     # to create some extra blank tokens at the end of the arrays
12010     $tokens_to_go[ $max_index_to_go + 1 ]        = '';
12011     $tokens_to_go[ $max_index_to_go + 2 ]        = '';
12012     $types_to_go[ $max_index_to_go + 1 ]         = 'b';
12013     $types_to_go[ $max_index_to_go + 2 ]         = 'b';
12014     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
12015       $nesting_depth_to_go[$max_index_to_go];
12016
12017     #    /^[R\}\)\]]$/
12018     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
12019         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
12020
12021             # shouldn't happen:
12022             unless ( get_saw_brace_error() ) {
12023                 warning(
12024 "Program bug in scan_list: hit nesting error which should have been caught\n"
12025                 );
12026                 report_definite_bug();
12027             }
12028         }
12029         else {
12030             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
12031         }
12032     }
12033
12034     #       /^[L\{\(\[]$/
12035     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
12036         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
12037     }
12038 }
12039
12040 {    # begin scan_list
12041
12042     my (
12043         $block_type,                $current_depth,
12044         $depth,                     $i,
12045         $i_last_nonblank_token,     $last_colon_sequence_number,
12046         $last_nonblank_token,       $last_nonblank_type,
12047         $last_old_breakpoint_count, $minimum_depth,
12048         $next_nonblank_block_type,  $next_nonblank_token,
12049         $next_nonblank_type,        $old_breakpoint_count,
12050         $starting_breakpoint_count, $starting_depth,
12051         $token,                     $type,
12052         $type_sequence,
12053     );
12054
12055     my (
12056         @breakpoint_stack,              @breakpoint_undo_stack,
12057         @comma_index,                   @container_type,
12058         @identifier_count_stack,        @index_before_arrow,
12059         @interrupted_list,              @item_count_stack,
12060         @last_comma_index,              @last_dot_index,
12061         @last_nonblank_type,            @old_breakpoint_count_stack,
12062         @opening_structure_index_stack, @rfor_semicolon_list,
12063         @has_old_logical_breakpoints,   @rand_or_list,
12064         @i_equals,
12065     );
12066
12067     # routine to define essential variables when we go 'up' to
12068     # a new depth
12069     sub check_for_new_minimum_depth {
12070         my $depth = shift;
12071         if ( $depth < $minimum_depth ) {
12072
12073             $minimum_depth = $depth;
12074
12075             # these arrays need not retain values between calls
12076             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
12077             $container_type[$depth]                = "";
12078             $identifier_count_stack[$depth]        = 0;
12079             $index_before_arrow[$depth]            = -1;
12080             $interrupted_list[$depth]              = 1;
12081             $item_count_stack[$depth]              = 0;
12082             $last_nonblank_type[$depth]            = "";
12083             $opening_structure_index_stack[$depth] = -1;
12084
12085             $breakpoint_undo_stack[$depth]       = undef;
12086             $comma_index[$depth]                 = undef;
12087             $last_comma_index[$depth]            = undef;
12088             $last_dot_index[$depth]              = undef;
12089             $old_breakpoint_count_stack[$depth]  = undef;
12090             $has_old_logical_breakpoints[$depth] = 0;
12091             $rand_or_list[$depth]                = [];
12092             $rfor_semicolon_list[$depth]         = [];
12093             $i_equals[$depth]                    = -1;
12094
12095             # these arrays must retain values between calls
12096             if ( !defined( $has_broken_sublist[$depth] ) ) {
12097                 $dont_align[$depth]         = 0;
12098                 $has_broken_sublist[$depth] = 0;
12099                 $want_comma_break[$depth]   = 0;
12100             }
12101         }
12102     }
12103
12104     # routine to decide which commas to break at within a container;
12105     # returns:
12106     #   $bp_count = number of comma breakpoints set
12107     #   $do_not_break_apart = a flag indicating if container need not
12108     #     be broken open
12109     sub set_comma_breakpoints {
12110
12111         my $dd                 = shift;
12112         my $bp_count           = 0;
12113         my $do_not_break_apart = 0;
12114         if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
12115
12116             my $fbc = $forced_breakpoint_count;
12117
12118             # always open comma lists not preceded by keywords,
12119             # barewords, identifiers (that is, anything that doesn't
12120             # look like a function call)
12121             my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
12122
12123             set_comma_breakpoints_do(
12124                 $dd,
12125                 $opening_structure_index_stack[$dd],
12126                 $i,
12127                 $item_count_stack[$dd],
12128                 $identifier_count_stack[$dd],
12129                 $comma_index[$dd],
12130                 $next_nonblank_type,
12131                 $container_type[$dd],
12132                 $interrupted_list[$dd],
12133                 \$do_not_break_apart,
12134                 $must_break_open,
12135             );
12136             $bp_count = $forced_breakpoint_count - $fbc;
12137             $do_not_break_apart = 0 if $must_break_open;
12138         }
12139         return ( $bp_count, $do_not_break_apart );
12140     }
12141
12142     my %is_logical_container;
12143
12144     BEGIN {
12145         @_ = qw# if elsif unless while and or not && | || ? : ! #;
12146         @is_logical_container{@_} = (1) x scalar(@_);
12147     }
12148
12149     sub set_for_semicolon_breakpoints {
12150         my $dd = shift;
12151         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
12152             set_forced_breakpoint($_);
12153         }
12154     }
12155
12156     sub set_logical_breakpoints {
12157         my $dd = shift;
12158         if (
12159                $item_count_stack[$dd] == 0
12160             && $is_logical_container{ $container_type[$dd] }
12161
12162             # TESTING:
12163             || $has_old_logical_breakpoints[$dd]
12164           )
12165         {
12166
12167             # Look for breaks in this order:
12168             # 0   1    2   3
12169             # or  and  ||  &&
12170             foreach my $i ( 0 .. 3 ) {
12171                 if ( $rand_or_list[$dd][$i] ) {
12172                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
12173                         set_forced_breakpoint($_);
12174                     }
12175
12176                     # break at any 'if' and 'unless' too
12177                     foreach ( @{ $rand_or_list[$dd][4] } ) {
12178                         set_forced_breakpoint($_);
12179                     }
12180                     $rand_or_list[$dd] = [];
12181                     last;
12182                 }
12183             }
12184         }
12185     }
12186
12187     sub is_unbreakable_container {
12188
12189         # never break a container of one of these types
12190         # because bad things can happen (map1.t)
12191         my $dd = shift;
12192         $is_sort_map_grep{ $container_type[$dd] };
12193     }
12194
12195     sub scan_list {
12196
12197         # This routine is responsible for setting line breaks for all lists,
12198         # so that hierarchical structure can be displayed and so that list
12199         # items can be vertically aligned.  The output of this routine is
12200         # stored in the array @forced_breakpoint_to_go, which is used to set
12201         # final breakpoints.
12202
12203         $starting_depth = $nesting_depth_to_go[0];
12204
12205         $block_type                 = ' ';
12206         $current_depth              = $starting_depth;
12207         $i                          = -1;
12208         $last_colon_sequence_number = -1;
12209         $last_nonblank_token        = ';';
12210         $last_nonblank_type         = ';';
12211         $last_old_breakpoint_count  = 0;
12212         $minimum_depth = $current_depth + 1;    # forces update in check below
12213         $old_breakpoint_count      = 0;
12214         $starting_breakpoint_count = $forced_breakpoint_count;
12215         $token                     = ';';
12216         $type                      = ';';
12217         $type_sequence             = '';
12218
12219         check_for_new_minimum_depth($current_depth);
12220
12221         my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
12222         my $want_previous_breakpoint = -1;
12223
12224         my $saw_good_breakpoint;
12225         my $i_line_end   = -1;
12226         my $i_line_start = -1;
12227
12228         # loop over all tokens in this batch
12229         while ( ++$i <= $max_index_to_go ) {
12230             if ( $type ne 'b' ) {
12231                 $i_last_nonblank_token = $i - 1;
12232                 $last_nonblank_type    = $type;
12233                 $last_nonblank_token   = $token;
12234             }
12235             $type          = $types_to_go[$i];
12236             $block_type    = $block_type_to_go[$i];
12237             $token         = $tokens_to_go[$i];
12238             $type_sequence = $type_sequence_to_go[$i];
12239             my $next_type       = $types_to_go[ $i + 1 ];
12240             my $next_token      = $tokens_to_go[ $i + 1 ];
12241             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12242             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
12243             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
12244             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
12245
12246             # set break if flag was set
12247             if ( $want_previous_breakpoint >= 0 ) {
12248                 set_forced_breakpoint($want_previous_breakpoint);
12249                 $want_previous_breakpoint = -1;
12250             }
12251
12252             $last_old_breakpoint_count = $old_breakpoint_count;
12253             if ( $old_breakpoint_to_go[$i] ) {
12254                 $i_line_end   = $i;
12255                 $i_line_start = $i_next_nonblank;
12256
12257                 $old_breakpoint_count++;
12258
12259                 # Break before certain keywords if user broke there and
12260                 # this is a 'safe' break point. The idea is to retain
12261                 # any preferred breaks for sequential list operations,
12262                 # like a schwartzian transform.
12263                 if ($rOpts_break_at_old_keyword_breakpoints) {
12264                     if (
12265                            $next_nonblank_type eq 'k'
12266                         && $is_keyword_returning_list{$next_nonblank_token}
12267                         && (   $type =~ /^[=\)\]\}Riw]$/
12268                             || $type eq 'k'
12269                             && $is_keyword_returning_list{$token} )
12270                       )
12271                     {
12272
12273                         # we actually have to set this break next time through
12274                         # the loop because if we are at a closing token (such
12275                         # as '}') which forms a one-line block, this break might
12276                         # get undone.
12277                         $want_previous_breakpoint = $i;
12278                     }
12279                 }
12280             }
12281             next if ( $type eq 'b' );
12282             $depth = $nesting_depth_to_go[ $i + 1 ];
12283
12284             # safety check - be sure we always break after a comment
12285             # Shouldn't happen .. an error here probably means that the
12286             # nobreak flag did not get turned off correctly during
12287             # formatting.
12288             if ( $type eq '#' ) {
12289                 if ( $i != $max_index_to_go ) {
12290                     warning(
12291 "Non-fatal program bug: backup logic needed to break after a comment\n"
12292                     );
12293                     report_definite_bug();
12294                     $nobreak_to_go[$i] = 0;
12295                     set_forced_breakpoint($i);
12296                 }
12297             }
12298
12299             # Force breakpoints at certain tokens in long lines.
12300             # Note that such breakpoints will be undone later if these tokens
12301             # are fully contained within parens on a line.
12302             if (
12303                    $type eq 'k'
12304                 && $i > 0
12305                 && $token =~ /^(if|unless)$/
12306                 && (
12307                     $is_long_line
12308
12309                     # or container is broken (by side-comment, etc)
12310                     || (   $next_nonblank_token eq '('
12311                         && $mate_index_to_go[$i_next_nonblank] < $i )
12312                 )
12313               )
12314             {
12315                 set_forced_breakpoint( $i - 1 );
12316             }
12317
12318             # remember locations of '||'  and '&&' for possible breaks if we
12319             # decide this is a long logical expression.
12320             if ( $type eq '||' ) {
12321                 push @{ $rand_or_list[$depth][2] }, $i;
12322                 ++$has_old_logical_breakpoints[$depth]
12323                   if ( ( $i == $i_line_start || $i == $i_line_end )
12324                     && $rOpts_break_at_old_logical_breakpoints );
12325             }
12326             elsif ( $type eq '&&' ) {
12327                 push @{ $rand_or_list[$depth][3] }, $i;
12328                 ++$has_old_logical_breakpoints[$depth]
12329                   if ( ( $i == $i_line_start || $i == $i_line_end )
12330                     && $rOpts_break_at_old_logical_breakpoints );
12331             }
12332             elsif ( $type eq 'f' ) {
12333                 push @{ $rfor_semicolon_list[$depth] }, $i;
12334             }
12335             elsif ( $type eq 'k' ) {
12336                 if ( $token eq 'and' ) {
12337                     push @{ $rand_or_list[$depth][1] }, $i;
12338                     ++$has_old_logical_breakpoints[$depth]
12339                       if ( ( $i == $i_line_start || $i == $i_line_end )
12340                         && $rOpts_break_at_old_logical_breakpoints );
12341                 }
12342
12343                 # break immediately at 'or's which are probably not in a logical
12344                 # block -- but we will break in logical breaks below so that
12345                 # they do not add to the forced_breakpoint_count
12346                 elsif ( $token eq 'or' ) {
12347                     push @{ $rand_or_list[$depth][0] }, $i;
12348                     ++$has_old_logical_breakpoints[$depth]
12349                       if ( ( $i == $i_line_start || $i == $i_line_end )
12350                         && $rOpts_break_at_old_logical_breakpoints );
12351                     if ( $is_logical_container{ $container_type[$depth] } ) {
12352                     }
12353                     else {
12354                         if ($is_long_line) { set_forced_breakpoint($i) }
12355                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
12356                             && $rOpts_break_at_old_logical_breakpoints )
12357                         {
12358                             $saw_good_breakpoint = 1;
12359                         }
12360                     }
12361                 }
12362                 elsif ( $token eq 'if' || $token eq 'unless' ) {
12363                     push @{ $rand_or_list[$depth][4] }, $i;
12364                     if ( ( $i == $i_line_start || $i == $i_line_end )
12365                         && $rOpts_break_at_old_logical_breakpoints )
12366                     {
12367                         set_forced_breakpoint($i);
12368                     }
12369                 }
12370             }
12371             elsif ( $is_assignment{$type} ) {
12372                 $i_equals[$depth] = $i;
12373             }
12374
12375             if ($type_sequence) {
12376
12377                 # handle any postponed closing breakpoints
12378                 if ( $token =~ /^[\)\]\}\:]$/ ) {
12379                     if ( $type eq ':' ) {
12380                         $last_colon_sequence_number = $type_sequence;
12381
12382                         # TESTING: retain break at a ':' line break
12383                         if ( ( $i == $i_line_start || $i == $i_line_end )
12384                             && $rOpts_break_at_old_trinary_breakpoints )
12385                         {
12386
12387                             # TESTING:
12388                             set_forced_breakpoint($i);
12389
12390                             # break at previous '='
12391                             if ( $i_equals[$depth] > 0 ) {
12392                                 set_forced_breakpoint( $i_equals[$depth] );
12393                                 $i_equals[$depth] = -1;
12394                             }
12395                         }
12396                     }
12397                     if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
12398                         my $inc = ( $type eq ':' ) ? 0 : 1;
12399                         set_forced_breakpoint( $i - $inc );
12400                         delete $postponed_breakpoint{$type_sequence};
12401                     }
12402                 }
12403
12404                 # set breaks at ?/: if they will get separated (and are
12405                 # not a ?/: chain), or if the '?' is at the end of the
12406                 # line
12407                 elsif ( $token eq '?' ) {
12408                     my $i_colon = $mate_index_to_go[$i];
12409                     if (
12410                         $i_colon <= 0  # the ':' is not in this batch
12411                         || $i == 0     # this '?' is the first token of the line
12412                         || $i ==
12413                         $max_index_to_go    # or this '?' is the last token
12414                       )
12415                     {
12416
12417                         # don't break at a '?' if preceded by ':' on
12418                         # this line of previous ?/: pair on this line.
12419                         # This is an attempt to preserve a chain of ?/:
12420                         # expressions (elsif2.t).  And don't break if
12421                         # this has a side comment.
12422                         set_forced_breakpoint($i)
12423                           unless (
12424                             $type_sequence == (
12425                                 $last_colon_sequence_number +
12426                                   TYPE_SEQUENCE_INCREMENT
12427                             )
12428                             || $tokens_to_go[$max_index_to_go] eq '#'
12429                           );
12430                         set_closing_breakpoint($i);
12431                     }
12432                 }
12433             }
12434
12435 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
12436
12437             #------------------------------------------------------------
12438             # Handle Increasing Depth..
12439             #
12440             # prepare for a new list when depth increases
12441             # token $i is a '(','{', or '['
12442             #------------------------------------------------------------
12443             if ( $depth > $current_depth ) {
12444
12445                 $breakpoint_stack[$depth]       = $forced_breakpoint_count;
12446                 $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
12447                 $has_broken_sublist[$depth]     = 0;
12448                 $identifier_count_stack[$depth] = 0;
12449                 $index_before_arrow[$depth]     = -1;
12450                 $interrupted_list[$depth]       = 0;
12451                 $item_count_stack[$depth]       = 0;
12452                 $last_comma_index[$depth]       = undef;
12453                 $last_dot_index[$depth]         = undef;
12454                 $last_nonblank_type[$depth]     = $last_nonblank_type;
12455                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
12456                 $opening_structure_index_stack[$depth] = $i;
12457                 $rand_or_list[$depth]                  = [];
12458                 $rfor_semicolon_list[$depth]           = [];
12459                 $i_equals[$depth]                      = -1;
12460                 $want_comma_break[$depth]              = 0;
12461                 $container_type[$depth]                =
12462                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
12463                   ? $last_nonblank_token
12464                   : "";
12465                 $has_old_logical_breakpoints[$depth] = 0;
12466
12467                 # if line ends here then signal closing token to break
12468                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
12469                 {
12470                     set_closing_breakpoint($i);
12471                 }
12472
12473                 # Not all lists of values should be vertically aligned..
12474                 $dont_align[$depth] =
12475
12476                   # code BLOCKS are handled at a higher level
12477                   ( $block_type ne "" )
12478
12479                   # certain paren lists
12480                   || ( $type eq '(' ) && (
12481
12482                     # it does not usually look good to align a list of
12483                     # identifiers in a parameter list, as in:
12484                     #    my($var1, $var2, ...)
12485                     # (This test should probably be refined, for now I'm just
12486                     # testing for any keyword)
12487                     ( $last_nonblank_type eq 'k' )
12488
12489                     # a trailing '(' usually indicates a non-list
12490                     || ( $next_nonblank_type eq '(' )
12491                   );
12492
12493                 # patch to outdent opening brace of long if/for/..
12494                 # statements (like this one).  See similar coding in
12495                 # set_continuation breaks.  We have also catch it here for
12496                 # short line fragments which otherwise will not go through
12497                 # set_continuation_breaks.
12498                 if (
12499                     $block_type
12500
12501                     # if we have the ')' but not its '(' in this batch..
12502                     && ( $last_nonblank_token eq ')' )
12503                     && $mate_index_to_go[$i_last_nonblank_token] < 0
12504
12505                     # and user wants brace to left
12506                     && !$rOpts->{'opening-brace-always-on-right'}
12507
12508                     && ( $type  eq '{' )    # should be true
12509                     && ( $token eq '{' )    # should be true
12510                   )
12511                 {
12512                     set_forced_breakpoint( $i - 1 );
12513                 }
12514             }
12515
12516             #------------------------------------------------------------
12517             # Handle Decreasing Depth..
12518             #
12519             # finish off any old list when depth decreases
12520             # token $i is a ')','}', or ']'
12521             #------------------------------------------------------------
12522             elsif ( $depth < $current_depth ) {
12523
12524                 check_for_new_minimum_depth($depth);
12525
12526                 # force all outer logical containers to break after we see on
12527                 # old breakpoint
12528                 $has_old_logical_breakpoints[$depth] ||=
12529                   $has_old_logical_breakpoints[$current_depth];
12530
12531                 # Patch to break between ') {' if the paren list is broken.
12532                 # There is similar logic in set_continuation_breaks for
12533                 # non-broken lists.
12534                 if (   $token eq ')'
12535                     && $next_nonblank_block_type
12536                     && $interrupted_list[$current_depth]
12537                     && $next_nonblank_type eq '{'
12538                     && !$rOpts->{'opening-brace-always-on-right'} )
12539                 {
12540                     set_forced_breakpoint($i);
12541                 }
12542
12543 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
12544
12545                 # set breaks at commas if necessary
12546                 my ( $bp_count, $do_not_break_apart ) =
12547                   set_comma_breakpoints($current_depth);
12548
12549                 my $i_opening = $opening_structure_index_stack[$current_depth];
12550                 my $saw_opening_structure = ( $i_opening >= 0 );
12551
12552                 # this term is long if we had to break at interior commas..
12553                 my $is_long_term = $bp_count > 0;
12554
12555                 # ..or if the length between opening and closing parens exceeds
12556                 # allowed line length
12557                 if ( !$is_long_term && $saw_opening_structure ) {
12558                     my $i_opening_minus = find_token_starting_list($i_opening);
12559
12560                     # Note: we have to allow for one extra space after a
12561                     # closing token so that we do not strand a comma or
12562                     # semicolon, hence the '>=' here (oneline.t)
12563                     $is_long_term =
12564                       excess_line_length( $i_opening_minus, $i ) >= 0;
12565                 }
12566
12567                 # We've set breaks after all comma-arrows.  Now we have to
12568                 # undo them if this can be a one-line block
12569                 # (the only breakpoints set will be due to comma-arrows)
12570                 if (
12571
12572                     # user doesn't require breaking after all comma-arrows
12573                     ( $rOpts_comma_arrow_breakpoints != 0 )
12574
12575                     # and if the opening structure is in this batch
12576                     && $saw_opening_structure
12577
12578                     # and either on the same old line
12579                     && (
12580                         $old_breakpoint_count_stack[$current_depth] ==
12581                         $last_old_breakpoint_count
12582
12583                         # or user wants to form long blocks with arrows
12584                         || $rOpts_comma_arrow_breakpoints == 2
12585                     )
12586
12587                   # and we made some breakpoints between the opening and closing
12588                     && ( $breakpoint_undo_stack[$current_depth] <
12589                         $forced_breakpoint_undo_count )
12590
12591                     # and this block is short enough to fit on one line
12592                     # Note: use < because need 1 more space for possible comma
12593                     && !$is_long_term
12594
12595                   )
12596                 {
12597                     undo_forced_breakpoint_stack(
12598                         $breakpoint_undo_stack[$current_depth] );
12599                 }
12600
12601                 # now see if we have any comma breakpoints left
12602                 my $has_comma_breakpoints =
12603                   ( $breakpoint_stack[$current_depth] !=
12604                       $forced_breakpoint_count );
12605
12606                 # update broken-sublist flag of the outer container
12607                      $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
12608                   || $has_broken_sublist[$current_depth]
12609                   || $is_long_term
12610                   || $has_comma_breakpoints;
12611
12612 # Having come to the closing ')', '}', or ']', now we have to decide if we
12613 # should 'open up' the structure by placing breaks at the opening and
12614 # closing containers.  This is a tricky decision.  Here are some of the
12615 # basic considerations:
12616 #
12617 # -If this is a BLOCK container, then any breakpoints will have already
12618 # been set (and according to user preferences), so we need do nothing here.
12619 #
12620 # -If we have a comma-separated list for which we can align the list items,
12621 # then we need to do so because otherwise the vertical aligner cannot
12622 # currently do the alignment.
12623 #
12624 # -If this container does itself contain a container which has been broken
12625 # open, then it should be broken open to properly show the structure.
12626 #
12627 # -If there is nothing to align, and no other reason to break apart,
12628 # then do not do it.
12629 #
12630 # We will not break open the parens of a long but 'simple' logical expression.
12631 # For example:
12632 #
12633 # This is an example of a simple logical expression and its formatting:
12634 #
12635 #     if ( $bigwasteofspace1 && $bigwasteofspace2
12636 #         || $bigwasteofspace3 && $bigwasteofspace4 )
12637 #
12638 # Most people would prefer this than the 'spacey' version:
12639 #
12640 #     if (
12641 #         $bigwasteofspace1 && $bigwasteofspace2
12642 #         || $bigwasteofspace3 && $bigwasteofspace4
12643 #     )
12644 #
12645 # To illustrate the rules for breaking logical expressions, consider:
12646 #
12647 #             FULLY DENSE:
12648 #             if ( $opt_excl
12649 #                 and ( exists $ids_excl_uc{$id_uc}
12650 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
12651 #
12652 # This is on the verge of being difficult to read.  The current default is to
12653 # open it up like this:
12654 #
12655 #             DEFAULT:
12656 #             if (
12657 #                 $opt_excl
12658 #                 and ( exists $ids_excl_uc{$id_uc}
12659 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
12660 #               )
12661 #
12662 # This is a compromise which tries to avoid being too dense and to spacey.
12663 # A more spaced version would be:
12664 #
12665 #             SPACEY:
12666 #             if (
12667 #                 $opt_excl
12668 #                 and (
12669 #                     exists $ids_excl_uc{$id_uc}
12670 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
12671 #                 )
12672 #               )
12673 #
12674 # Some people might prefer the spacey version -- an option could be added.  The
12675 # innermost expression contains a long block '( exists $ids_...  ')'.
12676 #
12677 # Here is how the logic goes: We will force a break at the 'or' that the
12678 # innermost expression contains, but we will not break apart its opening and
12679 # closing containers because (1) it contains no multi-line sub-containers itself,
12680 # and (2) there is no alignment to be gained by breaking it open like this
12681 #
12682 #             and (
12683 #                 exists $ids_excl_uc{$id_uc}
12684 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
12685 #             )
12686 #
12687 # (although this looks perfectly ok and might be good for long expressions).  The
12688 # outer 'if' container, though, contains a broken sub-container, so it will be
12689 # broken open to avoid too much density.  Also, since it contains no 'or's, there
12690 # will be a forced break at its 'and'.
12691
12692                 # set some flags telling something about this container..
12693                 my $is_simple_logical_expression = 0;
12694                 if (   $item_count_stack[$current_depth] == 0
12695                     && $saw_opening_structure
12696                     && $tokens_to_go[$i_opening] eq '('
12697                     && $is_logical_container{ $container_type[$current_depth] }
12698                   )
12699                 {
12700
12701                     # This seems to be a simple logical expression with
12702                     # no existing breakpoints.  Set a flag to prevent
12703                     # opening it up.
12704                     if ( !$has_comma_breakpoints ) {
12705                         $is_simple_logical_expression = 1;
12706                     }
12707
12708                     # This seems to be a simple logical expression with
12709                     # breakpoints (broken sublists, for example).  Break
12710                     # at all 'or's and '||'s.
12711                     else {
12712                         set_logical_breakpoints($current_depth);
12713                     }
12714                 }
12715
12716                 if ( $is_long_term
12717                     && @{ $rfor_semicolon_list[$current_depth] } )
12718                 {
12719                     set_for_semicolon_breakpoints($current_depth);
12720
12721                     # open up a long 'for' or 'foreach' container to allow
12722                     # leading term alignment unless -lp is used.
12723                     $has_comma_breakpoints = 1
12724                       unless $rOpts_line_up_parentheses;
12725                 }
12726
12727                 if (
12728
12729                     # breaks for code BLOCKS are handled at a higher level
12730                     !$block_type
12731
12732                     # we do not need to break at the top level of an 'if'
12733                     # type expression
12734                     && !$is_simple_logical_expression
12735
12736                     ## modification to keep ': (' containers vertically tight;
12737                     ## but probably better to let user set -vt=1 to avoid
12738                     ## inconsistency with other paren types
12739                     ## && ($container_type[$current_depth] ne ':')
12740
12741                     # otherwise, we require one of these reasons for breaking:
12742                     && (
12743
12744                         # - this term has forced line breaks
12745                         $has_comma_breakpoints
12746
12747                        # - the opening container is separated from this batch
12748                        #   for some reason (comment, blank line, code block)
12749                        # - this is a non-paren container spanning multiple lines
12750                         || !$saw_opening_structure
12751
12752                         # - this is a long block contained in another breakable
12753                         #   container
12754                         || (   $is_long_term
12755                             && $container_environment_to_go[$i_opening] ne
12756                             'BLOCK' )
12757                     )
12758                   )
12759                 {
12760
12761                     # For -lp option, we must put a breakpoint before
12762                     # the token which has been identified as starting
12763                     # this indentation level.  This is necessary for
12764                     # proper alignment.
12765                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
12766                     {
12767                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
12768                         if ( defined($item) ) {
12769                             my $i_start_2 = $item->get_STARTING_INDEX();
12770                             if (
12771                                 defined($i_start_2)
12772
12773                                 # we are breaking after an opening brace, paren,
12774                                 # so don't break before it too
12775                                 && $i_start_2 ne $i_opening
12776                               )
12777                             {
12778
12779                                 # Only break for breakpoints at the same
12780                                 # indentation level as the opening paren
12781                                 my $test1 = $nesting_depth_to_go[$i_opening];
12782                                 my $test2 = $nesting_depth_to_go[$i_start_2];
12783                                 if ( $test2 == $test1 ) {
12784                                     set_forced_breakpoint( $i_start_2 - 1 );
12785                                 }
12786                             }
12787                         }
12788                     }
12789
12790                     # break after opening structure.
12791                     # note: break before closing structure will be automatic
12792                     if ( $minimum_depth <= $current_depth ) {
12793
12794                         set_forced_breakpoint($i_opening)
12795                           unless ( $do_not_break_apart
12796                             || is_unbreakable_container($current_depth) );
12797
12798                         # break at '.' of lower depth level before opening token
12799                         if ( $last_dot_index[$depth] ) {
12800                             set_forced_breakpoint( $last_dot_index[$depth] );
12801                         }
12802
12803                         # break before opening structure if preeced by another
12804                         # closing structure and a comma.  This is normally
12805                         # done by the previous closing brace, but not
12806                         # if it was a one-line block.
12807                         if ( $i_opening > 2 ) {
12808                             my $i_prev =
12809                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
12810                               ? $i_opening - 2
12811                               : $i_opening - 1;
12812
12813                             if (   $types_to_go[$i_prev] eq ','
12814                                 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
12815                             {
12816                                 set_forced_breakpoint($i_prev);
12817                             }
12818
12819                             # also break before something like ':('  or '?('
12820                             # if appropriate.
12821                             elsif (
12822                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
12823                             {
12824                                 my $token_prev = $tokens_to_go[$i_prev];
12825                                 if ( $want_break_before{$token_prev} ) {
12826                                     set_forced_breakpoint($i_prev);
12827                                 }
12828                             }
12829                         }
12830                     }
12831
12832                     # break after comma following closing structure
12833                     if ( $next_type eq ',' ) {
12834                         set_forced_breakpoint( $i + 1 );
12835                     }
12836
12837                     # break before an '=' following closing structure
12838                     if (
12839                         $is_assignment{$next_nonblank_type}
12840                         && ( $breakpoint_stack[$current_depth] !=
12841                             $forced_breakpoint_count )
12842                       )
12843                     {
12844                         set_forced_breakpoint($i);
12845                     }
12846
12847                     # break at any comma before the opening structure Added
12848                     # for -lp, but seems to be good in general.  It isn't
12849                     # obvious how far back to look; the '5' below seems to
12850                     # work well and will catch the comma in something like
12851                     #  push @list, myfunc( $param, $param, ..
12852
12853                     my $icomma = $last_comma_index[$depth];
12854                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
12855                         unless ( $forced_breakpoint_to_go[$icomma] ) {
12856                             set_forced_breakpoint($icomma);
12857                         }
12858                     }
12859                 }    # end logic to open up a container
12860
12861                 # Break open a logical container open if it was already open
12862                 elsif ($is_simple_logical_expression
12863                     && $has_old_logical_breakpoints[$current_depth] )
12864                 {
12865                     set_logical_breakpoints($current_depth);
12866                 }
12867
12868                 # Handle long container which does not get opened up
12869                 elsif ($is_long_term) {
12870
12871                     # must set fake breakpoint to alert outer containers that
12872                     # they are complex
12873                     set_fake_breakpoint();
12874                 }
12875             }
12876
12877             #------------------------------------------------------------
12878             # Handle this token
12879             #------------------------------------------------------------
12880
12881             $current_depth = $depth;
12882
12883             # handle comma-arrow
12884             if ( $type eq '=>' ) {
12885                 next if ( $last_nonblank_type eq '=>' );
12886                 next if $rOpts_break_at_old_comma_breakpoints;
12887                 next if $rOpts_comma_arrow_breakpoints == 3;
12888                 $want_comma_break[$depth]   = 1;
12889                 $index_before_arrow[$depth] = $i_last_nonblank_token;
12890                 next;
12891             }
12892
12893             elsif ( $type eq '.' ) {
12894                 $last_dot_index[$depth] = $i;
12895             }
12896
12897             # Turn off alignment if we are sure that this is not a list
12898             # environment.  To be safe, we will do this if we see certain
12899             # non-list tokens, such as ';', and also the environment is
12900             # not a list.  Note that '=' could be in any of the = operators
12901             # (lextest.t). We can't just use the reported environment
12902             # because it can be incorrect in some cases.
12903             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
12904                 && $container_environment_to_go[$i] ne 'LIST' )
12905             {
12906                 $dont_align[$depth]         = 1;
12907                 $want_comma_break[$depth]   = 0;
12908                 $index_before_arrow[$depth] = -1;
12909             }
12910
12911             # now just handle any commas
12912             next unless ( $type eq ',' );
12913
12914             $last_dot_index[$depth]   = undef;
12915             $last_comma_index[$depth] = $i;
12916
12917             # break here if this comma follows a '=>'
12918             # but not if there is a side comment after the comma
12919             if ( $want_comma_break[$depth] ) {
12920
12921                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
12922                     $want_comma_break[$depth]   = 0;
12923                     $index_before_arrow[$depth] = -1;
12924                     next;
12925                 }
12926
12927                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
12928
12929                 # break before the previous token if it looks safe
12930                 # Example of something that we will not try to break before:
12931                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
12932                 my $ibreak = $index_before_arrow[$depth] - 1;
12933                 if (   $ibreak > 0
12934                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
12935                 {
12936                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
12937                     if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) {
12938                         set_forced_breakpoint($ibreak);
12939                     }
12940                 }
12941
12942                 $want_comma_break[$depth]   = 0;
12943                 $index_before_arrow[$depth] = -1;
12944
12945                 # handle list which mixes '=>'s and ','s:
12946                 # treat any list items so far as an interrupted list
12947                 $interrupted_list[$depth] = 1;
12948                 next;
12949             }
12950
12951             # skip past these commas if we are not supposed to format them
12952             next if ( $dont_align[$depth] );
12953
12954             # break after all commas above starting depth
12955             if ( $depth < $starting_depth ) {
12956                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
12957                 next;
12958             }
12959
12960             # add this comma to the list..
12961             my $item_count = $item_count_stack[$depth];
12962             if ( $item_count == 0 ) {
12963
12964                 # but do not form a list with no opening structure
12965                 # for example:
12966
12967                 #            open INFILE_COPY, ">$input_file_copy"
12968                 #              or die ("very long message");
12969
12970                 if ( ( $opening_structure_index_stack[$depth] < 0 )
12971                     && $container_environment_to_go[$i] eq 'BLOCK' )
12972                 {
12973                     $dont_align[$depth] = 1;
12974                     next;
12975                 }
12976             }
12977
12978             $comma_index[$depth][$item_count] = $i;
12979             ++$item_count_stack[$depth];
12980             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
12981                 $identifier_count_stack[$depth]++;
12982             }
12983         }
12984
12985         #-------------------------------------------
12986         # end of loop over all tokens in this batch
12987         #-------------------------------------------
12988
12989         # set breaks for any unfinished lists ..
12990         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
12991
12992             $interrupted_list[$dd] = 1;
12993             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
12994             set_comma_breakpoints($dd);
12995             set_logical_breakpoints($dd)
12996               if ( $has_old_logical_breakpoints[$dd] );
12997             set_for_semicolon_breakpoints($dd);
12998
12999             # break open container...
13000             my $i_opening = $opening_structure_index_stack[$dd];
13001             set_forced_breakpoint($i_opening)
13002               unless (
13003                 is_unbreakable_container($dd)
13004
13005                 # Avoid a break which would place an isolated ' or "
13006                 # on a line
13007                 || (   $type eq 'Q'
13008                     && $i_opening >= $max_index_to_go - 2
13009                     && $token =~ /^['"]$/ )
13010               );
13011         }
13012
13013         # Return a flag indicating if the input file had some good breakpoints.
13014         # This flag will be used to force a break in a line shorter than the
13015         # allowed line length.
13016         if ( $has_old_logical_breakpoints[$current_depth] ) {
13017             $saw_good_breakpoint = 1;
13018         }
13019         return $saw_good_breakpoint;
13020     }
13021 }    # end scan_list
13022
13023 sub find_token_starting_list {
13024
13025     # When testing to see if a block will fit on one line, some
13026     # previous token(s) may also need to be on the line; particularly
13027     # if this is a sub call.  So we will look back at least one
13028     # token. NOTE: This isn't perfect, but not critical, because
13029     # if we mis-identify a block, it will be wrapped and therefore
13030     # fixed the next time it is formatted.
13031     my $i_opening_paren = shift;
13032     my $i_opening_minus = $i_opening_paren;
13033     my $im1             = $i_opening_paren - 1;
13034     my $im2             = $i_opening_paren - 2;
13035     my $im3             = $i_opening_paren - 3;
13036     my $typem1          = $types_to_go[$im1];
13037     my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
13038     if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
13039         $i_opening_minus = $i_opening_paren;
13040     }
13041     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
13042         $i_opening_minus = $im1 if $im1 >= 0;
13043
13044         # walk back to improve length estimate
13045         for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
13046             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
13047             $i_opening_minus = $j;
13048         }
13049         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
13050     }
13051     elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
13052     elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
13053         $i_opening_minus = $im2;
13054     }
13055     return $i_opening_minus;
13056 }
13057
13058 {    # begin set_comma_breakpoints_do
13059
13060     my %is_keyword_with_special_leading_term;
13061
13062     BEGIN {
13063
13064         # These keywords have prototypes which allow a special leading item
13065         # followed by a list
13066         @_ =
13067           qw(formline grep kill map printf sprintf push chmod join pack unshift);
13068         @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
13069     }
13070
13071     sub set_comma_breakpoints_do {
13072
13073         # Given a list with some commas, set breakpoints at some of the
13074         # commas, if necessary, to make it easy to read.  This list is
13075         # an example:
13076         my (
13077             $depth,               $i_opening_paren,  $i_closing_paren,
13078             $item_count,          $identifier_count, $rcomma_index,
13079             $next_nonblank_type,  $list_type,        $interrupted,
13080             $rdo_not_break_apart, $must_break_open,
13081           )
13082           = @_;
13083
13084         # nothing to do if no commas seen
13085         return if ( $item_count < 1 );
13086         my $i_first_comma     = $$rcomma_index[0];
13087         my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
13088         my $i_last_comma      = $i_true_last_comma;
13089         if ( $i_last_comma >= $max_index_to_go ) {
13090             $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
13091             return if ( $item_count < 1 );
13092         }
13093
13094         #---------------------------------------------------------------
13095         # find lengths of all items in the list to calculate page layout
13096         #---------------------------------------------------------------
13097         my $comma_count = $item_count;
13098         my @item_lengths;
13099         my @i_term_begin;
13100         my @i_term_end;
13101         my @i_term_comma;
13102         my $i_prev_plus;
13103         my @max_length = ( 0, 0 );
13104         my $first_term_length;
13105         my $i      = $i_opening_paren;
13106         my $is_odd = 1;
13107
13108         for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
13109             $is_odd      = 1 - $is_odd;
13110             $i_prev_plus = $i + 1;
13111             $i           = $$rcomma_index[$j];
13112
13113             my $i_term_end =
13114               ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
13115             my $i_term_begin =
13116               ( $types_to_go[$i_prev_plus] eq 'b' )
13117               ? $i_prev_plus + 1
13118               : $i_prev_plus;
13119             push @i_term_begin, $i_term_begin;
13120             push @i_term_end,   $i_term_end;
13121             push @i_term_comma, $i;
13122
13123             # note: currently adding 2 to all lengths (for comma and space)
13124             my $length =
13125               2 + token_sequence_length( $i_term_begin, $i_term_end );
13126             push @item_lengths, $length;
13127
13128             if ( $j == 0 ) {
13129                 $first_term_length = $length;
13130             }
13131             else {
13132
13133                 if ( $length > $max_length[$is_odd] ) {
13134                     $max_length[$is_odd] = $length;
13135                 }
13136             }
13137         }
13138
13139         # now we have to make a distinction between the comma count and item
13140         # count, because the item count will be one greater than the comma
13141         # count if the last item is not terminated with a comma
13142         my $i_b =
13143           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
13144           ? $i_last_comma + 1
13145           : $i_last_comma;
13146         my $i_e =
13147           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
13148           ? $i_closing_paren - 2
13149           : $i_closing_paren - 1;
13150         my $i_effective_last_comma = $i_last_comma;
13151
13152         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
13153
13154         if ( $last_item_length > 0 ) {
13155
13156             # add 2 to length because other lengths include a comma and a blank
13157             $last_item_length += 2;
13158             push @item_lengths, $last_item_length;
13159             push @i_term_begin, $i_b + 1;
13160             push @i_term_end,   $i_e;
13161             push @i_term_comma, undef;
13162
13163             my $i_odd = $item_count % 2;
13164
13165             if ( $last_item_length > $max_length[$i_odd] ) {
13166                 $max_length[$i_odd] = $last_item_length;
13167             }
13168
13169             $item_count++;
13170             $i_effective_last_comma = $i_e + 1;
13171
13172             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
13173                 $identifier_count++;
13174             }
13175         }
13176
13177         #---------------------------------------------------------------
13178         # End of length calculations
13179         #---------------------------------------------------------------
13180
13181         #---------------------------------------------------------------
13182         # Compound List Rule 1:
13183         # Break at (almost) every comma for a list containing a broken
13184         # sublist.  This has higher priority than the Interrupted List
13185         # Rule.
13186         #---------------------------------------------------------------
13187         if ( $has_broken_sublist[$depth] ) {
13188
13189             # Break at every comma except for a comma between two
13190             # simple, small terms.  This prevents long vertical
13191             # columns of, say, just 0's.
13192             my $small_length = 10;    # 2 + actual maximum length wanted
13193
13194             # We'll insert a break in long runs of small terms to
13195             # allow alignment in uniform tables.
13196             my $skipped_count = 0;
13197             my $columns       = table_columns_available($i_first_comma);
13198             my $fields        = int( $columns / $small_length );
13199             if (   $rOpts_maximum_fields_per_table
13200                 && $fields > $rOpts_maximum_fields_per_table )
13201             {
13202                 $fields = $rOpts_maximum_fields_per_table;
13203             }
13204             my $max_skipped_count = $fields - 1;
13205
13206             my $is_simple_last_term = 0;
13207             my $is_simple_next_term = 0;
13208             foreach my $j ( 0 .. $item_count ) {
13209                 $is_simple_last_term = $is_simple_next_term;
13210                 $is_simple_next_term = 0;
13211                 if (   $j < $item_count
13212                     && $i_term_end[$j] == $i_term_begin[$j]
13213                     && $item_lengths[$j] <= $small_length )
13214                 {
13215                     $is_simple_next_term = 1;
13216                 }
13217                 next if $j == 0;
13218                 if (   $is_simple_last_term
13219                     && $is_simple_next_term
13220                     && $skipped_count < $max_skipped_count )
13221                 {
13222                     $skipped_count++;
13223                 }
13224                 else {
13225                     $skipped_count = 0;
13226                     my $i = $i_term_comma[ $j - 1 ];
13227                     last unless defined $i;
13228                     set_forced_breakpoint($i);
13229                 }
13230             }
13231
13232             # always break at the last comma if this list is
13233             # interrupted; we wouldn't want to leave a terminal '{', for
13234             # example.
13235             if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
13236             return;
13237         }
13238
13239 #my ( $a, $b, $c ) = caller();
13240 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
13241 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
13242 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
13243
13244         #---------------------------------------------------------------
13245         # Interrupted List Rule:
13246         # A list is is forced to use old breakpoints if it was interrupted
13247         # by side comments or blank lines, or requested by user.
13248         #---------------------------------------------------------------
13249         if (   $rOpts_break_at_old_comma_breakpoints
13250             || $interrupted
13251             || $i_opening_paren < 0 )
13252         {
13253             copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
13254             return;
13255         }
13256
13257         #---------------------------------------------------------------
13258         # Looks like a list of items.  We have to look at it and size it up.
13259         #---------------------------------------------------------------
13260
13261         my $opening_token       = $tokens_to_go[$i_opening_paren];
13262         my $opening_environment =
13263           $container_environment_to_go[$i_opening_paren];
13264
13265         #-------------------------------------------------------------------
13266         # Return if this will fit on one line
13267         #-------------------------------------------------------------------
13268
13269         my $i_opening_minus = find_token_starting_list($i_opening_paren);
13270         return
13271           unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
13272
13273         #-------------------------------------------------------------------
13274         # Now we know that this block spans multiple lines; we have to set
13275         # at least one breakpoint -- real or fake -- as a signal to break
13276         # open any outer containers.
13277         #-------------------------------------------------------------------
13278         set_fake_breakpoint();
13279
13280         # be sure we do not extend beyond the current list length
13281         if ( $i_effective_last_comma >= $max_index_to_go ) {
13282             $i_effective_last_comma = $max_index_to_go - 1;
13283         }
13284
13285         # Set a flag indicating if we need to break open to keep -lp
13286         # items aligned.  This is necessary if any of the list terms
13287         # exceeds the available space after the '('.
13288         my $need_lp_break_open = $must_break_open;
13289         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
13290             my $columns_if_unbroken = $rOpts_maximum_line_length -
13291               total_line_length( $i_opening_minus, $i_opening_paren );
13292             $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
13293               || ( $max_length[1] > $columns_if_unbroken )
13294               || ( $first_term_length > $columns_if_unbroken );
13295         }
13296
13297         # Specify if the list must have an even number of fields or not.
13298         # It is generally safest to assume an even number, because the
13299         # list items might be a hash list.  But if we can be sure that
13300         # it is not a hash, then we can allow an odd number for more
13301         # flexibility.
13302         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
13303
13304         if (   $identifier_count >= $item_count - 1
13305             || $is_assignment{$next_nonblank_type}
13306             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
13307           )
13308         {
13309             $odd_or_even = 1;
13310         }
13311
13312         # do we have a long first term which should be
13313         # left on a line by itself?
13314         my $use_separate_first_term = (
13315             $odd_or_even == 1       # only if we can use 1 field/line
13316               && $item_count > 3    # need several items
13317               && $first_term_length >
13318               2 * $max_length[0] - 2    # need long first term
13319               && $first_term_length >
13320               2 * $max_length[1] - 2    # need long first term
13321         );
13322
13323         # or do we know from the type of list that the first term should
13324         # be placed alone?
13325         if ( !$use_separate_first_term ) {
13326             if ( $is_keyword_with_special_leading_term{$list_type} ) {
13327                 $use_separate_first_term = 1;
13328
13329                 # should the container be broken open?
13330                 if ( $item_count < 3 ) {
13331                     if ( $i_first_comma - $i_opening_paren < 4 ) {
13332                         $$rdo_not_break_apart = 1;
13333                     }
13334                 }
13335                 elsif ($first_term_length < 20
13336                     && $i_first_comma - $i_opening_paren < 4 )
13337                 {
13338                     my $columns = table_columns_available($i_first_comma);
13339                     if ( $first_term_length < $columns ) {
13340                         $$rdo_not_break_apart = 1;
13341                     }
13342                 }
13343             }
13344         }
13345
13346         # if so,
13347         if ($use_separate_first_term) {
13348
13349             # ..set a break and update starting values
13350             $use_separate_first_term = 1;
13351             set_forced_breakpoint($i_first_comma);
13352             $i_opening_paren = $i_first_comma;
13353             $i_first_comma   = $$rcomma_index[1];
13354             $item_count--;
13355             return if $comma_count == 1;
13356             shift @item_lengths;
13357             shift @i_term_begin;
13358             shift @i_term_end;
13359             shift @i_term_comma;
13360         }
13361
13362         # if not, update the metrics to include the first term
13363         else {
13364             if ( $first_term_length > $max_length[0] ) {
13365                 $max_length[0] = $first_term_length;
13366             }
13367         }
13368
13369         # Field width parameters
13370         my $pair_width = ( $max_length[0] + $max_length[1] );
13371         my $max_width  =
13372           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
13373
13374         # Number of free columns across the page width for laying out tables
13375         my $columns = table_columns_available($i_first_comma);
13376
13377         # Estimated maximum number of fields which fit this space
13378         # This will be our first guess
13379         my $number_of_fields_max =
13380           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
13381             $pair_width );
13382         my $number_of_fields = $number_of_fields_max;
13383
13384         # Find the best-looking number of fields
13385         # and make this our second guess if possible
13386         my ( $number_of_fields_best, $ri_ragged_break_list,
13387             $new_identifier_count )
13388           = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
13389             $max_width );
13390
13391         if (   $number_of_fields_best != 0
13392             && $number_of_fields_best < $number_of_fields_max )
13393         {
13394             $number_of_fields = $number_of_fields_best;
13395         }
13396
13397         # ----------------------------------------------------------------------
13398         # If we are crowded and the -lp option is being used, try to
13399         # undo some indentation
13400         # ----------------------------------------------------------------------
13401         if (
13402             $rOpts_line_up_parentheses
13403             && (
13404                 $number_of_fields == 0
13405                 || (   $number_of_fields == 1
13406                     && $number_of_fields != $number_of_fields_best )
13407             )
13408           )
13409         {
13410             my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
13411             if ( $available_spaces > 0 ) {
13412
13413                 my $spaces_wanted = $max_width - $columns;    # for 1 field
13414
13415                 if ( $number_of_fields_best == 0 ) {
13416                     $number_of_fields_best =
13417                       get_maximum_fields_wanted( \@item_lengths );
13418                 }
13419
13420                 if ( $number_of_fields_best != 1 ) {
13421                     my $spaces_wanted_2 =
13422                       1 + $pair_width - $columns;             # for 2 fields
13423                     if ( $available_spaces > $spaces_wanted_2 ) {
13424                         $spaces_wanted = $spaces_wanted_2;
13425                     }
13426                 }
13427
13428                 if ( $spaces_wanted > 0 ) {
13429                     my $deleted_spaces =
13430                       reduce_lp_indentation( $i_first_comma, $spaces_wanted );
13431
13432                     # redo the math
13433                     if ( $deleted_spaces > 0 ) {
13434                         $columns = table_columns_available($i_first_comma);
13435                         $number_of_fields_max =
13436                           maximum_number_of_fields( $columns, $odd_or_even,
13437                             $max_width, $pair_width );
13438                         $number_of_fields = $number_of_fields_max;
13439
13440                         if (   $number_of_fields_best == 1
13441                             && $number_of_fields >= 1 )
13442                         {
13443                             $number_of_fields = $number_of_fields_best;
13444                         }
13445                     }
13446                 }
13447             }
13448         }
13449
13450         # try for one column if two won't work
13451         if ( $number_of_fields <= 0 ) {
13452             $number_of_fields = int( $columns / $max_width );
13453         }
13454
13455         # The user can place an upper bound on the number of fields,
13456         # which can be useful for doing maintenance on tables
13457         if (   $rOpts_maximum_fields_per_table
13458             && $number_of_fields > $rOpts_maximum_fields_per_table )
13459         {
13460             $number_of_fields = $rOpts_maximum_fields_per_table;
13461         }
13462
13463         # How many columns (characters) and lines would this container take
13464         # if no additional whitespace were added?
13465         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
13466             $i_effective_last_comma + 1 );
13467         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
13468         my $packed_lines = 1 + int( $packed_columns / $columns );
13469
13470         # are we an item contained in an outer list?
13471         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
13472
13473         if ( $number_of_fields <= 0 ) {
13474
13475 #         #---------------------------------------------------------------
13476 #         # We're in trouble.  We can't find a single field width that works.
13477 #         # There is no simple answer here; we may have a single long list
13478 #         # item, or many.
13479 #         #---------------------------------------------------------------
13480 #
13481 #         In many cases, it may be best to not force a break if there is just one
13482 #         comma, because the standard continuation break logic will do a better
13483 #         job without it.
13484 #
13485 #         In the common case that all but one of the terms can fit
13486 #         on a single line, it may look better not to break open the
13487 #         containing parens.  Consider, for example
13488 #
13489 #             $color =
13490 #               join ( '/',
13491 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
13492 #                 keys %colors );
13493 #
13494 #         which will look like this with the container broken:
13495 #
13496 #             $color = join (
13497 #                 '/',
13498 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
13499 #             );
13500 #
13501 #         Here is an example of this rule for a long last term:
13502 #
13503 #             log_message( 0, 256, 128,
13504 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
13505 #
13506 #         And here is an example with a long first term:
13507 #
13508 #         $s = sprintf(
13509 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
13510 #             $r, $pu, $ps, $cu, $cs, $tt
13511 #           )
13512 #           if $style eq 'all';
13513
13514             my $i_last_comma    = $$rcomma_index[ $comma_count - 1 ];
13515             my $long_last_term  = excess_line_length( 0, $i_last_comma ) <= 0;
13516             my $long_first_term =
13517               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
13518
13519             # break at every comma ...
13520             if (
13521
13522                 # if requested by user or is best looking
13523                 $number_of_fields_best == 1
13524
13525                 # or if this is a sublist of a larger list
13526                 || $in_hierarchical_list
13527
13528                 # or if multiple commas and we dont have a long first or last
13529                 # term
13530                 || ( $comma_count > 1
13531                     && !( $long_last_term || $long_first_term ) )
13532               )
13533             {
13534                 foreach ( 0 .. $comma_count - 1 ) {
13535                     set_forced_breakpoint( $$rcomma_index[$_] );
13536                 }
13537             }
13538             elsif ($long_last_term) {
13539
13540                 set_forced_breakpoint($i_last_comma);
13541                 $$rdo_not_break_apart = 1 unless $must_break_open;
13542             }
13543             elsif ($long_first_term) {
13544
13545                 set_forced_breakpoint($i_first_comma);
13546             }
13547             else {
13548
13549                 # let breaks be defined by default bond strength logic
13550             }
13551             return;
13552         }
13553
13554         # --------------------------------------------------------
13555         # We have a tentative field count that seems to work.
13556         # How many lines will this require?
13557         # --------------------------------------------------------
13558         my $formatted_lines = $item_count / ($number_of_fields);
13559         if ( $formatted_lines != int $formatted_lines ) {
13560             $formatted_lines = 1 + int $formatted_lines;
13561         }
13562
13563         # So far we've been trying to fill out to the right margin.  But
13564         # compact tables are easier to read, so let's see if we can use fewer
13565         # fields without increasing the number of lines.
13566         $number_of_fields =
13567           compactify_table( $item_count, $number_of_fields, $formatted_lines,
13568             $odd_or_even );
13569
13570         # How many spaces across the page will we fill?
13571         my $columns_per_line =
13572           ( int $number_of_fields / 2 ) * $pair_width +
13573           ( $number_of_fields % 2 ) * $max_width;
13574
13575         my $formatted_columns;
13576
13577         if ( $number_of_fields > 1 ) {
13578             $formatted_columns =
13579               ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
13580                   $max_width );
13581         }
13582         else {
13583             $formatted_columns = $max_width * $item_count;
13584         }
13585         if ( $formatted_columns < $packed_columns ) {
13586             $formatted_columns = $packed_columns;
13587         }
13588
13589         my $unused_columns = $formatted_columns - $packed_columns;
13590
13591         # set some empirical parameters to help decide if we should try to
13592         # align; high sparsity does not look good, especially with few lines
13593         my $sparsity = ($unused_columns) / ($formatted_columns);
13594         my $max_allowed_sparsity =
13595             ( $item_count < 3 ) ? 0.1
13596           : ( $packed_lines == 1 ) ? 0.15
13597           : ( $packed_lines == 2 ) ? 0.4
13598           : 0.7;
13599
13600         # Begin check for shortcut methods, which avoid treating a list
13601         # as a table for relatively small parenthesized lists.  These
13602         # are usually easier to read if not formatted as tables.
13603         if (
13604             $packed_lines <= 2    # probably can fit in 2 lines
13605             && $item_count < 9    # doesn't have too many items
13606             && $opening_environment eq 'BLOCK'    # not a sub-container
13607             && $opening_token       eq '('        # is paren list
13608           )
13609         {
13610
13611             # Shortcut method 1: for -lp and just one comma:
13612             # This is a no-brainer, just break at the comma.
13613             if (
13614                 $rOpts_line_up_parentheses        # -lp
13615                 && $item_count == 2               # two items, one comma
13616                 && !$must_break_open
13617               )
13618             {
13619                 my $i_break = $$rcomma_index[0];
13620                 set_forced_breakpoint($i_break);
13621                 $$rdo_not_break_apart = 1;
13622                 set_non_alignment_flags( $comma_count, $rcomma_index );
13623                 return;
13624
13625             }
13626
13627             # method 2 is for most small ragged lists which might look
13628             # best if not displayed as a table.
13629             if (
13630                 ( $number_of_fields == 2 && $item_count == 3 )
13631                 || (
13632                     $new_identifier_count > 0    # isn't all quotes
13633                     && $sparsity > 0.15
13634                 )    # would be fairly spaced gaps if aligned
13635               )
13636             {
13637
13638                 my $break_count =
13639                   set_ragged_breakpoints( \@i_term_comma,
13640                     $ri_ragged_break_list );
13641                 ++$break_count if ($use_separate_first_term);
13642
13643                 # NOTE: we should really use the true break count here,
13644                 # which can be greater if there are large terms and
13645                 # little space, but usually this will work well enough.
13646                 unless ($must_break_open) {
13647
13648                     if ( $break_count <= 1 ) {
13649                         $$rdo_not_break_apart = 1;
13650                     }
13651                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
13652                     {
13653                         $$rdo_not_break_apart = 1;
13654                     }
13655                 }
13656                 set_non_alignment_flags( $comma_count, $rcomma_index );
13657                 return;
13658             }
13659
13660         }    # end shortcut methods
13661
13662         # debug stuff
13663
13664         FORMATTER_DEBUG_FLAG_SPARSE && do {
13665             print
13666 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
13667
13668         };
13669
13670         #---------------------------------------------------------------
13671         # Compound List Rule 2:
13672         # If this list is too long for one line, and it is an item of a
13673         # larger list, then we must format it, regardless of sparsity
13674         # (ian.t).  One reason that we have to do this is to trigger
13675         # Compound List Rule 1, above, which causes breaks at all commas of
13676         # all outer lists.  In this way, the structure will be properly
13677         # displayed.
13678         #---------------------------------------------------------------
13679
13680         # Decide if this list is too long for one line unless broken
13681         my $total_columns = table_columns_available($i_opening_paren);
13682         my $too_long      = $packed_columns > $total_columns;
13683
13684         # For a paren list, include the length of the token just before the
13685         # '(' because this is likely a sub call, and we would have to
13686         # include the sub name on the same line as the list.  This is still
13687         # imprecise, but not too bad.  (steve.t)
13688         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
13689
13690             $too_long =
13691               excess_line_length( $i_opening_minus,
13692                 $i_effective_last_comma + 1 ) > 0;
13693         }
13694
13695         # FIXME: For an item after a '=>', try to include the length of the
13696         # thing before the '=>'.  This is crude and should be improved by
13697         # actually looking back token by token.
13698         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
13699             my $i_opening_minus = $i_opening_paren - 4;
13700             if ( $i_opening_minus >= 0 ) {
13701                 $too_long =
13702                   excess_line_length( $i_opening_minus,
13703                     $i_effective_last_comma + 1 ) > 0;
13704             }
13705         }
13706
13707         # Always break lists contained in '[' and '{' if too long for 1 line,
13708         # and always break lists which are too long and part of a more complex
13709         # structure.
13710         my $must_break_open_container = $must_break_open
13711           || ( $too_long
13712             && ( $in_hierarchical_list || $opening_token ne '(' ) );
13713
13714 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
13715
13716         #---------------------------------------------------------------
13717         # The main decision:
13718         # Now decide if we will align the data into aligned columns.  Do not
13719         # attempt to align columns if this is a tiny table or it would be
13720         # too spaced.  It seems that the more packed lines we have, the
13721         # sparser the list that can be allowed and still look ok.
13722         #---------------------------------------------------------------
13723
13724         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
13725             || ( $formatted_lines < 2 )
13726             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
13727           )
13728         {
13729
13730             #---------------------------------------------------------------
13731             # too sparse: would look ugly if aligned in a table;
13732             #---------------------------------------------------------------
13733
13734             # use old breakpoints if this is a 'big' list
13735             # FIXME: goal is to improve set_ragged_breakpoints so that
13736             # this is not necessary.
13737             if ( $packed_lines > 2 && $item_count > 10 ) {
13738                 write_logfile_entry("List sparse: using old breakpoints\n");
13739                 copy_old_breakpoints( $i_first_comma, $i_last_comma );
13740             }
13741
13742             # let the continuation logic handle it if 2 lines
13743             else {
13744
13745                 my $break_count =
13746                   set_ragged_breakpoints( \@i_term_comma,
13747                     $ri_ragged_break_list );
13748                 ++$break_count if ($use_separate_first_term);
13749
13750                 unless ($must_break_open_container) {
13751                     if ( $break_count <= 1 ) {
13752                         $$rdo_not_break_apart = 1;
13753                     }
13754                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
13755                     {
13756                         $$rdo_not_break_apart = 1;
13757                     }
13758                 }
13759                 set_non_alignment_flags( $comma_count, $rcomma_index );
13760             }
13761             return;
13762         }
13763
13764         #---------------------------------------------------------------
13765         # go ahead and format as a table
13766         #---------------------------------------------------------------
13767         write_logfile_entry(
13768             "List: auto formatting with $number_of_fields fields/row\n");
13769
13770         my $j_first_break =
13771           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
13772
13773         for (
13774             my $j = $j_first_break ;
13775             $j < $comma_count ;
13776             $j += $number_of_fields
13777           )
13778         {
13779             my $i = $$rcomma_index[$j];
13780             set_forced_breakpoint($i);
13781         }
13782         return;
13783     }
13784 }
13785
13786 sub set_non_alignment_flags {
13787
13788     # set flag which indicates that these commas should not be
13789     # aligned
13790     my ( $comma_count, $rcomma_index ) = @_;
13791     foreach ( 0 .. $comma_count - 1 ) {
13792         $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
13793     }
13794 }
13795
13796 sub study_list_complexity {
13797
13798     # Look for complex tables which should be formatted with one term per line.
13799     # Returns the following:
13800     #
13801     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
13802     #    which are hard to read
13803     #  $number_of_fields_best = suggested number of fields based on
13804     #    complexity; = 0 if any number may be used.
13805     #
13806     my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
13807     my $item_count            = @{$ri_term_begin};
13808     my $complex_item_count    = 0;
13809     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
13810     my $i_max                 = @{$ritem_lengths} - 1;
13811     ##my @item_complexity;
13812
13813     my $i_last_last_break = -3;
13814     my $i_last_break      = -2;
13815     my @i_ragged_break_list;
13816
13817     my $definitely_complex = 30;
13818     my $definitely_simple  = 12;
13819     my $quote_count        = 0;
13820
13821     for my $i ( 0 .. $i_max ) {
13822         my $ib = $ri_term_begin->[$i];
13823         my $ie = $ri_term_end->[$i];
13824
13825         # define complexity: start with the actual term length
13826         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
13827
13828         ##TBD: join types here and check for variations
13829         ##my $str=join "", @tokens_to_go[$ib..$ie];
13830
13831         my $is_quote = 0;
13832         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
13833             $is_quote = 1;
13834             $quote_count++;
13835         }
13836         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
13837             $quote_count++;
13838         }
13839
13840         if ( $ib eq $ie ) {
13841             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
13842                 $complex_item_count++;
13843                 $weighted_length *= 2;
13844             }
13845             else {
13846             }
13847         }
13848         else {
13849             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
13850                 $complex_item_count++;
13851                 $weighted_length *= 2;
13852             }
13853             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
13854                 $weighted_length += 4;
13855             }
13856         }
13857
13858         # add weight for extra tokens.
13859         $weighted_length += 2 * ( $ie - $ib );
13860
13861 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
13862 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
13863
13864 ##push @item_complexity, $weighted_length;
13865
13866         # now mark a ragged break after this item it if it is 'long and
13867         # complex':
13868         if ( $weighted_length >= $definitely_complex ) {
13869
13870             # if we broke after the previous term
13871             # then break before it too
13872             if (   $i_last_break == $i - 1
13873                 && $i > 1
13874                 && $i_last_last_break != $i - 2 )
13875             {
13876
13877                 ## FIXME: don't strand a small term
13878                 pop @i_ragged_break_list;
13879                 push @i_ragged_break_list, $i - 2;
13880                 push @i_ragged_break_list, $i - 1;
13881             }
13882
13883             push @i_ragged_break_list, $i;
13884             $i_last_last_break = $i_last_break;
13885             $i_last_break      = $i;
13886         }
13887
13888         # don't break before a small last term -- it will
13889         # not look good on a line by itself.
13890         elsif ($i == $i_max
13891             && $i_last_break == $i - 1
13892             && $weighted_length <= $definitely_simple )
13893         {
13894             pop @i_ragged_break_list;
13895         }
13896     }
13897
13898     my $identifier_count = $i_max + 1 - $quote_count;
13899
13900     # Need more tuning here..
13901     if (   $max_width > 12
13902         && $complex_item_count > $item_count / 2
13903         && $number_of_fields_best != 2 )
13904     {
13905         $number_of_fields_best = 1;
13906     }
13907
13908     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
13909 }
13910
13911 sub get_maximum_fields_wanted {
13912
13913     # Not all tables look good with more than one field of items.
13914     # This routine looks at a table and decides if it should be
13915     # formatted with just one field or not.
13916     # This coding is still under development.
13917     my ($ritem_lengths) = @_;
13918
13919     my $number_of_fields_best = 0;
13920
13921     # For just a few items, we tentatively assume just 1 field.
13922     my $item_count = @{$ritem_lengths};
13923     if ( $item_count <= 5 ) {
13924         $number_of_fields_best = 1;
13925     }
13926
13927     # For larger tables, look at it both ways and see what looks best
13928     else {
13929
13930         my $is_odd            = 1;
13931         my @max_length        = ( 0, 0 );
13932         my @last_length_2     = ( undef, undef );
13933         my @first_length_2    = ( undef, undef );
13934         my $last_length       = undef;
13935         my $total_variation_1 = 0;
13936         my $total_variation_2 = 0;
13937         my @total_variation_2 = ( 0, 0 );
13938         for ( my $j = 0 ; $j < $item_count ; $j++ ) {
13939
13940             $is_odd = 1 - $is_odd;
13941             my $length = $ritem_lengths->[$j];
13942             if ( $length > $max_length[$is_odd] ) {
13943                 $max_length[$is_odd] = $length;
13944             }
13945
13946             if ( defined($last_length) ) {
13947                 my $dl = abs( $length - $last_length );
13948                 $total_variation_1 += $dl;
13949             }
13950             $last_length = $length;
13951
13952             my $ll = $last_length_2[$is_odd];
13953             if ( defined($ll) ) {
13954                 my $dl = abs( $length - $ll );
13955                 $total_variation_2[$is_odd] += $dl;
13956             }
13957             else {
13958                 $first_length_2[$is_odd] = $length;
13959             }
13960             $last_length_2[$is_odd] = $length;
13961         }
13962         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
13963
13964         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
13965         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
13966             $number_of_fields_best = 1;
13967         }
13968     }
13969     return ($number_of_fields_best);
13970 }
13971
13972 sub table_columns_available {
13973     my $i_first_comma = shift;
13974     my $columns       =
13975       $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
13976
13977     # Patch: the vertical formatter does not line up lines whose lengths
13978     # exactly equal the available line length because of allowances
13979     # that must be made for side comments.  Therefore, the number of
13980     # available columns is reduced by 1 character.
13981     $columns -= 1;
13982     return $columns;
13983 }
13984
13985 sub maximum_number_of_fields {
13986
13987     # how many fields will fit in the available space?
13988     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
13989     my $max_pairs        = int( $columns / $pair_width );
13990     my $number_of_fields = $max_pairs * 2;
13991     if (   $odd_or_even == 1
13992         && $max_pairs * $pair_width + $max_width <= $columns )
13993     {
13994         $number_of_fields++;
13995     }
13996     return $number_of_fields;
13997 }
13998
13999 sub compactify_table {
14000
14001     # given a table with a certain number of fields and a certain number
14002     # of lines, see if reducing the number of fields will make it look
14003     # better.
14004     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
14005     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
14006         my $min_fields;
14007
14008         for (
14009             $min_fields = $number_of_fields ;
14010             $min_fields >= $odd_or_even
14011             && $min_fields * $formatted_lines >= $item_count ;
14012             $min_fields -= $odd_or_even
14013           )
14014         {
14015             $number_of_fields = $min_fields;
14016         }
14017     }
14018     return $number_of_fields;
14019 }
14020
14021 sub set_ragged_breakpoints {
14022
14023     # Set breakpoints in a list that cannot be formatted nicely as a
14024     # table.
14025     my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
14026
14027     my $break_count = 0;
14028     foreach (@$ri_ragged_break_list) {
14029         my $j = $ri_term_comma->[$_];
14030         if ($j) {
14031             set_forced_breakpoint($j);
14032             $break_count++;
14033         }
14034     }
14035     return $break_count;
14036 }
14037
14038 sub copy_old_breakpoints {
14039     my ( $i_first_comma, $i_last_comma ) = @_;
14040     for my $i ( $i_first_comma .. $i_last_comma ) {
14041         if ( $old_breakpoint_to_go[$i] ) {
14042             set_forced_breakpoint($i);
14043         }
14044     }
14045 }
14046
14047 sub set_nobreaks {
14048     my ( $i, $j ) = @_;
14049     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
14050
14051         FORMATTER_DEBUG_FLAG_NOBREAK && do {
14052             my ( $a, $b, $c ) = caller();
14053             print(
14054 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
14055             );
14056         };
14057
14058         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
14059     }
14060
14061     # shouldn't happen; non-critical error
14062     else {
14063         FORMATTER_DEBUG_FLAG_NOBREAK && do {
14064             my ( $a, $b, $c ) = caller();
14065             print(
14066 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
14067             );
14068         };
14069     }
14070 }
14071
14072 sub set_fake_breakpoint {
14073
14074     # Just bump up the breakpoint count as a signal that there are breaks.
14075     # This is useful if we have breaks but may want to postpone deciding where
14076     # to make them.
14077     $forced_breakpoint_count++;
14078 }
14079
14080 sub set_forced_breakpoint {
14081     my $i = shift;
14082
14083     return unless defined $i && $i >= 0;
14084
14085     # when called with certain tokens, use bond strengths to decide
14086     # if we break before or after it
14087     my $token = $tokens_to_go[$i];
14088
14089     if ( $token =~ /^([\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
14090         if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14091     }
14092
14093     # breaks are forced before 'if' and 'unless'
14094     elsif ( $is_if_unless{$token} ) { $i-- }
14095
14096     if ( $i >= 0 && $i <= $max_index_to_go ) {
14097         my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14098
14099         FORMATTER_DEBUG_FLAG_FORCE && do {
14100             my ( $a, $b, $c ) = caller();
14101             print
14102 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
14103         };
14104
14105         if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
14106             $forced_breakpoint_to_go[$i_nonblank] = 1;
14107
14108             if ( $i_nonblank > $index_max_forced_break ) {
14109                 $index_max_forced_break = $i_nonblank;
14110             }
14111             $forced_breakpoint_count++;
14112             $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
14113               $i_nonblank;
14114
14115             # if we break at an opening container..break at the closing
14116             if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
14117                 set_closing_breakpoint($i_nonblank);
14118             }
14119         }
14120     }
14121 }
14122
14123 sub clear_breakpoint_undo_stack {
14124     $forced_breakpoint_undo_count = 0;
14125 }
14126
14127 sub undo_forced_breakpoint_stack {
14128
14129     my $i_start = shift;
14130     if ( $i_start < 0 ) {
14131         $i_start = 0;
14132         my ( $a, $b, $c ) = caller();
14133         warning(
14134 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
14135         );
14136     }
14137
14138     while ( $forced_breakpoint_undo_count > $i_start ) {
14139         my $i =
14140           $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14141         if ( $i >= 0 && $i <= $max_index_to_go ) {
14142             $forced_breakpoint_to_go[$i] = 0;
14143             $forced_breakpoint_count--;
14144
14145             FORMATTER_DEBUG_FLAG_UNDOBP && do {
14146                 my ( $a, $b, $c ) = caller();
14147                 print(
14148 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
14149                 );
14150             };
14151         }
14152
14153         # shouldn't happen, but not a critical error
14154         else {
14155             FORMATTER_DEBUG_FLAG_UNDOBP && do {
14156                 my ( $a, $b, $c ) = caller();
14157                 print(
14158 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
14159                 );
14160             };
14161         }
14162     }
14163 }
14164
14165 sub recombine_breakpoints {
14166
14167     # sub set_continuation_breaks is very liberal in setting line breaks
14168     # for long lines, always setting breaks at good breakpoints, even
14169     # when that creates small lines.  Occasionally small line fragments
14170     # are produced which would look better if they were combined.
14171     # That's the task of this routine, recombine_breakpoints.
14172     my ( $ri_first, $ri_last ) = @_;
14173     my $more_to_do = 1;
14174
14175     # Keep looping until there are no more possible recombinations
14176     my $nmax_last = @$ri_last;
14177     while ($more_to_do) {
14178         my $n_best = 0;
14179         my $bs_best;
14180         my $n;
14181         my $nmax = @$ri_last - 1;
14182
14183         # safety check..
14184         unless ( $nmax < $nmax_last ) {
14185
14186             # shouldn't happen because splice below decreases nmax on each pass:
14187             # but i get paranoid sometimes
14188             die "Program bug-infinite loop in recombine breakpoints\n";
14189         }
14190         $nmax_last  = $nmax;
14191         $more_to_do = 0;
14192
14193         # loop over all remaining lines...
14194         for $n ( 1 .. $nmax ) {
14195
14196             #----------------------------------------------------------
14197             # Indexes of the endpoints of the two lines are:
14198             #
14199             #  ---left---- | ---right---
14200             #  $if   $imid | $imidr   $il
14201             #
14202             # We want to decide if we should join tokens $imid to $imidr
14203             #----------------------------------------------------------
14204             my $if    = $$ri_first[ $n - 1 ];
14205             my $il    = $$ri_last[$n];
14206             my $imid  = $$ri_last[ $n - 1 ];
14207             my $imidr = $$ri_first[$n];
14208
14209 #print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
14210
14211             #----------------------------------------------------------
14212             # Start of special recombination rules
14213             # These are ad-hoc rules which have been found to work ok.
14214             # Skip to next pair to avoid re-combination.
14215             #----------------------------------------------------------
14216
14217             # a terminal '{' should stay where it is
14218             next if ( $n == $nmax && $types_to_go[$imidr] eq '{' );
14219
14220             #----------------------------------------------------------
14221             # examine token at $imid  (right end of first line of pair)
14222             #----------------------------------------------------------
14223
14224             # an isolated '}' may join with a ';' terminated segment
14225             if ( $types_to_go[$imid] eq '}' ) {
14226                 next
14227                   unless (
14228
14229                     # join } and ;
14230                     ( ( $if == $imid ) && ( $types_to_go[$il] eq ';' ) )
14231
14232                     # handle '.' and '?' below
14233                     || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
14234                   );
14235             }
14236
14237             # do not recombine lines with ending &&, ||, or :
14238             elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
14239                 next unless $want_break_before{ $types_to_go[$imid] };
14240             }
14241
14242             # for lines ending in a comma...
14243             elsif ( $types_to_go[$imid] eq ',' ) {
14244
14245                 # an isolated '},' may join with an identifier + ';'
14246                 # this is useful for the class of a 'bless' statement (bless.t)
14247                 if (   $types_to_go[$if] eq '}'
14248                     && $types_to_go[$imidr] eq 'i' )
14249                 {
14250                     next
14251                       unless ( ( $if == ( $imid - 1 ) )
14252                         && ( $il == ( $imidr + 1 ) )
14253                         && ( $types_to_go[$il] eq ';' ) );
14254
14255                     # override breakpoint
14256                     $forced_breakpoint_to_go[$imid] = 0;
14257                 }
14258
14259                 # but otherwise, do not recombine unless this will leave
14260                 # just 1 more line
14261                 else {
14262                     next unless ( $n + 1 >= $nmax );
14263                 }
14264             }
14265
14266             # opening paren..
14267             elsif ( $types_to_go[$imid] eq '(' ) {
14268
14269                 # No longer doing this
14270             }
14271
14272             elsif ( $types_to_go[$imid] eq ')' ) {
14273
14274                 # No longer doing this
14275             }
14276
14277             # keep a terminal colon
14278             elsif ( $types_to_go[$imid] eq ':' ) {
14279                 next;
14280             }
14281
14282             # keep a terminal for-semicolon
14283             elsif ( $types_to_go[$imid] eq 'f' ) {
14284                 next;
14285             }
14286
14287             # if '=' at end of line ...
14288             elsif ( $is_assignment{ $types_to_go[$imid] } ) {
14289
14290                 # otherwise always ok to join isolated '='
14291                 unless ( $if == $imid ) {
14292
14293                     my $is_math = (
14294                         ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )
14295
14296                         # note no '$' in pattern because -> can
14297                         # start long identifier
14298                           && !grep { $_ =~ /^(->|=>|[\,])/ }
14299                           @types_to_go[ $imidr .. $il ]
14300                     );
14301
14302                     # retain the break after the '=' unless ...
14303                     next
14304                       unless (
14305
14306                         # '=' is followed by a number and looks like math
14307                         ( $types_to_go[$imidr] eq 'n' && $is_math )
14308
14309                         # or followed by a scalar and looks like math
14310                         || (   ( $types_to_go[$imidr] eq 'i' )
14311                             && ( $tokens_to_go[$imidr] =~ /^\$/ )
14312                             && $is_math )
14313
14314                         # or followed by a single "short" token
14315                         # ('12' is arbitrary)
14316                         || ( $il == $imidr
14317                             && token_sequence_length( $imidr, $imidr ) < 12 )
14318
14319                       );
14320                 }
14321                 unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
14322                     $forced_breakpoint_to_go[$imid] = 0;
14323                 }
14324             }
14325
14326             # for keywords..
14327             elsif ( $types_to_go[$imid] eq 'k' ) {
14328
14329                 # make major control keywords stand out
14330                 # (recombine.t)
14331                 next
14332                   if (
14333
14334                     #/^(last|next|redo|return)$/
14335                     $is_last_next_redo_return{ $tokens_to_go[$imid] }
14336                   );
14337
14338                 if ( $is_and_or{ $tokens_to_go[$imid] } ) {
14339                     next unless $want_break_before{ $tokens_to_go[$imid] };
14340                 }
14341             }
14342
14343             #----------------------------------------------------------
14344             # examine token at $imidr (left end of second line of pair)
14345             #----------------------------------------------------------
14346
14347             # do not recombine lines with leading &&, ||, or :
14348             if ( $types_to_go[$imidr] =~ /^(|:|\&\&|\|\|)$/ ) {
14349                 next if $want_break_before{ $types_to_go[$imidr] };
14350             }
14351
14352             # Identify and recombine a broken ?/: chain
14353             elsif ( $types_to_go[$imidr] eq '?' ) {
14354
14355                 # indexes of line first tokens --
14356                 #  mm  - line before previous line
14357                 #  f   - previous line
14358                 #     <-- this line
14359                 #  ff  - next line
14360                 #  fff - line after next
14361                 my $iff  = $n < $nmax      ? $$ri_first[ $n + 1 ] : -1;
14362                 my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
14363                 my $imm  = $n > 1          ? $$ri_first[ $n - 2 ] : -1;
14364                 my $seqno = $type_sequence_to_go[$imidr];
14365                 my $f_ok  =
14366                   (      $types_to_go[$if] eq ':'
14367                       && $type_sequence_to_go[$if] ==
14368                       $seqno - TYPE_SEQUENCE_INCREMENT );
14369                 my $mm_ok =
14370                   (      $imm >= 0
14371                       && $types_to_go[$imm] eq ':'
14372                       && $type_sequence_to_go[$imm] ==
14373                       $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
14374
14375                 my $ff_ok =
14376                   (      $iff > 0
14377                       && $types_to_go[$iff] eq ':'
14378                       && $type_sequence_to_go[$iff] == $seqno );
14379                 my $fff_ok =
14380                   (      $ifff > 0
14381                       && $types_to_go[$ifff] eq ':'
14382                       && $type_sequence_to_go[$ifff] ==
14383                       $seqno + TYPE_SEQUENCE_INCREMENT );
14384
14385                 # we require that this '?' be part of a correct sequence
14386                 # of 3 in a row or else no recombination is done.
14387                 next
14388                   unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
14389                 $forced_breakpoint_to_go[$imid] = 0;
14390             }
14391
14392             # do not recombine lines with leading '.'
14393             elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
14394                 my $i_next_nonblank = $imidr + 1;
14395                 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
14396                     $i_next_nonblank++;
14397                 }
14398
14399                 next
14400                   unless (
14401
14402      #      ... unless there is just one and we can reduce this to
14403      #      two lines if we do.  For example, this :
14404      #
14405      #                $bodyA .=
14406      #                  '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
14407      #
14408      #      looks better than this:
14409      #                $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
14410      #                   . '$args .= $pat;'
14411
14412                     (
14413                            $n == 2
14414                         && $n == $nmax
14415                         && $types_to_go[$if] ne $types_to_go[$imidr]
14416                     )
14417
14418                     #
14419                     #      ... or this would strand a short quote , like this
14420                     #                . "some long qoute"
14421                     #                . "\n";
14422                     #
14423
14424                     || (   $types_to_go[$i_next_nonblank] eq 'Q'
14425                         && $i_next_nonblank >= $il - 1
14426                         && length( $tokens_to_go[$i_next_nonblank] ) <
14427                         $rOpts_short_concatenation_item_length )
14428                   );
14429             }
14430
14431             # handle leading keyword..
14432             elsif ( $types_to_go[$imidr] eq 'k' ) {
14433
14434                 # handle leading "and" and "or"
14435                 if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
14436
14437                     # Decide if we will combine a single terminal 'and' and
14438                     # 'or' after an 'if' or 'unless'.  We should consider the
14439                     # possible vertical alignment, and visual clutter.
14440
14441   #     This looks best with the 'and' on the same line as the 'if':
14442   #
14443   #         $a = 1
14444   #           if $seconds and $nu < 2;
14445   #
14446   #     But this looks better as shown:
14447   #
14448   #         $a = 1
14449   #           if !$this->{Parents}{$_}
14450   #           or $this->{Parents}{$_} eq $_;
14451   #
14452   #     Eventually, it would be nice to look for similarities (such as 'this' or
14453   #     'Parents'), but for now I'm using a simple rule that says that the
14454   #     resulting line length must not be more than half the maximum line length
14455   #     (making it 80/2 = 40 characters by default).
14456
14457                     next
14458                       unless (
14459                         $n == $nmax    # if this is the last line
14460                         && $types_to_go[$il] eq ';'    # ending in ';'
14461                         && $types_to_go[$if] eq 'k'    # after 'if' or 'unless'
14462                                                        #   /^(if|unless)$/
14463                         && $is_if_unless{ $tokens_to_go[$if] }
14464
14465                         # and if this doesn't make a long last line
14466                         && total_line_length( $if, $il ) <=
14467                         $half_maximum_line_length
14468                       );
14469
14470                     # override breakpoint
14471                     $forced_breakpoint_to_go[$imid] = 0;
14472                 }
14473
14474                 # handle leading "if" and "unless"
14475                 elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
14476
14477                     # FIXME: This is still experimental..may not be too useful
14478                     next
14479                       unless (
14480                         $n == $nmax    # if this is the last line
14481                         && $types_to_go[$il] eq ';'    # ending in ';'
14482                         && $types_to_go[$if] eq 'k'
14483
14484                         #   /^(and|or)$/
14485                         && $is_and_or{ $tokens_to_go[$if] }
14486
14487                         # and if this doesn't make a long last line
14488                         && total_line_length( $if, $il ) <=
14489                         $half_maximum_line_length
14490                       );
14491
14492                     # override breakpoint
14493                     $forced_breakpoint_to_go[$imid] = 0;
14494                 }
14495
14496                 # handle all other leading keywords
14497                 else {
14498
14499                     # keywords look best at start of lines,
14500                     # but combine things like "1 while"
14501
14502                     unless ( $is_assignment{ $types_to_go[$imid] } ) {
14503                         next
14504                           if ( ( $types_to_go[$imid] ne 'k' )
14505                             && ( $tokens_to_go[$imidr] !~ /^(while)$/ ) );
14506                     }
14507                 }
14508             }
14509
14510             # similar treatment of && and || as above for 'and' and 'or':
14511             elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
14512
14513                 # maybe looking at something like:
14514                 #   unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
14515
14516                 next
14517                   unless (
14518                     $n == $nmax    # if this is the last line
14519                     && $types_to_go[$il] eq ';'    # ending in ';'
14520                     && $types_to_go[$if] eq 'k'    # after an 'if' or 'unless'
14521                                                    #   /^(if|unless)$/
14522                     && $is_if_unless{ $tokens_to_go[$if] }
14523
14524                     # and if this doesn't make a long last line
14525                     && total_line_length( $if, $il ) <=
14526                     $half_maximum_line_length
14527                   );
14528
14529                 # override breakpoint
14530                 $forced_breakpoint_to_go[$imid] = 0;
14531             }
14532
14533             # honor hard breakpoints
14534             next if ( $forced_breakpoint_to_go[$imid] > 0 );
14535
14536             #----------------------------------------------------------
14537             # end of special recombination rules
14538             #----------------------------------------------------------
14539
14540             my $bs = $bond_strength_to_go[$imid];
14541
14542             # combined line cannot be too long
14543             next
14544               if excess_line_length( $if, $il ) > 0;
14545
14546             # do not recombine if we would skip in indentation levels
14547             if ( $n < $nmax ) {
14548                 my $if_next = $$ri_first[ $n + 1 ];
14549                 next
14550                   if (
14551                        $levels_to_go[$if] < $levels_to_go[$imidr]
14552                     && $levels_to_go[$imidr] < $levels_to_go[$if_next]
14553
14554                     # but an isolated 'if (' is undesirable
14555                     && !(
14556                            $n == 1
14557                         && $imid - $if <= 2
14558                         && $types_to_go[$if]  eq 'k'
14559                         && $tokens_to_go[$if] eq 'if'
14560                         && $tokens_to_go[$imid] ne '('
14561                     )
14562
14563                     #
14564                   );
14565             }
14566
14567             # honor no-break's
14568             next if ( $bs == NO_BREAK );
14569
14570             # remember the pair with the greatest bond strength
14571             if ( !$n_best ) {
14572                 $n_best  = $n;
14573                 $bs_best = $bs;
14574             }
14575             else {
14576
14577                 if ( $bs > $bs_best ) {
14578                     $n_best  = $n;
14579                     $bs_best = $bs;
14580                 }
14581
14582                 # we have 2 or more candidates, so need another pass
14583                 $more_to_do++;
14584             }
14585         }
14586
14587         # recombine the pair with the greatest bond strength
14588         if ($n_best) {
14589             splice @$ri_first, $n_best, 1;
14590             splice @$ri_last, $n_best - 1, 1;
14591         }
14592     }
14593     return ( $ri_first, $ri_last );
14594 }
14595
14596 sub set_continuation_breaks {
14597
14598     # Define an array of indexes for inserting newline characters to
14599     # keep the line lengths below the maximum desired length.  There is
14600     # an implied break after the last token, so it need not be included.
14601     # We'll break at points where the bond strength is lowest.
14602
14603     my $saw_good_break = shift;
14604     my @i_first        = ();      # the first index to output
14605     my @i_last         = ();      # the last index to output
14606     my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
14607     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
14608
14609     set_bond_strengths();
14610
14611     my $imin = 0;
14612     my $imax = $max_index_to_go;
14613     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
14614     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
14615     my $i_begin = $imin;
14616
14617     my $leading_spaces          = leading_spaces_to_go($imin);
14618     my $line_count              = 0;
14619     my $last_break_strength     = NO_BREAK;
14620     my $i_last_break            = -1;
14621     my $max_bias                = 0.001;
14622     my $tiny_bias               = 0.0001;
14623     my $leading_alignment_token = "";
14624     my $leading_alignment_type  = "";
14625
14626     # see if any ?/:'s are in order
14627     my $colons_in_order = 1;
14628     my $last_tok        = "";
14629     my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
14630     foreach (@colon_list) {
14631         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
14632         $last_tok = $_;
14633     }
14634
14635     # This is a sufficient but not necessary condition for colon chain
14636     my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
14637
14638     while ( $i_begin <= $imax ) {
14639         my $lowest_strength        = NO_BREAK;
14640         my $starting_sum           = $lengths_to_go[$i_begin];
14641         my $i_lowest               = -1;
14642         my $i_test                 = -1;
14643         my $lowest_next_token      = '';
14644         my $lowest_next_type       = 'b';
14645         my $i_lowest_next_nonblank = -1;
14646
14647         # loop to find next break point
14648         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
14649             my $type            = $types_to_go[$i_test];
14650             my $token           = $tokens_to_go[$i_test];
14651             my $next_type       = $types_to_go[ $i_test + 1 ];
14652             my $next_token      = $tokens_to_go[ $i_test + 1 ];
14653             my $i_next_nonblank =
14654               ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
14655             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
14656             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
14657             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
14658             my $strength                 = $bond_strength_to_go[$i_test];
14659             my $must_break               = 0;
14660
14661             # FIXME: TESTING: Might want to be able to break after these
14662             # force an immediate break at certain operators
14663             # with lower level than the start of the line
14664             if (
14665                 (
14666                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
14667                     || (   $next_nonblank_type eq 'k'
14668                         && $next_nonblank_token =~ /^(and|or)$/ )
14669                 )
14670                 && ( $nesting_depth_to_go[$i_begin] >
14671                     $nesting_depth_to_go[$i_next_nonblank] )
14672               )
14673             {
14674                 set_forced_breakpoint($i_next_nonblank);
14675             }
14676
14677             if (
14678
14679                 # Try to put a break where requested by scan_list
14680                 $forced_breakpoint_to_go[$i_test]
14681
14682                 # break between ) { in a continued line so that the '{' can
14683                 # be outdented
14684                 # See similar logic in scan_list which catches instances
14685                 # where a line is just something like ') {'
14686                 || (   $line_count
14687                     && ( $token eq ')' )
14688                     && ( $next_nonblank_type eq '{' )
14689                     && ($next_nonblank_block_type)
14690                     && !$rOpts->{'opening-brace-always-on-right'} )
14691
14692                 # There is an implied forced break at a terminal opening brace
14693                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
14694
14695               )
14696             {
14697
14698                 # Forced breakpoints must sometimes be overridden, for example
14699                 # because of a side comment causing a NO_BREAK.  It is easier
14700                 # to catch this here than when they are set.
14701                 if ( $strength < NO_BREAK ) {
14702                     $strength   = $lowest_strength - $tiny_bias;
14703                     $must_break = 1;
14704                 }
14705             }
14706
14707             # quit if a break here would put a good terminal token on
14708             # the next line and we already have a possible break
14709             if (
14710                    !$must_break
14711                 && ( $next_nonblank_type =~ /^[\;\,]$/ )
14712                 && (
14713                     (
14714                         $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ]
14715                         - $starting_sum
14716                     ) > $rOpts_maximum_line_length
14717                 )
14718               )
14719             {
14720                 last if ( $i_lowest >= 0 );
14721             }
14722
14723             # Avoid a break which would strand a single punctuation
14724             # token.  For example, we do not want to strand a leading
14725             # '.' which is followed by a long quoted string.
14726             if (
14727                    !$must_break
14728                 && ( $i_test == $i_begin )
14729                 && ( $i_test < $imax )
14730                 && ( $token eq $type )
14731                 && (
14732                     (
14733                         $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
14734                         $starting_sum
14735                     ) <= $rOpts_maximum_line_length
14736                 )
14737               )
14738             {
14739                 $i_test++;
14740
14741                 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
14742                     $i_test++;
14743                 }
14744                 redo;
14745             }
14746
14747             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
14748             {
14749
14750                 # break at previous best break if it would have produced
14751                 # a leading alignment of certain common tokens, and it
14752                 # is different from the latest candidate break
14753                 last
14754                   if ($leading_alignment_type);
14755
14756                 # Force at least one breakpoint if old code had good
14757                 # break It is only called if a breakpoint is required or
14758                 # desired.  This will probably need some adjustments
14759                 # over time.  A goal is to try to be sure that, if a new
14760                 # side comment is introduced into formated text, then
14761                 # the same breakpoints will occur.  scbreak.t
14762                 last
14763                   if (
14764                     $i_test == $imax                # we are at the end
14765                     && !$forced_breakpoint_count    #
14766                     && $saw_good_break              # old line had good break
14767                     && $type =~ /^[#;\{]$/          # and this line ends in
14768                                                     # ';' or side comment
14769                     && $i_last_break < 0        # and we haven't made a break
14770                     && $i_lowest > 0            # and we saw a possible break
14771                     && $i_lowest < $imax - 1    # (but not just before this ;)
14772                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
14773                   );
14774
14775                 $lowest_strength        = $strength;
14776                 $i_lowest               = $i_test;
14777                 $lowest_next_token      = $next_nonblank_token;
14778                 $lowest_next_type       = $next_nonblank_type;
14779                 $i_lowest_next_nonblank = $i_next_nonblank;
14780                 last if $must_break;
14781
14782                 # set flags to remember if a break here will produce a
14783                 # leading alignment of certain common tokens
14784                 if (
14785                        $line_count > 0
14786                     && $i_test < $imax
14787                     && ( $lowest_strength - $last_break_strength <= $max_bias )
14788                     && ( $nesting_depth_to_go[$i_begin] >=
14789                         $nesting_depth_to_go[$i_next_nonblank] )
14790                     && (
14791                         (
14792                                $types_to_go[$i_begin] =~ /^(\.|\&\&|\|\||:)$/
14793                             && $types_to_go[$i_begin] eq $next_nonblank_type
14794                         )
14795                         || (   $tokens_to_go[$i_begin] =~ /^(and|or)$/
14796                             && $tokens_to_go[$i_begin] eq $next_nonblank_token )
14797                     )
14798                   )
14799                 {
14800                     $leading_alignment_token = $next_nonblank_token;
14801                     $leading_alignment_type  = $next_nonblank_type;
14802                 }
14803             }
14804
14805             my $too_long =
14806               ( $i_test >= $imax )
14807               ? 1
14808               : (
14809                 (
14810                     $leading_spaces + $lengths_to_go[ $i_test + 2 ] -
14811                       $starting_sum
14812                 ) > $rOpts_maximum_line_length
14813               );
14814
14815             FORMATTER_DEBUG_FLAG_BREAK
14816               && print
14817 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
14818
14819             # allow one extra terminal token after exceeding line length
14820             # if it would strand this token.
14821             if (   $rOpts_fuzzy_line_length
14822                 && $too_long
14823                 && ( $i_lowest == $i_test )
14824                 && ( length($token) > 1 )
14825                 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
14826             {
14827                 $too_long = 0;
14828             }
14829
14830             last
14831               if (
14832                 ( $i_test == $imax )    # we're done if no more tokens,
14833                 || (
14834                     ( $i_lowest >= 0 )    # or no more space and we have a break
14835                     && $too_long
14836                 )
14837               );
14838         }
14839
14840         # it's always ok to break at imax if no other break was found
14841         if ( $i_lowest < 0 ) { $i_lowest = $imax }
14842
14843         # semi-final index calculation
14844         my $i_next_nonblank = (
14845             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
14846             ? $i_lowest + 2
14847             : $i_lowest + 1
14848         );
14849         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
14850         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14851
14852         #-------------------------------------------------------
14853         # ?/: rule 1 : if a break here will separate a '?' on this
14854         # line from its closing ':', then break at the '?' instead.
14855         #-------------------------------------------------------
14856         my $i;
14857         foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
14858             next unless ( $tokens_to_go[$i] eq '?' );
14859
14860             # do not break if probable sequence of ?/: statements
14861             next if ($is_colon_chain);
14862
14863             # do not break if statement is broken by side comment
14864             next
14865               if (
14866                 $tokens_to_go[$max_index_to_go] eq '#'
14867                 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
14868                     $max_index_to_go ) !~ /^[\;\}]$/
14869               );
14870
14871             # no break needed if matching : is also on the line
14872             next
14873               if ( $mate_index_to_go[$i] >= 0
14874                 && $mate_index_to_go[$i] <= $i_next_nonblank );
14875
14876             $i_lowest = $i;
14877             if ( $want_break_before{'?'} ) { $i_lowest-- }
14878             last;
14879         }
14880
14881         # final index calculation
14882         $i_next_nonblank = (
14883             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
14884             ? $i_lowest + 2
14885             : $i_lowest + 1
14886         );
14887         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
14888         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14889
14890         FORMATTER_DEBUG_FLAG_BREAK
14891           && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
14892
14893         #-------------------------------------------------------
14894         # ?/: rule 2 : if we break at a '?', then break at its ':'
14895         #
14896         # Note: this rule is also in sub scan_list to handle a break
14897         # at the start and end of a line (in case breaks are dictated
14898         # by side comments).
14899         #-------------------------------------------------------
14900         if ( $next_nonblank_type eq '?' ) {
14901             set_closing_breakpoint($i_next_nonblank);
14902         }
14903         elsif ( $types_to_go[$i_lowest] eq '?' ) {
14904             set_closing_breakpoint($i_lowest);
14905         }
14906
14907         #-------------------------------------------------------
14908         # ?/: rule 3 : if we break at a ':' then we save
14909         # its location for further work below.  We may need to go
14910         # back and break at its '?'.
14911         #-------------------------------------------------------
14912         if ( $next_nonblank_type eq ':' ) {
14913             push @i_colon_breaks, $i_next_nonblank;
14914         }
14915         elsif ( $types_to_go[$i_lowest] eq ':' ) {
14916             push @i_colon_breaks, $i_lowest;
14917         }
14918
14919         # here we should set breaks for all '?'/':' pairs which are
14920         # separated by this line
14921
14922         $line_count++;
14923
14924         # save this line segment, after trimming blanks at the ends
14925         push( @i_first,
14926             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
14927         push( @i_last,
14928             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
14929
14930         # set a forced breakpoint at a container opening, if necessary, to
14931         # signal a break at a closing container.  Excepting '(' for now.
14932         if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
14933             && !$forced_breakpoint_to_go[$i_lowest] )
14934         {
14935             set_closing_breakpoint($i_lowest);
14936         }
14937
14938         # get ready to go again
14939         $i_begin                 = $i_lowest + 1;
14940         $last_break_strength     = $lowest_strength;
14941         $i_last_break            = $i_lowest;
14942         $leading_alignment_token = "";
14943         $leading_alignment_type  = "";
14944         $lowest_next_token       = '';
14945         $lowest_next_type        = 'b';
14946
14947         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
14948             $i_begin++;
14949         }
14950
14951         # update indentation size
14952         if ( $i_begin <= $imax ) {
14953             $leading_spaces = leading_spaces_to_go($i_begin);
14954         }
14955     }
14956
14957     #-------------------------------------------------------
14958     # ?/: rule 4 -- if we broke at a ':', then break at
14959     # corresponding '?' unless this is a chain of ?: expressions
14960     #-------------------------------------------------------
14961     if (@i_colon_breaks) {
14962
14963         # using a simple method for deciding if we are in a ?/: chain --
14964         # this is a chain if it has multiple ?/: pairs all in order;
14965         # otherwise not.
14966         # Note that if line starts in a ':' we count that above as a break
14967         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
14968
14969         unless ($is_chain) {
14970             my @insert_list = ();
14971             foreach (@i_colon_breaks) {
14972                 my $i_question = $mate_index_to_go[$_];
14973                 if ( $i_question >= 0 ) {
14974                     if ( $want_break_before{'?'} ) {
14975                         $i_question--;
14976                         if (   $i_question > 0
14977                             && $types_to_go[$i_question] eq 'b' )
14978                         {
14979                             $i_question--;
14980                         }
14981                     }
14982
14983                     if ( $i_question >= 0 ) {
14984                         push @insert_list, $i_question;
14985                     }
14986                 }
14987                 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
14988             }
14989         }
14990     }
14991     return \@i_first, \@i_last;
14992 }
14993
14994 sub insert_additional_breaks {
14995
14996     # this routine will add line breaks at requested locations after
14997     # sub set_continuation_breaks has made preliminary breaks.
14998
14999     my ( $ri_break_list, $ri_first, $ri_last ) = @_;
15000     my $i_f;
15001     my $i_l;
15002     my $line_number = 0;
15003     my $i_break_left;
15004     foreach $i_break_left ( sort @$ri_break_list ) {
15005
15006         $i_f = $$ri_first[$line_number];
15007         $i_l = $$ri_last[$line_number];
15008         while ( $i_break_left >= $i_l ) {
15009             $line_number++;
15010
15011             # shouldn't happen unless caller passes bad indexes
15012             if ( $line_number >= @$ri_last ) {
15013                 warning(
15014 "Non-fatal program bug: couldn't set break at $i_break_left\n"
15015                 );
15016                 report_definite_bug();
15017                 return;
15018             }
15019             $i_f = $$ri_first[$line_number];
15020             $i_l = $$ri_last[$line_number];
15021         }
15022
15023         my $i_break_right = $i_break_left + 1;
15024         if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
15025
15026         if (   $i_break_left >= $i_f
15027             && $i_break_left < $i_l
15028             && $i_break_right > $i_f
15029             && $i_break_right <= $i_l )
15030         {
15031             splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
15032             splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
15033         }
15034     }
15035 }
15036
15037 sub set_closing_breakpoint {
15038
15039     # set a breakpoint at a matching closing token
15040     # at present, this is only used to break at a ':' which matches a '?'
15041     my $i_break = shift;
15042
15043     if ( $mate_index_to_go[$i_break] >= 0 ) {
15044
15045         # CAUTION: infinite recursion possible here:
15046         #   set_closing_breakpoint calls set_forced_breakpoint, and
15047         #   set_forced_breakpoint call set_closing_breakpoint
15048         #   ( test files attrib.t, BasicLyx.pm.html).
15049         # Don't reduce the '2' in the statement below
15050         if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
15051
15052             # break before } ] and ), but sub set_forced_breakpoint will decide
15053             # to break before or after a ? and :
15054             my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
15055             set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
15056         }
15057     }
15058     else {
15059         my $type_sequence = $type_sequence_to_go[$i_break];
15060         if ($type_sequence) {
15061             my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
15062             $postponed_breakpoint{$type_sequence} = 1;
15063         }
15064     }
15065 }
15066
15067 # check to see if output line tabbing agrees with input line
15068 # this can be very useful for debugging a script which has an extra
15069 # or missing brace
15070 sub compare_indentation_levels {
15071
15072     my ( $python_indentation_level, $structural_indentation_level ) = @_;
15073     if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
15074         $last_tabbing_disagreement = $input_line_number;
15075
15076         if ($in_tabbing_disagreement) {
15077         }
15078         else {
15079             $tabbing_disagreement_count++;
15080
15081             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15082                 write_logfile_entry(
15083 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
15084                 );
15085             }
15086             $in_tabbing_disagreement    = $input_line_number;
15087             $first_tabbing_disagreement = $in_tabbing_disagreement
15088               unless ($first_tabbing_disagreement);
15089         }
15090     }
15091     else {
15092
15093         if ($in_tabbing_disagreement) {
15094
15095             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15096                 write_logfile_entry(
15097 "End indentation disagreement from input line $in_tabbing_disagreement\n"
15098                 );
15099
15100                 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
15101                     write_logfile_entry(
15102                         "No further tabbing disagreements will be noted\n");
15103                 }
15104             }
15105             $in_tabbing_disagreement = 0;
15106         }
15107     }
15108 }
15109
15110 #####################################################################
15111 #
15112 # the Perl::Tidy::IndentationItem class supplies items which contain
15113 # how much whitespace should be used at the start of a line
15114 #
15115 #####################################################################
15116
15117 package Perl::Tidy::IndentationItem;
15118
15119 # Indexes for indentation items
15120 use constant SPACES             => 0;     # total leading white spaces
15121 use constant LEVEL              => 1;     # the indentation 'level'
15122 use constant CI_LEVEL           => 2;     # the 'continuation level'
15123 use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
15124                                           # for this level
15125 use constant CLOSED             => 4;     # index where we saw closing '}'
15126 use constant COMMA_COUNT        => 5;     # how many commas at this level?
15127 use constant SEQUENCE_NUMBER    => 6;     # output batch number
15128 use constant INDEX              => 7;     # index in output batch list
15129 use constant HAVE_CHILD         => 8;     # any dependents?
15130 use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
15131                                           # we would like to move to get
15132                                           # alignment (negative if left)
15133 use constant ALIGN_PAREN        => 10;    # do we want to try to align
15134                                           # with an opening structure?
15135 use constant MARKED             => 11;    # if visited by corrector logic
15136 use constant STACK_DEPTH        => 12;    # indentation nesting depth
15137 use constant STARTING_INDEX     => 13;    # first token index of this level
15138 use constant ARROW_COUNT        => 14;    # how many =>'s
15139
15140 sub new {
15141
15142     # Create an 'indentation_item' which describes one level of leading
15143     # whitespace when the '-lp' indentation is used.  We return
15144     # a reference to an anonymous array of associated variables.
15145     # See above constants for storage scheme.
15146     my (
15147         $class,               $spaces,           $level,
15148         $ci_level,            $available_spaces, $index,
15149         $gnu_sequence_number, $align_paren,      $stack_depth,
15150         $starting_index,
15151       )
15152       = @_;
15153     my $closed            = -1;
15154     my $arrow_count       = 0;
15155     my $comma_count       = 0;
15156     my $have_child        = 0;
15157     my $want_right_spaces = 0;
15158     my $marked            = 0;
15159     bless [
15160         $spaces,              $level,          $ci_level,
15161         $available_spaces,    $closed,         $comma_count,
15162         $gnu_sequence_number, $index,          $have_child,
15163         $want_right_spaces,   $align_paren,    $marked,
15164         $stack_depth,         $starting_index, $arrow_count,
15165     ], $class;
15166 }
15167
15168 sub permanently_decrease_AVAILABLE_SPACES {
15169
15170     # make a permanent reduction in the available indentation spaces
15171     # at one indentation item.  NOTE: if there are child nodes, their
15172     # total SPACES must be reduced by the caller.
15173
15174     my ( $item, $spaces_needed ) = @_;
15175     my $available_spaces = $item->get_AVAILABLE_SPACES();
15176     my $deleted_spaces   =
15177       ( $available_spaces > $spaces_needed )
15178       ? $spaces_needed
15179       : $available_spaces;
15180     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
15181     $item->decrease_SPACES($deleted_spaces);
15182     $item->set_RECOVERABLE_SPACES(0);
15183
15184     return $deleted_spaces;
15185 }
15186
15187 sub tentatively_decrease_AVAILABLE_SPACES {
15188
15189     # We are asked to tentatively delete $spaces_needed of indentation
15190     # for a indentation item.  We may want to undo this later.  NOTE: if
15191     # there are child nodes, their total SPACES must be reduced by the
15192     # caller.
15193     my ( $item, $spaces_needed ) = @_;
15194     my $available_spaces = $item->get_AVAILABLE_SPACES();
15195     my $deleted_spaces   =
15196       ( $available_spaces > $spaces_needed )
15197       ? $spaces_needed
15198       : $available_spaces;
15199     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
15200     $item->decrease_SPACES($deleted_spaces);
15201     $item->increase_RECOVERABLE_SPACES($deleted_spaces);
15202     return $deleted_spaces;
15203 }
15204
15205 sub get_STACK_DEPTH {
15206     my $self = shift;
15207     return $self->[STACK_DEPTH];
15208 }
15209
15210 sub get_SPACES {
15211     my $self = shift;
15212     return $self->[SPACES];
15213 }
15214
15215 sub get_MARKED {
15216     my $self = shift;
15217     return $self->[MARKED];
15218 }
15219
15220 sub set_MARKED {
15221     my ( $self, $value ) = @_;
15222     if ( defined($value) ) {
15223         $self->[MARKED] = $value;
15224     }
15225     return $self->[MARKED];
15226 }
15227
15228 sub get_AVAILABLE_SPACES {
15229     my $self = shift;
15230     return $self->[AVAILABLE_SPACES];
15231 }
15232
15233 sub decrease_SPACES {
15234     my ( $self, $value ) = @_;
15235     if ( defined($value) ) {
15236         $self->[SPACES] -= $value;
15237     }
15238     return $self->[SPACES];
15239 }
15240
15241 sub decrease_AVAILABLE_SPACES {
15242     my ( $self, $value ) = @_;
15243     if ( defined($value) ) {
15244         $self->[AVAILABLE_SPACES] -= $value;
15245     }
15246     return $self->[AVAILABLE_SPACES];
15247 }
15248
15249 sub get_ALIGN_PAREN {
15250     my $self = shift;
15251     return $self->[ALIGN_PAREN];
15252 }
15253
15254 sub get_RECOVERABLE_SPACES {
15255     my $self = shift;
15256     return $self->[RECOVERABLE_SPACES];
15257 }
15258
15259 sub set_RECOVERABLE_SPACES {
15260     my ( $self, $value ) = @_;
15261     if ( defined($value) ) {
15262         $self->[RECOVERABLE_SPACES] = $value;
15263     }
15264     return $self->[RECOVERABLE_SPACES];
15265 }
15266
15267 sub increase_RECOVERABLE_SPACES {
15268     my ( $self, $value ) = @_;
15269     if ( defined($value) ) {
15270         $self->[RECOVERABLE_SPACES] += $value;
15271     }
15272     return $self->[RECOVERABLE_SPACES];
15273 }
15274
15275 sub get_CI_LEVEL {
15276     my $self = shift;
15277     return $self->[CI_LEVEL];
15278 }
15279
15280 sub get_LEVEL {
15281     my $self = shift;
15282     return $self->[LEVEL];
15283 }
15284
15285 sub get_SEQUENCE_NUMBER {
15286     my $self = shift;
15287     return $self->[SEQUENCE_NUMBER];
15288 }
15289
15290 sub get_INDEX {
15291     my $self = shift;
15292     return $self->[INDEX];
15293 }
15294
15295 sub get_STARTING_INDEX {
15296     my $self = shift;
15297     return $self->[STARTING_INDEX];
15298 }
15299
15300 sub set_HAVE_CHILD {
15301     my ( $self, $value ) = @_;
15302     if ( defined($value) ) {
15303         $self->[HAVE_CHILD] = $value;
15304     }
15305     return $self->[HAVE_CHILD];
15306 }
15307
15308 sub get_HAVE_CHILD {
15309     my $self = shift;
15310     return $self->[HAVE_CHILD];
15311 }
15312
15313 sub set_ARROW_COUNT {
15314     my ( $self, $value ) = @_;
15315     if ( defined($value) ) {
15316         $self->[ARROW_COUNT] = $value;
15317     }
15318     return $self->[ARROW_COUNT];
15319 }
15320
15321 sub get_ARROW_COUNT {
15322     my $self = shift;
15323     return $self->[ARROW_COUNT];
15324 }
15325
15326 sub set_COMMA_COUNT {
15327     my ( $self, $value ) = @_;
15328     if ( defined($value) ) {
15329         $self->[COMMA_COUNT] = $value;
15330     }
15331     return $self->[COMMA_COUNT];
15332 }
15333
15334 sub get_COMMA_COUNT {
15335     my $self = shift;
15336     return $self->[COMMA_COUNT];
15337 }
15338
15339 sub set_CLOSED {
15340     my ( $self, $value ) = @_;
15341     if ( defined($value) ) {
15342         $self->[CLOSED] = $value;
15343     }
15344     return $self->[CLOSED];
15345 }
15346
15347 sub get_CLOSED {
15348     my $self = shift;
15349     return $self->[CLOSED];
15350 }
15351
15352 #####################################################################
15353 #
15354 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
15355 # contain a single output line
15356 #
15357 #####################################################################
15358
15359 package Perl::Tidy::VerticalAligner::Line;
15360
15361 {
15362
15363     use strict;
15364     use Carp;
15365
15366     use constant JMAX                      => 0;
15367     use constant JMAX_ORIGINAL_LINE        => 1;
15368     use constant RTOKENS                   => 2;
15369     use constant RFIELDS                   => 3;
15370     use constant RPATTERNS                 => 4;
15371     use constant INDENTATION               => 5;
15372     use constant LEADING_SPACE_COUNT       => 6;
15373     use constant OUTDENT_LONG_LINES        => 7;
15374     use constant LIST_TYPE                 => 8;
15375     use constant IS_HANGING_SIDE_COMMENT   => 9;
15376     use constant RALIGNMENTS               => 10;
15377     use constant MAXIMUM_LINE_LENGTH       => 11;
15378     use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
15379
15380     my %_index_map;
15381     $_index_map{jmax}                      = JMAX;
15382     $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
15383     $_index_map{rtokens}                   = RTOKENS;
15384     $_index_map{rfields}                   = RFIELDS;
15385     $_index_map{rpatterns}                 = RPATTERNS;
15386     $_index_map{indentation}               = INDENTATION;
15387     $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
15388     $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
15389     $_index_map{list_type}                 = LIST_TYPE;
15390     $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
15391     $_index_map{ralignments}               = RALIGNMENTS;
15392     $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
15393     $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
15394
15395     my @_default_data = ();
15396     $_default_data[JMAX]                      = undef;
15397     $_default_data[JMAX_ORIGINAL_LINE]        = undef;
15398     $_default_data[RTOKENS]                   = undef;
15399     $_default_data[RFIELDS]                   = undef;
15400     $_default_data[RPATTERNS]                 = undef;
15401     $_default_data[INDENTATION]               = undef;
15402     $_default_data[LEADING_SPACE_COUNT]       = undef;
15403     $_default_data[OUTDENT_LONG_LINES]        = undef;
15404     $_default_data[LIST_TYPE]                 = undef;
15405     $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
15406     $_default_data[RALIGNMENTS]               = [];
15407     $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
15408     $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
15409
15410     {
15411
15412         # methods to count object population
15413         my $_count = 0;
15414         sub get_count        { $_count; }
15415         sub _increment_count { ++$_count }
15416         sub _decrement_count { --$_count }
15417     }
15418
15419     # Constructor may be called as a class method
15420     sub new {
15421         my ( $caller, %arg ) = @_;
15422         my $caller_is_obj = ref($caller);
15423         my $class = $caller_is_obj || $caller;
15424         no strict "refs";
15425         my $self = bless [], $class;
15426
15427         $self->[RALIGNMENTS] = [];
15428
15429         my $index;
15430         foreach ( keys %_index_map ) {
15431             $index = $_index_map{$_};
15432             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
15433             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
15434             else { $self->[$index] = $_default_data[$index] }
15435         }
15436
15437         $self->_increment_count();
15438         return $self;
15439     }
15440
15441     sub DESTROY {
15442         $_[0]->_decrement_count();
15443     }
15444
15445     sub get_jmax                      { $_[0]->[JMAX] }
15446     sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
15447     sub get_rtokens                   { $_[0]->[RTOKENS] }
15448     sub get_rfields                   { $_[0]->[RFIELDS] }
15449     sub get_rpatterns                 { $_[0]->[RPATTERNS] }
15450     sub get_indentation               { $_[0]->[INDENTATION] }
15451     sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
15452     sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
15453     sub get_list_type                 { $_[0]->[LIST_TYPE] }
15454     sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
15455     sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
15456
15457     sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
15458     sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
15459     sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
15460     sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
15461
15462     sub get_starting_column {
15463         $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
15464     }
15465
15466     sub increment_column {
15467         $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
15468     }
15469     sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
15470
15471     sub current_field_width {
15472         my $self = shift;
15473         my ($j) = @_;
15474         if ( $j == 0 ) {
15475             return $self->get_column($j);
15476         }
15477         else {
15478             return $self->get_column($j) - $self->get_column( $j - 1 );
15479         }
15480     }
15481
15482     sub field_width_growth {
15483         my $self = shift;
15484         my $j    = shift;
15485         return $self->get_column($j) - $self->get_starting_column($j);
15486     }
15487
15488     sub starting_field_width {
15489         my $self = shift;
15490         my $j    = shift;
15491         if ( $j == 0 ) {
15492             return $self->get_starting_column($j);
15493         }
15494         else {
15495             return $self->get_starting_column($j) -
15496               $self->get_starting_column( $j - 1 );
15497         }
15498     }
15499
15500     sub increase_field_width {
15501
15502         my $self = shift;
15503         my ( $j, $pad ) = @_;
15504         my $jmax = $self->get_jmax();
15505         for my $k ( $j .. $jmax ) {
15506             $self->increment_column( $k, $pad );
15507         }
15508     }
15509
15510     sub get_available_space_on_right {
15511         my $self = shift;
15512         my $jmax = $self->get_jmax();
15513         return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
15514     }
15515
15516     sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
15517     sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
15518     sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
15519     sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
15520     sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
15521     sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
15522     sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
15523     sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
15524     sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
15525     sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
15526     sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
15527
15528 }
15529
15530 #####################################################################
15531 #
15532 # the Perl::Tidy::VerticalAligner::Alignment class holds information
15533 # on a single column being aligned
15534 #
15535 #####################################################################
15536 package Perl::Tidy::VerticalAligner::Alignment;
15537
15538 {
15539
15540     use strict;
15541
15542     #use Carp;
15543
15544     # Symbolic array indexes
15545     use constant COLUMN          => 0;    # the current column number
15546     use constant STARTING_COLUMN => 1;    # column number when created
15547     use constant MATCHING_TOKEN  => 2;    # what token we are matching
15548     use constant STARTING_LINE   => 3;    # the line index of creation
15549     use constant ENDING_LINE     => 4;    # the most recent line to use it
15550     use constant SAVED_COLUMN    => 5;    # the most recent line to use it
15551     use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
15552                                           # (just its index in an array)
15553
15554     # Correspondence between variables and array indexes
15555     my %_index_map;
15556     $_index_map{column}          = COLUMN;
15557     $_index_map{starting_column} = STARTING_COLUMN;
15558     $_index_map{matching_token}  = MATCHING_TOKEN;
15559     $_index_map{starting_line}   = STARTING_LINE;
15560     $_index_map{ending_line}     = ENDING_LINE;
15561     $_index_map{saved_column}    = SAVED_COLUMN;
15562     $_index_map{serial_number}   = SERIAL_NUMBER;
15563
15564     my @_default_data = ();
15565     $_default_data[COLUMN]          = undef;
15566     $_default_data[STARTING_COLUMN] = undef;
15567     $_default_data[MATCHING_TOKEN]  = undef;
15568     $_default_data[STARTING_LINE]   = undef;
15569     $_default_data[ENDING_LINE]     = undef;
15570     $_default_data[SAVED_COLUMN]    = undef;
15571     $_default_data[SERIAL_NUMBER]   = undef;
15572
15573     # class population count
15574     {
15575         my $_count = 0;
15576         sub get_count        { $_count; }
15577         sub _increment_count { ++$_count }
15578         sub _decrement_count { --$_count }
15579     }
15580
15581     # constructor
15582     sub new {
15583         my ( $caller, %arg ) = @_;
15584         my $caller_is_obj = ref($caller);
15585         my $class = $caller_is_obj || $caller;
15586         no strict "refs";
15587         my $self = bless [], $class;
15588
15589         foreach ( keys %_index_map ) {
15590             my $index = $_index_map{$_};
15591             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
15592             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
15593             else { $self->[$index] = $_default_data[$index] }
15594         }
15595         $self->_increment_count();
15596         return $self;
15597     }
15598
15599     sub DESTROY {
15600         $_[0]->_decrement_count();
15601     }
15602
15603     sub get_column          { return $_[0]->[COLUMN] }
15604     sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
15605     sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
15606     sub get_starting_line   { return $_[0]->[STARTING_LINE] }
15607     sub get_ending_line     { return $_[0]->[ENDING_LINE] }
15608     sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
15609
15610     sub set_column          { $_[0]->[COLUMN]          = $_[1] }
15611     sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
15612     sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
15613     sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
15614     sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
15615     sub increment_column { $_[0]->[COLUMN] += $_[1] }
15616
15617     sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
15618     sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
15619
15620 }
15621
15622 package Perl::Tidy::VerticalAligner;
15623
15624 # The Perl::Tidy::VerticalAligner package collects output lines and
15625 # attempts to line up certain common tokens, such as => and #, which are
15626 # identified by the calling routine.
15627 #
15628 # There are two main routines: append_line and flush.  Append acts as a
15629 # storage buffer, collecting lines into a group which can be vertically
15630 # aligned.  When alignment is no longer possible or desirable, it dumps
15631 # the group to flush.
15632 #
15633 #     append_line -----> flush
15634 #
15635 #     collects          writes
15636 #     vertical          one
15637 #     groups            group
15638
15639 BEGIN {
15640
15641     # Caution: these debug flags produce a lot of output
15642     # They should all be 0 except when debugging small scripts
15643
15644     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
15645     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
15646
15647     my $debug_warning = sub {
15648         print "VALIGN_DEBUGGING with key $_[0]\n";
15649     };
15650
15651     VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
15652     VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
15653
15654 }
15655
15656 use vars qw(
15657   $vertical_aligner_self
15658   $current_line
15659   $maximum_alignment_index
15660   $ralignment_list
15661   $maximum_jmax_seen
15662   $minimum_jmax_seen
15663   $previous_minimum_jmax_seen
15664   $previous_maximum_jmax_seen
15665   $maximum_line_index
15666   $group_level
15667   $group_type
15668   $group_maximum_gap
15669   $marginal_match
15670   $last_group_level_written
15671   $last_leading_space_count
15672   $extra_indent_ok
15673   $zero_count
15674   @group_lines
15675   $last_comment_column
15676   $last_side_comment_line_number
15677   $last_side_comment_length
15678   $last_side_comment_level
15679   $outdented_line_count
15680   $first_outdented_line_at
15681   $last_outdented_line_at
15682   $diagnostics_object
15683   $logger_object
15684   $file_writer_object
15685   @side_comment_history
15686   $comment_leading_space_count
15687
15688   $cached_line_text
15689   $cached_line_type
15690   $cached_line_flag
15691   $cached_seqno
15692   $cached_line_valid
15693
15694   $rOpts
15695
15696   $rOpts_maximum_line_length
15697   $rOpts_continuation_indentation
15698   $rOpts_indent_columns
15699   $rOpts_tabs
15700   $rOpts_entab_leading_whitespace
15701
15702   $rOpts_minimum_space_to_comment
15703
15704 );
15705
15706 sub initialize {
15707
15708     my $class;
15709
15710     ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
15711       = @_;
15712
15713     # variables describing the entire space group:
15714
15715     $ralignment_list            = [];
15716     $group_level                = 0;
15717     $last_group_level_written   = -1;
15718     $extra_indent_ok            = 0;    # can we move all lines to the right?
15719     $last_side_comment_length   = 0;
15720     $maximum_jmax_seen          = 0;
15721     $minimum_jmax_seen          = 0;
15722     $previous_minimum_jmax_seen = 0;
15723     $previous_maximum_jmax_seen = 0;
15724
15725     # variables describing each line of the group
15726     @group_lines = ();                  # list of all lines in group
15727
15728     $outdented_line_count          = 0;
15729     $first_outdented_line_at       = 0;
15730     $last_outdented_line_at        = 0;
15731     $last_side_comment_line_number = 0;
15732     $last_side_comment_level       = -1;
15733
15734     # most recent 3 side comments; [ line number, column ]
15735     $side_comment_history[0] = [ -300, 0 ];
15736     $side_comment_history[1] = [ -200, 0 ];
15737     $side_comment_history[2] = [ -100, 0 ];
15738
15739     # write_leader_and_string cache:
15740     $cached_line_text  = "";
15741     $cached_line_type  = 0;
15742     $cached_line_flag  = 0;
15743     $cached_seqno      = 0;
15744     $cached_line_valid = 0;
15745
15746     # frequently used parameters
15747     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
15748     $rOpts_tabs                     = $rOpts->{'tabs'};
15749     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
15750     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
15751     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
15752
15753     forget_side_comment();
15754
15755     initialize_for_new_group();
15756
15757     $vertical_aligner_self = {};
15758     bless $vertical_aligner_self, $class;
15759     return $vertical_aligner_self;
15760 }
15761
15762 sub initialize_for_new_group {
15763     $maximum_line_index      = -1;      # lines in the current group
15764     $maximum_alignment_index = -1;      # alignments in current group
15765     $zero_count              = 0;       # count consecutive lines without tokens
15766     $current_line            = undef;   # line being matched for alignment
15767     $group_maximum_gap       = 0;       # largest gap introduced
15768     $group_type              = "";
15769     $marginal_match          = 0;
15770     $comment_leading_space_count = 0;
15771     $last_leading_space_count    = 0;
15772 }
15773
15774 # interface to Perl::Tidy::Diagnostics routines
15775 sub write_diagnostics {
15776     if ($diagnostics_object) {
15777         $diagnostics_object->write_diagnostics(@_);
15778     }
15779 }
15780
15781 # interface to Perl::Tidy::Logger routines
15782 sub warning {
15783     if ($logger_object) {
15784         $logger_object->warning(@_);
15785     }
15786 }
15787
15788 sub write_logfile_entry {
15789     if ($logger_object) {
15790         $logger_object->write_logfile_entry(@_);
15791     }
15792 }
15793
15794 sub report_definite_bug {
15795     if ($logger_object) {
15796         $logger_object->report_definite_bug();
15797     }
15798 }
15799
15800 sub get_SPACES {
15801
15802     # return the number of leading spaces associated with an indentation
15803     # variable $indentation is either a constant number of spaces or an
15804     # object with a get_SPACES method.
15805     my $indentation = shift;
15806     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
15807 }
15808
15809 sub get_RECOVERABLE_SPACES {
15810
15811     # return the number of spaces (+ means shift right, - means shift left)
15812     # that we would like to shift a group of lines with the same indentation
15813     # to get them to line up with their opening parens
15814     my $indentation = shift;
15815     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
15816 }
15817
15818 sub get_STACK_DEPTH {
15819
15820     my $indentation = shift;
15821     return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
15822 }
15823
15824 sub make_alignment {
15825     my ( $col, $token ) = @_;
15826
15827     # make one new alignment at column $col which aligns token $token
15828     ++$maximum_alignment_index;
15829     my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
15830         column          => $col,
15831         starting_column => $col,
15832         matching_token  => $token,
15833         starting_line   => $maximum_line_index,
15834         ending_line     => $maximum_line_index,
15835         serial_number   => $maximum_alignment_index,
15836     );
15837     $ralignment_list->[$maximum_alignment_index] = $alignment;
15838     return $alignment;
15839 }
15840
15841 sub dump_alignments {
15842     print
15843 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
15844     for my $i ( 0 .. $maximum_alignment_index ) {
15845         my $column          = $ralignment_list->[$i]->get_column();
15846         my $starting_column = $ralignment_list->[$i]->get_starting_column();
15847         my $matching_token  = $ralignment_list->[$i]->get_matching_token();
15848         my $starting_line   = $ralignment_list->[$i]->get_starting_line();
15849         my $ending_line     = $ralignment_list->[$i]->get_ending_line();
15850         print
15851 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
15852     }
15853 }
15854
15855 sub save_alignment_columns {
15856     for my $i ( 0 .. $maximum_alignment_index ) {
15857         $ralignment_list->[$i]->save_column();
15858     }
15859 }
15860
15861 sub restore_alignment_columns {
15862     for my $i ( 0 .. $maximum_alignment_index ) {
15863         $ralignment_list->[$i]->restore_column();
15864     }
15865 }
15866
15867 sub forget_side_comment {
15868     $last_comment_column = 0;
15869 }
15870
15871 sub append_line {
15872
15873     # sub append is called to place one line in the current vertical group.
15874     #
15875     # The input parameters are:
15876     #     $level = indentation level of this line
15877     #     $rfields = reference to array of fields
15878     #     $rpatterns = reference to array of patterns, one per field
15879     #     $rtokens   = reference to array of tokens starting fields 1,2,..
15880     #
15881     # Here is an example of what this package does.  In this example,
15882     # we are trying to line up both the '=>' and the '#'.
15883     #
15884     #         '18' => 'grave',    #   \`
15885     #         '19' => 'acute',    #   `'
15886     #         '20' => 'caron',    #   \v
15887     # <-tabs-><f1-><--field 2 ---><-f3->
15888     # |            |              |    |
15889     # |            |              |    |
15890     # col1        col2         col3 col4
15891     #
15892     # The calling routine has already broken the entire line into 3 fields as
15893     # indicated.  (So the work of identifying promising common tokens has
15894     # already been done).
15895     #
15896     # In this example, there will be 2 tokens being matched: '=>' and '#'.
15897     # They are the leading parts of fields 2 and 3, but we do need to know
15898     # what they are so that we can dump a group of lines when these tokens
15899     # change.
15900     #
15901     # The fields contain the actual characters of each field.  The patterns
15902     # are like the fields, but they contain mainly token types instead
15903     # of tokens, so they have fewer characters.  They are used to be
15904     # sure we are matching fields of similar type.
15905     #
15906     # In this example, there will be 4 column indexes being adjusted.  The
15907     # first one is always at zero.  The interior columns are at the start of
15908     # the matching tokens, and the last one tracks the maximum line length.
15909     #
15910     # Basically, each time a new line comes in, it joins the current vertical
15911     # group if possible.  Otherwise it causes the current group to be dumped
15912     # and a new group is started.
15913     #
15914     # For each new group member, the column locations are increased, as
15915     # necessary, to make room for the new fields.  When the group is finally
15916     # output, these column numbers are used to compute the amount of spaces of
15917     # padding needed for each field.
15918     #
15919     # Programming note: the fields are assumed not to have any tab characters.
15920     # Tabs have been previously removed except for tabs in quoted strings and
15921     # side comments.  Tabs in these fields can mess up the column counting.
15922     # The log file warns the user if there are any such tabs.
15923
15924     my (
15925         $level,                     $level_end,
15926         $indentation,               $rfields,
15927         $rtokens,                   $rpatterns,
15928         $is_forced_break,           $outdent_long_lines,
15929         $is_terminal_statement,     $do_not_pad,
15930         $rvertical_tightness_flags, $level_jump,
15931       )
15932       = @_;
15933
15934     # number of fields is $jmax
15935     # number of tokens between fields is $jmax-1
15936     my $jmax = $#{$rfields};
15937     $previous_minimum_jmax_seen = $minimum_jmax_seen;
15938     $previous_maximum_jmax_seen = $maximum_jmax_seen;
15939
15940     my $leading_space_count = get_SPACES($indentation);
15941
15942     # set outdented flag to be sure we either align within statements or
15943     # across statement boundaries, but not both.
15944     my $is_outdented = $last_leading_space_count > $leading_space_count;
15945     $last_leading_space_count = $leading_space_count;
15946
15947     # Patch: undo for hanging side comment
15948     my $is_hanging_side_comment =
15949       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
15950     $is_outdented = 0 if $is_hanging_side_comment;
15951
15952     VALIGN_DEBUG_FLAG_APPEND0 && do {
15953         print
15954 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
15955     };
15956
15957     # Validate cached line if necessary: If we can produce a container
15958     # with just 2 lines total by combining an existing cached opening
15959     # token with the closing token to follow, then we will mark both
15960     # cached flags as valid.
15961     if ($rvertical_tightness_flags) {
15962         if (   $maximum_line_index <= 0
15963             && $cached_line_type
15964             && $rvertical_tightness_flags->[2] == $cached_seqno )
15965         {
15966             $rvertical_tightness_flags->[3] ||= 1;
15967             $cached_line_valid              ||= 1;
15968         }
15969     }
15970
15971     # do not join an opening block brace with an unbalanced line
15972     # unless requested with a flag value of 2
15973     if (   $cached_line_type == 3
15974         && $maximum_line_index < 0
15975         && $cached_line_flag < 2
15976         && $level_jump != 0 )
15977     {
15978         $cached_line_valid = 0;
15979     }
15980
15981     # patch until new aligner is finished
15982     if ($do_not_pad) { my_flush() }
15983
15984     # shouldn't happen:
15985     if ( $level < 0 ) { $level = 0 }
15986
15987     # do not align code across indentation level changes
15988     if ( $level != $group_level || $is_outdented ) {
15989
15990         # we are allowed to shift a group of lines to the right if its
15991         # level is greater than the previous and next group
15992         $extra_indent_ok =
15993           ( $level < $group_level && $last_group_level_written < $group_level );
15994
15995         my_flush();
15996
15997         # If we know that this line will get flushed out by itself because
15998         # of level changes, we can leave the extra_indent_ok flag set.
15999         # That way, if we get an external flush call, we will still be
16000         # able to do some -lp alignment if necessary.
16001         $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
16002
16003         $group_level = $level;
16004
16005         # wait until after the above flush to get the leading space
16006         # count because it may have been changed if the -icp flag is in
16007         # effect
16008         $leading_space_count = get_SPACES($indentation);
16009
16010     }
16011
16012     # --------------------------------------------------------------------
16013     # Patch to collect outdentable block COMMENTS
16014     # --------------------------------------------------------------------
16015     my $is_blank_line = "";
16016     my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
16017     if ( $group_type eq 'COMMENT' ) {
16018         if (
16019             (
16020                    $is_block_comment
16021                 && $outdent_long_lines
16022                 && $leading_space_count == $comment_leading_space_count
16023             )
16024             || $is_blank_line
16025           )
16026         {
16027             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
16028             return;
16029         }
16030         else {
16031             my_flush();
16032         }
16033     }
16034
16035     # --------------------------------------------------------------------
16036     # Step 1. Handle simple line of code with no fields to match.
16037     # --------------------------------------------------------------------
16038     if ( $jmax <= 0 ) {
16039         $zero_count++;
16040
16041         if ( $maximum_line_index >= 0
16042             && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
16043         {
16044
16045             # flush the current group if it has some aligned columns..
16046             if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
16047
16048             # flush current group if we are just collecting side comments..
16049             elsif (
16050
16051                 # ...and we haven't seen a comment lately
16052                 ( $zero_count > 3 )
16053
16054                 # ..or if this new line doesn't fit to the left of the comments
16055                 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
16056                     $group_lines[0]->get_column(0) )
16057               )
16058             {
16059                 my_flush();
16060             }
16061         }
16062
16063         # patch to start new COMMENT group if this comment may be outdented
16064         if (   $is_block_comment
16065             && $outdent_long_lines
16066             && $maximum_line_index < 0 )
16067         {
16068             $group_type                           = 'COMMENT';
16069             $comment_leading_space_count          = $leading_space_count;
16070             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
16071             return;
16072         }
16073
16074         # just write this line directly if no current group, no side comment,
16075         # and no space recovery is needed.
16076         if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
16077         {
16078             write_leader_and_string( $leading_space_count, $$rfields[0], 0,
16079                 $outdent_long_lines, $rvertical_tightness_flags );
16080             return;
16081         }
16082     }
16083     else {
16084         $zero_count = 0;
16085     }
16086
16087     # programming check: (shouldn't happen)
16088     # an error here implies an incorrect call was made
16089     if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
16090         warning(
16091 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
16092         );
16093         report_definite_bug();
16094     }
16095
16096     # --------------------------------------------------------------------
16097     # create an object to hold this line
16098     # --------------------------------------------------------------------
16099     my $new_line = new Perl::Tidy::VerticalAligner::Line(
16100         jmax                      => $jmax,
16101         jmax_original_line        => $jmax,
16102         rtokens                   => $rtokens,
16103         rfields                   => $rfields,
16104         rpatterns                 => $rpatterns,
16105         indentation               => $indentation,
16106         leading_space_count       => $leading_space_count,
16107         outdent_long_lines        => $outdent_long_lines,
16108         list_type                 => "",
16109         is_hanging_side_comment   => $is_hanging_side_comment,
16110         maximum_line_length       => $rOpts->{'maximum-line-length'},
16111         rvertical_tightness_flags => $rvertical_tightness_flags,
16112     );
16113
16114     # --------------------------------------------------------------------
16115     # It simplifies things to create a zero length side comment
16116     # if none exists.
16117     # --------------------------------------------------------------------
16118     make_side_comment( $new_line, $level_end );
16119
16120     # --------------------------------------------------------------------
16121     # Decide if this is a simple list of items.
16122     # There are 3 list types: none, comma, comma-arrow.
16123     # We use this below to be less restrictive in deciding what to align.
16124     # --------------------------------------------------------------------
16125     if ($is_forced_break) {
16126         decide_if_list($new_line);
16127     }
16128
16129     if ($current_line) {
16130
16131         # --------------------------------------------------------------------
16132         # Allow hanging side comment to join current group, if any
16133         # This will help keep side comments aligned, because otherwise we
16134         # will have to start a new group, making alignment less likely.
16135         # --------------------------------------------------------------------
16136         join_hanging_comment( $new_line, $current_line )
16137           if $is_hanging_side_comment;
16138
16139         # --------------------------------------------------------------------
16140         # If there is just one previous line, and it has more fields
16141         # than the new line, try to join fields together to get a match with
16142         # the new line.  At the present time, only a single leading '=' is
16143         # allowed to be compressed out.  This is useful in rare cases where
16144         # a table is forced to use old breakpoints because of side comments,
16145         # and the table starts out something like this:
16146         #   my %MonthChars = ('0', 'Jan',   # side comment
16147         #                     '1', 'Feb',
16148         #                     '2', 'Mar',
16149         # Eliminating the '=' field will allow the remaining fields to line up.
16150         # This situation does not occur if there are no side comments
16151         # because scan_list would put a break after the opening '('.
16152         # --------------------------------------------------------------------
16153         eliminate_old_fields( $new_line, $current_line );
16154
16155         # --------------------------------------------------------------------
16156         # If the new line has more fields than the current group,
16157         # see if we can match the first fields and combine the remaining
16158         # fields of the new line.
16159         # --------------------------------------------------------------------
16160         eliminate_new_fields( $new_line, $current_line );
16161
16162         # --------------------------------------------------------------------
16163         # Flush previous group unless all common tokens and patterns match..
16164         # --------------------------------------------------------------------
16165         check_match( $new_line, $current_line );
16166
16167         # --------------------------------------------------------------------
16168         # See if there is space for this line in the current group (if any)
16169         # --------------------------------------------------------------------
16170         if ($current_line) {
16171             check_fit( $new_line, $current_line );
16172         }
16173     }
16174
16175     # --------------------------------------------------------------------
16176     # Append this line to the current group (or start new group)
16177     # --------------------------------------------------------------------
16178     accept_line($new_line);
16179
16180     # Future update to allow this to vary:
16181     $current_line = $new_line if ( $maximum_line_index == 0 );
16182
16183     # --------------------------------------------------------------------
16184     # Step 8. Some old debugging stuff
16185     # --------------------------------------------------------------------
16186     VALIGN_DEBUG_FLAG_APPEND && do {
16187         print "APPEND fields:";
16188         dump_array(@$rfields);
16189         print "APPEND tokens:";
16190         dump_array(@$rtokens);
16191         print "APPEND patterns:";
16192         dump_array(@$rpatterns);
16193         dump_alignments();
16194     };
16195 }
16196
16197 sub join_hanging_comment {
16198
16199     my $line = shift;
16200     my $jmax = $line->get_jmax();
16201     return 0 unless $jmax == 1;    # must be 2 fields
16202     my $rtokens = $line->get_rtokens();
16203     return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
16204     my $rfields = $line->get_rfields();
16205     return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
16206     my $old_line            = shift;
16207     my $maximum_field_index = $old_line->get_jmax();
16208     return 0
16209       unless $maximum_field_index > $jmax;    # the current line has more fields
16210     my $rpatterns = $line->get_rpatterns();
16211
16212     $line->set_is_hanging_side_comment(1);
16213     $jmax = $maximum_field_index;
16214     $line->set_jmax($jmax);
16215     $$rfields[$jmax]         = $$rfields[1];
16216     $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
16217     $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
16218     for ( my $j = 1 ; $j < $jmax ; $j++ ) {
16219         $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
16220         $$rtokens[ $j - 1 ]   = "";
16221         $$rpatterns[ $j - 1 ] = "";
16222     }
16223     return 1;
16224 }
16225
16226 sub eliminate_old_fields {
16227
16228     my $new_line = shift;
16229     my $jmax     = $new_line->get_jmax();
16230     if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
16231     if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
16232
16233     # there must be one previous line
16234     return unless ( $maximum_line_index == 0 );
16235
16236     my $old_line            = shift;
16237     my $maximum_field_index = $old_line->get_jmax();
16238
16239     # this line must have fewer fields
16240     return unless $maximum_field_index > $jmax;
16241
16242     # Identify specific cases where field elimination is allowed:
16243     # case=1: both lines have comma-separated lists, and the first
16244     #         line has an equals
16245     # case=2: both lines have leading equals
16246
16247     # case 1 is the default
16248     my $case = 1;
16249
16250     # See if case 2: both lines have leading '='
16251     # We'll require smiliar leading patterns in this case
16252     my $old_rtokens   = $old_line->get_rtokens();
16253     my $rtokens       = $new_line->get_rtokens();
16254     my $rpatterns     = $new_line->get_rpatterns();
16255     my $old_rpatterns = $old_line->get_rpatterns();
16256     if (   $rtokens->[0] =~ /^=\d*$/
16257         && $old_rtokens->[0]   eq $rtokens->[0]
16258         && $old_rpatterns->[0] eq $rpatterns->[0] )
16259     {
16260         $case = 2;
16261     }
16262
16263     # not too many fewer fields in new line for case 1
16264     return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
16265
16266     # case 1 must have side comment
16267     my $old_rfields = $old_line->get_rfields();
16268     return
16269       if ( $case == 1
16270         && length( $$old_rfields[$maximum_field_index] ) == 0 );
16271
16272     my $rfields = $new_line->get_rfields();
16273
16274     my $hid_equals = 0;
16275
16276     my @new_alignments        = ();
16277     my @new_fields            = ();
16278     my @new_matching_patterns = ();
16279     my @new_matching_tokens   = ();
16280
16281     my $j = 0;
16282     my $k;
16283     my $current_field   = '';
16284     my $current_pattern = '';
16285
16286     # loop over all old tokens
16287     my $in_match = 0;
16288     for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
16289         $current_field   .= $$old_rfields[$k];
16290         $current_pattern .= $$old_rpatterns[$k];
16291         last if ( $j > $jmax - 1 );
16292
16293         if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
16294             $in_match                  = 1;
16295             $new_fields[$j]            = $current_field;
16296             $new_matching_patterns[$j] = $current_pattern;
16297             $current_field             = '';
16298             $current_pattern           = '';
16299             $new_matching_tokens[$j]   = $$old_rtokens[$k];
16300             $new_alignments[$j]        = $old_line->get_alignment($k);
16301             $j++;
16302         }
16303         else {
16304
16305             if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
16306                 last if ( $case == 2 );    # avoid problems with stuff
16307                                            # like:   $a=$b=$c=$d;
16308                 $hid_equals = 1;
16309             }
16310             last
16311               if ( $in_match && $case == 1 )
16312               ;    # disallow gaps in matching field types in case 1
16313         }
16314     }
16315
16316     # Modify the current state if we are successful.
16317     # We must exactly reach the ends of both lists for success.
16318     if (   ( $j == $jmax )
16319         && ( $current_field eq '' )
16320         && ( $case != 1 || $hid_equals ) )
16321     {
16322         $k = $maximum_field_index;
16323         $current_field   .= $$old_rfields[$k];
16324         $current_pattern .= $$old_rpatterns[$k];
16325         $new_fields[$j]            = $current_field;
16326         $new_matching_patterns[$j] = $current_pattern;
16327
16328         $new_alignments[$j] = $old_line->get_alignment($k);
16329         $maximum_field_index = $j;
16330
16331         $old_line->set_alignments(@new_alignments);
16332         $old_line->set_jmax($jmax);
16333         $old_line->set_rtokens( \@new_matching_tokens );
16334         $old_line->set_rfields( \@new_fields );
16335         $old_line->set_rpatterns( \@$rpatterns );
16336     }
16337 }
16338
16339 # create an empty side comment if none exists
16340 sub make_side_comment {
16341     my $new_line  = shift;
16342     my $level_end = shift;
16343     my $jmax      = $new_line->get_jmax();
16344     my $rtokens   = $new_line->get_rtokens();
16345
16346     # if line does not have a side comment...
16347     if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
16348         my $rfields   = $new_line->get_rfields();
16349         my $rpatterns = $new_line->get_rpatterns();
16350         $$rtokens[$jmax]     = '#';
16351         $$rfields[ ++$jmax ] = '';
16352         $$rpatterns[$jmax]   = '#';
16353         $new_line->set_jmax($jmax);
16354         $new_line->set_jmax_original_line($jmax);
16355     }
16356
16357     # line has a side comment..
16358     else {
16359
16360         # don't remember old side comment location for very long
16361         my $line_number = $vertical_aligner_self->get_output_line_number();
16362         my $rfields     = $new_line->get_rfields();
16363         if (
16364             $line_number - $last_side_comment_line_number > 12
16365
16366             # and don't remember comment location across block level changes
16367             || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
16368           )
16369         {
16370             forget_side_comment();
16371         }
16372         $last_side_comment_line_number = $line_number;
16373         $last_side_comment_level       = $level_end;
16374     }
16375 }
16376
16377 sub decide_if_list {
16378
16379     my $line = shift;
16380
16381     # A list will be taken to be a line with a forced break in which all
16382     # of the field separators are commas or comma-arrows (except for the
16383     # trailing #)
16384
16385     # List separator tokens are things like ',3'   or '=>2',
16386     # where the trailing digit is the nesting depth.  Allow braces
16387     # to allow nested list items.
16388     my $rtokens    = $line->get_rtokens();
16389     my $test_token = $$rtokens[0];
16390     if ( $test_token =~ /^(\,|=>)/ ) {
16391         my $list_type = $test_token;
16392         my $jmax      = $line->get_jmax();
16393
16394         foreach ( 1 .. $jmax - 2 ) {
16395             if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
16396                 $list_type = "";
16397                 last;
16398             }
16399         }
16400         $line->set_list_type($list_type);
16401     }
16402 }
16403
16404 sub eliminate_new_fields {
16405
16406     return unless ( $maximum_line_index >= 0 );
16407     my $new_line = shift;
16408     my $old_line = shift;
16409     my $jmax     = $new_line->get_jmax();
16410
16411     my $old_rtokens   = $old_line->get_rtokens();
16412     my $rtokens       = $new_line->get_rtokens();
16413     my $is_assignment =
16414       ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
16415
16416     # must be monotonic variation
16417     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
16418
16419     # must be more fields in the new line
16420     my $maximum_field_index = $old_line->get_jmax();
16421     return unless ( $maximum_field_index < $jmax );
16422
16423     unless ($is_assignment) {
16424         return
16425           unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
16426           ;    # only if monotonic
16427
16428         # never combine fields of a comma list
16429         return
16430           unless ( $maximum_field_index > 1 )
16431           && ( $new_line->get_list_type() !~ /^,/ );
16432     }
16433
16434     my $rfields       = $new_line->get_rfields();
16435     my $rpatterns     = $new_line->get_rpatterns();
16436     my $old_rpatterns = $old_line->get_rpatterns();
16437
16438     # loop over all old tokens except comment
16439     my $match = 1;
16440     my $k;
16441     for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
16442         if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
16443             || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
16444         {
16445             $match = 0;
16446             last;
16447         }
16448     }
16449
16450     # first tokens agree, so combine new tokens
16451     if ($match) {
16452         for $k ( $maximum_field_index .. $jmax - 1 ) {
16453
16454             $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
16455             $$rfields[$k] = "";
16456             $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
16457             $$rpatterns[$k] = "";
16458         }
16459
16460         $$rtokens[ $maximum_field_index - 1 ] = '#';
16461         $$rfields[$maximum_field_index]       = $$rfields[$jmax];
16462         $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
16463         $jmax                                 = $maximum_field_index;
16464     }
16465     $new_line->set_jmax($jmax);
16466 }
16467
16468 sub check_match {
16469
16470     my $new_line = shift;
16471     my $old_line = shift;
16472
16473     my $jmax                = $new_line->get_jmax();
16474     my $maximum_field_index = $old_line->get_jmax();
16475
16476     # flush if this line has too many fields
16477     if ( $jmax > $maximum_field_index ) { my_flush(); return }
16478
16479     # flush if adding this line would make a non-monotonic field count
16480     if (
16481         ( $maximum_field_index > $jmax )    # this has too few fields
16482         && (
16483             ( $previous_minimum_jmax_seen < $jmax )  # and wouldn't be monotonic
16484             || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
16485         )
16486       )
16487     {
16488         my_flush();
16489         return;
16490     }
16491
16492     # otherwise append this line if everything matches
16493     my $jmax_original_line      = $new_line->get_jmax_original_line();
16494     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
16495     my $rtokens                 = $new_line->get_rtokens();
16496     my $rfields                 = $new_line->get_rfields();
16497     my $rpatterns               = $new_line->get_rpatterns();
16498     my $list_type               = $new_line->get_list_type();
16499
16500     my $group_list_type = $old_line->get_list_type();
16501     my $old_rpatterns   = $old_line->get_rpatterns();
16502     my $old_rtokens     = $old_line->get_rtokens();
16503
16504     my $jlimit = $jmax - 1;
16505     if ( $maximum_field_index > $jmax ) {
16506         $jlimit = $jmax_original_line;
16507         --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
16508     }
16509
16510     my $everything_matches = 1;
16511
16512     # common list types always match
16513     unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
16514         || $is_hanging_side_comment )
16515     {
16516
16517         my $leading_space_count = $new_line->get_leading_space_count();
16518         my $saw_equals          = 0;
16519         for my $j ( 0 .. $jlimit ) {
16520             my $match = 1;
16521
16522             my $old_tok = $$old_rtokens[$j];
16523             my $new_tok = $$rtokens[$j];
16524
16525             # dumb down the match after an equals
16526             if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
16527                 $new_tok = $1;
16528                 $old_tok =~ s/\+.*$//;
16529             }
16530             if ( $new_tok =~ /^=\d*$/ ) { $saw_equals = 1 }
16531
16532             # we never match if the matching tokens differ
16533             if (   $j < $jlimit
16534                 && $old_tok ne $new_tok )
16535             {
16536                 $match = 0;
16537             }
16538
16539             # otherwise, if patterns match, we always have a match.
16540             # However, if patterns don't match, we have to be careful...
16541             elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
16542
16543                 # We have to be very careful about aligning commas when the
16544                 # pattern's don't match, because it can be worse to create an
16545                 # alignment where none is needed than to omit one.  The current
16546                 # rule: if we are within a matching sub call (indicated by '+'
16547                 # in the matching token), we'll allow a marginal match, but
16548                 # otherwise not.
16549                 #
16550                 # Here's an example where we'd like to align the '='
16551                 #  my $cfile = File::Spec->catfile( 't',    'callext.c' );
16552                 #  my $inc   = File::Spec->catdir( 'Basic', 'Core' );
16553                 # because the function names differ.
16554                 # Future alignment logic should make this unnecessary.
16555                 #
16556                 # Here's an example where the ','s are not contained in a call.
16557                 # The first line below should probably not match the next two:
16558                 #   ( $a, $b ) = ( $b, $r );
16559                 #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
16560                 #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
16561                 if ( $new_tok =~ /^,/ ) {
16562                     if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
16563                         $marginal_match = 1;
16564                     }
16565                     else {
16566                         $match = 0;
16567                     }
16568                 }
16569
16570                 # parens don't align well unless patterns match
16571                 elsif ( $new_tok =~ /^\(/ ) {
16572                     $match = 0;
16573                 }
16574
16575                 # Handle an '=' alignment with different patterns to
16576                 # the left.
16577                 elsif ( $new_tok =~ /^=\d*$/ ) {
16578
16579                     $saw_equals = 1;
16580
16581                     # It is best to be a little restrictive when
16582                     # aligning '=' tokens.  Here is an example of
16583                     # two lines that we will not align:
16584                     #       my $variable=6;
16585                     #       $bb=4;
16586                     # The problem is that one is a 'my' declaration,
16587                     # and the other isn't, so they're not very similar.
16588                     # We will filter these out by comparing the first
16589                     # letter of the pattern.  This is crude, but works
16590                     # well enough.
16591                     if (
16592                         substr( $$old_rpatterns[$j], 0, 1 ) ne
16593                         substr( $$rpatterns[$j], 0, 1 ) )
16594                     {
16595                         $match = 0;
16596                     }
16597
16598                     # If we pass that test, we'll call it a marginal match.
16599                     # Here is an example of a marginal match:
16600                     #       $done{$$op} = 1;
16601                     #       $op         = compile_bblock($op);
16602                     # The left tokens are both identifiers, but
16603                     # one accesses a hash and the other doesn't.
16604                     # We'll let this be a tentative match and undo
16605                     # it later if we don't find more than 2 lines
16606                     # in the group.
16607                     elsif ( $maximum_line_index == 0 ) {
16608                         $marginal_match = 1;
16609                     }
16610                 }
16611             }
16612
16613             # Don't let line with fewer fields increase column widths
16614             # ( align3.t )
16615             if ( $maximum_field_index > $jmax ) {
16616                 my $pad =
16617                   length( $$rfields[$j] ) - $old_line->current_field_width($j);
16618
16619                 if ( $j == 0 ) {
16620                     $pad += $leading_space_count;
16621                 }
16622
16623                 # TESTING: suspend this rule to allow last lines to join
16624                 if ( $pad > 0 ) { $match = 0; }
16625             }
16626
16627             unless ($match) {
16628                 $everything_matches = 0;
16629                 last;
16630             }
16631         }
16632     }
16633
16634     if ( $maximum_field_index > $jmax ) {
16635
16636         if ($everything_matches) {
16637
16638             my $comment = $$rfields[$jmax];
16639             for $jmax ( $jlimit .. $maximum_field_index ) {
16640                 $$rtokens[$jmax]     = $$old_rtokens[$jmax];
16641                 $$rfields[ ++$jmax ] = '';
16642                 $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
16643             }
16644             $$rfields[$jmax] = $comment;
16645             $new_line->set_jmax($jmax);
16646         }
16647     }
16648
16649     my_flush() unless ($everything_matches);
16650 }
16651
16652 sub check_fit {
16653
16654     return unless ( $maximum_line_index >= 0 );
16655     my $new_line = shift;
16656     my $old_line = shift;
16657
16658     my $jmax                    = $new_line->get_jmax();
16659     my $leading_space_count     = $new_line->get_leading_space_count();
16660     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
16661     my $rtokens                 = $new_line->get_rtokens();
16662     my $rfields                 = $new_line->get_rfields();
16663     my $rpatterns               = $new_line->get_rpatterns();
16664
16665     my $group_list_type = $group_lines[0]->get_list_type();
16666
16667     my $padding_so_far    = 0;
16668     my $padding_available = $old_line->get_available_space_on_right();
16669
16670     # save current columns in case this doesn't work
16671     save_alignment_columns();
16672
16673     my ( $j, $pad, $eight );
16674     my $maximum_field_index = $old_line->get_jmax();
16675     for $j ( 0 .. $jmax ) {
16676
16677         ## testing patch to avoid excessive gaps in previous lines,
16678         # due to a line of fewer fields.
16679         #   return join( ".",
16680         #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
16681         #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
16682         ## MOVED BELOW AS A TEST
16683         ##next if ($jmax < $maximum_field_index && $j==$jmax-1);
16684
16685         $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
16686
16687         if ( $j == 0 ) {
16688             $pad += $leading_space_count;
16689         }
16690
16691         # remember largest gap of the group, excluding gap to side comment
16692         if (   $pad < 0
16693             && $group_maximum_gap < -$pad
16694             && $j > 0
16695             && $j < $jmax - 1 )
16696         {
16697             $group_maximum_gap = -$pad;
16698         }
16699
16700         next if $pad < 0;
16701
16702         # This line will need space; lets see if we want to accept it..
16703         if (
16704
16705             # not if this won't fit
16706             ( $pad > $padding_available )
16707
16708             # previously, there were upper bounds placed on padding here
16709             # (maximum_whitespace_columns), but they were not really helpful
16710
16711           )
16712         {
16713
16714             # revert to starting state then flush; things didn't work out
16715             restore_alignment_columns();
16716             my_flush();
16717             last;
16718         }
16719
16720         # TESTING PATCH moved from above to be sure we fit
16721         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
16722
16723         # looks ok, squeeze this field in
16724         $old_line->increase_field_width( $j, $pad );
16725         $padding_available -= $pad;
16726
16727         # remember largest gap of the group, excluding gap to side comment
16728         if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
16729             $group_maximum_gap = $pad;
16730         }
16731     }
16732 }
16733
16734 sub accept_line {
16735
16736     my $new_line = shift;
16737     $group_lines[ ++$maximum_line_index ] = $new_line;
16738
16739     # initialize field lengths if starting new group
16740     if ( $maximum_line_index == 0 ) {
16741
16742         my $jmax    = $new_line->get_jmax();
16743         my $rfields = $new_line->get_rfields();
16744         my $rtokens = $new_line->get_rtokens();
16745         my $j;
16746         my $col = $new_line->get_leading_space_count();
16747
16748         for $j ( 0 .. $jmax ) {
16749             $col += length( $$rfields[$j] );
16750
16751             # create initial alignments for the new group
16752             my $token = "";
16753             if ( $j < $jmax ) { $token = $$rtokens[$j] }
16754             my $alignment = make_alignment( $col, $token );
16755             $new_line->set_alignment( $j, $alignment );
16756         }
16757
16758         $maximum_jmax_seen = $jmax;
16759         $minimum_jmax_seen = $jmax;
16760     }
16761
16762     # use previous alignments otherwise
16763     else {
16764         my @new_alignments =
16765           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
16766         $new_line->set_alignments(@new_alignments);
16767     }
16768 }
16769
16770 sub dump_array {
16771
16772     # debug routine to dump array contents
16773     local $" = ')(';
16774     print "(@_)\n";
16775 }
16776
16777 # flush() sends the current Perl::Tidy::VerticalAligner group down the
16778 # pipeline to Perl::Tidy::FileWriter.
16779
16780 # This is the external flush, which also empties the cache
16781 sub flush {
16782
16783     if ( $maximum_line_index < 0 ) {
16784         if ($cached_line_type) {
16785             $file_writer_object->write_code_line( $cached_line_text . "\n" );
16786             $cached_line_type = 0;
16787             $cached_line_text = "";
16788         }
16789     }
16790     else {
16791         my_flush();
16792     }
16793 }
16794
16795 # This is the internal flush, which leaves the cache intact
16796 sub my_flush {
16797
16798     return if ( $maximum_line_index < 0 );
16799
16800     # handle a group of comment lines
16801     if ( $group_type eq 'COMMENT' ) {
16802
16803         VALIGN_DEBUG_FLAG_APPEND0 && do {
16804             my ( $a, $b, $c ) = caller();
16805             print
16806 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
16807
16808         };
16809         my $leading_space_count = $comment_leading_space_count;
16810         my $leading_string      = get_leading_string($leading_space_count);
16811
16812         # zero leading space count if any lines are too long
16813         my $max_excess = 0;
16814         for my $i ( 0 .. $maximum_line_index ) {
16815             my $str    = $group_lines[$i];
16816             my $excess =
16817               length($str) + $leading_space_count - $rOpts_maximum_line_length;
16818             if ( $excess > $max_excess ) {
16819                 $max_excess = $excess;
16820             }
16821         }
16822
16823         if ( $max_excess > 0 ) {
16824             $leading_space_count -= $max_excess;
16825             if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
16826             $last_outdented_line_at =
16827               $file_writer_object->get_output_line_number();
16828             unless ($outdented_line_count) {
16829                 $first_outdented_line_at = $last_outdented_line_at;
16830             }
16831             $outdented_line_count += ( $maximum_line_index + 1 );
16832         }
16833
16834         # write the group of lines
16835         my $outdent_long_lines = 0;
16836         for my $i ( 0 .. $maximum_line_index ) {
16837             write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
16838                 $outdent_long_lines, "" );
16839         }
16840     }
16841
16842     # handle a group of code lines
16843     else {
16844
16845         VALIGN_DEBUG_FLAG_APPEND0 && do {
16846             my $group_list_type = $group_lines[0]->get_list_type();
16847             my ( $a, $b, $c ) = caller();
16848             my $maximum_field_index = $group_lines[0]->get_jmax();
16849             print
16850 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
16851
16852         };
16853
16854         # some small groups are best left unaligned
16855         my $do_not_align = decide_if_aligned();
16856
16857         # optimize side comment location
16858         $do_not_align = adjust_side_comment($do_not_align);
16859
16860         # recover spaces for -lp option if possible
16861         my $extra_leading_spaces = get_extra_leading_spaces();
16862
16863         # all lines of this group have the same basic leading spacing
16864         my $group_leader_length = $group_lines[0]->get_leading_space_count();
16865
16866         # add extra leading spaces if helpful
16867         my $min_ci_gap =
16868           improve_continuation_indentation( $do_not_align,
16869             $group_leader_length );
16870
16871         # loop to output all lines
16872         for my $i ( 0 .. $maximum_line_index ) {
16873             my $line = $group_lines[$i];
16874             write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
16875                 $group_leader_length, $extra_leading_spaces );
16876         }
16877     }
16878     initialize_for_new_group();
16879 }
16880
16881 sub decide_if_aligned {
16882
16883     # Do not try to align two lines which are not really similar
16884     return unless $maximum_line_index == 1;
16885
16886     my $group_list_type = $group_lines[0]->get_list_type();
16887
16888     my $do_not_align = (
16889
16890         # always align lists
16891         !$group_list_type
16892
16893           && (
16894
16895             # don't align if it was just a marginal match
16896             $marginal_match
16897
16898             # don't align two lines with big gap
16899             || $group_maximum_gap > 12
16900
16901             # or lines with differing number of alignment tokens
16902             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
16903           )
16904     );
16905
16906     # But try to convert them into a simple comment group if the first line
16907     # a has side comment
16908     my $rfields             = $group_lines[0]->get_rfields();
16909     my $maximum_field_index = $group_lines[0]->get_jmax();
16910     if (   $do_not_align
16911         && ( $maximum_line_index > 0 )
16912         && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
16913     {
16914         combine_fields();
16915         $do_not_align = 0;
16916     }
16917     return $do_not_align;
16918 }
16919
16920 sub adjust_side_comment {
16921
16922     my $do_not_align = shift;
16923
16924     # let's see if we can move the side comment field out a little
16925     # to improve readability (the last field is always a side comment field)
16926     my $have_side_comment       = 0;
16927     my $first_side_comment_line = -1;
16928     my $maximum_field_index     = $group_lines[0]->get_jmax();
16929     for my $i ( 0 .. $maximum_line_index ) {
16930         my $line = $group_lines[$i];
16931
16932         if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
16933             $have_side_comment       = 1;
16934             $first_side_comment_line = $i;
16935             last;
16936         }
16937     }
16938
16939     my $kmax = $maximum_field_index + 1;
16940
16941     if ($have_side_comment) {
16942
16943         my $line = $group_lines[0];
16944
16945         # the maximum space without exceeding the line length:
16946         my $avail = $line->get_available_space_on_right();
16947
16948         # try to use the previous comment column
16949         my $side_comment_column = $line->get_column( $kmax - 2 );
16950         my $move                = $last_comment_column - $side_comment_column;
16951
16952 ##        my $sc_line0 = $side_comment_history[0]->[0];
16953 ##        my $sc_col0  = $side_comment_history[0]->[1];
16954 ##        my $sc_line1 = $side_comment_history[1]->[0];
16955 ##        my $sc_col1  = $side_comment_history[1]->[1];
16956 ##        my $sc_line2 = $side_comment_history[2]->[0];
16957 ##        my $sc_col2  = $side_comment_history[2]->[1];
16958 ##
16959 ##        # FUTURE UPDATES:
16960 ##        # Be sure to ignore 'do not align' and  '} # end comments'
16961 ##        # Find first $move > 0 and $move <= $avail as follows:
16962 ##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
16963 ##        # 2. try sc_col2 if (line-sc_line2) < 12
16964 ##        # 3. try min possible space, plus up to 8,
16965 ##        # 4. try min possible space
16966
16967         if ( $kmax > 0 && !$do_not_align ) {
16968
16969             # but if this doesn't work, give up and use the minimum space
16970             if ( $move > $avail ) {
16971                 $move = $rOpts_minimum_space_to_comment - 1;
16972             }
16973
16974             # but we want some minimum space to the comment
16975             my $min_move = $rOpts_minimum_space_to_comment - 1;
16976             if (   $move >= 0
16977                 && $last_side_comment_length > 0
16978                 && ( $first_side_comment_line == 0 )
16979                 && $group_level == $last_group_level_written )
16980             {
16981                 $min_move = 0;
16982             }
16983
16984             if ( $move < $min_move ) {
16985                 $move = $min_move;
16986             }
16987
16988             # prevously, an upper bound was placed on $move here,
16989             # (maximum_space_to_comment), but it was not helpful
16990
16991             # don't exceed the available space
16992             if ( $move > $avail ) { $move = $avail }
16993
16994             # we can only increase space, never decrease
16995             if ( $move > 0 ) {
16996                 $line->increase_field_width( $maximum_field_index - 1, $move );
16997             }
16998
16999             # remember this column for the next group
17000             $last_comment_column = $line->get_column( $kmax - 2 );
17001         }
17002         else {
17003
17004             # try to at least line up the existing side comment location
17005             if ( $kmax > 0 && $move > 0 && $move < $avail ) {
17006                 $line->increase_field_width( $maximum_field_index - 1, $move );
17007                 $do_not_align = 0;
17008             }
17009
17010             # reset side comment column if we can't align
17011             else {
17012                 forget_side_comment();
17013             }
17014         }
17015     }
17016     return $do_not_align;
17017 }
17018
17019 sub improve_continuation_indentation {
17020     my ( $do_not_align, $group_leader_length ) = @_;
17021
17022     # See if we can increase the continuation indentation
17023     # to move all continuation lines closer to the next field
17024     # (unless it is a comment).
17025     #
17026     # '$min_ci_gap'is the extra indentation that we may need to introduce.
17027     # We will only introduce this to fields which already have some ci.
17028     # Without this variable, we would occasionally get something like this
17029     # (Complex.pm):
17030     #
17031     # use overload '+' => \&plus,
17032     #   '-'            => \&minus,
17033     #   '*'            => \&multiply,
17034     #   ...
17035     #   'tan'          => \&tan,
17036     #   'atan2'        => \&atan2,
17037     #
17038     # Whereas with this variable, we can shift variables over to get this:
17039     #
17040     # use overload '+' => \&plus,
17041     #          '-'     => \&minus,
17042     #          '*'     => \&multiply,
17043     #          ...
17044     #          'tan'   => \&tan,
17045     #          'atan2' => \&atan2,
17046
17047     ## BUB: Deactivated####################
17048     # The trouble with this patch is that it may, for example,
17049     # move in some 'or's  or ':'s, and leave some out, so that the
17050     # left edge alignment suffers.
17051     return 0;
17052     ###########################################
17053
17054     my $maximum_field_index = $group_lines[0]->get_jmax();
17055
17056     my $min_ci_gap = $rOpts_maximum_line_length;
17057     if ( $maximum_field_index > 1 && !$do_not_align ) {
17058
17059         for my $i ( 0 .. $maximum_line_index ) {
17060             my $line                = $group_lines[$i];
17061             my $leading_space_count = $line->get_leading_space_count();
17062             my $rfields             = $line->get_rfields();
17063
17064             my $gap = $line->get_column(0) - $leading_space_count -
17065               length( $$rfields[0] );
17066
17067             if ( $leading_space_count > $group_leader_length ) {
17068                 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
17069             }
17070         }
17071
17072         if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
17073             $min_ci_gap = 0;
17074         }
17075     }
17076     else {
17077         $min_ci_gap = 0;
17078     }
17079     return $min_ci_gap;
17080 }
17081
17082 sub write_vertically_aligned_line {
17083
17084     my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
17085         $extra_leading_spaces )
17086       = @_;
17087     my $rfields                   = $line->get_rfields();
17088     my $leading_space_count       = $line->get_leading_space_count();
17089     my $outdent_long_lines        = $line->get_outdent_long_lines();
17090     my $maximum_field_index       = $line->get_jmax();
17091     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
17092
17093     # add any extra spaces
17094     if ( $leading_space_count > $group_leader_length ) {
17095         $leading_space_count += $min_ci_gap;
17096     }
17097
17098     my $str = $$rfields[0];
17099
17100     # loop to concatenate all fields of this line and needed padding
17101     my $total_pad_count = 0;
17102     my ( $j, $pad );
17103     for $j ( 1 .. $maximum_field_index ) {
17104
17105         # skip zero-length side comments
17106         last
17107           if ( ( $j == $maximum_field_index )
17108             && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
17109           );
17110
17111         # compute spaces of padding before this field
17112         my $col = $line->get_column( $j - 1 );
17113         $pad = $col - ( length($str) + $leading_space_count );
17114
17115         if ($do_not_align) {
17116             $pad =
17117               ( $j < $maximum_field_index )
17118               ? 0
17119               : $rOpts_minimum_space_to_comment - 1;
17120         }
17121
17122         # accumulate the padding
17123         if ( $pad > 0 ) { $total_pad_count += $pad; }
17124
17125         # add this field
17126         if ( !defined $$rfields[$j] ) {
17127             write_diagnostics("UNDEFined field at j=$j\n");
17128         }
17129
17130         # only add padding when we have a finite field;
17131         # this avoids extra terminal spaces if we have empty fields
17132         if ( length( $$rfields[$j] ) > 0 ) {
17133             $str .= ' ' x $total_pad_count;
17134             $total_pad_count = 0;
17135             $str .= $$rfields[$j];
17136         }
17137
17138         # update side comment history buffer
17139         if ( $j == $maximum_field_index ) {
17140             my $lineno = $file_writer_object->get_output_line_number();
17141             shift @side_comment_history;
17142             push @side_comment_history, [ $lineno, $col ];
17143         }
17144     }
17145
17146     my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
17147
17148     # ship this line off
17149     write_leader_and_string( $leading_space_count + $extra_leading_spaces,
17150         $str, $side_comment_length, $outdent_long_lines,
17151         $rvertical_tightness_flags );
17152 }
17153
17154 sub get_extra_leading_spaces {
17155
17156     #----------------------------------------------------------
17157     # Define any extra indentation space (for the -lp option).
17158     # Here is why:
17159     # If a list has side comments, sub scan_list must dump the
17160     # list before it sees everything.  When this happens, it sets
17161     # the indentation to the standard scheme, but notes how
17162     # many spaces it would have liked to use.  We may be able
17163     # to recover that space here in the event that that all of the
17164     # lines of a list are back together again.
17165     #----------------------------------------------------------
17166
17167     my $extra_leading_spaces = 0;
17168     if ($extra_indent_ok) {
17169         my $object = $group_lines[0]->get_indentation();
17170         if ( ref($object) ) {
17171             my $extra_indentation_spaces_wanted =
17172               get_RECOVERABLE_SPACES($object);
17173
17174             # all indentation objects must be the same
17175             my $i;
17176             for $i ( 1 .. $maximum_line_index ) {
17177                 if ( $object != $group_lines[$i]->get_indentation() ) {
17178                     $extra_indentation_spaces_wanted = 0;
17179                     last;
17180                 }
17181             }
17182
17183             if ($extra_indentation_spaces_wanted) {
17184
17185                 # the maximum space without exceeding the line length:
17186                 my $avail = $group_lines[0]->get_available_space_on_right();
17187                 $extra_leading_spaces =
17188                   ( $avail > $extra_indentation_spaces_wanted )
17189                   ? $extra_indentation_spaces_wanted
17190                   : $avail;
17191
17192                 # update the indentation object because with -icp the terminal
17193                 # ');' will use the same adjustment.
17194                 $object->permanently_decrease_AVAILABLE_SPACES(
17195                     -$extra_leading_spaces );
17196             }
17197         }
17198     }
17199     return $extra_leading_spaces;
17200 }
17201
17202 sub combine_fields {
17203
17204     # combine all fields except for the comment field  ( sidecmt.t )
17205     my ( $j, $k );
17206     my $maximum_field_index = $group_lines[0]->get_jmax();
17207     for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
17208         my $line    = $group_lines[$j];
17209         my $rfields = $line->get_rfields();
17210         foreach ( 1 .. $maximum_field_index - 1 ) {
17211             $$rfields[0] .= $$rfields[$_];
17212         }
17213         $$rfields[1] = $$rfields[$maximum_field_index];
17214
17215         $line->set_jmax(1);
17216         $line->set_column( 0, 0 );
17217         $line->set_column( 1, 0 );
17218
17219     }
17220     $maximum_field_index = 1;
17221
17222     for $j ( 0 .. $maximum_line_index ) {
17223         my $line    = $group_lines[$j];
17224         my $rfields = $line->get_rfields();
17225         for $k ( 0 .. $maximum_field_index ) {
17226             my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
17227             if ( $k == 0 ) {
17228                 $pad += $group_lines[$j]->get_leading_space_count();
17229             }
17230
17231             if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
17232
17233         }
17234     }
17235 }
17236
17237 sub get_output_line_number {
17238
17239     # the output line number reported to a caller is the number of items
17240     # written plus the number of items in the buffer
17241     my $self = shift;
17242     1 + $maximum_line_index + $file_writer_object->get_output_line_number();
17243 }
17244
17245 sub write_leader_and_string {
17246
17247     my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
17248         $rvertical_tightness_flags )
17249       = @_;
17250
17251     my $leading_string = get_leading_string($leading_space_count);
17252
17253     # handle outdenting of long lines:
17254     if ($outdent_long_lines) {
17255         my $excess =
17256           length($str) - $side_comment_length + $leading_space_count -
17257           $rOpts_maximum_line_length;
17258         if ( $excess > 0 ) {
17259             $leading_string         = "";
17260             $last_outdented_line_at =
17261               $file_writer_object->get_output_line_number();
17262
17263             unless ($outdented_line_count) {
17264                 $first_outdented_line_at = $last_outdented_line_at;
17265             }
17266             $outdented_line_count++;
17267         }
17268     }
17269
17270     # Unpack any recombination data; it was packed by
17271     # sub send_lines_to_vertical_aligner. Contents:
17272     #
17273     #   [0] type: 1=opening  2=closing  3=opening block brace
17274     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
17275     #             if closing: spaces of padding to use
17276     #   [2] sequence number of container
17277     #   [3] valid flag: do not append if this flag is false
17278     #
17279     my ( $open_or_close, $tightness_flag, $seqno, $valid );
17280     if ($rvertical_tightness_flags) {
17281         ( $open_or_close, $tightness_flag, $seqno, $valid ) =
17282           @{$rvertical_tightness_flags};
17283     }
17284
17285     # handle any cached line ..
17286     # either append this line to it or write it out
17287     if ($cached_line_text) {
17288
17289         if ( !$cached_line_valid ) {
17290             $file_writer_object->write_code_line( $cached_line_text . "\n" );
17291         }
17292
17293         # handle cached line with opening container token
17294         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
17295
17296             my $gap = $leading_space_count - length($cached_line_text);
17297
17298             # handle option of just one tight opening per line:
17299             if ( $cached_line_flag == 1 ) {
17300                 if ( defined($open_or_close) && $open_or_close == 1 ) {
17301                     $gap = -1;
17302                 }
17303             }
17304
17305             if ( $gap >= 0 ) {
17306                 $leading_string = $cached_line_text . ' ' x $gap;
17307             }
17308             else {
17309                 $file_writer_object->write_code_line(
17310                     $cached_line_text . "\n" );
17311             }
17312         }
17313
17314         # handle cached line to place before this closing container token
17315         else {
17316             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
17317
17318             if ( length($test_line) <= $rOpts_maximum_line_length ) {
17319                 $str            = $test_line;
17320                 $leading_string = "";
17321             }
17322             else {
17323                 $file_writer_object->write_code_line(
17324                     $cached_line_text . "\n" );
17325             }
17326         }
17327     }
17328     $cached_line_type = 0;
17329     $cached_line_text = "";
17330
17331     my $line = $leading_string . $str;
17332
17333     # write or cache this line
17334     if ( !$rvertical_tightness_flags || $side_comment_length > 0 ) {
17335         $file_writer_object->write_code_line( $line . "\n" );
17336     }
17337     else {
17338         $cached_line_text  = $line;
17339         $cached_line_type  = $open_or_close;
17340         $cached_line_flag  = $tightness_flag;
17341         $cached_seqno      = $seqno;
17342         $cached_line_valid = $valid;
17343     }
17344
17345     $last_group_level_written = $group_level;
17346     $last_side_comment_length = $side_comment_length;
17347     $extra_indent_ok          = 0;
17348 }
17349
17350 {    # begin get_leading_string
17351
17352     my @leading_string_cache;
17353
17354     sub get_leading_string {
17355
17356         # define the leading whitespace string for this line..
17357         my $leading_whitespace_count = shift;
17358
17359         # Handle case of zero whitespace, which includes multi-line quotes
17360         # (which may have a finite level; this prevents tab problems)
17361         if ( $leading_whitespace_count <= 0 ) {
17362             return "";
17363         }
17364
17365         # look for previous result
17366         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
17367             return $leading_string_cache[$leading_whitespace_count];
17368         }
17369
17370         # must compute a string for this number of spaces
17371         my $leading_string;
17372
17373         # Handle simple case of no tabs
17374         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
17375             || $rOpts_indent_columns <= 0 )
17376         {
17377             $leading_string = ( ' ' x $leading_whitespace_count );
17378         }
17379
17380         # Handle entab option
17381         elsif ($rOpts_entab_leading_whitespace) {
17382             my $space_count =
17383               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
17384             my $tab_count =
17385               int(
17386                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
17387             $leading_string = "\t" x $tab_count . ' ' x $space_count;
17388         }
17389
17390         # Handle option of one tab per level
17391         else {
17392             $leading_string = ( "\t" x $group_level );
17393             my $space_count =
17394               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
17395
17396             # shouldn't happen:
17397             if ( $space_count < 0 ) {
17398                 warning(
17399 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
17400                 );
17401                 $leading_string = ( ' ' x $leading_whitespace_count );
17402             }
17403             else {
17404                 $leading_string .= ( ' ' x $space_count );
17405             }
17406         }
17407         $leading_string_cache[$leading_whitespace_count] = $leading_string;
17408         return $leading_string;
17409     }
17410 }    # end get_leading_string
17411
17412 sub report_anything_unusual {
17413     my $self = shift;
17414     if ( $outdented_line_count > 0 ) {
17415         write_logfile_entry(
17416             "$outdented_line_count long lines were outdented:\n");
17417         write_logfile_entry(
17418             "  First at output line $first_outdented_line_at\n");
17419
17420         if ( $outdented_line_count > 1 ) {
17421             write_logfile_entry(
17422                 "   Last at output line $last_outdented_line_at\n");
17423         }
17424         write_logfile_entry(
17425             "  use -noll to prevent outdenting, -l=n to increase line length\n"
17426         );
17427         write_logfile_entry("\n");
17428     }
17429 }
17430
17431 #####################################################################
17432 #
17433 # the Perl::Tidy::FileWriter class writes the output file
17434 #
17435 #####################################################################
17436
17437 package Perl::Tidy::FileWriter;
17438
17439 # Maximum number of little messages; probably need not be changed.
17440 use constant MAX_NAG_MESSAGES => 6;
17441
17442 sub write_logfile_entry {
17443     my $self          = shift;
17444     my $logger_object = $self->{_logger_object};
17445     if ($logger_object) {
17446         $logger_object->write_logfile_entry(@_);
17447     }
17448 }
17449
17450 sub new {
17451     my $class = shift;
17452     my ( $line_sink_object, $rOpts, $logger_object ) = @_;
17453
17454     bless {
17455         _line_sink_object           => $line_sink_object,
17456         _logger_object              => $logger_object,
17457         _rOpts                      => $rOpts,
17458         _output_line_number         => 1,
17459         _consecutive_blank_lines    => 0,
17460         _consecutive_nonblank_lines => 0,
17461         _first_line_length_error    => 0,
17462         _max_line_length_error      => 0,
17463         _last_line_length_error     => 0,
17464         _first_line_length_error_at => 0,
17465         _max_line_length_error_at   => 0,
17466         _last_line_length_error_at  => 0,
17467         _line_length_error_count    => 0,
17468         _max_output_line_length     => 0,
17469         _max_output_line_length_at  => 0,
17470     }, $class;
17471 }
17472
17473 sub tee_on {
17474     my $self = shift;
17475     $self->{_line_sink_object}->tee_on();
17476 }
17477
17478 sub tee_off {
17479     my $self = shift;
17480     $self->{_line_sink_object}->tee_off();
17481 }
17482
17483 sub get_output_line_number {
17484     my $self = shift;
17485     return $self->{_output_line_number};
17486 }
17487
17488 sub decrement_output_line_number {
17489     my $self = shift;
17490     $self->{_output_line_number}--;
17491 }
17492
17493 sub get_consecutive_nonblank_lines {
17494     my $self = shift;
17495     return $self->{_consecutive_nonblank_lines};
17496 }
17497
17498 sub reset_consecutive_blank_lines {
17499     my $self = shift;
17500     $self->{_consecutive_blank_lines} = 0;
17501 }
17502
17503 sub want_blank_line {
17504     my $self = shift;
17505     unless ( $self->{_consecutive_blank_lines} ) {
17506         $self->write_blank_code_line();
17507     }
17508 }
17509
17510 sub write_blank_code_line {
17511     my $self  = shift;
17512     my $rOpts = $self->{_rOpts};
17513     return
17514       if ( $self->{_consecutive_blank_lines} >=
17515         $rOpts->{'maximum-consecutive-blank-lines'} );
17516     $self->{_consecutive_blank_lines}++;
17517     $self->{_consecutive_nonblank_lines} = 0;
17518     $self->write_line("\n");
17519 }
17520
17521 sub write_code_line {
17522     my $self = shift;
17523     my $a    = shift;
17524
17525     if ( $a =~ /^\s*$/ ) {
17526         my $rOpts = $self->{_rOpts};
17527         return
17528           if ( $self->{_consecutive_blank_lines} >=
17529             $rOpts->{'maximum-consecutive-blank-lines'} );
17530         $self->{_consecutive_blank_lines}++;
17531         $self->{_consecutive_nonblank_lines} = 0;
17532     }
17533     else {
17534         $self->{_consecutive_blank_lines} = 0;
17535         $self->{_consecutive_nonblank_lines}++;
17536     }
17537     $self->write_line($a);
17538 }
17539
17540 sub write_line {
17541     my $self = shift;
17542     my $a    = shift;
17543
17544     # TODO: go through and see if the test is necessary here
17545     if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
17546
17547     $self->{_line_sink_object}->write_line($a);
17548
17549     # This calculation of excess line length ignores any internal tabs
17550     my $rOpts  = $self->{_rOpts};
17551     my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
17552     if ( $a =~ /^\t+/g ) {
17553         $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
17554     }
17555
17556     # Note that we just incremented output line number to future value
17557     # so we must subtract 1 for current line number
17558     if ( length($a) > 1 + $self->{_max_output_line_length} ) {
17559         $self->{_max_output_line_length}    = length($a) - 1;
17560         $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
17561     }
17562
17563     if ( $exceed > 0 ) {
17564         my $output_line_number = $self->{_output_line_number};
17565         $self->{_last_line_length_error}    = $exceed;
17566         $self->{_last_line_length_error_at} = $output_line_number - 1;
17567         if ( $self->{_line_length_error_count} == 0 ) {
17568             $self->{_first_line_length_error}    = $exceed;
17569             $self->{_first_line_length_error_at} = $output_line_number - 1;
17570         }
17571
17572         if (
17573             $self->{_last_line_length_error} > $self->{_max_line_length_error} )
17574         {
17575             $self->{_max_line_length_error}    = $exceed;
17576             $self->{_max_line_length_error_at} = $output_line_number - 1;
17577         }
17578
17579         if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
17580             $self->write_logfile_entry(
17581                 "Line length exceeded by $exceed characters\n");
17582         }
17583         $self->{_line_length_error_count}++;
17584     }
17585
17586 }
17587
17588 sub report_line_length_errors {
17589     my $self                    = shift;
17590     my $rOpts                   = $self->{_rOpts};
17591     my $line_length_error_count = $self->{_line_length_error_count};
17592     if ( $line_length_error_count == 0 ) {
17593         $self->write_logfile_entry(
17594             "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
17595         my $max_output_line_length    = $self->{_max_output_line_length};
17596         my $max_output_line_length_at = $self->{_max_output_line_length_at};
17597         $self->write_logfile_entry(
17598 "  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
17599         );
17600
17601     }
17602     else {
17603
17604         my $word = ( $line_length_error_count > 1 ) ? "s" : "";
17605         $self->write_logfile_entry(
17606 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
17607         );
17608
17609         $word = ( $line_length_error_count > 1 ) ? "First" : "";
17610         my $first_line_length_error    = $self->{_first_line_length_error};
17611         my $first_line_length_error_at = $self->{_first_line_length_error_at};
17612         $self->write_logfile_entry(
17613 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
17614         );
17615
17616         if ( $line_length_error_count > 1 ) {
17617             my $max_line_length_error     = $self->{_max_line_length_error};
17618             my $max_line_length_error_at  = $self->{_max_line_length_error_at};
17619             my $last_line_length_error    = $self->{_last_line_length_error};
17620             my $last_line_length_error_at = $self->{_last_line_length_error_at};
17621             $self->write_logfile_entry(
17622 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
17623             );
17624             $self->write_logfile_entry(
17625 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
17626             );
17627         }
17628     }
17629 }
17630
17631 #####################################################################
17632 #
17633 # The Perl::Tidy::Debugger class shows line tokenization
17634 #
17635 #####################################################################
17636
17637 package Perl::Tidy::Debugger;
17638
17639 sub new {
17640
17641     my ( $class, $filename ) = @_;
17642
17643     bless {
17644         _debug_file        => $filename,
17645         _debug_file_opened => 0,
17646         _fh                => undef,
17647     }, $class;
17648 }
17649
17650 sub really_open_debug_file {
17651
17652     my $self       = shift;
17653     my $debug_file = $self->{_debug_file};
17654     my $fh;
17655     unless ( $fh = IO::File->new("> $debug_file") ) {
17656         warn("can't open $debug_file: $!\n");
17657     }
17658     $self->{_debug_file_opened} = 1;
17659     $self->{_fh}                = $fh;
17660     print $fh
17661       "Use -dump-token-types (-dtt) to get a list of token type codes\n";
17662 }
17663
17664 sub close_debug_file {
17665
17666     my $self = shift;
17667     my $fh   = $self->{_fh};
17668     if ( $self->{_debug_file_opened} ) {
17669
17670         eval { $self->{_fh}->close() };
17671     }
17672 }
17673
17674 sub write_debug_entry {
17675
17676     # This is a debug dump routine which may be modified as necessary
17677     # to dump tokens on a line-by-line basis.  The output will be written
17678     # to the .DEBUG file when the -D flag is entered.
17679     my $self           = shift;
17680     my $line_of_tokens = shift;
17681
17682     my $input_line        = $line_of_tokens->{_line_text};
17683     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
17684     my $rtokens           = $line_of_tokens->{_rtokens};
17685     my $rlevels           = $line_of_tokens->{_rlevels};
17686     my $rslevels          = $line_of_tokens->{_rslevels};
17687     my $rblock_type       = $line_of_tokens->{_rblock_type};
17688     my $input_line_number = $line_of_tokens->{_line_number};
17689     my $line_type         = $line_of_tokens->{_line_type};
17690
17691     my ( $j, $num );
17692
17693     my $token_str              = "$input_line_number: ";
17694     my $reconstructed_original = "$input_line_number: ";
17695     my $block_str              = "$input_line_number: ";
17696
17697     #$token_str .= "$line_type: ";
17698     #$reconstructed_original .= "$line_type: ";
17699
17700     my $pattern   = "";
17701     my @next_char = ( '"', '"' );
17702     my $i_next    = 0;
17703     unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
17704     my $fh = $self->{_fh};
17705
17706     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
17707
17708         # testing patterns
17709         if ( $$rtoken_type[$j] eq 'k' ) {
17710             $pattern .= $$rtokens[$j];
17711         }
17712         else {
17713             $pattern .= $$rtoken_type[$j];
17714         }
17715         $reconstructed_original .= $$rtokens[$j];
17716         $block_str              .= "($$rblock_type[$j])";
17717         $num = length( $$rtokens[$j] );
17718         my $type_str = $$rtoken_type[$j];
17719
17720         # be sure there are no blank tokens (shouldn't happen)
17721         # This can only happen if a programming error has been made
17722         # because all valid tokens are non-blank
17723         if ( $type_str eq ' ' ) {
17724             print $fh "BLANK TOKEN on the next line\n";
17725             $type_str = $next_char[$i_next];
17726             $i_next   = 1 - $i_next;
17727         }
17728
17729         if ( length($type_str) == 1 ) {
17730             $type_str = $type_str x $num;
17731         }
17732         $token_str .= $type_str;
17733     }
17734
17735     # Write what you want here ...
17736     # print $fh "$input_line\n";
17737     # print $fh "$pattern\n";
17738     print $fh "$reconstructed_original\n";
17739     print $fh "$token_str\n";
17740
17741     #print $fh "$block_str\n";
17742 }
17743
17744 #####################################################################
17745 #
17746 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
17747 # method for returning the next line to be parsed, as well as a
17748 # 'peek_ahead()' method
17749 #
17750 # The input parameter is an object with a 'get_line()' method
17751 # which returns the next line to be parsed
17752 #
17753 #####################################################################
17754
17755 package Perl::Tidy::LineBuffer;
17756
17757 sub new {
17758
17759     my $class              = shift;
17760     my $line_source_object = shift;
17761
17762     return bless {
17763         _line_source_object => $line_source_object,
17764         _rlookahead_buffer  => [],
17765     }, $class;
17766 }
17767
17768 sub peek_ahead {
17769     my $self               = shift;
17770     my $buffer_index       = shift;
17771     my $line               = undef;
17772     my $line_source_object = $self->{_line_source_object};
17773     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
17774     if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
17775         $line = $$rlookahead_buffer[$buffer_index];
17776     }
17777     else {
17778         $line = $line_source_object->get_line();
17779         push( @$rlookahead_buffer, $line );
17780     }
17781     return $line;
17782 }
17783
17784 sub get_line {
17785     my $self               = shift;
17786     my $line               = undef;
17787     my $line_source_object = $self->{_line_source_object};
17788     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
17789
17790     if ( scalar(@$rlookahead_buffer) ) {
17791         $line = shift @$rlookahead_buffer;
17792     }
17793     else {
17794         $line = $line_source_object->get_line();
17795     }
17796     return $line;
17797 }
17798
17799 ########################################################################
17800 #
17801 # the Perl::Tidy::Tokenizer package is essentially a filter which
17802 # reads lines of perl source code from a source object and provides
17803 # corresponding tokenized lines through its get_line() method.  Lines
17804 # flow from the source_object to the caller like this:
17805 #
17806 # source_object --> LineBuffer_object --> Tokenizer -->  calling routine
17807 #   get_line()         get_line()           get_line()     line_of_tokens
17808 #
17809 # The source object can be any object with a get_line() method which
17810 # supplies one line (a character string) perl call.
17811 # The LineBuffer object is created by the Tokenizer.
17812 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
17813 # containing one tokenized line for each call to its get_line() method.
17814 #
17815 # WARNING: This is not a real class yet.  Only one tokenizer my be used.
17816 #
17817 ########################################################################
17818
17819 package Perl::Tidy::Tokenizer;
17820
17821 BEGIN {
17822
17823     # Caution: these debug flags produce a lot of output
17824     # They should all be 0 except when debugging small scripts
17825
17826     use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
17827     use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
17828     use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
17829     use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
17830     use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
17831
17832     my $debug_warning = sub {
17833         print "TOKENIZER_DEBUGGING with key $_[0]\n";
17834     };
17835
17836     TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
17837     TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
17838     TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
17839     TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
17840     TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
17841
17842 }
17843
17844 use Carp;
17845 use vars qw{
17846   $tokenizer_self
17847   $level_in_tokenizer
17848   $slevel_in_tokenizer
17849   $nesting_token_string
17850   $nesting_type_string
17851   $nesting_block_string
17852   $nesting_block_flag
17853   $nesting_list_string
17854   $nesting_list_flag
17855   $saw_negative_indentation
17856   $id_scan_state
17857   $last_nonblank_token
17858   $last_nonblank_type
17859   $last_nonblank_block_type
17860   $last_nonblank_container_type
17861   $last_nonblank_type_sequence
17862   $last_last_nonblank_token
17863   $last_last_nonblank_type
17864   $last_last_nonblank_block_type
17865   $last_last_nonblank_container_type
17866   $last_last_nonblank_type_sequence
17867   $last_nonblank_prototype
17868   $statement_type
17869   $identifier
17870   $in_quote
17871   $quote_type
17872   $quote_character
17873   $quote_pos
17874   $quote_depth
17875   $allowed_quote_modifiers
17876   $paren_depth
17877   @paren_type
17878   @paren_semicolon_count
17879   @paren_structural_type
17880   $brace_depth
17881   @brace_type
17882   @brace_structural_type
17883   @brace_statement_type
17884   @brace_context
17885   @brace_package
17886   $square_bracket_depth
17887   @square_bracket_type
17888   @square_bracket_structural_type
17889   @depth_array
17890   @starting_line_of_current_depth
17891   @current_depth
17892   @current_sequence_number
17893   @nesting_sequence_number
17894   @lower_case_labels_at
17895   $saw_v_string
17896   %is_constant
17897   %is_user_function
17898   %user_function_prototype
17899   %saw_function_definition
17900   $max_token_index
17901   $peeked_ahead
17902   $current_package
17903   $unexpected_error_count
17904   $input_line
17905   $input_line_number
17906   $rpretokens
17907   $rpretoken_map
17908   $rpretoken_type
17909   $want_paren
17910   $context
17911   @slevel_stack
17912   $ci_string_in_tokenizer
17913   $continuation_string_in_tokenizer
17914   $in_statement_continuation
17915   $started_looking_for_here_target_at
17916   $nearly_matched_here_target_at
17917
17918   %is_indirect_object_taker
17919   %is_block_operator
17920   %expecting_operator_token
17921   %expecting_operator_types
17922   %expecting_term_types
17923   %expecting_term_token
17924   %is_block_function
17925   %is_block_list_function
17926   %is_digraph
17927   %is_file_test_operator
17928   %is_trigraph
17929   %is_valid_token_type
17930   %is_keyword
17931   %is_code_block_token
17932   %really_want_term
17933   @opening_brace_names
17934   @closing_brace_names
17935   %is_keyword_taking_list
17936   %is_q_qq_qw_qx_qr_s_y_tr_m
17937 };
17938
17939 # possible values of operator_expected()
17940 use constant TERM     => -1;
17941 use constant UNKNOWN  => 0;
17942 use constant OPERATOR => 1;
17943
17944 # possible values of context
17945 use constant SCALAR_CONTEXT  => -1;
17946 use constant UNKNOWN_CONTEXT => 0;
17947 use constant LIST_CONTEXT    => 1;
17948
17949 # Maximum number of little messages; probably need not be changed.
17950 use constant MAX_NAG_MESSAGES => 6;
17951
17952 {
17953
17954     # methods to count instances
17955     my $_count = 0;
17956     sub get_count        { $_count; }
17957     sub _increment_count { ++$_count }
17958     sub _decrement_count { --$_count }
17959 }
17960
17961 sub DESTROY {
17962     $_[0]->_decrement_count();
17963 }
17964
17965 sub new {
17966
17967     my $class = shift;
17968
17969     # Note: 'tabs' and 'indent_columns' are temporary and should be
17970     # removed asap
17971     my %defaults = (
17972         source_object       => undef,
17973         debugger_object     => undef,
17974         diagnostics_object  => undef,
17975         logger_object       => undef,
17976         starting_level      => undef,
17977         indent_columns      => 4,
17978         tabs                => 0,
17979         look_for_hash_bang  => 0,
17980         trim_qw             => 1,
17981         look_for_autoloader => 1,
17982         look_for_selfloader => 1,
17983     );
17984     my %args = ( %defaults, @_ );
17985
17986     # we are given an object with a get_line() method to supply source lines
17987     my $source_object = $args{source_object};
17988
17989     # we create another object with a get_line() and peek_ahead() method
17990     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
17991
17992     # Tokenizer state data is as follows:
17993     # _rhere_target_list    reference to list of here-doc targets
17994     # _here_doc_target      the target string for a here document
17995     # _here_quote_character the type of here-doc quoting (" ' ` or none)
17996     #                       to determine if interpolation is done
17997     # _quote_target         character we seek if chasing a quote
17998     # _line_start_quote     line where we started looking for a long quote
17999     # _in_here_doc          flag indicating if we are in a here-doc
18000     # _in_pod               flag set if we are in pod documentation
18001     # _in_error             flag set if we saw severe error (binary in script)
18002     # _in_data              flag set if we are in __DATA__ section
18003     # _in_end               flag set if we are in __END__ section
18004     # _in_format            flag set if we are in a format description
18005     # _in_quote             flag telling if we are chasing a quote
18006     # _starting_level       indentation level of first line
18007     # _input_tabstr         string denoting one indentation level of input file
18008     # _know_input_tabstr    flag indicating if we know _input_tabstr
18009     # _line_buffer_object   object with get_line() method to supply source code
18010     # _diagnostics_object   place to write debugging information
18011     $tokenizer_self = {
18012         _rhere_target_list    => undef,
18013         _in_here_doc          => 0,
18014         _here_doc_target      => "",
18015         _here_quote_character => "",
18016         _in_data              => 0,
18017         _in_end               => 0,
18018         _in_format            => 0,
18019         _in_error             => 0,
18020         _in_pod               => 0,
18021         _in_quote             => 0,
18022         _quote_target         => "",
18023         _line_start_quote     => -1,
18024         _starting_level       => $args{starting_level},
18025         _know_starting_level  => defined( $args{starting_level} ),
18026         _tabs                 => $args{tabs},
18027         _indent_columns       => $args{indent_columns},
18028         _look_for_hash_bang   => $args{look_for_hash_bang},
18029         _trim_qw              => $args{trim_qw},
18030         _input_tabstr         => "",
18031         _know_input_tabstr    => -1,
18032         _last_line_number     => 0,
18033         _saw_perl_dash_P      => 0,
18034         _saw_perl_dash_w      => 0,
18035         _saw_use_strict       => 0,
18036         _look_for_autoloader  => $args{look_for_autoloader},
18037         _look_for_selfloader  => $args{look_for_selfloader},
18038         _saw_autoloader       => 0,
18039         _saw_selfloader       => 0,
18040         _saw_hash_bang        => 0,
18041         _saw_end              => 0,
18042         _saw_data             => 0,
18043         _saw_lc_filehandle    => 0,
18044         _started_tokenizing   => 0,
18045         _line_buffer_object   => $line_buffer_object,
18046         _debugger_object      => $args{debugger_object},
18047         _diagnostics_object   => $args{diagnostics_object},
18048         _logger_object        => $args{logger_object},
18049     };
18050
18051     prepare_for_a_new_file();
18052     find_starting_indentation_level();
18053
18054     bless $tokenizer_self, $class;
18055
18056     # This is not a full class yet, so die if an attempt is made to
18057     # create more than one object.
18058
18059     if ( _increment_count() > 1 ) {
18060         confess
18061 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
18062     }
18063
18064     return $tokenizer_self;
18065
18066 }
18067
18068 # interface to Perl::Tidy::Logger routines
18069 sub warning {
18070     my $logger_object = $tokenizer_self->{_logger_object};
18071     if ($logger_object) {
18072         $logger_object->warning(@_);
18073     }
18074 }
18075
18076 sub complain {
18077     my $logger_object = $tokenizer_self->{_logger_object};
18078     if ($logger_object) {
18079         $logger_object->complain(@_);
18080     }
18081 }
18082
18083 sub write_logfile_entry {
18084     my $logger_object = $tokenizer_self->{_logger_object};
18085     if ($logger_object) {
18086         $logger_object->write_logfile_entry(@_);
18087     }
18088 }
18089
18090 sub interrupt_logfile {
18091     my $logger_object = $tokenizer_self->{_logger_object};
18092     if ($logger_object) {
18093         $logger_object->interrupt_logfile();
18094     }
18095 }
18096
18097 sub resume_logfile {
18098     my $logger_object = $tokenizer_self->{_logger_object};
18099     if ($logger_object) {
18100         $logger_object->resume_logfile();
18101     }
18102 }
18103
18104 sub increment_brace_error {
18105     my $logger_object = $tokenizer_self->{_logger_object};
18106     if ($logger_object) {
18107         $logger_object->increment_brace_error();
18108     }
18109 }
18110
18111 sub report_definite_bug {
18112     my $logger_object = $tokenizer_self->{_logger_object};
18113     if ($logger_object) {
18114         $logger_object->report_definite_bug();
18115     }
18116 }
18117
18118 sub brace_warning {
18119     my $logger_object = $tokenizer_self->{_logger_object};
18120     if ($logger_object) {
18121         $logger_object->brace_warning(@_);
18122     }
18123 }
18124
18125 sub get_saw_brace_error {
18126     my $logger_object = $tokenizer_self->{_logger_object};
18127     if ($logger_object) {
18128         $logger_object->get_saw_brace_error();
18129     }
18130     else {
18131         0;
18132     }
18133 }
18134
18135 # interface to Perl::Tidy::Diagnostics routines
18136 sub write_diagnostics {
18137     if ( $tokenizer_self->{_diagnostics_object} ) {
18138         $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
18139     }
18140 }
18141
18142 sub report_tokenization_errors {
18143
18144     my $self = shift;
18145
18146     my $level = get_indentation_level();
18147     if ( $level != $tokenizer_self->{_starting_level} ) {
18148         warning("final indentation level: $level\n");
18149     }
18150
18151     check_final_nesting_depths();
18152
18153     if ( $tokenizer_self->{_look_for_hash_bang}
18154         && !$tokenizer_self->{_saw_hash_bang} )
18155     {
18156         warning(
18157             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
18158     }
18159
18160     if ( $tokenizer_self->{_in_format} ) {
18161         warning("hit EOF while in format description\n");
18162     }
18163
18164     # this check may be removed after a year or so
18165     if ( $tokenizer_self->{_saw_lc_filehandle} ) {
18166
18167         warning( <<'EOM' );
18168 ------------------------------------------------------------------------
18169 PLEASE NOTE: If you get this message, it is because perltidy noticed
18170 possible ambiguous syntax at one or more places in your script, as
18171 noted above.  The problem is with statements accepting indirect objects,
18172 such as print and printf statements of the form
18173
18174     print bareword ( $etc
18175
18176 Perltidy needs your help in deciding if 'bareword' is a filehandle or a
18177 function call.  The problem is the space between 'bareword' and '('.  If
18178 'bareword' is a function call, you should remove the trailing space.  If
18179 'bareword' is a filehandle, you should avoid the opening paren or else
18180 globally capitalize 'bareword' to be BAREWORD.  So the above line
18181 would be:
18182
18183     print bareword( $etc    # function
18184 or
18185     print bareword @list    # filehandle
18186 or
18187     print BAREWORD ( $etc   # filehandle
18188
18189 If you want to keep the line as it is, and are sure it is correct,
18190 you can use -w=0 to prevent this message.
18191 ------------------------------------------------------------------------
18192 EOM
18193
18194     }
18195
18196     if ( $tokenizer_self->{_in_pod} ) {
18197
18198         # Just write log entry if this is after __END__ or __DATA__
18199         # because this happens to often, and it is not likely to be
18200         # a parsing error.
18201         if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
18202             write_logfile_entry(
18203 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
18204             );
18205         }
18206
18207         else {
18208             complain(
18209 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
18210             );
18211         }
18212
18213     }
18214
18215     if ( $tokenizer_self->{_in_here_doc} ) {
18216         my $here_doc_target = $tokenizer_self->{_here_doc_target};
18217         if ($here_doc_target) {
18218             warning(
18219 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
18220             );
18221         }
18222         else {
18223             warning(
18224 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
18225             );
18226         }
18227         if ($nearly_matched_here_target_at) {
18228             warning(
18229 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
18230             );
18231         }
18232     }
18233
18234     if ( $tokenizer_self->{_in_quote} ) {
18235         my $line_start_quote = $tokenizer_self->{_line_start_quote};
18236         my $quote_target     = $tokenizer_self->{_quote_target};
18237         warning(
18238 "hit EOF seeking end of quote/pattern starting at line $line_start_quote ending in $quote_target\n"
18239         );
18240     }
18241
18242     unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
18243         if ( $] < 5.006 ) {
18244             write_logfile_entry("Suggest including '-w parameter'\n");
18245         }
18246         else {
18247             write_logfile_entry("Suggest including 'use warnings;'\n");
18248         }
18249     }
18250
18251     if ( $tokenizer_self->{_saw_perl_dash_P} ) {
18252         write_logfile_entry("Use of -P parameter for defines is discouraged\n");
18253     }
18254
18255     unless ( $tokenizer_self->{_saw_use_strict} ) {
18256         write_logfile_entry("Suggest including 'use strict;'\n");
18257     }
18258
18259     # it is suggested that lables have at least one upper case character
18260     # for legibility and to avoid code breakage as new keywords are introduced
18261     if (@lower_case_labels_at) {
18262         my $num = @lower_case_labels_at;
18263         write_logfile_entry(
18264             "Suggest using upper case characters in label(s)\n");
18265         local $" = ')(';
18266         write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
18267     }
18268 }
18269
18270 sub report_v_string {
18271
18272     # warn if this version can't handle v-strings
18273     my $tok = shift;
18274     $saw_v_string = $input_line_number;
18275     if ( $] < 5.006 ) {
18276         warning(
18277 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
18278         );
18279     }
18280 }
18281
18282 sub get_input_line_number {
18283     return $tokenizer_self->{_last_line_number};
18284 }
18285
18286 # returns the next tokenized line
18287 sub get_line {
18288
18289     my $self = shift;
18290
18291     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
18292
18293     return undef unless ($input_line);
18294
18295     $tokenizer_self->{_last_line_number}++;
18296
18297     # Find and remove what characters terminate this line, including any
18298     # control r
18299     my $input_line_separator = "";
18300     if ( chomp($input_line) ) { $input_line_separator = $/ }
18301
18302     # TODO: what other characters should be included here?
18303     if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
18304         $input_line_separator = $2 . $input_line_separator;
18305     }
18306
18307     # for backwards compatability we keep the line text terminated with
18308     # a newline character
18309     $input_line .= "\n";
18310
18311     my $input_line_number = $tokenizer_self->{_last_line_number};
18312
18313     # create a data structure describing this line which will be
18314     # returned to the caller.
18315
18316     # _line_type codes are:
18317     #   SYSTEM         - system-specific code before hash-bang line
18318     #   CODE           - line of perl code (including comments)
18319     #   POD_START      - line starting pod, such as '=head'
18320     #   POD            - pod documentation text
18321     #   POD_END        - last line of pod section, '=cut'
18322     #   HERE           - text of here-document
18323     #   HERE_END       - last line of here-doc (target word)
18324     #   FORMAT         - format section
18325     #   FORMAT_END     - last line of format section, '.'
18326     #   DATA_START     - __DATA__ line
18327     #   DATA           - unidentified text following __DATA__
18328     #   END_START      - __END__ line
18329     #   END            - unidentified text following __END__
18330     #   ERROR          - we are in big trouble, probably not a perl script
18331
18332     # Other variables:
18333     #   _curly_brace_depth     - depth of curly braces at start of line
18334     #   _square_bracket_depth  - depth of square brackets at start of line
18335     #   _paren_depth           - depth of parens at start of line
18336     #   _starting_in_quote     - this line continues a multi-line quote
18337     #                            (so don't trim leading blanks!)
18338     #   _ending_in_quote       - this line ends in a multi-line quote
18339     #                            (so don't trim trailing blanks!)
18340     my $line_of_tokens = {
18341         _line_type                => 'EOF',
18342         _line_text                => $input_line,
18343         _line_number              => $input_line_number,
18344         _rtoken_type              => undef,
18345         _rtokens                  => undef,
18346         _rlevels                  => undef,
18347         _rslevels                 => undef,
18348         _rblock_type              => undef,
18349         _rcontainer_type          => undef,
18350         _rcontainer_environment   => undef,
18351         _rtype_sequence           => undef,
18352         _rnesting_tokens          => undef,
18353         _rci_levels               => undef,
18354         _rnesting_blocks          => undef,
18355         _python_indentation_level => -1,                      ## 0,
18356         _starting_in_quote        =>
18357           ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ),
18358         _ending_in_quote      => 0,
18359         _curly_brace_depth    => $brace_depth,
18360         _square_bracket_depth => $square_bracket_depth,
18361         _paren_depth          => $paren_depth,
18362         _quote_character      => '',
18363     };
18364
18365     # must print line unchanged if we are in a here document
18366     if ( $tokenizer_self->{_in_here_doc} ) {
18367
18368         $line_of_tokens->{_line_type} = 'HERE';
18369         my $here_doc_target      = $tokenizer_self->{_here_doc_target};
18370         my $here_quote_character = $tokenizer_self->{_here_quote_character};
18371         my $candidate_target     = $input_line;
18372         chomp $candidate_target;
18373         if ( $candidate_target eq $here_doc_target ) {
18374             $nearly_matched_here_target_at = undef;
18375             $line_of_tokens->{_line_type} = 'HERE_END';
18376             write_logfile_entry("Exiting HERE document $here_doc_target\n");
18377
18378             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
18379             if (@$rhere_target_list) {    # there can be multiple here targets
18380                 ( $here_doc_target, $here_quote_character ) =
18381                   @{ shift @$rhere_target_list };
18382                 $tokenizer_self->{_here_doc_target}      = $here_doc_target;
18383                 $tokenizer_self->{_here_quote_character} =
18384                   $here_quote_character;
18385                 write_logfile_entry(
18386                     "Entering HERE document $here_doc_target\n");
18387                 $nearly_matched_here_target_at      = undef;
18388                 $started_looking_for_here_target_at = $input_line_number;
18389             }
18390             else {
18391                 $tokenizer_self->{_in_here_doc}          = 0;
18392                 $tokenizer_self->{_here_doc_target}      = "";
18393                 $tokenizer_self->{_here_quote_character} = "";
18394             }
18395         }
18396
18397         # check for error of extra whitespace
18398         # note for PERL6: leading whitespace is allowed
18399         else {
18400             $candidate_target =~ s/\s*$//;
18401             $candidate_target =~ s/^\s*//;
18402             if ( $candidate_target eq $here_doc_target ) {
18403                 $nearly_matched_here_target_at = $input_line_number;
18404             }
18405         }
18406         return $line_of_tokens;
18407     }
18408
18409     # must print line unchanged if we are in a format section
18410     elsif ( $tokenizer_self->{_in_format} ) {
18411
18412         if ( $input_line =~ /^\.[\s#]*$/ ) {
18413             write_logfile_entry("Exiting format section\n");
18414             $tokenizer_self->{_in_format} = 0;
18415             $line_of_tokens->{_line_type} = 'FORMAT_END';
18416         }
18417         else {
18418             $line_of_tokens->{_line_type} = 'FORMAT';
18419         }
18420         return $line_of_tokens;
18421     }
18422
18423     # must print line unchanged if we are in pod documentation
18424     elsif ( $tokenizer_self->{_in_pod} ) {
18425
18426         $line_of_tokens->{_line_type} = 'POD';
18427         if ( $input_line =~ /^=cut/ ) {
18428             $line_of_tokens->{_line_type} = 'POD_END';
18429             write_logfile_entry("Exiting POD section\n");
18430             $tokenizer_self->{_in_pod} = 0;
18431         }
18432         if ( $input_line =~ /^\#\!.*perl\b/ ) {
18433             warning("Hash-bang in pod can cause perl to fail! \n");
18434         }
18435
18436         return $line_of_tokens;
18437     }
18438
18439     # must print line unchanged if we have seen a severe error (i.e., we
18440     # are seeing illegal tokens and connot continue.  Syntax errors do
18441     # not pass this route).  Calling routine can decide what to do, but
18442     # the default can be to just pass all lines as if they were after __END__
18443     elsif ( $tokenizer_self->{_in_error} ) {
18444         $line_of_tokens->{_line_type} = 'ERROR';
18445         return $line_of_tokens;
18446     }
18447
18448     # print line unchanged if we are __DATA__ section
18449     elsif ( $tokenizer_self->{_in_data} ) {
18450
18451         # ...but look for POD
18452         # Note that the _in_data and _in_end flags remain set
18453         # so that we return to that state after seeing the
18454         # end of a pod section
18455         if ( $input_line =~ /^=(?!cut)/ ) {
18456             $line_of_tokens->{_line_type} = 'POD_START';
18457             write_logfile_entry("Entering POD section\n");
18458             $tokenizer_self->{_in_pod} = 1;
18459             return $line_of_tokens;
18460         }
18461         else {
18462             $line_of_tokens->{_line_type} = 'DATA';
18463             return $line_of_tokens;
18464         }
18465     }
18466
18467     # print line unchanged if we are in __END__ section
18468     elsif ( $tokenizer_self->{_in_end} ) {
18469
18470         # ...but look for POD
18471         # Note that the _in_data and _in_end flags remain set
18472         # so that we return to that state after seeing the
18473         # end of a pod section
18474         if ( $input_line =~ /^=(?!cut)/ ) {
18475             $line_of_tokens->{_line_type} = 'POD_START';
18476             write_logfile_entry("Entering POD section\n");
18477             $tokenizer_self->{_in_pod} = 1;
18478             return $line_of_tokens;
18479         }
18480         else {
18481             $line_of_tokens->{_line_type} = 'END';
18482             return $line_of_tokens;
18483         }
18484     }
18485
18486     # check for a hash-bang line if we haven't seen one
18487     if ( !$tokenizer_self->{_saw_hash_bang} ) {
18488         if ( $input_line =~ /^\#\!.*perl\b/ ) {
18489             $tokenizer_self->{_saw_hash_bang} = $input_line_number;
18490
18491             # check for -w and -P flags
18492             if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
18493                 $tokenizer_self->{_saw_perl_dash_P} = 1;
18494             }
18495
18496             if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
18497                 $tokenizer_self->{_saw_perl_dash_w} = 1;
18498             }
18499
18500             if (   ( $input_line_number > 1 )
18501                 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
18502             {
18503
18504                 # this is helpful for VMS systems; we may have accidentally
18505                 # tokenized some DCL commands
18506                 if ( $tokenizer_self->{_started_tokenizing} ) {
18507                     warning(
18508 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
18509                     );
18510                 }
18511                 else {
18512                     complain("Useless hash-bang after line 1\n");
18513                 }
18514             }
18515
18516             # Report the leading hash-bang as a system line
18517             # This will prevent -dac from deleting it
18518             else {
18519                 $line_of_tokens->{_line_type} = 'SYSTEM';
18520                 return $line_of_tokens;
18521             }
18522         }
18523     }
18524
18525     # wait for a hash-bang before parsing if the user invoked us with -x
18526     if ( $tokenizer_self->{_look_for_hash_bang}
18527         && !$tokenizer_self->{_saw_hash_bang} )
18528     {
18529         $line_of_tokens->{_line_type} = 'SYSTEM';
18530         return $line_of_tokens;
18531     }
18532
18533     # a first line of the form ': #' will be marked as SYSTEM
18534     # since lines of this form may be used by tcsh
18535     if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
18536         $line_of_tokens->{_line_type} = 'SYSTEM';
18537         return $line_of_tokens;
18538     }
18539
18540     # now we know that it is ok to tokenize the line...
18541     # the line tokenizer will modify any of these private variables:
18542     #        _rhere_target_list
18543     #        _in_data
18544     #        _in_end
18545     #        _in_format
18546     #        _in_error
18547     #        _in_pod
18548     #        _in_quote
18549     my $ending_in_quote_last = $tokenizer_self->{_in_quote};
18550     tokenize_this_line($line_of_tokens);
18551
18552     # Now finish defining the return structure and return it
18553     $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
18554
18555     # handle severe error (binary data in script)
18556     if ( $tokenizer_self->{_in_error} ) {
18557         $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
18558         warning("Giving up after error\n");
18559         $line_of_tokens->{_line_type} = 'ERROR';
18560         reset_indentation_level(0);          # avoid error messages
18561         return $line_of_tokens;
18562     }
18563
18564     # handle start of pod documentation
18565     if ( $tokenizer_self->{_in_pod} ) {
18566
18567         # This gets tricky..above a __DATA__ or __END__ section, perl
18568         # accepts '=cut' as the start of pod section. But afterwards,
18569         # only pod utilities see it and they may ignore an =cut without
18570         # leading =head.  In any case, this isn't good.
18571         if ( $input_line =~ /^=cut\b/ ) {
18572             if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
18573                 complain("=cut while not in pod ignored\n");
18574                 $tokenizer_self->{_in_pod}    = 0;
18575                 $line_of_tokens->{_line_type} = 'POD_STOP';
18576             }
18577             else {
18578                 $line_of_tokens->{_line_type} = 'POD_END';
18579                 complain(
18580 "=cut starts a pod section .. this can fool pod utilities.\n"
18581                 );
18582                 write_logfile_entry("Entering POD section\n");
18583             }
18584         }
18585
18586         else {
18587             $line_of_tokens->{_line_type} = 'POD_START';
18588             write_logfile_entry("Entering POD section\n");
18589         }
18590
18591         return $line_of_tokens;
18592     }
18593
18594     # update indentation levels for log messages
18595     if ( $input_line !~ /^\s*$/ ) {
18596         my $rlevels                      = $line_of_tokens->{_rlevels};
18597         my $structural_indentation_level = $$rlevels[0];
18598         my ( $python_indentation_level, $msg ) =
18599           find_indentation_level( $input_line, $structural_indentation_level );
18600         if ($msg) { write_logfile_entry("$msg") }
18601         if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
18602             $line_of_tokens->{_python_indentation_level} =
18603               $python_indentation_level;
18604         }
18605     }
18606
18607     # see if this line contains here doc targets
18608     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
18609     if (@$rhere_target_list) {
18610
18611         #my $here_doc_target = shift @$rhere_target_list;
18612         my ( $here_doc_target, $here_quote_character ) =
18613           @{ shift @$rhere_target_list };
18614         $tokenizer_self->{_in_here_doc}          = 1;
18615         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
18616         $tokenizer_self->{_here_quote_character} = $here_quote_character;
18617         write_logfile_entry("Entering HERE document $here_doc_target\n");
18618         $started_looking_for_here_target_at = $input_line_number;
18619     }
18620
18621     # NOTE: __END__ and __DATA__ statements are written unformatted
18622     # because they can theoretically contain additional characters
18623     # which are not tokenized (and cannot be read with <DATA> either!).
18624     if ( $tokenizer_self->{_in_data} ) {
18625         $line_of_tokens->{_line_type} = 'DATA_START';
18626         write_logfile_entry("Starting __DATA__ section\n");
18627         $tokenizer_self->{_saw_data} = 1;
18628
18629         # keep parsing after __DATA__ if use SelfLoader was seen
18630         if ( $tokenizer_self->{_saw_selfloader} ) {
18631             $tokenizer_self->{_in_data} = 0;
18632             write_logfile_entry(
18633                 "SelfLoader seen, continuing; -nlsl deactivates\n");
18634         }
18635
18636         return $line_of_tokens;
18637     }
18638
18639     elsif ( $tokenizer_self->{_in_end} ) {
18640         $line_of_tokens->{_line_type} = 'END_START';
18641         write_logfile_entry("Starting __END__ section\n");
18642         $tokenizer_self->{_saw_end} = 1;
18643
18644         # keep parsing after __END__ if use AutoLoader was seen
18645         if ( $tokenizer_self->{_saw_autoloader} ) {
18646             $tokenizer_self->{_in_end} = 0;
18647             write_logfile_entry(
18648                 "AutoLoader seen, continuing; -nlal deactivates\n");
18649         }
18650         return $line_of_tokens;
18651     }
18652
18653     # now, finally, we know that this line is type 'CODE'
18654     $line_of_tokens->{_line_type} = 'CODE';
18655
18656     # remember if we have seen any real code
18657     if (   !$tokenizer_self->{_started_tokenizing}
18658         && $input_line !~ /^\s*$/
18659         && $input_line !~ /^\s*#/ )
18660     {
18661         $tokenizer_self->{_started_tokenizing} = 1;
18662     }
18663
18664     if ( $tokenizer_self->{_debugger_object} ) {
18665         $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
18666     }
18667
18668     # Note: if keyword 'format' occurs in this line code, it is still CODE
18669     # (keyword 'format' need not start a line)
18670     if ( $tokenizer_self->{_in_format} ) {
18671         write_logfile_entry("Entering format section\n");
18672     }
18673
18674     if ( $tokenizer_self->{_in_quote}
18675         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
18676     {
18677
18678         if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
18679             $tokenizer_self->{_line_start_quote} = $input_line_number;
18680             $tokenizer_self->{_quote_target}     = $quote_target;
18681             write_logfile_entry(
18682                 "Start multi-line quote or pattern ending in $quote_target\n");
18683         }
18684     }
18685     elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
18686         and !$tokenizer_self->{_in_quote} )
18687     {
18688         $tokenizer_self->{_line_start_quote} = -1;
18689         write_logfile_entry("End of multi-line quote or pattern\n");
18690     }
18691
18692     # we are returning a line of CODE
18693     return $line_of_tokens;
18694 }
18695
18696 sub find_starting_indentation_level {
18697
18698     my $starting_level    = 0;
18699     my $know_input_tabstr = -1;    # flag for find_indentation_level
18700
18701     # use value if given as parameter
18702     if ( $tokenizer_self->{_know_starting_level} ) {
18703         $starting_level = $tokenizer_self->{_starting_level};
18704     }
18705
18706     # if we know there is a hash_bang line, the level must be zero
18707     elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
18708         $tokenizer_self->{_know_starting_level} = 1;
18709     }
18710
18711     # otherwise figure it out from the input file
18712     else {
18713         my $line;
18714         my $i                            = 0;
18715         my $structural_indentation_level = -1; # flag for find_indentation_level
18716
18717         my $msg = "";
18718         while ( $line =
18719             $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
18720         {
18721
18722             # if first line is #! then assume starting level is zero
18723             if ( $i == 1 && $line =~ /^\#\!/ ) {
18724                 $starting_level = 0;
18725                 last;
18726             }
18727             next if ( $line =~ /^\s*#/ );      # must not be comment
18728             next if ( $line =~ /^\s*$/ );      # must not be blank
18729             ( $starting_level, $msg ) =
18730               find_indentation_level( $line, $structural_indentation_level );
18731             if ($msg) { write_logfile_entry("$msg") }
18732             last;
18733         }
18734         $msg = "Line $i implies starting-indentation-level = $starting_level\n";
18735
18736         if ( $starting_level > 0 ) {
18737
18738             my $input_tabstr = $tokenizer_self->{_input_tabstr};
18739             if ( $input_tabstr eq "\t" ) {
18740                 $msg .= "by guessing input tabbing uses 1 tab per level\n";
18741             }
18742             else {
18743                 my $cols = length($input_tabstr);
18744                 $msg .=
18745                   "by guessing input tabbing uses $cols blanks per level\n";
18746             }
18747         }
18748         write_logfile_entry("$msg");
18749     }
18750     $tokenizer_self->{_starting_level} = $starting_level;
18751     reset_indentation_level($starting_level);
18752 }
18753
18754 # Find indentation level given a input line.  At the same time, try to
18755 # figure out the input tabbing scheme.
18756 #
18757 # There are two types of calls:
18758 #
18759 # Type 1: $structural_indentation_level < 0
18760 #  In this case we have to guess $input_tabstr to figure out the level.
18761 #
18762 # Type 2: $structural_indentation_level >= 0
18763 #  In this case the level of this line is known, and this routine can
18764 #  update the tabbing string, if still unknown, to make the level correct.
18765
18766 sub find_indentation_level {
18767     my ( $line, $structural_indentation_level ) = @_;
18768     my $level = 0;
18769     my $msg   = "";
18770
18771     my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
18772     my $input_tabstr      = $tokenizer_self->{_input_tabstr};
18773
18774     # find leading whitespace
18775     my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
18776
18777     # make first guess at input tabbing scheme if necessary
18778     if ( $know_input_tabstr < 0 ) {
18779
18780         $know_input_tabstr = 0;
18781
18782         if ( $tokenizer_self->{_tabs} ) {
18783             $input_tabstr = "\t";
18784             if ( length($leading_whitespace) > 0 ) {
18785                 if ( $leading_whitespace !~ /\t/ ) {
18786
18787                     my $cols = $tokenizer_self->{_indent_columns};
18788
18789                     if ( length($leading_whitespace) < $cols ) {
18790                         $cols = length($leading_whitespace);
18791                     }
18792                     $input_tabstr = " " x $cols;
18793                 }
18794             }
18795         }
18796         else {
18797             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
18798
18799             if ( length($leading_whitespace) > 0 ) {
18800                 if ( $leading_whitespace =~ /^\t/ ) {
18801                     $input_tabstr = "\t";
18802                 }
18803             }
18804         }
18805         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
18806         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
18807     }
18808
18809     # determine the input tabbing scheme if possible
18810     if (   ( $know_input_tabstr == 0 )
18811         && ( length($leading_whitespace) > 0 )
18812         && ( $structural_indentation_level > 0 ) )
18813     {
18814         my $saved_input_tabstr = $input_tabstr;
18815
18816         # check for common case of one tab per indentation level
18817         if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
18818             if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
18819                 $input_tabstr = "\t";
18820                 $msg          = "Guessing old indentation was tab character\n";
18821             }
18822         }
18823
18824         else {
18825
18826             # detab any tabs based on 8 blanks per tab
18827             my $entabbed = "";
18828             if ( $leading_whitespace =~ s/^\t+/        /g ) {
18829                 $entabbed = "entabbed";
18830             }
18831
18832             # now compute tabbing from number of spaces
18833             my $columns =
18834               length($leading_whitespace) / $structural_indentation_level;
18835             if ( $columns == int $columns ) {
18836                 $msg =
18837                   "Guessing old indentation was $columns $entabbed spaces\n";
18838             }
18839             else {
18840                 $columns = int $columns;
18841                 $msg     =
18842 "old indentation is unclear, using $columns $entabbed spaces\n";
18843             }
18844             $input_tabstr = " " x $columns;
18845         }
18846         $know_input_tabstr                    = 1;
18847         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
18848         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
18849
18850         # see if mistakes were made
18851         if ( ( $tokenizer_self->{_starting_level} > 0 )
18852             && !$tokenizer_self->{_know_starting_level} )
18853         {
18854
18855             if ( $input_tabstr ne $saved_input_tabstr ) {
18856                 complain(
18857 "I made a bad starting level guess; rerun with a value for -sil \n"
18858                 );
18859             }
18860         }
18861     }
18862
18863     # use current guess at input tabbing to get input indentation level
18864     #
18865     # Patch to handle a common case of entabbed leading whitespace
18866     # If the leading whitespace equals 4 spaces and we also have
18867     # tabs, detab the input whitespace assuming 8 spaces per tab.
18868     if ( length($input_tabstr) == 4 ) {
18869         $leading_whitespace =~ s/^\t+/        /g;
18870     }
18871
18872     if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
18873         my $pos = 0;
18874
18875         while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
18876         {
18877             $pos += $len_tab;
18878             $level++;
18879         }
18880     }
18881     return ( $level, $msg );
18882 }
18883
18884 sub dump_token_types {
18885     my $class = shift;
18886     my $fh    = shift;
18887
18888     # This should be the latest list of token types in use
18889     # adding NEW_TOKENS: add a comment here
18890     print $fh <<'END_OF_LIST';
18891
18892 Here is a list of the token types currently used for lines of type 'CODE'.
18893 For the following tokens, the "type" of a token is just the token itself.
18894
18895 .. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
18896 ( ) <= >= == =~ !~ != ++ -- /= x=
18897 ... **= <<= >>= &&= ||= <=>
18898 , + - / * | % ! x ~ = \ ? : . < > ^ &
18899
18900 The following additional token types are defined:
18901
18902  type    meaning
18903     b    blank (white space)
18904     {    indent: opening structural curly brace or square bracket or paren
18905          (code block, anonymous hash reference, or anonymous array reference)
18906     }    outdent: right structural curly brace or square bracket or paren
18907     [    left non-structural square bracket (enclosing an array index)
18908     ]    right non-structural square bracket
18909     (    left non-structural paren (all but a list right of an =)
18910     )    right non-structural parena
18911     L    left non-structural curly brace (enclosing a key)
18912     R    right non-structural curly brace
18913     ;    terminal semicolon
18914     f    indicates a semicolon in a "for" statement
18915     h    here_doc operator <<
18916     #    a comment
18917     Q    indicates a quote or pattern
18918     q    indicates a qw quote block
18919     k    a perl keyword
18920     C    user-defined constant or constant function (with void prototype = ())
18921     U    user-defined function taking parameters
18922     G    user-defined function taking block parameter (like grep/map/eval)
18923     M    (unused, but reserved for subroutine definition name)
18924     P    (unused, but -html uses it to label pod text)
18925     t    type indicater such as %,$,@,*,&,sub
18926     w    bare word (perhaps a subroutine call)
18927     i    identifier of some type (with leading %, $, @, *, &, sub, -> )
18928     n    a number
18929     v    a v-string
18930     F    a file test operator (like -e)
18931     Y    File handle
18932     Z    identifier in indirect object slot: may be file handle, object
18933     J    LABEL:  code block label
18934     j    LABEL after next, last, redo, goto
18935     p    unary +
18936     m    unary -
18937     pp   pre-increment operator ++
18938     mm   pre-decrement operator --
18939     A    : used as attribute separator
18940
18941     Here are the '_line_type' codes used internally:
18942     SYSTEM         - system-specific code before hash-bang line
18943     CODE           - line of perl code (including comments)
18944     POD_START      - line starting pod, such as '=head'
18945     POD            - pod documentation text
18946     POD_END        - last line of pod section, '=cut'
18947     HERE           - text of here-document
18948     HERE_END       - last line of here-doc (target word)
18949     FORMAT         - format section
18950     FORMAT_END     - last line of format section, '.'
18951     DATA_START     - __DATA__ line
18952     DATA           - unidentified text following __DATA__
18953     END_START      - __END__ line
18954     END            - unidentified text following __END__
18955     ERROR          - we are in big trouble, probably not a perl script
18956 END_OF_LIST
18957 }
18958
18959 # This is a currently unused debug routine
18960 sub dump_functions {
18961
18962     my $fh = *STDOUT;
18963     my ( $pkg, $sub );
18964     foreach $pkg ( keys %is_user_function ) {
18965         print $fh "\nnon-constant subs in package $pkg\n";
18966
18967         foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
18968             my $msg = "";
18969             if ( $is_block_list_function{$pkg}{$sub} ) {
18970                 $msg = 'block_list';
18971             }
18972
18973             if ( $is_block_function{$pkg}{$sub} ) {
18974                 $msg = 'block';
18975             }
18976             print $fh "$sub $msg\n";
18977         }
18978     }
18979
18980     foreach $pkg ( keys %is_constant ) {
18981         print $fh "\nconstants and constant subs in package $pkg\n";
18982
18983         foreach $sub ( keys %{ $is_constant{$pkg} } ) {
18984             print $fh "$sub\n";
18985         }
18986     }
18987 }
18988
18989 sub prepare_for_a_new_file {
18990     $saw_negative_indentation = 0;
18991     $id_scan_state            = '';
18992     $statement_type           = '';     # '' or 'use' or 'sub..' or 'case..'
18993     $last_nonblank_token      = ';';    # the only possible starting state which
18994     $last_nonblank_type       = ';';    # will make a leading brace a code block
18995     $last_nonblank_block_type = '';
18996     $last_nonblank_container_type      = '';
18997     $last_nonblank_type_sequence       = '';
18998     $last_last_nonblank_token          = ';';
18999     $last_last_nonblank_type           = ';';
19000     $last_last_nonblank_block_type     = '';
19001     $last_last_nonblank_container_type = '';
19002     $last_last_nonblank_type_sequence  = '';
19003     $last_nonblank_prototype           = "";
19004     $identifier                        = '';
19005     $in_quote   = 0;     # flag telling if we are chasing a quote, and what kind
19006     $quote_type = 'Q';
19007     $quote_character = "";    # character we seek if chasing a quote
19008     $quote_pos   = 0;  # next character index to check for case of alphanum char
19009     $quote_depth = 0;
19010     $allowed_quote_modifiers                     = "";
19011     $paren_depth                                 = 0;
19012     $brace_depth                                 = 0;
19013     $square_bracket_depth                        = 0;
19014     $current_package                             = "main";
19015     @current_depth[ 0 .. $#closing_brace_names ] =
19016       (0) x scalar @closing_brace_names;
19017     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
19018       ( 0 .. $#closing_brace_names );
19019     @current_sequence_number = ();
19020
19021     $paren_type[$paren_depth]            = '';
19022     $paren_semicolon_count[$paren_depth] = 0;
19023     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
19024     $brace_structural_type[$brace_depth]                   = '';
19025     $brace_statement_type[$brace_depth]                    = "";
19026     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
19027     $paren_structural_type[$brace_depth]                   = '';
19028     $square_bracket_type[$square_bracket_depth]            = '';
19029     $square_bracket_structural_type[$square_bracket_depth] = '';
19030     $brace_package[$paren_depth]                           = $current_package;
19031     %is_constant                      = ();             # user-defined constants
19032     %is_user_function                 = ();             # user-defined functions
19033     %user_function_prototype          = ();             # their prototypes
19034     %is_block_function                = ();
19035     %is_block_list_function           = ();
19036     %saw_function_definition          = ();
19037     $unexpected_error_count           = 0;
19038     $want_paren                       = "";
19039     $context                          = UNKNOWN_CONTEXT;
19040     @slevel_stack                     = ();
19041     $ci_string_in_tokenizer           = "";
19042     $continuation_string_in_tokenizer = "0";
19043     $in_statement_continuation        = 0;
19044     @lower_case_labels_at             = ();
19045     $saw_v_string         = 0;      # for warning of v-strings on older perl
19046     $nesting_token_string = "";
19047     $nesting_type_string  = "";
19048     $nesting_block_string = '1';    # initially in a block
19049     $nesting_block_flag   = 1;
19050     $nesting_list_string  = '0';    # initially not in a list
19051     $nesting_list_flag    = 0;      # initially not in a list
19052     $nearly_matched_here_target_at = undef;
19053 }
19054
19055 sub get_quote_target {
19056     return matching_end_token($quote_character);
19057 }
19058
19059 sub get_indentation_level {
19060     return $level_in_tokenizer;
19061 }
19062
19063 sub reset_indentation_level {
19064     $level_in_tokenizer  = $_[0];
19065     $slevel_in_tokenizer = $_[0];
19066     push @slevel_stack, $slevel_in_tokenizer;
19067 }
19068
19069 {    # begin tokenize_this_line
19070
19071     use constant BRACE          => 0;
19072     use constant SQUARE_BRACKET => 1;
19073     use constant PAREN          => 2;
19074     use constant QUESTION_COLON => 3;
19075
19076     my (
19077         $block_type,      $container_type,       $expecting,
19078         $here_doc_target, $here_quote_character, $i,
19079         $i_tok,           $last_nonblank_i,      $next_tok,
19080         $next_type,       $prototype,            $rtoken_map,
19081         $rtoken_type,     $rtokens,              $tok,
19082         $type,            $type_sequence,
19083     );
19084
19085     my @output_token_list     = ();    # stack of output token indexes
19086     my @output_token_type     = ();    # token types
19087     my @output_block_type     = ();    # types of code block
19088     my @output_container_type = ();    # paren types, such as if, elsif, ..
19089     my @output_type_sequence  = ();    # nesting sequential number
19090
19091     my @here_target_list = ();         # list of here-doc target strings
19092
19093     # ------------------------------------------------------------
19094     # beginning of various scanner interfaces to simplify coding
19095     # ------------------------------------------------------------
19096     sub scan_bare_identifier {
19097         ( $i, $tok, $type, $prototype ) =
19098           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
19099             $rtoken_map );
19100     }
19101
19102     sub scan_identifier {
19103         ( $i, $tok, $type, $id_scan_state, $identifier ) =
19104           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens );
19105     }
19106
19107     sub scan_id {
19108         ( $i, $tok, $type, $id_scan_state ) =
19109           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
19110             $id_scan_state );
19111     }
19112
19113     my $number;
19114
19115     sub scan_number {
19116         ( $i, $type, $number ) =
19117           scan_number_do( $input_line, $i, $rtoken_map, $type );
19118     }
19119
19120     # a sub to warn if token found where term expected
19121     sub error_if_expecting_TERM {
19122         if ( $expecting == TERM ) {
19123             if ( $really_want_term{$last_nonblank_type} ) {
19124                 unexpected( $tok, "term", $i_tok, $last_nonblank_i );
19125                 1;
19126             }
19127         }
19128     }
19129
19130     # a sub to warn if token found where operator expected
19131     sub error_if_expecting_OPERATOR {
19132         if ( $expecting == OPERATOR ) {
19133             my $thing = defined $_[0] ? $_[0] : $tok;
19134             unexpected( $thing, "operator", $i_tok, $last_nonblank_i );
19135             if ( $i_tok == 0 ) {
19136                 interrupt_logfile();
19137                 warning("Missing ';' above?\n");
19138                 resume_logfile();
19139             }
19140             1;
19141         }
19142     }
19143
19144     # ------------------------------------------------------------
19145     # end scanner interfaces
19146     # ------------------------------------------------------------
19147
19148     my %is_for_foreach;
19149     @_ = qw(for foreach);
19150     @is_for_foreach{@_} = (1) x scalar(@_);
19151
19152     my %is_my_our;
19153     @_ = qw(my our);
19154     @is_my_our{@_} = (1) x scalar(@_);
19155
19156     # These keywords may introduce blocks after parenthesized expressions,
19157     # in the form:
19158     # keyword ( .... ) { BLOCK }
19159     # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
19160     my %is_blocktype_with_paren;
19161     @_ = qw(if elsif unless while until for foreach switch case given when);
19162     @is_blocktype_with_paren{@_} = (1) x scalar(@_);
19163
19164     # ------------------------------------------------------------
19165     # begin hash of code for handling most token types
19166     # ------------------------------------------------------------
19167     my $tokenization_code = {
19168
19169         # no special code for these types yet, but syntax checks
19170         # could be added
19171
19172 ##      '!'   => undef,
19173 ##      '!='  => undef,
19174 ##      '!~'  => undef,
19175 ##      '%='  => undef,
19176 ##      '&&=' => undef,
19177 ##      '&='  => undef,
19178 ##      '+='  => undef,
19179 ##      '-='  => undef,
19180 ##      '..'  => undef,
19181 ##      '..'  => undef,
19182 ##      '...' => undef,
19183 ##      '.='  => undef,
19184 ##      '<<=' => undef,
19185 ##      '<='  => undef,
19186 ##      '<=>' => undef,
19187 ##      '<>'  => undef,
19188 ##      '='   => undef,
19189 ##      '=='  => undef,
19190 ##      '=~'  => undef,
19191 ##      '>='  => undef,
19192 ##      '>>'  => undef,
19193 ##      '>>=' => undef,
19194 ##      '\\'  => undef,
19195 ##      '^='  => undef,
19196 ##      '|='  => undef,
19197 ##      '||=' => undef,
19198 ##      '~'   => undef,
19199
19200         '>' => sub {
19201             error_if_expecting_TERM()
19202               if ( $expecting == TERM );
19203         },
19204         '|' => sub {
19205             error_if_expecting_TERM()
19206               if ( $expecting == TERM );
19207         },
19208         '$' => sub {
19209
19210             # start looking for a scalar
19211             error_if_expecting_OPERATOR("Scalar")
19212               if ( $expecting == OPERATOR );
19213             scan_identifier();
19214
19215             if ( $identifier eq '$^W' ) {
19216                 $tokenizer_self->{_saw_perl_dash_w} = 1;
19217             }
19218
19219             # Check for indentifier in indirect object slot
19220             # (vorboard.pl, sort.t).  Something like:
19221             #   /^(print|printf|sort|exec|system)$/
19222             if (
19223                 $is_indirect_object_taker{$last_nonblank_token}
19224
19225                 || ( ( $last_nonblank_token eq '(' )
19226                     && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
19227                 || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
19228               )
19229             {
19230                 $type = 'Z';
19231             }
19232         },
19233         '(' => sub {
19234
19235             ++$paren_depth;
19236             $paren_semicolon_count[$paren_depth] = 0;
19237             if ($want_paren) {
19238                 $container_type = $want_paren;
19239                 $want_paren     = "";
19240             }
19241             else {
19242                 $container_type = $last_nonblank_token;
19243
19244                 # We can check for a syntax error here of unexpected '(',
19245                 # but this is going to get messy...
19246                 if (
19247                     $expecting == OPERATOR
19248
19249                     # be sure this is not a method call of the form
19250                     # &method(...), $method->(..), &{method}(...),
19251                     # $ref[2](list) is ok & short for $ref[2]->(list)
19252                     # NOTE: at present, braces in something like &{ xxx }
19253                     # are not marked as a block, we might have a method call
19254                     && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
19255
19256                   )
19257                 {
19258
19259                     # ref: camel 3 p 703.
19260                     if ( $last_last_nonblank_token eq 'do' ) {
19261                         complain(
19262 "do SUBROUTINE is deprecated; consider & or -> notation\n"
19263                         );
19264                     }
19265                     else {
19266
19267                         # if this is an empty list, (), then it is not an
19268                         # error; for example, we might have a constant pi and
19269                         # invoke it with pi() or just pi;
19270                         my ( $next_nonblank_token, $i_next ) =
19271                           find_next_nonblank_token( $i, $rtokens );
19272                         if ( $next_nonblank_token ne ')' ) {
19273                             my $hint;
19274                             error_if_expecting_OPERATOR('(');
19275
19276                             if ( $last_nonblank_type eq 'C' ) {
19277                                 $hint =
19278                                   "$last_nonblank_token has a void prototype\n";
19279                             }
19280                             elsif ( $last_nonblank_type eq 'i' ) {
19281                                 if (   $i_tok > 0
19282                                     && $last_nonblank_token =~ /^\$/ )
19283                                 {
19284                                     $hint =
19285 "Do you mean '$last_nonblank_token->(' ?\n";
19286                                 }
19287                             }
19288                             if ($hint) {
19289                                 interrupt_logfile();
19290                                 warning($hint);
19291                                 resume_logfile();
19292                             }
19293                         } ## end if ( $next_nonblank_token...
19294                     } ## end else [ if ( $last_last_nonblank_token...
19295                 } ## end if ( $expecting == OPERATOR...
19296             }
19297             $paren_type[$paren_depth] = $container_type;
19298             $type_sequence = increase_nesting_depth( PAREN, $i_tok );
19299
19300             # propagate types down through nested parens
19301             # for example: the second paren in 'if ((' would be structural
19302             # since the first is.
19303
19304             if ( $last_nonblank_token eq '(' ) {
19305                 $type = $last_nonblank_type;
19306             }
19307
19308             #     We exclude parens as structural after a ',' because it
19309             #     causes subtle problems with continuation indentation for
19310             #     something like this, where the first 'or' will not get
19311             #     indented.
19312             #
19313             #         assert(
19314             #             __LINE__,
19315             #             ( not defined $check )
19316             #               or ref $check
19317             #               or $check eq "new"
19318             #               or $check eq "old",
19319             #         );
19320             #
19321             #     Likewise, we exclude parens where a statement can start
19322             #     because of problems with continuation indentation, like
19323             #     these:
19324             #
19325             #         ($firstline =~ /^#\!.*perl/)
19326             #         and (print $File::Find::name, "\n")
19327             #           and (return 1);
19328             #
19329             #         (ref($usage_fref) =~ /CODE/)
19330             #         ? &$usage_fref
19331             #           : (&blast_usage, &blast_params, &blast_general_params);
19332
19333             else {
19334                 $type = '{';
19335             }
19336
19337             if ( $last_nonblank_type eq ')' ) {
19338                 warning(
19339                     "Syntax error? found token '$last_nonblank_type' then '('\n"
19340                 );
19341             }
19342             $paren_structural_type[$paren_depth] = $type;
19343
19344         },
19345         ')' => sub {
19346             $type_sequence = decrease_nesting_depth( PAREN, $i_tok );
19347
19348             if ( $paren_structural_type[$paren_depth] eq '{' ) {
19349                 $type = '}';
19350             }
19351
19352             $container_type = $paren_type[$paren_depth];
19353
19354             #    /^(for|foreach)$/
19355             if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
19356                 my $num_sc = $paren_semicolon_count[$paren_depth];
19357                 if ( $num_sc > 0 && $num_sc != 2 ) {
19358                     warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
19359                 }
19360             }
19361
19362             if ( $paren_depth > 0 ) { $paren_depth-- }
19363         },
19364         ',' => sub {
19365             if ( $last_nonblank_type eq ',' ) {
19366                 complain("Repeated ','s \n");
19367             }
19368 ##                FIXME: need to move this elsewhere, perhaps check after a '('
19369 ##                elsif ($last_nonblank_token eq '(') {
19370 ##                    warning("Leading ','s illegal in some versions of perl\n");
19371 ##                }
19372         },
19373         ';' => sub {
19374             $context        = UNKNOWN_CONTEXT;
19375             $statement_type = '';
19376
19377             #    /^(for|foreach)$/
19378             if ( $is_for_foreach{ $paren_type[$paren_depth] } )
19379             {    # mark ; in for loop
19380
19381                 # Be careful: we do not want a semicolon such as the
19382                 # following to be included:
19383                 #
19384                 #    for (sort {strcoll($a,$b);} keys %investments) {
19385
19386                 if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
19387                     && $square_bracket_depth ==
19388                     $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
19389                 {
19390
19391                     $type = 'f';
19392                     $paren_semicolon_count[$paren_depth]++;
19393                 }
19394             }
19395
19396         },
19397         '"' => sub {
19398             error_if_expecting_OPERATOR("String")
19399               if ( $expecting == OPERATOR );
19400             $in_quote                = 1;
19401             $type                    = 'Q';
19402             $allowed_quote_modifiers = "";
19403         },
19404         "'" => sub {
19405             error_if_expecting_OPERATOR("String")
19406               if ( $expecting == OPERATOR );
19407             $in_quote                = 1;
19408             $type                    = 'Q';
19409             $allowed_quote_modifiers = "";
19410         },
19411         '`' => sub {
19412             error_if_expecting_OPERATOR("String")
19413               if ( $expecting == OPERATOR );
19414             $in_quote                = 1;
19415             $type                    = 'Q';
19416             $allowed_quote_modifiers = "";
19417         },
19418         '/' => sub {
19419             my $is_pattern;
19420
19421             if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
19422                 my $msg;
19423                 ( $is_pattern, $msg ) =
19424                   guess_if_pattern_or_division( $i, $rtokens, $rtoken_map );
19425
19426                 if ($msg) {
19427                     write_diagnostics("DIVIDE:$msg\n");
19428                     write_logfile_entry($msg);
19429                 }
19430             }
19431             else { $is_pattern = ( $expecting == TERM ) }
19432
19433             if ($is_pattern) {
19434                 $in_quote                = 1;
19435                 $type                    = 'Q';
19436                 $allowed_quote_modifiers = '[cgimosx]';
19437             }
19438             else {    # not a pattern; check for a /= token
19439
19440                 if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
19441                     $i++;
19442                     $tok  = '/=';
19443                     $type = $tok;
19444                 }
19445
19446                 #DEBUG - collecting info on what tokens follow a divide
19447                 # for development of guessing algorithm
19448                 #if ( numerator_expected( $i, $rtokens ) < 0 ) {
19449                 #    #write_diagnostics( "DIVIDE? $input_line\n" );
19450                 #}
19451             }
19452         },
19453         '{' => sub {
19454
19455             # if we just saw a ')', we will label this block with
19456             # its type.  We need to do this to allow sub
19457             # code_block_type to determine if this brace starts a
19458             # code block or anonymous hash.  (The type of a paren
19459             # pair is the preceding token, such as 'if', 'else',
19460             # etc).
19461             $container_type = "";
19462
19463             # ATTRS: for a '{' following an attribute list, reset
19464             # things to look like we just saw the sub name
19465             if ( $statement_type =~ /^sub/ ) {
19466                 $last_nonblank_token = $statement_type;
19467                 $last_nonblank_type  = 'i';
19468                 $statement_type      = "";
19469             }
19470
19471             # patch for SWITCH/CASE: hide these keywords from an immediately
19472             # following opening brace
19473             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
19474                 && $statement_type eq $last_nonblank_token )
19475             {
19476                 $last_nonblank_token = ";";
19477             }
19478
19479             elsif ( $last_nonblank_token eq ')' ) {
19480                 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
19481
19482                 # defensive move in case of a nesting error (pbug.t)
19483                 # in which this ')' had no previous '('
19484                 # this nesting error will have been caught
19485                 if ( !defined($last_nonblank_token) ) {
19486                     $last_nonblank_token = 'if';
19487                 }
19488
19489                 # check for syntax error here;
19490                 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
19491                     my $list = join( ' ', sort keys %is_blocktype_with_paren );
19492                     warning(
19493                         "syntax error at ') {', didn't see one of: $list\n");
19494                 }
19495             }
19496
19497             # patch for paren-less for/foreach glitch, part 2.
19498             # see note below under 'qw'
19499             elsif ($last_nonblank_token eq 'qw'
19500                 && $is_for_foreach{$want_paren} )
19501             {
19502                 $last_nonblank_token = $want_paren;
19503                 if ( $last_last_nonblank_token eq $want_paren ) {
19504                     warning(
19505 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
19506                     );
19507
19508                 }
19509                 $want_paren = "";
19510             }
19511
19512             # now identify which of the three possible types of
19513             # curly braces we have: hash index container, anonymous
19514             # hash reference, or code block.
19515
19516             # non-structural (hash index) curly brace pair
19517             # get marked 'L' and 'R'
19518             if ( is_non_structural_brace() ) {
19519                 $type = 'L';
19520
19521                 # patch for SWITCH/CASE:
19522                 # allow paren-less identifier after 'when'
19523                 # if the brace is preceded by a space
19524                 if (   $statement_type eq 'when'
19525                     && $last_nonblank_type      eq 'i'
19526                     && $last_last_nonblank_type eq 'k'
19527                     && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
19528                 {
19529                     $type       = '{';
19530                     $block_type = $statement_type;
19531                 }
19532             }
19533
19534             # code and anonymous hash have the same type, '{', but are
19535             # distinguished by 'block_type',
19536             # which will be blank for an anonymous hash
19537             else {
19538
19539                 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type );
19540
19541                 # patch to promote bareword type to function taking block
19542                 if (   $block_type
19543                     && $last_nonblank_type eq 'w'
19544                     && $last_nonblank_i >= 0 )
19545                 {
19546                     if ( $output_token_type[$last_nonblank_i] eq 'w' ) {
19547                         $output_token_type[$last_nonblank_i] = 'G';
19548                     }
19549                 }
19550
19551                 # patch for SWITCH/CASE: if we find a stray opening block brace
19552                 # where we might accept a 'case' or 'when' block, then take it
19553                 if (   $statement_type eq 'case'
19554                     || $statement_type eq 'when' )
19555                 {
19556                     if ( !$block_type || $block_type eq '}' ) {
19557                         $block_type = $statement_type;
19558                     }
19559                 }
19560             }
19561             $brace_type[ ++$brace_depth ] = $block_type;
19562             $brace_package[$brace_depth] = $current_package;
19563             $type_sequence = increase_nesting_depth( BRACE, $i_tok );
19564             $brace_structural_type[$brace_depth] = $type;
19565             $brace_context[$brace_depth]         = $context;
19566             $brace_statement_type[$brace_depth]  = $statement_type;
19567         },
19568         '}' => sub {
19569             $block_type = $brace_type[$brace_depth];
19570             if ($block_type) { $statement_type = '' }
19571             if ( defined( $brace_package[$brace_depth] ) ) {
19572                 $current_package = $brace_package[$brace_depth];
19573             }
19574
19575             # can happen on brace error (caught elsewhere)
19576             else {
19577             }
19578             $type_sequence = decrease_nesting_depth( BRACE, $i_tok );
19579
19580             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
19581                 $type = 'R';
19582             }
19583
19584             # propagate type information for 'do' and 'eval' blocks.
19585             # This is necessary to enable us to know if an operator
19586             # or term is expected next
19587             if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
19588                 $tok = $brace_type[$brace_depth];
19589             }
19590
19591             $context        = $brace_context[$brace_depth];
19592             $statement_type = $brace_statement_type[$brace_depth];
19593             if ( $brace_depth > 0 ) { $brace_depth--; }
19594         },
19595         '&' => sub {    # maybe sub call? start looking
19596
19597             # We have to check for sub call unless we are sure we
19598             # are expecting an operator.  This example from s2p
19599             # got mistaken as a q operator in an early version:
19600             #   print BODY &q(<<'EOT');
19601             if ( $expecting != OPERATOR ) {
19602                 scan_identifier();
19603             }
19604             else {
19605             }
19606         },
19607         '<' => sub {    # angle operator or less than?
19608
19609             if ( $expecting != OPERATOR ) {
19610                 ( $i, $type ) =
19611                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
19612                     $expecting );
19613
19614             }
19615             else {
19616             }
19617         },
19618         '?' => sub {    # ?: conditional or starting pattern?
19619
19620             my $is_pattern;
19621
19622             if ( $expecting == UNKNOWN ) {
19623
19624                 my $msg;
19625                 ( $is_pattern, $msg ) =
19626                   guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map );
19627
19628                 if ($msg) { write_logfile_entry($msg) }
19629             }
19630             else { $is_pattern = ( $expecting == TERM ) }
19631
19632             if ($is_pattern) {
19633                 $in_quote                = 1;
19634                 $type                    = 'Q';
19635                 $allowed_quote_modifiers = '[cgimosx]';    # TBD:check this
19636             }
19637             else {
19638
19639                 $type_sequence =
19640                   increase_nesting_depth( QUESTION_COLON, $i_tok );
19641             }
19642         },
19643         '*' => sub {    # typeglob, or multiply?
19644
19645             if ( $expecting == TERM ) {
19646                 scan_identifier();
19647             }
19648             else {
19649
19650                 if ( $$rtokens[ $i + 1 ] eq '=' ) {
19651                     $tok  = '*=';
19652                     $type = $tok;
19653                     $i++;
19654                 }
19655                 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
19656                     $tok  = '**';
19657                     $type = $tok;
19658                     $i++;
19659                     if ( $$rtokens[ $i + 1 ] eq '=' ) {
19660                         $tok  = '**=';
19661                         $type = $tok;
19662                         $i++;
19663                     }
19664                 }
19665             }
19666         },
19667         '.' => sub {    # what kind of . ?
19668
19669             if ( $expecting != OPERATOR ) {
19670                 scan_number();
19671                 if ( $type eq '.' ) {
19672                     error_if_expecting_TERM()
19673                       if ( $expecting == TERM );
19674                 }
19675             }
19676             else {
19677             }
19678         },
19679         ':' => sub {
19680
19681             # if this is the first nonblank character, call it a label
19682             # since perl seems to just swallow it
19683             if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
19684                 $type = 'J';
19685             }
19686
19687             # ATTRS: check for a ':' which introduces an attribute list
19688             # (this might eventually get its own token type)
19689             elsif ( $statement_type =~ /^sub/ ) {
19690                 $type = 'A';
19691             }
19692
19693             # check for scalar attribute, such as
19694             # my $foo : shared = 1;
19695             elsif ($is_my_our{$statement_type}
19696                 && $current_depth[QUESTION_COLON] == 0 )
19697             {
19698                 $type = 'A';
19699             }
19700
19701             # otherwise, it should be part of a ?/: operator
19702             else {
19703                 $type_sequence =
19704                   decrease_nesting_depth( QUESTION_COLON, $i_tok );
19705                 if ( $last_nonblank_token eq '?' ) {
19706                     warning("Syntax error near ? :\n");
19707                 }
19708             }
19709         },
19710         '+' => sub {    # what kind of plus?
19711
19712             if ( $expecting == TERM ) {
19713                 scan_number();
19714
19715                 # unary plus is safest assumption if not a number
19716                 if ( !defined($number) ) { $type = 'p'; }
19717             }
19718             elsif ( $expecting == OPERATOR ) {
19719             }
19720             else {
19721                 if ( $next_type eq 'w' ) { $type = 'p' }
19722             }
19723         },
19724         '@' => sub {
19725
19726             error_if_expecting_OPERATOR("Array")
19727               if ( $expecting == OPERATOR );
19728             scan_identifier();
19729         },
19730         '%' => sub {    # hash or modulo?
19731
19732             # first guess is hash if no following blank
19733             if ( $expecting == UNKNOWN ) {
19734                 if ( $next_type ne 'b' ) { $expecting = TERM }
19735             }
19736             if ( $expecting == TERM ) {
19737                 scan_identifier();
19738             }
19739         },
19740         '[' => sub {
19741             $square_bracket_type[ ++$square_bracket_depth ] =
19742               $last_nonblank_token;
19743             $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok );
19744
19745             # It may seem odd, but structural square brackets have
19746             # type '{' and '}'.  This simplifies the indentation logic.
19747             if ( !is_non_structural_brace() ) {
19748                 $type = '{';
19749             }
19750             $square_bracket_structural_type[$square_bracket_depth] = $type;
19751         },
19752         ']' => sub {
19753             $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok );
19754
19755             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
19756             {
19757                 $type = '}';
19758             }
19759             if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
19760         },
19761         '-' => sub {    # what kind of minus?
19762
19763             if ( ( $expecting != OPERATOR )
19764                 && $is_file_test_operator{$next_tok} )
19765             {
19766                 $i++;
19767                 $tok .= $next_tok;
19768                 $type = 'F';
19769             }
19770             elsif ( $expecting == TERM ) {
19771                 scan_number();
19772
19773                 # maybe part of bareword token? unary is safest
19774                 if ( !defined($number) ) { $type = 'm'; }
19775
19776             }
19777             elsif ( $expecting == OPERATOR ) {
19778             }
19779             else {
19780
19781                 if ( $next_type eq 'w' ) {
19782                     $type = 'm';
19783                 }
19784             }
19785         },
19786
19787         '^' => sub {
19788
19789             # check for special variables like ${^WARNING_BITS}
19790             if ( $expecting == TERM ) {
19791
19792                 # FIXME: this should work but will not catch errors
19793                 # because we also have to be sure that previous token is
19794                 # a type character ($,@,%).
19795                 if ( $last_nonblank_token eq '{'
19796                     && ( $next_tok =~ /^[A-Za-z_]/ ) )
19797                 {
19798
19799                     if ( $next_tok eq 'W' ) {
19800                         $tokenizer_self->{_saw_perl_dash_w} = 1;
19801                     }
19802                     $tok  = $tok . $next_tok;
19803                     $i    = $i + 1;
19804                     $type = 'w';
19805                 }
19806
19807                 else {
19808                     unless ( error_if_expecting_TERM() ) {
19809
19810                         # Something like this is valid but strange:
19811                         # undef ^I;
19812                         complain("The '^' seems unusual here\n");
19813                     }
19814                 }
19815             }
19816         },
19817
19818         '::' => sub {    # probably a sub call
19819             scan_bare_identifier();
19820         },
19821         '<<' => sub {    # maybe a here-doc?
19822             return
19823               unless ( $i < $max_token_index )
19824               ;          # here-doc not possible if end of line
19825
19826             if ( $expecting != OPERATOR ) {
19827                 my ($found_target);
19828                 ( $found_target, $here_doc_target, $here_quote_character, $i ) =
19829                   find_here_doc( $expecting, $i, $rtokens, $rtoken_map );
19830
19831                 if ($found_target) {
19832                     push @here_target_list,
19833                       [ $here_doc_target, $here_quote_character ];
19834                     $type = 'h';
19835                     if ( length($here_doc_target) > 80 ) {
19836                         my $truncated = substr( $here_doc_target, 0, 80 );
19837                         complain("Long here-target: '$truncated' ...\n");
19838                     }
19839                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
19840                         complain(
19841                             "Unconventional here-target: '$here_doc_target'\n"
19842                         );
19843                     }
19844                 }
19845                 elsif ( $expecting == TERM ) {
19846
19847                     # shouldn't happen..
19848                     warning("Program bug; didn't find here doc target\n");
19849                     report_definite_bug();
19850                 }
19851             }
19852             else {
19853             }
19854         },
19855         '->' => sub {
19856
19857             # if -> points to a bare word, we must scan for an identifier,
19858             # otherwise something like ->y would look like the y operator
19859             scan_identifier();
19860         },
19861
19862         # type = 'pp' for pre-increment, '++' for post-increment
19863         '++' => sub {
19864             if ( $expecting == TERM ) { $type = 'pp' }
19865             elsif ( $expecting == UNKNOWN ) {
19866                 my ( $next_nonblank_token, $i_next ) =
19867                   find_next_nonblank_token( $i, $rtokens );
19868                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
19869             }
19870         },
19871
19872         '=>' => sub {
19873             if ( $last_nonblank_type eq $tok ) {
19874                 complain("Repeated '=>'s \n");
19875             }
19876         },
19877
19878         # type = 'mm' for pre-decrement, '--' for post-decrement
19879         '--' => sub {
19880
19881             if ( $expecting == TERM ) { $type = 'mm' }
19882             elsif ( $expecting == UNKNOWN ) {
19883                 my ( $next_nonblank_token, $i_next ) =
19884                   find_next_nonblank_token( $i, $rtokens );
19885                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
19886             }
19887         },
19888
19889         '&&' => sub {
19890             error_if_expecting_TERM()
19891               if ( $expecting == TERM );
19892         },
19893
19894         '||' => sub {
19895             error_if_expecting_TERM()
19896               if ( $expecting == TERM );
19897         },
19898     };
19899
19900     # ------------------------------------------------------------
19901     # end hash of code for handling individual token types
19902     # ------------------------------------------------------------
19903
19904     my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
19905
19906     # These block types terminate statements and do not need a trailing
19907     # semicolon
19908     # patched for SWITCH/CASE:
19909     my %is_zero_continuation_block_type;
19910     @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
19911       if elsif else unless while until for foreach switch case given when);
19912     @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
19913
19914     my %is_not_zero_continuation_block_type;
19915     @_ = qw(sort grep map do eval);
19916     @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
19917
19918     my %is_logical_container;
19919     @_ = qw(if elsif unless while and or not && !  || for foreach);
19920     @is_logical_container{@_} = (1) x scalar(@_);
19921
19922     my %is_binary_type;
19923     @_ = qw(|| &&);
19924     @is_binary_type{@_} = (1) x scalar(@_);
19925
19926     my %is_binary_keyword;
19927     @_ = qw(and or eq ne cmp);
19928     @is_binary_keyword{@_} = (1) x scalar(@_);
19929
19930     # 'L' is token for opening { at hash key
19931     my %is_opening_type;
19932     @_ = qw" L { ( [ ";
19933     @is_opening_type{@_} = (1) x scalar(@_);
19934
19935     # 'R' is token for closing } at hash key
19936     my %is_closing_type;
19937     @_ = qw" R } ) ] ";
19938     @is_closing_type{@_} = (1) x scalar(@_);
19939
19940     my %is_redo_last_next_goto;
19941     @_ = qw(redo last next goto);
19942     @is_redo_last_next_goto{@_} = (1) x scalar(@_);
19943
19944     my %is_use_require;
19945     @_ = qw(use require);
19946     @is_use_require{@_} = (1) x scalar(@_);
19947
19948     my %is_sub_package;
19949     @_ = qw(sub package);
19950     @is_sub_package{@_} = (1) x scalar(@_);
19951
19952     # This hash holds the hash key in $tokenizer_self for these keywords:
19953     my %is_format_END_DATA = (
19954         'format'   => '_in_format',
19955         '__END__'  => '_in_end',
19956         '__DATA__' => '_in_data',
19957     );
19958
19959     # ref: camel 3 p 147,
19960     # but perl may accept undocumented flags
19961     my %quote_modifiers = (
19962         's'  => '[cegimosx]',
19963         'y'  => '[cds]',
19964         'tr' => '[cds]',
19965         'm'  => '[cgimosx]',
19966         'qr' => '[imosx]',
19967         'q'  => "",
19968         'qq' => "",
19969         'qw' => "",
19970         'qx' => "",
19971     );
19972
19973     # table showing how many quoted things to look for after quote operator..
19974     # s, y, tr have 2 (pattern and replacement)
19975     # others have 1 (pattern only)
19976     my %quote_items = (
19977         's'  => 2,
19978         'y'  => 2,
19979         'tr' => 2,
19980         'm'  => 1,
19981         'qr' => 1,
19982         'q'  => 1,
19983         'qq' => 1,
19984         'qw' => 1,
19985         'qx' => 1,
19986     );
19987
19988     sub tokenize_this_line {
19989
19990   # This routine breaks a line of perl code into tokens which are of use in
19991   # indentation and reformatting.  One of my goals has been to define tokens
19992   # such that a newline may be inserted between any pair of tokens without
19993   # changing or invalidating the program. This version comes close to this,
19994   # although there are necessarily a few exceptions which must be caught by
19995   # the formatter.  Many of these involve the treatment of bare words.
19996   #
19997   # The tokens and their types are returned in arrays.  See previous
19998   # routine for their names.
19999   #
20000   # See also the array "valid_token_types" in the BEGIN section for an
20001   # up-to-date list.
20002   #
20003   # To simplify things, token types are either a single character, or they
20004   # are identical to the tokens themselves.
20005   #
20006   # As a debugging aid, the -D flag creates a file containing a side-by-side
20007   # comparison of the input string and its tokenization for each line of a file.
20008   # This is an invaluable debugging aid.
20009   #
20010   # In addition to tokens, and some associated quantities, the tokenizer
20011   # also returns flags indication any special line types.  These include
20012   # quotes, here_docs, formats.
20013   #
20014   # -----------------------------------------------------------------------
20015   #
20016   # How to add NEW_TOKENS:
20017   #
20018   # New token types will undoubtedly be needed in the future both to keep up
20019   # with changes in perl and to help adapt the tokenizer to other applications.
20020   #
20021   # Here are some notes on the minimal steps.  I wrote these notes while
20022   # adding the 'v' token type for v-strings, which are things like version
20023   # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
20024   # can use your editor to search for the string "NEW_TOKENS" to find the
20025   # appropriate sections to change):
20026   #
20027   # *. Try to talk somebody else into doing it!  If not, ..
20028   #
20029   # *. Make a backup of your current version in case things don't work out!
20030   #
20031   # *. Think of a new, unused character for the token type, and add to
20032   # the array @valid_token_types in the BEGIN section of this package.
20033   # For example, I used 'v' for v-strings.
20034   #
20035   # *. Implement coding to recognize the $type of the token in this routine.
20036   # This is the hardest part, and is best done by immitating or modifying
20037   # some of the existing coding.  For example, to recognize v-strings, I
20038   # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
20039   # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
20040   #
20041   # *. Update sub operator_expected.  This update is critically important but
20042   # the coding is trivial.  Look at the comments in that routine for help.
20043   # For v-strings, which should behave like numbers, I just added 'v' to the
20044   # regex used to handle numbers and strings (types 'n' and 'Q').
20045   #
20046   # *. Implement a 'bond strength' rule in sub set_bond_strengths in
20047   # Perl::Tidy::Formatter for breaking lines around this token type.  You can
20048   # skip this step and take the default at first, then adjust later to get
20049   # desired results.  For adding type 'v', I looked at sub bond_strength and
20050   # saw that number type 'n' was using default strengths, so I didn't do
20051   # anything.  I may tune it up someday if I don't like the way line
20052   # breaks with v-strings look.
20053   #
20054   # *. Implement a 'whitespace' rule in sub set_white_space_flag in
20055   # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
20056   # and saw that type 'n' used spaces on both sides, so I just added 'v'
20057   # to the array @spaces_both_sides.
20058   #
20059   # *. Update HtmlWriter package so that users can colorize the token as
20060   # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
20061   # that package.  For v-strings, I initially chose to use a default color
20062   # equal to the default for numbers, but it might be nice to change that
20063   # eventually.
20064   #
20065   # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
20066   #
20067   # *. Run lots and lots of debug tests.  Start with special files designed
20068   # to test the new token type.  Run with the -D flag to create a .DEBUG
20069   # file which shows the tokenization.  When these work ok, test as many old
20070   # scripts as possible.  Start with all of the '.t' files in the 'test'
20071   # directory of the distribution file.  Compare .tdy output with previous
20072   # version and updated version to see the differences.  Then include as
20073   # many more files as possible. My own technique has been to collect a huge
20074   # number of perl scripts (thousands!) into one directory and run perltidy
20075   # *, then run diff between the output of the previous version and the
20076   # current version.
20077   #
20078   # -----------------------------------------------------------------------
20079
20080         my $line_of_tokens = shift;
20081         my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
20082
20083         # patch while coding change is underway
20084         # make callers private data to allow access
20085         # $tokenizer_self = $caller_tokenizer_self;
20086
20087         # extract line number for use in error messages
20088         $input_line_number = $line_of_tokens->{_line_number};
20089
20090         # check for pod documentation
20091         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
20092
20093             # must not be in multi-line quote
20094             # and must not be in an eqn
20095             if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
20096             {
20097                 $tokenizer_self->{_in_pod} = 1;
20098                 return;
20099             }
20100         }
20101
20102         $input_line = $untrimmed_input_line;
20103
20104         chomp $input_line;
20105
20106         # trim start of this line unless we are continuing a quoted line
20107         # do not trim end because we might end in a quote (test: deken4.pl)
20108         # Perl::Tidy::Formatter will delete needless trailing blanks
20109         unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
20110             $input_line =~ s/^\s*//;    # trim left end
20111         }
20112
20113         # re-initialize for the main loop
20114         @output_token_list     = ();    # stack of output token indexes
20115         @output_token_type     = ();    # token types
20116         @output_block_type     = ();    # types of code block
20117         @output_container_type = ();    # paren types, such as if, elsif, ..
20118         @output_type_sequence  = ();    # nesting sequential number
20119
20120         $tok             = $last_nonblank_token;
20121         $type            = $last_nonblank_type;
20122         $prototype       = $last_nonblank_prototype;
20123         $last_nonblank_i = -1;
20124         $block_type      = $last_nonblank_block_type;
20125         $container_type  = $last_nonblank_container_type;
20126         $type_sequence   = $last_nonblank_type_sequence;
20127         @here_target_list = ();         # list of here-doc target strings
20128
20129         $peeked_ahead = 0;
20130
20131         # tokenization is done in two stages..
20132         # stage 1 is a very simple pre-tokenization
20133         my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
20134
20135         # a little optimization for a full-line comment
20136         if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
20137             $max_tokens_wanted = 1    # no use tokenizing a comment
20138         }
20139
20140         # start by breaking the line into pre-tokens
20141         ( $rpretokens, $rpretoken_map, $rpretoken_type ) =
20142           pre_tokenize( $input_line, $max_tokens_wanted );
20143
20144         $max_token_index = scalar(@$rpretokens) - 1;
20145         push( @$rpretokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
20146         push( @$rpretoken_map,  0,   0,   0 );     # shouldn't be referenced
20147         push( @$rpretoken_type, 'b', 'b', 'b' );
20148
20149         # temporary copies while coding change is underway
20150         ( $rtokens, $rtoken_map, $rtoken_type ) =
20151           ( $rpretokens, $rpretoken_map, $rpretoken_type );
20152
20153         # initialize for main loop
20154         for $i ( 0 .. $max_token_index + 3 ) {
20155             $output_token_type[$i]     = "";
20156             $output_block_type[$i]     = "";
20157             $output_container_type[$i] = "";
20158             $output_type_sequence[$i]  = "";
20159         }
20160         $i     = -1;
20161         $i_tok = -1;
20162
20163         # ------------------------------------------------------------
20164         # begin main tokenization loop
20165         # ------------------------------------------------------------
20166
20167         # we are looking at each pre-token of one line and combining them
20168         # into tokens
20169         while ( ++$i <= $max_token_index ) {
20170
20171             if ($in_quote) {    # continue looking for end of a quote
20172                 $type = $quote_type;
20173
20174                 unless (@output_token_list) {  # initialize if continuation line
20175                     push( @output_token_list, $i );
20176                     $output_token_type[$i] = $type;
20177
20178                 }
20179                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
20180
20181                 # scan for the end of the quote or pattern
20182                 ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
20183                   do_quote( $i, $in_quote, $quote_character, $quote_pos,
20184                     $quote_depth, $rtokens, $rtoken_map );
20185
20186                 # all done if we didn't find it
20187                 last if ($in_quote);
20188
20189                 # re-initialize for next search
20190                 $quote_character = '';
20191                 $quote_pos       = 0;
20192                 $quote_type      = 'Q';
20193                 last if ( ++$i > $max_token_index );
20194
20195                 # look for any modifiers
20196                 if ($allowed_quote_modifiers) {
20197
20198                     # check for exact quote modifiers
20199                     if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
20200                         my $str = $$rtokens[$i];
20201                         while ( $str =~ /\G$allowed_quote_modifiers/gc ) { }
20202
20203                         if ( defined( pos($str) ) ) {
20204
20205                             # matched
20206                             if ( pos($str) == length($str) ) {
20207                                 last if ( ++$i > $max_token_index );
20208                             }
20209
20210                             # Looks like a joined quote modifier
20211                             # and keyword, maybe something like
20212                             # s/xxx/yyy/gefor @k=...
20213                             # Example is "galgen.pl".  Would have to split
20214                             # the word and insert a new token in the
20215                             # pre-token list.  This is so rare that I haven't
20216                             # done it.  Will just issue a warning citation.
20217
20218                             # This error might also be triggered if my quote
20219                             # modifier characters are incomplete
20220                             else {
20221                                 warning(<<EOM);
20222
20223 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
20224 Please put a space between quote modifiers and trailing keywords.
20225 EOM
20226
20227                            # print "token $$rtokens[$i]\n";
20228                            # my $num = length($str) - pos($str);
20229                            # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
20230                            # print "continuing with new token $$rtokens[$i]\n";
20231
20232                                 # skipping past this token does least damage
20233                                 last if ( ++$i > $max_token_index );
20234                             }
20235                         }
20236                         else {
20237
20238                             # example file: rokicki4.pl
20239                             # This error might also be triggered if my quote
20240                             # modifier characters are incomplete
20241                             write_logfile_entry(
20242 "Note: found word $str at quote modifier location\n"
20243                             );
20244                         }
20245                     }
20246
20247                     # re-initialize
20248                     $allowed_quote_modifiers = "";
20249                 }
20250             }
20251
20252             unless ( $tok =~ /^\s*$/ ) {
20253
20254                 # try to catch some common errors
20255                 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
20256
20257                     if ( $last_nonblank_token eq 'eq' ) {
20258                         complain("Should 'eq' be '==' here ?\n");
20259                     }
20260                     elsif ( $last_nonblank_token eq 'ne' ) {
20261                         complain("Should 'ne' be '!=' here ?\n");
20262                     }
20263                 }
20264
20265                 $last_last_nonblank_token          = $last_nonblank_token;
20266                 $last_last_nonblank_type           = $last_nonblank_type;
20267                 $last_last_nonblank_block_type     = $last_nonblank_block_type;
20268                 $last_last_nonblank_container_type =
20269                   $last_nonblank_container_type;
20270                 $last_last_nonblank_type_sequence =
20271                   $last_nonblank_type_sequence;
20272                 $last_nonblank_token          = $tok;
20273                 $last_nonblank_type           = $type;
20274                 $last_nonblank_prototype      = $prototype;
20275                 $last_nonblank_block_type     = $block_type;
20276                 $last_nonblank_container_type = $container_type;
20277                 $last_nonblank_type_sequence  = $type_sequence;
20278                 $last_nonblank_i              = $i_tok;
20279             }
20280
20281             # store previous token type
20282             if ( $i_tok >= 0 ) {
20283                 $output_token_type[$i_tok]     = $type;
20284                 $output_block_type[$i_tok]     = $block_type;
20285                 $output_container_type[$i_tok] = $container_type;
20286                 $output_type_sequence[$i_tok]  = $type_sequence;
20287             }
20288             my $pre_tok  = $$rtokens[$i];        # get the next pre-token
20289             my $pre_type = $$rtoken_type[$i];    # and type
20290             $tok  = $pre_tok;
20291             $type = $pre_type;                   # to be modified as necessary
20292             $block_type = "";    # blank for all tokens except code block braces
20293             $container_type = "";    # blank for all tokens except some parens
20294             $type_sequence  = "";    # blank for all tokens except ?/:
20295             $prototype = "";    # blank for all tokens except user defined subs
20296             $i_tok     = $i;
20297
20298             # this pre-token will start an output token
20299             push( @output_token_list, $i_tok );
20300
20301             # continue gathering identifier if necessary
20302             # but do not start on blanks and comments
20303             if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
20304
20305                 if ( $id_scan_state =~ /^(sub|package)/ ) {
20306                     scan_id();
20307                 }
20308                 else {
20309                     scan_identifier();
20310                 }
20311
20312                 last if ($id_scan_state);
20313                 next if ( ( $i > 0 ) || $type );
20314
20315                 # didn't find any token; start over
20316                 $type = $pre_type;
20317                 $tok  = $pre_tok;
20318             }
20319
20320             # handle whitespace tokens..
20321             next if ( $type eq 'b' );
20322             my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
20323             my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
20324
20325             # Build larger tokens where possible, since we are not in a quote.
20326             #
20327             # First try to assemble digraphs.  The following tokens are
20328             # excluded and handled specially:
20329             # '/=' is excluded because the / might start a pattern.
20330             # 'x=' is excluded since it might be $x=, with $ on previous line
20331             # '**' and *= might be typeglobs of punctuation variables
20332             # I have allowed tokens starting with <, such as <=,
20333             # because I don't think these could be valid angle operators.
20334             # test file: storrs4.pl
20335             my $test_tok = $tok . $$rtokens[ $i + 1 ];
20336
20337             if (
20338                 $is_digraph{$test_tok}
20339                 && ( $test_tok ne '/=' )    # might be pattern
20340                 && ( $test_tok ne 'x=' )    # might be $x
20341                 && ( $test_tok ne '**' )    # typeglob?
20342                 && ( $test_tok ne '*=' )    # typeglob?
20343               )
20344             {
20345                 $tok = $test_tok;
20346                 $i++;
20347
20348                 # Now try to assemble trigraphs.  Note that all possible
20349                 # perl trigraphs can be constructed by appending a character
20350                 # to a digraph.
20351                 $test_tok = $tok . $$rtokens[ $i + 1 ];
20352
20353                 if ( $is_trigraph{$test_tok} ) {
20354                     $tok = $test_tok;
20355                     $i++;
20356                 }
20357             }
20358             $type      = $tok;
20359             $next_tok  = $$rtokens[ $i + 1 ];
20360             $next_type = $$rtoken_type[ $i + 1 ];
20361
20362             TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
20363                 local $" = ')(';
20364                 my @debug_list = (
20365                     $last_nonblank_token,      $tok,
20366                     $next_tok,                 $brace_depth,
20367                     $brace_type[$brace_depth], $paren_depth,
20368                     $paren_type[$paren_depth]
20369                 );
20370                 print "TOKENIZE:(@debug_list)\n";
20371             };
20372
20373             ###############################################################
20374             # We have the next token, $tok.
20375             # Now we have to examine this token and decide what it is
20376             # and define its $type
20377             #
20378             # section 1: bare words
20379             ###############################################################
20380
20381             if ( $pre_type eq 'w' ) {
20382                 $expecting = operator_expected( $prev_type, $tok, $next_type );
20383                 my ( $next_nonblank_token, $i_next ) =
20384                   find_next_nonblank_token( $i, $rtokens );
20385
20386                 # quote a word followed by => operator
20387                 if ( $next_nonblank_token eq '=' ) {
20388
20389                     if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
20390                         if ( $is_constant{$current_package}{$tok} ) {
20391                             $type = 'C';
20392                         }
20393                         elsif ( $is_user_function{$current_package}{$tok} ) {
20394                             $type      = 'U';
20395                             $prototype =
20396                               $user_function_prototype{$current_package}{$tok};
20397                         }
20398                         elsif ( $tok =~ /^v\d+$/ ) {
20399                             $type = 'v';
20400                             unless ($saw_v_string) { report_v_string($tok) }
20401                         }
20402                         else { $type = 'w' }
20403
20404                         next;
20405                     }
20406                 }
20407
20408                 # quote a bare word within braces..like xxx->{s}; note that we
20409                 # must be sure this is not a structural brace, to avoid
20410                 # mistaking {s} in the following for a quoted bare word:
20411                 #     for(@[){s}bla}BLA}
20412                 if (   ( $last_nonblank_type eq 'L' )
20413                     && ( $next_nonblank_token eq '}' ) )
20414                 {
20415                     $type = 'w';
20416                     next;
20417                 }
20418
20419                 # a bare word immediately followed by :: is not a keyword;
20420                 # use $tok_kw when testing for keywords to avoid a mistake
20421                 my $tok_kw = $tok;
20422                 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
20423                 {
20424                     $tok_kw .= '::';
20425                 }
20426
20427                 # handle operator x (now we know it isn't $x=)
20428                 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
20429                     if ( $tok eq 'x' ) {
20430
20431                         if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
20432                             $tok  = 'x=';
20433                             $type = $tok;
20434                             $i++;
20435                         }
20436                         else {
20437                             $type = 'x';
20438                         }
20439                     }
20440
20441                     # FIXME: Patch: mark something like x4 as an integer for now
20442                     # It gets fixed downstream.  This is easier than
20443                     # splitting the pretoken.
20444                     else {
20445                         $type = 'n';
20446                     }
20447                 }
20448
20449                 elsif ( ( $tok eq 'strict' )
20450                     and ( $last_nonblank_token eq 'use' ) )
20451                 {
20452                     $tokenizer_self->{_saw_use_strict} = 1;
20453                     scan_bare_identifier();
20454                 }
20455
20456                 elsif ( ( $tok eq 'warnings' )
20457                     and ( $last_nonblank_token eq 'use' ) )
20458                 {
20459                     $tokenizer_self->{_saw_perl_dash_w} = 1;
20460
20461                     # scan as identifier, so that we pick up something like:
20462                     # use warnings::register
20463                     scan_bare_identifier();
20464                 }
20465
20466                 elsif (
20467                        $tok eq 'AutoLoader'
20468                     && $tokenizer_self->{_look_for_autoloader}
20469                     && (
20470                         $last_nonblank_token eq 'use'
20471
20472                         # these regexes are from AutoSplit.pm, which we want
20473                         # to mimic
20474                         || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
20475                         || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
20476                     )
20477                   )
20478                 {
20479                     write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
20480                     $tokenizer_self->{_saw_autoloader}      = 1;
20481                     $tokenizer_self->{_look_for_autoloader} = 0;
20482                     scan_bare_identifier();
20483                 }
20484
20485                 elsif (
20486                        $tok eq 'SelfLoader'
20487                     && $tokenizer_self->{_look_for_selfloader}
20488                     && (   $last_nonblank_token eq 'use'
20489                         || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
20490                         || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
20491                   )
20492                 {
20493                     write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
20494                     $tokenizer_self->{_saw_selfloader}      = 1;
20495                     $tokenizer_self->{_look_for_selfloader} = 0;
20496                     scan_bare_identifier();
20497                 }
20498
20499                 elsif ( ( $tok eq 'constant' )
20500                     and ( $last_nonblank_token eq 'use' ) )
20501                 {
20502                     scan_bare_identifier();
20503                     my ( $next_nonblank_token, $i_next ) =
20504                       find_next_nonblank_token( $i, $rtokens );
20505
20506                     if ($next_nonblank_token) {
20507
20508                         if ( $is_keyword{$next_nonblank_token} ) {
20509                             warning(
20510 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
20511                             );
20512                         }
20513
20514                         # FIXME: could check for error in which next token is
20515                         # not a word (number, punctuation, ..)
20516                         else {
20517                             $is_constant{$current_package}
20518                               {$next_nonblank_token} = 1;
20519                         }
20520                     }
20521                 }
20522
20523                 # various quote operators
20524                 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
20525                     if ( $expecting == OPERATOR ) {
20526
20527                         # patch for paren-less for/foreach glitch, part 1
20528                         # perl will accept this construct as valid:
20529                         #
20530                         #    foreach my $key qw\Uno Due Tres Quadro\ {
20531                         #        print "Set $key\n";
20532                         #    }
20533                         unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
20534                         {
20535                             error_if_expecting_OPERATOR();
20536                         }
20537                     }
20538                     $in_quote                = $quote_items{$tok};
20539                     $allowed_quote_modifiers = $quote_modifiers{$tok};
20540
20541                    # All quote types are 'Q' except possibly qw quotes.
20542                    # qw quotes are special in that they may generally be trimmed
20543                    # of leading and trailing whitespace.  So they are given a
20544                    # separate type, 'q', unless requested otherwise.
20545                     $type =
20546                       ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
20547                       ? 'q'
20548                       : 'Q';
20549                     $quote_type = $type;
20550                 }
20551
20552                 # check for a statement label
20553                 elsif (
20554                        ( $next_nonblank_token eq ':' )
20555                     && ( $$rtokens[ $i_next + 1 ] ne ':' )
20556                     && ( $i_next <= $max_token_index )    # colon on same line
20557                     && label_ok()
20558                   )
20559                 {
20560                     if ( $tok !~ /A-Z/ ) {
20561                         push @lower_case_labels_at, $input_line_number;
20562                     }
20563                     $type = 'J';
20564                     $tok .= ':';
20565                     $i = $i_next;
20566                     next;
20567                 }
20568
20569                 #      'sub' || 'package'
20570                 elsif ( $is_sub_package{$tok_kw} ) {
20571                     error_if_expecting_OPERATOR()
20572                       if ( $expecting == OPERATOR );
20573                     scan_id();
20574                 }
20575
20576                 # Note on token types for format, __DATA__, __END__:
20577                 # It simplifies things to give these type ';', so that when we
20578                 # start rescanning we will be expecting a token of type TERM.
20579                 # We will switch to type 'k' before outputting the tokens.
20580                 elsif ( $is_format_END_DATA{$tok_kw} ) {
20581                     $type = ';';    # make tokenizer look for TERM next
20582                     $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
20583                     last;
20584                 }
20585
20586                 elsif ( $is_keyword{$tok_kw} ) {
20587                     $type = 'k';
20588
20589                     # Since for and foreach may not be followed immediately
20590                     # by an opening paren, we have to remember which keyword
20591                     # is associated with the next '('
20592                     if ( $is_for_foreach{$tok} ) {
20593                         if ( new_statement_ok() ) {
20594                             $want_paren = $tok;
20595                         }
20596                     }
20597
20598                     # recognize 'use' statements, which are special
20599                     elsif ( $is_use_require{$tok} ) {
20600                         $statement_type = $tok;
20601                         error_if_expecting_OPERATOR()
20602                           if ( $expecting == OPERATOR );
20603                     }
20604
20605                     # remember my and our to check for trailing ": shared"
20606                     elsif ( $is_my_our{$tok} ) {
20607                         $statement_type = $tok;
20608                     }
20609
20610                     # Check for misplaced 'elsif' and 'else', but allow isolated
20611                     # else or elsif blocks to be formatted.  This is indicated
20612                     # by a last noblank token of ';'
20613                     elsif ( $tok eq 'elsif' ) {
20614                         if (   $last_nonblank_token ne ';'
20615                             && $last_nonblank_block_type !~
20616                             /^(if|elsif|unless)$/ )
20617                         {
20618                             warning(
20619 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
20620                             );
20621                         }
20622                     }
20623                     elsif ( $tok eq 'else' ) {
20624
20625                         # patched for SWITCH/CASE
20626                         if (   $last_nonblank_token ne ';'
20627                             && $last_nonblank_block_type !~
20628                             /^(if|elsif|unless|case|when)$/ )
20629                         {
20630                             warning(
20631 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
20632                             );
20633                         }
20634                     }
20635                     elsif ( $tok eq 'continue' ) {
20636                         if (   $last_nonblank_token ne ';'
20637                             && $last_nonblank_block_type !~
20638                             /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
20639                         {
20640
20641                             # note: ';' '{' and '}' in list above
20642                             # because continues can follow bare blocks;
20643                             # ':' is labeled block
20644                             warning("'$tok' should follow a block\n");
20645                         }
20646                     }
20647
20648                     # patch for SWITCH/CASE if 'case' and 'when are
20649                     # treated as keywords.
20650                     elsif ( $tok eq 'when' || $tok eq 'case' ) {
20651                         $statement_type = $tok;    # next '{' is block
20652                     }
20653                 }
20654
20655                 # check for inline label following
20656                 #         /^(redo|last|next|goto)$/
20657                 elsif (( $last_nonblank_type eq 'k' )
20658                     && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
20659                 {
20660                     $type = 'j';
20661                     next;
20662                 }
20663
20664                 # something else --
20665                 else {
20666
20667                     scan_bare_identifier();
20668                     if ( $type eq 'w' ) {
20669
20670                         if ( $expecting == OPERATOR ) {
20671
20672                             # don't complain about possible indirect object
20673                             # notation.
20674                             # For example:
20675                             #   package main;
20676                             #   sub new($) { ... }
20677                             #   $b = new A::;  # calls A::new
20678                             #   $c = new A;    # same thing but suspicious
20679                             # This will call A::new but we have a 'new' in
20680                             # main:: which looks like a constant.
20681                             #
20682                             if ( $last_nonblank_type eq 'C' ) {
20683                                 if ( $tok !~ /::$/ ) {
20684                                     complain(<<EOM);
20685 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
20686        Maybe indirectet object notation?
20687 EOM
20688                                 }
20689                             }
20690                             else {
20691                                 error_if_expecting_OPERATOR("bareword");
20692                             }
20693                         }
20694
20695                         # mark bare words immediately followed by a paren as
20696                         # functions
20697                         $next_tok = $$rtokens[ $i + 1 ];
20698                         if ( $next_tok eq '(' ) {
20699                             $type = 'U';
20700                         }
20701
20702                         # mark bare words following a file test operator as
20703                         # something that will expect an operator next.
20704                         # patch 072901: unless followed immediately by a paren,
20705                         # in which case it must be a function call (pid.t)
20706                         if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) {
20707                             $type = 'C';
20708                         }
20709
20710                         # patch for SWITCH/CASE if 'case' and 'when are
20711                         # not treated as keywords:
20712                         if (
20713                             (
20714                                    $tok                      eq 'case'
20715                                 && $brace_type[$brace_depth] eq 'switch'
20716                             )
20717                             || (   $tok eq 'when'
20718                                 && $brace_type[$brace_depth] eq 'given' )
20719                           )
20720                         {
20721                             $statement_type = $tok;    # next '{' is block
20722                             $type = 'k';    # for keyword syntax coloring
20723                         }
20724
20725                         # patch for SWITCH/CASE if switch and given not keywords
20726                         # Switch is not a perl 5 keyword, but we will gamble
20727                         # and mark switch followed by paren as a keyword.  This
20728                         # is only necessary to get html syntax coloring nice,
20729                         # and does not commit this as being a switch/case.
20730                         if ( $next_nonblank_token eq '('
20731                             && ( $tok eq 'switch' || $tok eq 'given' ) )
20732                         {
20733                             $type = 'k';    # for keyword syntax coloring
20734                         }
20735                     }
20736                 }
20737             }
20738
20739             ###############################################################
20740             # section 2: strings of digits
20741             ###############################################################
20742             elsif ( $pre_type eq 'd' ) {
20743                 $expecting = operator_expected( $prev_type, $tok, $next_type );
20744                 error_if_expecting_OPERATOR("Number")
20745                   if ( $expecting == OPERATOR );
20746                 scan_number();
20747                 if ( !defined($number) ) {
20748
20749                     # shouldn't happen - we should always get a number
20750                     warning("non-number beginning with digit--program bug\n");
20751                     report_definite_bug();
20752                 }
20753             }
20754
20755             ###############################################################
20756             # section 3: all other tokens
20757             ###############################################################
20758
20759             else {
20760                 last if ( $tok eq '#' );
20761                 my $code = $tokenization_code->{$tok};
20762                 if ($code) {
20763                     $expecting =
20764                       operator_expected( $prev_type, $tok, $next_type );
20765                     $code->();
20766                     redo if $in_quote;
20767                 }
20768             }
20769         }
20770
20771         # -----------------------------
20772         # end of main tokenization loop
20773         # -----------------------------
20774
20775         if ( $i_tok >= 0 ) {
20776             $output_token_type[$i_tok]     = $type;
20777             $output_block_type[$i_tok]     = $block_type;
20778             $output_container_type[$i_tok] = $container_type;
20779             $output_type_sequence[$i_tok]  = $type_sequence;
20780         }
20781
20782         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
20783             $last_last_nonblank_token          = $last_nonblank_token;
20784             $last_last_nonblank_type           = $last_nonblank_type;
20785             $last_last_nonblank_block_type     = $last_nonblank_block_type;
20786             $last_last_nonblank_container_type = $last_nonblank_container_type;
20787             $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
20788             $last_nonblank_token               = $tok;
20789             $last_nonblank_type                = $type;
20790             $last_nonblank_block_type          = $block_type;
20791             $last_nonblank_container_type      = $container_type;
20792             $last_nonblank_type_sequence       = $type_sequence;
20793             $last_nonblank_prototype           = $prototype;
20794         }
20795
20796         # reset indentation level if necessary at a sub or package
20797         # in an attempt to recover from a nesting error
20798         if ( $level_in_tokenizer < 0 ) {
20799             if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
20800                 reset_indentation_level(0);
20801                 brace_warning("resetting level to 0 at $1 $2\n");
20802             }
20803         }
20804
20805         # all done tokenizing this line ...
20806         # now prepare the final list of tokens and types
20807
20808         my @token_type     = ();   # stack of output token types
20809         my @block_type     = ();   # stack of output code block types
20810         my @container_type = ();   # stack of output code container types
20811         my @type_sequence  = ();   # stack of output type sequence numbers
20812         my @tokens         = ();   # output tokens
20813         my @levels         = ();   # structural brace levels of output tokens
20814         my @slevels        = ();   # secondary nesting levels of output tokens
20815         my @nesting_tokens = ();   # string of tokens leading to this depth
20816         my @nesting_types  = ();   # string of token types leading to this depth
20817         my @nesting_blocks = ();   # string of block types leading to this depth
20818         my @nesting_lists  = ();   # string of list types leading to this depth
20819         my @ci_string = ();  # string needed to compute continuation indentation
20820         my @container_environment = ();    # BLOCK or LIST
20821         my $container_environment = '';
20822         my $im                    = -1;    # previous $i value
20823         my $num;
20824         my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
20825
20826 # =head1 Computing Token Indentation
20827 #
20828 #     The final section of the tokenizer forms tokens and also computes
20829 #     parameters needed to find indentation.  It is much easier to do it
20830 #     in the tokenizer than elsewhere.  Here is a brief description of how
20831 #     indentation is computed.  Perl::Tidy computes indentation as the sum
20832 #     of 2 terms:
20833 #
20834 #     (1) structural indentation, such as if/else/elsif blocks
20835 #     (2) continuation indentation, such as long parameter call lists.
20836 #
20837 #     These are occasionally called primary and secondary indentation.
20838 #
20839 #     Structural indentation is introduced by tokens of type '{', although
20840 #     the actual tokens might be '{', '(', or '['.  Structural indentation
20841 #     is of two types: BLOCK and non-BLOCK.  Default structural indentation
20842 #     is 4 characters if the standard indentation scheme is used.
20843 #
20844 #     Continuation indentation is introduced whenever a line at BLOCK level
20845 #     is broken before its termination.  Default continuation indentation
20846 #     is 2 characters in the standard indentation scheme.
20847 #
20848 #     Both types of indentation may be nested arbitrarily deep and
20849 #     interlaced.  The distinction between the two is somewhat arbitrary.
20850 #
20851 #     For each token, we will define two variables which would apply if
20852 #     the current statement were broken just before that token, so that
20853 #     that token started a new line:
20854 #
20855 #     $level = the structural indentation level,
20856 #     $ci_level = the continuation indentation level
20857 #
20858 #     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
20859 #     assuming defaults.  However, in some special cases it is customary
20860 #     to modify $ci_level from this strict value.
20861 #
20862 #     The total structural indentation is easy to compute by adding and
20863 #     subtracting 1 from a saved value as types '{' and '}' are seen.  The
20864 #     running value of this variable is $level_in_tokenizer.
20865 #
20866 #     The total continuation is much more difficult to compute, and requires
20867 #     several variables.  These veriables are:
20868 #
20869 #     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
20870 #       each indentation level, if there are intervening open secondary
20871 #       structures just prior to that level.
20872 #     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
20873 #       if the last token at that level is "continued", meaning that it
20874 #       is not the first token of an expression.
20875 #     $nesting_block_string = a string of 1's and 0's indicating, for each
20876 #       indentation level, if the level is of type BLOCK or not.
20877 #     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
20878 #     $nesting_list_string = a string of 1's and 0's indicating, for each
20879 #       indentation level, if it is is appropriate for list formatting.
20880 #       If so, continuation indentation is used to indent long list items.
20881 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
20882 #     @slevel_stack = a stack of total nesting depths at each
20883 #       structural indentation level, where "total nesting depth" means
20884 #       the nesting depth that would occur if every nesting token -- '{', '[',
20885 #       and '(' -- , regardless of context, is used to compute a nesting
20886 #       depth.
20887
20888         #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
20889         #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
20890
20891         my ( $ci_string_i, $level_i, $nesting_block_string_i,
20892             $nesting_list_string_i, $nesting_token_string_i,
20893             $nesting_type_string_i, );
20894
20895         foreach $i (@output_token_list) {  # scan the list of pre-tokens indexes
20896
20897             # self-checking for valid token types
20898             my $type = $output_token_type[$i];
20899             my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
20900             $level_i = $level_in_tokenizer;
20901
20902             # This can happen by running perltidy on non-scripts
20903             # although it could also be bug introduced by programming change.
20904             # Perl silently accepts a 032 (^Z) and takes it as the end
20905             if ( !$is_valid_token_type{$type} ) {
20906                 my $val = ord($type);
20907                 warning(
20908                     "unexpected character decimal $val ($type) in script\n");
20909                 $tokenizer_self->{_in_error} = 1;
20910             }
20911
20912             # ----------------------------------------------------------------
20913             # TOKEN TYPE PATCHES
20914             #  output __END__, __DATA__, and format as type 'k' instead of ';'
20915             # to make html colors correct, etc.
20916             my $fix_type = $type;
20917             if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
20918
20919             # output anonymous 'sub' as keyword
20920             if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
20921
20922             # -----------------------------------------------------------------
20923
20924             $nesting_token_string_i = $nesting_token_string;
20925             $nesting_type_string_i  = $nesting_type_string;
20926             $nesting_block_string_i = $nesting_block_string;
20927             $nesting_list_string_i  = $nesting_list_string;
20928
20929             # set primary indentation levels based on structural braces
20930             # Note: these are set so that the leading braces have a HIGHER
20931             # level than their CONTENTS, which is convenient for indentation
20932             # Also, define continuation indentation for each token.
20933             if ( $type eq '{' || $type eq 'L' ) {
20934
20935                 # use environment before updating
20936                 $container_environment =
20937                     $nesting_block_flag ? 'BLOCK'
20938                   : $nesting_list_flag  ? 'LIST'
20939                   : "";
20940
20941                 # if the difference between total nesting levels is not 1,
20942                 # there are intervening non-structural nesting types between
20943                 # this '{' and the previous unclosed '{'
20944                 my $intervening_secondary_structure = 0;
20945                 if (@slevel_stack) {
20946                     $intervening_secondary_structure =
20947                       $slevel_in_tokenizer - $slevel_stack[-1];
20948                 }
20949
20950      # =head1 Continuation Indentation
20951      #
20952      # Having tried setting continuation indentation both in the formatter and
20953      # in the tokenizer, I can say that setting it in the tokenizer is much,
20954      # much easier.  The formatter already has too much to do, and can't
20955      # make decisions on line breaks without knowing what 'ci' will be at
20956      # arbitrary locations.
20957      #
20958      # But a problem with setting the continuation indentation (ci) here
20959      # in the tokenizer is that we do not know where line breaks will actually
20960      # be.  As a result, we don't know if we should propagate continuation
20961      # indentation to higher levels of structure.
20962      #
20963      # For nesting of only structural indentation, we never need to do this.
20964      # For example, in a long if statement, like this
20965      #
20966      #   if ( !$output_block_type[$i]
20967      #     && ($in_statement_continuation) )
20968      #   {           <--outdented
20969      #       do_something();
20970      #   }
20971      #
20972      # the second line has ci but we do normally give the lines within the BLOCK
20973      # any ci.  This would be true if we had blocks nested arbitrarily deeply.
20974      #
20975      # But consider something like this, where we have created a break after
20976      # an opening paren on line 1, and the paren is not (currently) a
20977      # structural indentation token:
20978      #
20979      # my $file = $menubar->Menubutton(
20980      #   qw/-text File -underline 0 -menuitems/ => [
20981      #       [
20982      #           Cascade    => '~View',
20983      #           -menuitems => [
20984      #           ...
20985      #
20986      # The second line has ci, so it would seem reasonable to propagate it
20987      # down, giving the third line 1 ci + 1 indentation.  This suggests the
20988      # following rule, which is currently used to propagating ci down: if there
20989      # are any non-structural opening parens (or brackets, or braces), before
20990      # an opening structural brace, then ci is propagated down, and otherwise
20991      # not.  The variable $intervening_secondary_structure contains this
20992      # information for the current token, and the string
20993      # "$ci_string_in_tokenizer" is a stack of previous values of this
20994      # variable.
20995
20996                 # save the current states
20997                 push( @slevel_stack, 1 + $slevel_in_tokenizer );
20998                 $level_in_tokenizer++;
20999
21000                 if ( $output_block_type[$i] ) {
21001                     $nesting_block_flag = 1;
21002                     $nesting_block_string .= '1';
21003                 }
21004                 else {
21005                     $nesting_block_flag = 0;
21006                     $nesting_block_string .= '0';
21007                 }
21008
21009                 # we will use continuation indentation within containers
21010                 # which are not blocks and not logical expressions
21011                 my $bit = 0;
21012                 if ( !$output_block_type[$i] ) {
21013
21014                     # propagate flag down at nested open parens
21015                     if ( $output_container_type[$i] eq '(' ) {
21016                         $bit = 1 if $nesting_list_flag;
21017                     }
21018
21019                   # use list continuation if not a logical grouping
21020                   # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
21021                     else {
21022                         $bit = 1
21023                           unless
21024                           $is_logical_container{ $output_container_type[$i] };
21025                     }
21026                 }
21027                 $nesting_list_string .= $bit;
21028                 $nesting_list_flag = $bit;
21029
21030                 $ci_string_in_tokenizer .=
21031                   ( $intervening_secondary_structure != 0 ) ? '1' : '0';
21032                 $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
21033                 $continuation_string_in_tokenizer .=
21034                   ( $in_statement_continuation > 0 ) ? '1' : '0';
21035
21036    #  Sometimes we want to give an opening brace continuation indentation,
21037    #  and sometimes not.  For code blocks, we don't do it, so that the leading
21038    #  '{' gets outdented, like this:
21039    #
21040    #   if ( !$output_block_type[$i]
21041    #     && ($in_statement_continuation) )
21042    #   {           <--outdented
21043    #
21044    #  For other types, we will give them continuation indentation.  For example,
21045    #  here is how a list looks with the opening paren indented:
21046    #
21047    #     @LoL =
21048    #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
21049    #         [ "homer", "marge", "bart" ], );
21050    #
21051    #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
21052
21053                 my $total_ci = $ci_string_sum;
21054                 if (
21055                     !$output_block_type[$i]    # patch: skip for BLOCK
21056                     && ($in_statement_continuation)
21057                   )
21058                 {
21059                     $total_ci += $in_statement_continuation
21060                       unless ( $ci_string_in_tokenizer =~ /1$/ );
21061                 }
21062
21063                 $ci_string_i               = $total_ci;
21064                 $in_statement_continuation = 0;
21065             }
21066
21067             elsif ( $type eq '}' || $type eq 'R' ) {
21068
21069                 # only a nesting error in the script would prevent popping here
21070                 if ( @slevel_stack > 1 ) { pop(@slevel_stack); }
21071
21072                 $level_i = --$level_in_tokenizer;
21073
21074                 # restore previous level values
21075                 if ( length($nesting_block_string) > 1 )
21076                 {    # true for valid script
21077                     chop $nesting_block_string;
21078                     $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
21079                     chop $nesting_list_string;
21080                     $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
21081
21082                     chop $ci_string_in_tokenizer;
21083                     $ci_string_sum =
21084                       ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
21085
21086                     $in_statement_continuation =
21087                       chop $continuation_string_in_tokenizer;
21088
21089                     # zero continuation flag at terminal BLOCK '}' which
21090                     # ends a statement.
21091                     if ( $output_block_type[$i] ) {
21092
21093                         # ...These include non-anonymous subs
21094                         # note: could be sub ::abc { or sub 'abc
21095                         if ( $output_block_type[$i] =~ m/^sub\s*/gc ) {
21096
21097                          # note: older versions of perl require the /gc modifier
21098                          # here or else the \G does not work.
21099                             if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) {
21100                                 $in_statement_continuation = 0;
21101                             }
21102                         }
21103
21104 # ...and include all block types except user subs with
21105 # block prototypes and these: (sort|grep|map|do|eval)
21106 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
21107                         elsif (
21108                             $is_zero_continuation_block_type{ $output_block_type
21109                                   [$i] } )
21110                         {
21111                             $in_statement_continuation = 0;
21112                         }
21113
21114                         # ..but these are not terminal types:
21115                         #     /^(sort|grep|map|do|eval)$/ )
21116                         elsif (
21117                             $is_not_zero_continuation_block_type{
21118                                 $output_block_type[$i] } )
21119                         {
21120                         }
21121
21122                         # ..and a block introduced by a label
21123                         # /^\w+\s*:$/gc ) {
21124                         elsif ( $output_block_type[$i] =~ /:$/ ) {
21125                             $in_statement_continuation = 0;
21126                         }
21127
21128                         # ..nor user function with block prototype
21129                         else {
21130                         }
21131                     }
21132
21133                     # If we are in a list, then
21134                     # we must set continuatoin indentation at the closing
21135                     # paren of something like this (paren after $check):
21136                     #     assert(
21137                     #         __LINE__,
21138                     #         ( not defined $check )
21139                     #           or ref $check
21140                     #           or $check eq "new"
21141                     #           or $check eq "old",
21142                     #     );
21143                     elsif ( $tok eq ')' ) {
21144                         $in_statement_continuation = 1
21145                           if $output_container_type[$i] =~ /^[;,\{\}]$/;
21146                     }
21147                 }
21148
21149                 # use environment after updating
21150                 $container_environment =
21151                     $nesting_block_flag ? 'BLOCK'
21152                   : $nesting_list_flag  ? 'LIST'
21153                   : "";
21154                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
21155                 $nesting_block_string_i = $nesting_block_string;
21156                 $nesting_list_string_i  = $nesting_list_string;
21157             }
21158
21159             # not a structural indentation type..
21160             else {
21161
21162                 $container_environment =
21163                     $nesting_block_flag ? 'BLOCK'
21164                   : $nesting_list_flag  ? 'LIST'
21165                   : "";
21166
21167                 # zero the continuation indentation at certain tokens so
21168                 # that they will be at the same level as its container.  For
21169                 # commas, this simplifies the -lp indentation logic, which
21170                 # counts commas.  For ?: it makes them stand out.
21171                 if ($nesting_list_flag) {
21172                     if ( $type =~ /^[,\?\:]$/ ) {
21173                         $in_statement_continuation = 0;
21174                     }
21175                 }
21176
21177                 # be sure binary operators get continuation indentation
21178                 if (
21179                     $container_environment
21180                     && (   $type eq 'k' && $is_binary_keyword{$tok}
21181                         || $is_binary_type{$type} )
21182                   )
21183                 {
21184                     $in_statement_continuation = 1;
21185                 }
21186
21187                 # continuation indentation is sum of any open ci from previous
21188                 # levels plus the current level
21189                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
21190
21191                 # update continuation flag ...
21192                 # if this isn't a blank or comment..
21193                 if ( $type ne 'b' && $type ne '#' ) {
21194
21195                     # and we are in a BLOCK
21196                     if ($nesting_block_flag) {
21197
21198                         # the next token after a ';' and label starts a new stmt
21199                         if ( $type eq ';' || $type eq 'J' ) {
21200                             $in_statement_continuation = 0;
21201                         }
21202
21203                         # otherwise, we are continuing the current statement
21204                         else {
21205                             $in_statement_continuation = 1;
21206                         }
21207                     }
21208
21209                     # if we are not in a BLOCK..
21210                     else {
21211
21212                         # do not use continuation indentation if not list
21213                         # environment (could be within if/elsif clause)
21214                         if ( !$nesting_list_flag ) {
21215                             $in_statement_continuation = 0;
21216                         }
21217
21218                        # otherwise, the next token after a ',' starts a new term
21219                         elsif ( $type eq ',' ) {
21220                             $in_statement_continuation = 0;
21221                         }
21222
21223                         # otherwise, we are continuing the current term
21224                         else {
21225                             $in_statement_continuation = 1;
21226                         }
21227                     }
21228                 }
21229             }
21230
21231             if ( $level_in_tokenizer < 0 ) {
21232                 unless ($saw_negative_indentation) {
21233                     $saw_negative_indentation = 1;
21234                     warning("Starting negative indentation\n");
21235                 }
21236             }
21237
21238             # set secondary nesting levels based on all continment token types
21239             # Note: these are set so that the nesting depth is the depth
21240             # of the PREVIOUS TOKEN, which is convenient for setting
21241             # the stength of token bonds
21242             my $slevel_i = $slevel_in_tokenizer;
21243
21244             #    /^[L\{\(\[]$/
21245             if ( $is_opening_type{$type} ) {
21246                 $slevel_in_tokenizer++;
21247                 $nesting_token_string .= $tok;
21248                 $nesting_type_string  .= $type;
21249             }
21250
21251             #       /^[R\}\)\]]$/
21252             elsif ( $is_closing_type{$type} ) {
21253                 $slevel_in_tokenizer--;
21254                 my $char = chop $nesting_token_string;
21255
21256                 if ( $char ne $matching_start_token{$tok} ) {
21257                     $nesting_token_string .= $char . $tok;
21258                     $nesting_type_string  .= $type;
21259                 }
21260                 else {
21261                     chop $nesting_type_string;
21262                 }
21263             }
21264
21265             push( @block_type,            $output_block_type[$i] );
21266             push( @ci_string,             $ci_string_i );
21267             push( @container_environment, $container_environment );
21268             push( @container_type,        $output_container_type[$i] );
21269             push( @levels,                $level_i );
21270             push( @nesting_tokens,        $nesting_token_string_i );
21271             push( @nesting_types,         $nesting_type_string_i );
21272             push( @slevels,               $slevel_i );
21273             push( @token_type,            $fix_type );
21274             push( @type_sequence,         $output_type_sequence[$i] );
21275             push( @nesting_blocks,        $nesting_block_string );
21276             push( @nesting_lists,         $nesting_list_string );
21277
21278             # now form the previous token
21279             if ( $im >= 0 ) {
21280                 $num =
21281                   $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
21282
21283                 if ( $num > 0 ) {
21284                     push( @tokens,
21285                         substr( $input_line, $$rtoken_map[$im], $num ) );
21286                 }
21287             }
21288             $im = $i;
21289         }
21290
21291         $num = length($input_line) - $$rtoken_map[$im];    # make the last token
21292         if ( $num > 0 ) {
21293             push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
21294         }
21295
21296         $tokenizer_self->{_in_quote}          = $in_quote;
21297         $tokenizer_self->{_rhere_target_list} = \@here_target_list;
21298
21299         $line_of_tokens->{_rtoken_type}            = \@token_type;
21300         $line_of_tokens->{_rtokens}                = \@tokens;
21301         $line_of_tokens->{_rblock_type}            = \@block_type;
21302         $line_of_tokens->{_rcontainer_type}        = \@container_type;
21303         $line_of_tokens->{_rcontainer_environment} = \@container_environment;
21304         $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
21305         $line_of_tokens->{_rlevels}                = \@levels;
21306         $line_of_tokens->{_rslevels}               = \@slevels;
21307         $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
21308         $line_of_tokens->{_rci_levels}             = \@ci_string;
21309         $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
21310
21311         return;
21312     }
21313 }    # end tokenize_this_line
21314
21315 sub new_statement_ok {
21316
21317     # return true if the current token can start a new statement
21318
21319     return label_ok()    # a label would be ok here
21320
21321       || $last_nonblank_type eq 'J';    # or we follow a label
21322
21323 }
21324
21325 sub label_ok {
21326
21327     # Decide if a bare word followed by a colon here is a label
21328
21329     # if it follows an opening or closing code block curly brace..
21330     if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
21331         && $last_nonblank_type eq $last_nonblank_token )
21332     {
21333
21334         # it is a label if and only if the curly encloses a code block
21335         return $brace_type[$brace_depth];
21336     }
21337
21338     # otherwise, it is a label if and only if it follows a ';'
21339     # (real or fake)
21340     else {
21341         return ( $last_nonblank_type eq ';' );
21342     }
21343 }
21344
21345 sub code_block_type {
21346
21347     # Decide if this is a block of code, and its type.
21348     # Must be called only when $type = $token = '{'
21349     # The problem is to distinguish between the start of a block of code
21350     # and the start of an anonymous hash reference
21351     # Returns "" if not code block, otherwise returns 'last_nonblank_token'
21352     # to indicate the type of code block.  (For example, 'last_nonblank_token'
21353     # might be 'if' for an if block, 'else' for an else block, etc).
21354
21355     # handle case of multiple '{'s
21356
21357 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
21358
21359     my ( $i, $rtokens, $rtoken_type ) = @_;
21360     if (   $last_nonblank_token eq '{'
21361         && $last_nonblank_type eq $last_nonblank_token )
21362     {
21363
21364         # opening brace where a statement may appear is probably
21365         # a code block but might be and anonymous hash reference
21366         if ( $brace_type[$brace_depth] ) {
21367             return decide_if_code_block( $i, $rtokens, $rtoken_type );
21368         }
21369
21370         # cannot start a code block within an anonymous hash
21371         else {
21372             return "";
21373         }
21374     }
21375
21376     elsif ( $last_nonblank_token eq ';' ) {
21377
21378         # an opening brace where a statement may appear is probably
21379         # a code block but might be and anonymous hash reference
21380         return decide_if_code_block( $i, $rtokens, $rtoken_type );
21381     }
21382
21383     # handle case of '}{'
21384     elsif ($last_nonblank_token eq '}'
21385         && $last_nonblank_type eq $last_nonblank_token )
21386     {
21387
21388         # a } { situation ...
21389         # could be hash reference after code block..(blktype1.t)
21390         if ($last_nonblank_block_type) {
21391             return decide_if_code_block( $i, $rtokens, $rtoken_type );
21392         }
21393
21394         # must be a block if it follows a closing hash reference
21395         else {
21396             return $last_nonblank_token;
21397         }
21398     }
21399
21400     # NOTE: braces after type characters start code blocks, but for
21401     # simplicity these are not identified as such.  See also
21402     # sub is_non_structural_brace.
21403     # elsif ( $last_nonblank_type eq 't' ) {
21404     #    return $last_nonblank_token;
21405     # }
21406
21407     # brace after label:
21408     elsif ( $last_nonblank_type eq 'J' ) {
21409         return $last_nonblank_token;
21410     }
21411
21412 # otherwise, look at previous token.  This must be a code block if
21413 # it follows any of these:
21414 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
21415     elsif ( $is_code_block_token{$last_nonblank_token} ) {
21416         return $last_nonblank_token;
21417     }
21418
21419     # or a sub definition
21420     elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
21421         && $last_nonblank_token =~ /^sub\b/ )
21422     {
21423         return $last_nonblank_token;
21424     }
21425
21426     # user-defined subs with block parameters (like grep/map/eval)
21427     elsif ( $last_nonblank_type eq 'G' ) {
21428         return $last_nonblank_token;
21429     }
21430
21431     # check bareword
21432     elsif ( $last_nonblank_type eq 'w' ) {
21433         return decide_if_code_block( $i, $rtokens, $rtoken_type );
21434     }
21435
21436     # anything else must be anonymous hash reference
21437     else {
21438         return "";
21439     }
21440 }
21441
21442 sub decide_if_code_block {
21443
21444     my ( $i, $rtokens, $rtoken_type ) = @_;
21445     my ( $next_nonblank_token, $i_next ) =
21446       find_next_nonblank_token( $i, $rtokens );
21447
21448     # we are at a '{' where a statement may appear.
21449     # We must decide if this brace starts an anonymous hash or a code
21450     # block.
21451     # return "" if anonymous hash, and $last_nonblank_token otherwise
21452
21453     # initialize to be code BLOCK
21454     my $code_block_type = $last_nonblank_token;
21455
21456     # Check for the common case of an empty anonymous hash reference:
21457     # Maybe something like sub { { } }
21458     if ( $next_nonblank_token eq '}' ) {
21459         $code_block_type = "";
21460     }
21461
21462     else {
21463
21464         # To guess if this '{' is an anonymous hash reference, look ahead
21465         # and test as follows:
21466         #
21467         # it is a hash reference if next come:
21468         #   - a string or digit followed by a comma or =>
21469         #   - bareword followed by =>
21470         # otherwise it is a code block
21471         #
21472         # Examples of anonymous hash ref:
21473         # {'aa',};
21474         # {1,2}
21475         #
21476         # Examples of code blocks:
21477         # {1; print "hello\n", 1;}
21478         # {$a,1};
21479
21480         # We are only going to look ahead one more (nonblank/comment) line.
21481         # Strange formatting could cause a bad guess, but that's unlikely.
21482         my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
21483         my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
21484         my ( $rpre_tokens, $rpre_types ) =
21485           peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
21486                                                        # generous, and prevents
21487                                                        # wasting lots of
21488                                                        # time in mangled files
21489         if ( defined($rpre_types) && @$rpre_types ) {
21490             push @pre_types,  @$rpre_types;
21491             push @pre_tokens, @$rpre_tokens;
21492         }
21493
21494         # put a sentinal token to simplify stopping the search
21495         push @pre_types, '}';
21496
21497         my $jbeg = 0;
21498         $jbeg = 1 if $pre_types[0] eq 'b';
21499
21500         # first look for one of these
21501         #  - bareword
21502         #  - bareword with leading -
21503         #  - digit
21504         #  - quoted string
21505         my $j = $jbeg;
21506         if ( $pre_types[$j] =~ /^[\'\"]/ ) {
21507
21508             # find the closing quote; don't worry about escapes
21509             my $quote_mark = $pre_types[$j];
21510             for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
21511                 if ( $pre_types[$k] eq $quote_mark ) {
21512                     $j = $k + 1;
21513                     my $next = $pre_types[$j];
21514                     last;
21515                 }
21516             }
21517         }
21518         elsif ( $pre_types[$j] eq 'd' ) {
21519             $j++;
21520         }
21521         elsif ( $pre_types[$j] eq 'w' ) {
21522             unless ( $is_keyword{ $pre_tokens[$j] } ) {
21523                 $j++;
21524             }
21525         }
21526         elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
21527             $j++;
21528         }
21529         if ( $j > $jbeg ) {
21530
21531             $j++ if $pre_types[$j] eq 'b';
21532
21533             # it's a hash ref if a comma or => follow next
21534             if ( $pre_types[$j] eq ','
21535                 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
21536             {
21537                 $code_block_type = "";
21538             }
21539         }
21540     }
21541
21542     return $code_block_type;
21543 }
21544
21545 sub unexpected {
21546
21547     # report unexpected token type and show where it is
21548     my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_;
21549     $unexpected_error_count++;
21550     if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) {
21551         my $msg = "found $found where $expecting expected";
21552         my $pos = $$rpretoken_map[$i_tok];
21553         interrupt_logfile();
21554         my ( $offset, $numbered_line, $underline ) =
21555           make_numbered_line( $input_line_number, $input_line, $pos );
21556         $underline = write_on_underline( $underline, $pos - $offset, '^' );
21557
21558         my $trailer = "";
21559         if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
21560             my $pos_prev = $$rpretoken_map[$last_nonblank_i];
21561             my $num;
21562             if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
21563                 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
21564             }
21565             else {
21566                 $num = $pos - $pos_prev;
21567             }
21568             if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
21569
21570             $underline =
21571               write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
21572             $trailer = " (previous token underlined)";
21573         }
21574         warning( $numbered_line . "\n" );
21575         warning( $underline . "\n" );
21576         warning( $msg . $trailer . "\n" );
21577         resume_logfile();
21578     }
21579 }
21580
21581 sub indicate_error {
21582     my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
21583     interrupt_logfile();
21584     warning($msg);
21585     write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
21586     resume_logfile();
21587 }
21588
21589 sub write_error_indicator_pair {
21590     my ( $line_number, $input_line, $pos, $carrat ) = @_;
21591     my ( $offset, $numbered_line, $underline ) =
21592       make_numbered_line( $line_number, $input_line, $pos );
21593     $underline = write_on_underline( $underline, $pos - $offset, $carrat );
21594     warning( $numbered_line . "\n" );
21595     $underline =~ s/\s*$//;
21596     warning( $underline . "\n" );
21597 }
21598
21599 sub make_numbered_line {
21600
21601     #  Given an input line, its line number, and a character position of
21602     #  interest, create a string not longer than 80 characters of the form
21603     #     $lineno: sub_string
21604     #  such that the sub_string of $str contains the position of interest
21605     #
21606     #  Here is an example of what we want, in this case we add trailing
21607     #  '...' because the line is long.
21608     #
21609     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
21610     #
21611     #  Here is another example, this time in which we used leading '...'
21612     #  because of excessive length:
21613     #
21614     # 2: ... er of the World Wide Web Consortium's
21615     #
21616     #  input parameters are:
21617     #   $lineno = line number
21618     #   $str = the text of the line
21619     #   $pos = position of interest (the error) : 0 = first character
21620     #
21621     #   We return :
21622     #     - $offset = an offset which corrects the position in case we only
21623     #       display part of a line, such that $pos-$offset is the effective
21624     #       position from the start of the displayed line.
21625     #     - $numbered_line = the numbered line as above,
21626     #     - $underline = a blank 'underline' which is all spaces with the same
21627     #       number of characters as the numbered line.
21628
21629     my ( $lineno, $str, $pos ) = @_;
21630     my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
21631     my $excess = length($str) - $offset - 68;
21632     my $numc   = ( $excess > 0 ) ? 68 : undef;
21633
21634     if ( defined($numc) ) {
21635         if ( $offset == 0 ) {
21636             $str = substr( $str, $offset, $numc - 4 ) . " ...";
21637         }
21638         else {
21639             $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
21640         }
21641     }
21642     else {
21643
21644         if ( $offset == 0 ) {
21645         }
21646         else {
21647             $str = "... " . substr( $str, $offset + 4 );
21648         }
21649     }
21650
21651     my $numbered_line = sprintf( "%d: ", $lineno );
21652     $offset -= length($numbered_line);
21653     $numbered_line .= $str;
21654     my $underline = " " x length($numbered_line);
21655     return ( $offset, $numbered_line, $underline );
21656 }
21657
21658 sub write_on_underline {
21659
21660     # The "underline" is a string that shows where an error is; it starts
21661     # out as a string of blanks with the same length as the numbered line of
21662     # code above it, and we have to add marking to show where an error is.
21663     # In the example below, we want to write the string '--^' just below
21664     # the line of bad code:
21665     #
21666     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
21667     #                 ---^
21668     # We are given the current underline string, plus a position and a
21669     # string to write on it.
21670     #
21671     # In the above example, there will be 2 calls to do this:
21672     # First call:  $pos=19, pos_chr=^
21673     # Second call: $pos=16, pos_chr=---
21674     #
21675     # This is a trivial thing to do with substr, but there is some
21676     # checking to do.
21677
21678     my ( $underline, $pos, $pos_chr ) = @_;
21679
21680     # check for error..shouldn't happen
21681     unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
21682         return $underline;
21683     }
21684     my $excess = length($pos_chr) + $pos - length($underline);
21685     if ( $excess > 0 ) {
21686         $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
21687     }
21688     substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
21689     return ($underline);
21690 }
21691
21692 sub is_non_structural_brace {
21693
21694     # Decide if a brace or bracket is structural or non-structural
21695     # by looking at the previous token and type
21696
21697     # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
21698     # Tentatively deactivated because it caused the wrong operator expectation
21699     # for this code:
21700     #      $user = @vars[1] / 100;
21701     # Must update sub operator_expected before re-implementing.
21702     # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
21703     #    return 0;
21704     # }
21705
21706     # NOTE: braces after type characters start code blocks, but for
21707     # simplicity these are not identified as such.  See also
21708     # sub code_block_type
21709     # if ($last_nonblank_type eq 't') {return 0}
21710
21711     # otherwise, it is non-structural if it is decorated
21712     # by type information.
21713     # For example, the '{' here is non-structural:   ${xxx}
21714     (
21715         $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
21716
21717           # or if we follow a hash or array closing curly brace or bracket
21718           # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
21719           # because the first '}' would have been given type 'R'
21720           || $last_nonblank_type =~ /^([R\]])$/
21721     );
21722 }
21723
21724 sub operator_expected {
21725
21726     # Many perl symbols have two or more meanings.  For example, '<<'
21727     # can be a shift operator or a here-doc operator.  The
21728     # interpretation of these symbols depends on the current state of
21729     # the tokenizer, which may either be expecting a term or an
21730     # operator.  For this example, a << would be a shift if an operator
21731     # is expected, and a here-doc if a term is expected.  This routine
21732     # is called to make this decision for any current token.  It returns
21733     # one of three possible values:
21734     #
21735     #     OPERATOR - operator expected (or at least, not a term)
21736     #     UNKNOWN  - can't tell
21737     #     TERM     - a term is expected (or at least, not an operator)
21738     #
21739     # The decision is based on what has been seen so far.  This
21740     # information is stored in the "$last_nonblank_type" and
21741     # "$last_nonblank_token" variables.  For example, if the
21742     # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
21743     # if $last_nonblank_type is 'n' (numeric), we are expecting an
21744     # OPERATOR.
21745     #
21746     # If a UNKNOWN is returned, the calling routine must guess. A major
21747     # goal of this tokenizer is to minimize the possiblity of returning
21748     # UNKNOWN, because a wrong guess can spoil the formatting of a
21749     # script.
21750     #
21751     # adding NEW_TOKENS: it is critically important that this routine be
21752     # updated to allow it to determine if an operator or term is to be
21753     # expected after the new token.  Doing this simply involves adding
21754     # the new token character to one of the regexes in this routine or
21755     # to one of the hash lists
21756     # that it uses, which are initialized in the BEGIN section.
21757
21758     my ( $prev_type, $tok, $next_type ) = @_;
21759     my $op_expected = UNKNOWN;
21760
21761 # Note: function prototype is available for token type 'U' for future
21762 # program development.  It contains the leading and trailing parens,
21763 # and no blanks.  It might be used to eliminate token type 'C', for
21764 # example (prototype = '()'). Thus:
21765 # if ($last_nonblank_type eq 'U') {
21766 #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
21767 # }
21768
21769     # A possible filehandle (or object) requires some care...
21770     if ( $last_nonblank_type eq 'Z' ) {
21771
21772         # angle.t
21773         if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
21774             $op_expected = UNKNOWN;
21775         }
21776
21777         # For possible file handle like "$a", Perl uses weird parsing rules.
21778         # For example:
21779         # print $a/2,"/hi";   - division
21780         # print $a / 2,"/hi"; - division
21781         # print $a/ 2,"/hi";  - division
21782         # print $a /2,"/hi";  - pattern (and error)!
21783         elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
21784             $op_expected = TERM;
21785         }
21786
21787         # Note when an operation is being done where a
21788         # filehandle might be expected, since a change in whitespace
21789         # could change the interpretation of the statement.
21790         else {
21791             if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
21792                 complain("operator in print statement not recommended\n");
21793                 $op_expected = OPERATOR;
21794             }
21795         }
21796     }
21797
21798     # handle something after 'do' and 'eval'
21799     elsif ( $is_block_operator{$last_nonblank_token} ) {
21800
21801         # something like $a = eval "expression";
21802         #                          ^
21803         if ( $last_nonblank_type eq 'k' ) {
21804             $op_expected = TERM;    # expression or list mode following keyword
21805         }
21806
21807         # something like $a = do { BLOCK } / 2;
21808         #                                  ^
21809         else {
21810             $op_expected = OPERATOR;    # block mode following }
21811         }
21812     }
21813
21814     # handle bare word..
21815     elsif ( $last_nonblank_type eq 'w' ) {
21816
21817         # unfortunately, we can't tell what type of token to expect next
21818         # after most bare words
21819         $op_expected = UNKNOWN;
21820     }
21821
21822     # operator, but not term possible after these types
21823     # Note: moved ')' from type to token because parens in list context
21824     # get marked as '{' '}' now.  This is a minor glitch in the following:
21825     #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
21826     #
21827     elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
21828         || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
21829     {
21830         $op_expected = OPERATOR;
21831
21832         # in a 'use' statement, numbers and v-strings are not really
21833         # numbers, so to avoid incorrect error messages, we will
21834         # mark them as unknown for now (use.t)
21835         if (   ( $statement_type eq 'use' )
21836             && ( $last_nonblank_type =~ /^[nv]$/ ) )
21837         {
21838             $op_expected = UNKNOWN;
21839         }
21840     }
21841
21842     # no operator after many keywords, such as "die", "warn", etc
21843     elsif ( $expecting_term_token{$last_nonblank_token} ) {
21844         $op_expected = TERM;
21845     }
21846
21847     # no operator after things like + - **  (i.e., other operators)
21848     elsif ( $expecting_term_types{$last_nonblank_type} ) {
21849         $op_expected = TERM;
21850     }
21851
21852     # a few operators, like "time", have an empty prototype () and so
21853     # take no parameters but produce a value to operate on
21854     elsif ( $expecting_operator_token{$last_nonblank_token} ) {
21855         $op_expected = OPERATOR;
21856     }
21857
21858     # post-increment and decrement produce values to be operated on
21859     elsif ( $expecting_operator_types{$last_nonblank_type} ) {
21860         $op_expected = OPERATOR;
21861     }
21862
21863     # no value to operate on after sub block
21864     elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
21865
21866     # a right brace here indicates the end of a simple block.
21867     # all non-structural right braces have type 'R'
21868     # all braces associated with block operator keywords have been given those
21869     # keywords as "last_nonblank_token" and caught above.
21870     # (This statement is order dependent, and must come after checking
21871     # $last_nonblank_token).
21872     elsif ( $last_nonblank_type eq '}' ) {
21873         $op_expected = TERM;
21874     }
21875
21876     # something else..what did I forget?
21877     else {
21878
21879         # collecting diagnostics on unknown operator types..see what was missed
21880         $op_expected = UNKNOWN;
21881         write_diagnostics(
21882 "OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
21883         );
21884     }
21885
21886     TOKENIZER_DEBUG_FLAG_EXPECT && do {
21887         print
21888 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
21889     };
21890     return $op_expected;
21891 }
21892
21893 # The following routines keep track of nesting depths of the nesting
21894 # types, ( [ { and ?.  This is necessary for determining the indentation
21895 # level, and also for debugging programs.  Not only do they keep track of
21896 # nesting depths of the individual brace types, but they check that each
21897 # of the other brace types is balanced within matching pairs.  For
21898 # example, if the program sees this sequence:
21899 #
21900 #         {  ( ( ) }
21901 #
21902 # then it can determine that there is an extra left paren somewhere
21903 # between the { and the }.  And so on with every other possible
21904 # combination of outer and inner brace types.  For another
21905 # example:
21906 #
21907 #         ( [ ..... ]  ] )
21908 #
21909 # which has an extra ] within the parens.
21910 #
21911 # The brace types have indexes 0 .. 3 which are indexes into
21912 # the matrices.
21913 #
21914 # The pair ? : are treated as just another nesting type, with ? acting
21915 # as the opening brace and : acting as the closing brace.
21916 #
21917 # The matrix
21918 #
21919 #         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
21920 #
21921 # saves the nesting depth of brace type $b (where $b is either of the other
21922 # nesting types) when brace type $a enters a new depth.  When this depth
21923 # decreases, a check is made that the current depth of brace types $b is
21924 # unchanged, or otherwise there must have been an error.  This can
21925 # be very useful for localizing errors, particularly when perl runs to
21926 # the end of a large file (such as this one) and announces that there
21927 # is a problem somewhere.
21928 #
21929 # A numerical sequence number is maintained for every nesting type,
21930 # so that each matching pair can be uniquely identified in a simple
21931 # way.
21932
21933 sub increase_nesting_depth {
21934     my ( $a, $i_tok ) = @_;
21935     my $b;
21936     $current_depth[$a]++;
21937
21938     # Sequence numbers increment by number of items.  This keeps
21939     # a unique set of numbers but still allows the relative location
21940     # of any type to be determined.
21941     $nesting_sequence_number[$a] += scalar(@closing_brace_names);
21942     my $seqno = $nesting_sequence_number[$a];
21943     $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
21944
21945     my $pos = $$rpretoken_map[$i_tok];
21946     $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
21947       [ $input_line_number, $input_line, $pos ];
21948
21949     for $b ( 0 .. $#closing_brace_names ) {
21950         next if ( $b == $a );
21951         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
21952     }
21953     return $seqno;
21954 }
21955
21956 sub decrease_nesting_depth {
21957
21958     my ( $a, $i_tok ) = @_;
21959     my $pos = $$rpretoken_map[$i_tok];
21960     my $b;
21961     my $seqno = 0;
21962
21963     if ( $current_depth[$a] > 0 ) {
21964
21965         $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
21966
21967         # check that any brace types $b contained within are balanced
21968         for $b ( 0 .. $#closing_brace_names ) {
21969             next if ( $b == $a );
21970
21971             unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
21972                 $current_depth[$b] )
21973             {
21974                 my $diff = $current_depth[$b] -
21975                   $depth_array[$a][$b][ $current_depth[$a] ];
21976
21977                 # don't whine too many times
21978                 my $saw_brace_error = get_saw_brace_error();
21979                 if (
21980                     $saw_brace_error <= MAX_NAG_MESSAGES
21981
21982                     # if too many closing types have occured, we probably
21983                     # already caught this error
21984                     && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
21985                   )
21986                 {
21987                     interrupt_logfile();
21988                     my $rsl =
21989                       $starting_line_of_current_depth[$a][ $current_depth[$a] ];
21990                     my $sl  = $$rsl[0];
21991                     my $rel = [ $input_line_number, $input_line, $pos ];
21992                     my $el  = $$rel[0];
21993                     my ($ess);
21994
21995                     if ( $diff == 1 || $diff == -1 ) {
21996                         $ess = '';
21997                     }
21998                     else {
21999                         $ess = 's';
22000                     }
22001                     my $bname =
22002                       ( $diff > 0 )
22003                       ? $opening_brace_names[$b]
22004                       : $closing_brace_names[$b];
22005                     write_error_indicator_pair( @$rsl, '^' );
22006                     my $msg = <<"EOM";
22007 Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
22008 EOM
22009
22010                     if ( $diff > 0 ) {
22011                         my $rml =
22012                           $starting_line_of_current_depth[$b]
22013                           [ $current_depth[$b] ];
22014                         my $ml = $$rml[0];
22015                         $msg .=
22016 "    The most recent un-matched $bname is on line $ml\n";
22017                         write_error_indicator_pair( @$rml, '^' );
22018                     }
22019                     write_error_indicator_pair( @$rel, '^' );
22020                     warning($msg);
22021                     resume_logfile();
22022                 }
22023                 increment_brace_error();
22024             }
22025         }
22026         $current_depth[$a]--;
22027     }
22028     else {
22029
22030         my $saw_brace_error = get_saw_brace_error();
22031         if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
22032             my $msg = <<"EOM";
22033 There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
22034 EOM
22035             indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
22036         }
22037         increment_brace_error();
22038     }
22039     return $seqno;
22040 }
22041
22042 sub check_final_nesting_depths {
22043     my ($a);
22044
22045     for $a ( 0 .. $#closing_brace_names ) {
22046
22047         if ( $current_depth[$a] ) {
22048             my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
22049             my $sl  = $$rsl[0];
22050             my $msg = <<"EOM";
22051 Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
22052 The most recent un-matched $opening_brace_names[$a] is on line $sl
22053 EOM
22054             indicate_error( $msg, @$rsl, '^' );
22055             increment_brace_error();
22056         }
22057     }
22058 }
22059
22060 sub numerator_expected {
22061
22062     # this is a filter for a possible numerator, in support of guessing
22063     # for the / pattern delimiter token.
22064     # returns -
22065     #   1 - yes
22066     #   0 - can't tell
22067     #  -1 - no
22068     # Note: I am using the convention that variables ending in
22069     # _expected have these 3 possible values.
22070     my ( $i, $rtokens ) = @_;
22071     my $next_token = $$rtokens[ $i + 1 ];
22072     if ( $next_token eq '=' ) { $i++; }    # handle /=
22073     my ( $next_nonblank_token, $i_next ) =
22074       find_next_nonblank_token( $i, $rtokens );
22075
22076     if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
22077         1;
22078     }
22079     else {
22080
22081         if ( $next_nonblank_token =~ /^\s*$/ ) {
22082             0;
22083         }
22084         else {
22085             -1;
22086         }
22087     }
22088 }
22089
22090 sub pattern_expected {
22091
22092     # This is the start of a filter for a possible pattern.
22093     # It looks at the token after a possbible pattern and tries to
22094     # determine if that token could end a pattern.
22095     # returns -
22096     #   1 - yes
22097     #   0 - can't tell
22098     #  -1 - no
22099     my ( $i, $rtokens ) = @_;
22100     my $next_token = $$rtokens[ $i + 1 ];
22101     if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
22102     my ( $next_nonblank_token, $i_next ) =
22103       find_next_nonblank_token( $i, $rtokens );
22104
22105     # list of tokens which may follow a pattern
22106     # (can probably be expanded)
22107     if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
22108     {
22109         1;
22110     }
22111     else {
22112
22113         if ( $next_nonblank_token =~ /^\s*$/ ) {
22114             0;
22115         }
22116         else {
22117             -1;
22118         }
22119     }
22120 }
22121
22122 sub find_next_nonblank_token_on_this_line {
22123     my ( $i, $rtokens ) = @_;
22124     my $next_nonblank_token;
22125
22126     if ( $i < $max_token_index ) {
22127         $next_nonblank_token = $$rtokens[ ++$i ];
22128
22129         if ( $next_nonblank_token =~ /^\s*$/ ) {
22130
22131             if ( $i < $max_token_index ) {
22132                 $next_nonblank_token = $$rtokens[ ++$i ];
22133             }
22134         }
22135     }
22136     else {
22137         $next_nonblank_token = "";
22138     }
22139     return ( $next_nonblank_token, $i );
22140 }
22141
22142 sub find_next_nonblank_token {
22143     my ( $i, $rtokens ) = @_;
22144
22145     if ( $i >= $max_token_index ) {
22146
22147         if ( !$peeked_ahead ) {
22148             $peeked_ahead = 1;
22149             $rtokens      = peek_ahead_for_nonblank_token($rtokens);
22150         }
22151     }
22152     my $next_nonblank_token = $$rtokens[ ++$i ];
22153
22154     if ( $next_nonblank_token =~ /^\s*$/ ) {
22155         $next_nonblank_token = $$rtokens[ ++$i ];
22156     }
22157     return ( $next_nonblank_token, $i );
22158 }
22159
22160 sub peek_ahead_for_n_nonblank_pre_tokens {
22161
22162     # returns next n pretokens if they exist
22163     # returns undef's if hits eof without seeing any pretokens
22164     my $max_pretokens = shift;
22165     my $line;
22166     my $i = 0;
22167     my ( $rpre_tokens, $rmap, $rpre_types );
22168
22169     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
22170     {
22171         $line =~ s/^\s*//;    # trim leading blanks
22172         next if ( length($line) <= 0 );    # skip blank
22173         next if ( $line =~ /^#/ );         # skip comment
22174         ( $rpre_tokens, $rmap, $rpre_types ) =
22175           pre_tokenize( $line, $max_pretokens );
22176         last;
22177     }
22178     return ( $rpre_tokens, $rpre_types );
22179 }
22180
22181 # look ahead for next non-blank, non-comment line of code
22182 sub peek_ahead_for_nonblank_token {
22183     my $rtokens = shift;
22184     my $line;
22185     my $i = 0;
22186
22187     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
22188     {
22189         $line =~ s/^\s*//;    # trim leading blanks
22190         next if ( length($line) <= 0 );    # skip blank
22191         next if ( $line =~ /^#/ );         # skip comment
22192         my ( $rtok, $rmap, $rtype ) =
22193           pre_tokenize( $line, 2 );        # only need 2 pre-tokens
22194         my $j = $max_token_index + 1;
22195         my $tok;
22196
22197         foreach $tok (@$rtok) {
22198             last if ( $tok =~ "\n" );
22199             $$rtokens[ ++$j ] = $tok;
22200         }
22201         last;
22202     }
22203     return $rtokens;
22204 }
22205
22206 sub pre_tokenize {
22207
22208     # Break a string, $str, into a sequence of preliminary tokens.  We
22209     # are interested in these types of tokens:
22210     #   words       (type='w'),            example: 'max_tokens_wanted'
22211     #   digits      (type = 'd'),          example: '0755'
22212     #   whitespace  (type = 'b'),          example: '   '
22213     #   any other single character (i.e. punct; type = the character itself).
22214     # We cannot do better than this yet because we might be in a quoted
22215     # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
22216     # tokens.
22217     my ( $str, $max_tokens_wanted ) = @_;
22218
22219     # we return references to these 3 arrays:
22220     my @tokens    = ();     # array of the tokens themselves
22221     my @token_map = (0);    # string position of start of each token
22222     my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
22223
22224     do {
22225
22226         # whitespace
22227         if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
22228
22229         # numbers
22230         # note that this must come before words!
22231         elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
22232
22233         # words
22234         elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
22235
22236         # single-character punctuation
22237         elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
22238
22239         # that's all..
22240         else {
22241             return ( \@tokens, \@token_map, \@type );
22242         }
22243
22244         push @tokens,    $1;
22245         push @token_map, pos($str);
22246
22247     } while ( --$max_tokens_wanted != 0 );
22248
22249     return ( \@tokens, \@token_map, \@type );
22250 }
22251
22252 sub show_tokens {
22253
22254     # this is an old debug routine
22255     my ( $rtokens, $rtoken_map ) = @_;
22256     my $num = scalar(@$rtokens);
22257     my $i;
22258
22259     for ( $i = 0 ; $i < $num ; $i++ ) {
22260         my $len = length( $$rtokens[$i] );
22261         print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
22262     }
22263 }
22264
22265 sub find_angle_operator_termination {
22266
22267     # We are looking at a '<' and want to know if it is an angle operator.
22268     # We are to return:
22269     #   $i = pretoken index of ending '>' if found, current $i otherwise
22270     #   $type = 'Q' if found, '>' otherwise
22271     my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_;
22272     my $i    = $i_beg;
22273     my $type = '<';
22274     pos($input_line) = 1 + $$rtoken_map[$i];
22275
22276     my $filter;
22277
22278     # we just have to find the next '>' if a term is expected
22279     if ( $expecting == TERM ) { $filter = '[\>]' }
22280
22281     # we have to guess if we don't know what is expected
22282     elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
22283
22284     # shouldn't happen - we shouldn't be here if operator is expected
22285     else { warning("Program Bug in find_angle_operator_termination\n") }
22286
22287     # To illustrate what we might be looking at, in case we are
22288     # guessing, here are some examples of valid angle operators
22289     # (or file globs):
22290     #  <tmp_imp/*>
22291     #  <FH>
22292     #  <$fh>
22293     #  <*.c *.h>
22294     #  <_>
22295     #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
22296     #  <${PREFIX}*img*.$IMAGE_TYPE>
22297     #  <img*.$IMAGE_TYPE>
22298     #  <Timg*.$IMAGE_TYPE>
22299     #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
22300     #
22301     # Here are some examples of lines which do not have angle operators:
22302     #  return undef unless $self->[2]++ < $#{$self->[1]};
22303     #  < 2  || @$t >
22304     #
22305     # the following line from dlister.pl caused trouble:
22306     #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
22307     #
22308     # If the '<' starts an angle operator, it must end on this line and
22309     # it must not have certain characters like ';' and '=' in it.  I use
22310     # this to limit the testing.  This filter should be improved if
22311     # possible.
22312
22313     if ( $input_line =~ /($filter)/g ) {
22314
22315         if ( $1 eq '>' ) {
22316
22317             # We MAY have found an angle operator termination if we get
22318             # here, but we need to do more to be sure we haven't been
22319             # fooled.
22320             my $pos = pos($input_line);
22321
22322             my $pos_beg = $$rtoken_map[$i];
22323             my $str     = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
22324
22325             ######################################debug#####
22326             #write_diagnostics( "ANGLE? :$str\n");
22327             #print "ANGLE: found $1 at pos=$pos\n";
22328             ######################################debug#####
22329             $type = 'Q';
22330             my $error;
22331             ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
22332
22333             # It may be possible that a quote ends midway in a pretoken.
22334             # If this happens, it may be necessary to split the pretoken.
22335             if ($error) {
22336                 warning(
22337                     "Possible tokinization error..please check this line\n");
22338                 report_possible_bug();
22339             }
22340
22341             # Now let's see where we stand....
22342             # OK if math op not possible
22343             if ( $expecting == TERM ) {
22344             }
22345
22346             # OK if there are no more than 2 pre-tokens inside
22347             # (not possible to write 2 token math between < and >)
22348             # This catches most common cases
22349             elsif ( $i <= $i_beg + 3 ) {
22350                 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
22351             }
22352
22353             # Not sure..
22354             else {
22355
22356                 # Let's try a Brace Test: any braces inside must balance
22357                 my $br = 0;
22358                 while ( $str =~ /\{/g ) { $br++ }
22359                 while ( $str =~ /\}/g ) { $br-- }
22360                 my $sb = 0;
22361                 while ( $str =~ /\[/g ) { $sb++ }
22362                 while ( $str =~ /\]/g ) { $sb-- }
22363                 my $pr = 0;
22364                 while ( $str =~ /\(/g ) { $pr++ }
22365                 while ( $str =~ /\)/g ) { $pr-- }
22366
22367                 # if braces do not balance - not angle operator
22368                 if ( $br || $sb || $pr ) {
22369                     $i    = $i_beg;
22370                     $type = '<';
22371                     write_diagnostics(
22372                         "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
22373                 }
22374
22375                 # we should keep doing more checks here...to be continued
22376                 # Tentatively accepting this as a valid angle operator.
22377                 # There are lots more things that can be checked.
22378                 else {
22379                     write_diagnostics(
22380                         "ANGLE-Guessing yes: $str expecting=$expecting\n");
22381                     write_logfile_entry("Guessing angle operator here: $str\n");
22382                 }
22383             }
22384         }
22385
22386         # didn't find ending >
22387         else {
22388             if ( $expecting == TERM ) {
22389                 warning("No ending > for angle operator\n");
22390             }
22391         }
22392     }
22393     return ( $i, $type );
22394 }
22395
22396 sub inverse_pretoken_map {
22397
22398     # Starting with the current pre_token index $i, scan forward until
22399     # finding the index of the next pre_token whose position is $pos.
22400     my ( $i, $pos, $rtoken_map ) = @_;
22401     my $error = 0;
22402
22403     while ( ++$i <= $max_token_index ) {
22404
22405         if ( $pos <= $$rtoken_map[$i] ) {
22406
22407             # Let the calling routine handle errors in which we do not
22408             # land on a pre-token boundary.  It can happen by running
22409             # perltidy on some non-perl scripts, for example.
22410             if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
22411             $i--;
22412             last;
22413         }
22414     }
22415     return ( $i, $error );
22416 }
22417
22418 sub guess_if_pattern_or_conditional {
22419
22420     # this routine is called when we have encountered a ? following an
22421     # unknown bareword, and we must decide if it starts a pattern or not
22422     # input parameters:
22423     #   $i - token index of the ? starting possible pattern
22424     # output parameters:
22425     #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
22426     #   msg = a warning or diagnostic message
22427     my ( $i, $rtokens, $rtoken_map ) = @_;
22428     my $is_pattern = 0;
22429     my $msg        = "guessing that ? after $last_nonblank_token starts a ";
22430
22431     if ( $i >= $max_token_index ) {
22432         $msg .= "conditional (no end to pattern found on the line)\n";
22433     }
22434     else {
22435         my $ibeg = $i;
22436         $i = $ibeg + 1;
22437         my $next_token = $$rtokens[$i];    # first token after ?
22438
22439         # look for a possible ending ? on this line..
22440         my $in_quote        = 1;
22441         my $quote_depth     = 0;
22442         my $quote_character = '';
22443         my $quote_pos       = 0;
22444         ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
22445           follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
22446             $quote_pos, $quote_depth );
22447
22448         if ($in_quote) {
22449
22450             # we didn't find an ending ? on this line,
22451             # so we bias towards conditional
22452             $is_pattern = 0;
22453             $msg .= "conditional (no ending ? on this line)\n";
22454
22455             # we found an ending ?, so we bias towards a pattern
22456         }
22457         else {
22458
22459             if ( pattern_expected( $i, $rtokens ) >= 0 ) {
22460                 $is_pattern = 1;
22461                 $msg .= "pattern (found ending ? and pattern expected)\n";
22462             }
22463             else {
22464                 $msg .= "pattern (uncertain, but found ending ?)\n";
22465             }
22466         }
22467     }
22468     return ( $is_pattern, $msg );
22469 }
22470
22471 sub guess_if_pattern_or_division {
22472
22473     # this routine is called when we have encountered a / following an
22474     # unknown bareword, and we must decide if it starts a pattern or is a
22475     # division
22476     # input parameters:
22477     #   $i - token index of the / starting possible pattern
22478     # output parameters:
22479     #   $is_pattern = 0 if probably division,  =1 if probably a pattern
22480     #   msg = a warning or diagnostic message
22481     my ( $i, $rtokens, $rtoken_map ) = @_;
22482     my $is_pattern = 0;
22483     my $msg        = "guessing that / after $last_nonblank_token starts a ";
22484
22485     if ( $i >= $max_token_index ) {
22486         "division (no end to pattern found on the line)\n";
22487     }
22488     else {
22489         my $ibeg = $i;
22490         my $divide_expected = numerator_expected( $i, $rtokens );
22491         $i = $ibeg + 1;
22492         my $next_token = $$rtokens[$i];    # first token after slash
22493
22494         # look for a possible ending / on this line..
22495         my $in_quote        = 1;
22496         my $quote_depth     = 0;
22497         my $quote_character = '';
22498         my $quote_pos       = 0;
22499         ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
22500           follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
22501             $quote_pos, $quote_depth );
22502
22503         if ($in_quote) {
22504
22505             # we didn't find an ending / on this line,
22506             # so we bias towards division
22507             if ( $divide_expected >= 0 ) {
22508                 $is_pattern = 0;
22509                 $msg .= "division (no ending / on this line)\n";
22510             }
22511             else {
22512                 $msg        = "multi-line pattern (division not possible)\n";
22513                 $is_pattern = 1;
22514             }
22515
22516         }
22517
22518         # we found an ending /, so we bias towards a pattern
22519         else {
22520
22521             if ( pattern_expected( $i, $rtokens ) >= 0 ) {
22522
22523                 if ( $divide_expected >= 0 ) {
22524
22525                     if ( $i - $ibeg > 60 ) {
22526                         $msg .= "division (matching / too distant)\n";
22527                         $is_pattern = 0;
22528                     }
22529                     else {
22530                         $msg .= "pattern (but division possible too)\n";
22531                         $is_pattern = 1;
22532                     }
22533                 }
22534                 else {
22535                     $is_pattern = 1;
22536                     $msg .= "pattern (division not possible)\n";
22537                 }
22538             }
22539             else {
22540
22541                 if ( $divide_expected >= 0 ) {
22542                     $is_pattern = 0;
22543                     $msg .= "division (pattern not possible)\n";
22544                 }
22545                 else {
22546                     $is_pattern = 1;
22547                     $msg .=
22548                       "pattern (uncertain, but division would not work here)\n";
22549                 }
22550             }
22551         }
22552     }
22553     return ( $is_pattern, $msg );
22554 }
22555
22556 sub find_here_doc {
22557
22558     # find the target of a here document, if any
22559     # input parameters:
22560     #   $i - token index of the second < of <<
22561     #   ($i must be less than the last token index if this is called)
22562     # output parameters:
22563     #   $found_target = 0 didn't find target; =1 found target
22564     #   HERE_TARGET - the target string (may be empty string)
22565     #   $i - unchanged if not here doc,
22566     #    or index of the last token of the here target
22567     my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
22568     my $ibeg                 = $i;
22569     my $found_target         = 0;
22570     my $here_doc_target      = '';
22571     my $here_quote_character = '';
22572     my ( $next_nonblank_token, $i_next_nonblank, $next_token );
22573     $next_token = $$rtokens[ $i + 1 ];
22574
22575     # perl allows a backslash before the target string (heredoc.t)
22576     my $backslash = 0;
22577     if ( $next_token eq '\\' ) {
22578         $backslash  = 1;
22579         $next_token = $$rtokens[ $i + 2 ];
22580     }
22581
22582     ( $next_nonblank_token, $i_next_nonblank ) =
22583       find_next_nonblank_token_on_this_line( $i, $rtokens );
22584
22585     if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
22586
22587         my $in_quote    = 1;
22588         my $quote_depth = 0;
22589         my $quote_pos   = 0;
22590
22591         ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) =
22592           follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
22593             $here_quote_character, $quote_pos, $quote_depth );
22594
22595         if ($in_quote) {    # didn't find end of quote, so no target found
22596             $i = $ibeg;
22597         }
22598         else {              # found ending quote
22599             my $j;
22600             $found_target = 1;
22601
22602             my $tokj;
22603             for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
22604                 $tokj = $$rtokens[$j];
22605
22606                 # we have to remove any backslash before the quote character
22607                 # so that the here-doc-target exactly matches this string
22608                 next
22609                   if ( $tokj eq "\\"
22610                     && $j < $i - 1
22611                     && $$rtokens[ $j + 1 ] eq $here_quote_character );
22612                 $here_doc_target .= $tokj;
22613             }
22614         }
22615     }
22616
22617     elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
22618         $found_target = 1;
22619         write_logfile_entry(
22620             "found blank here-target after <<; suggest using \"\"\n");
22621         $i = $ibeg;
22622     }
22623     elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
22624
22625         my $here_doc_expected;
22626         if ( $expecting == UNKNOWN ) {
22627             $here_doc_expected = guess_if_here_doc($next_token);
22628         }
22629         else {
22630             $here_doc_expected = 1;
22631         }
22632
22633         if ($here_doc_expected) {
22634             $found_target    = 1;
22635             $here_doc_target = $next_token;
22636             $i               = $ibeg + 1;
22637         }
22638
22639     }
22640     else {
22641
22642         if ( $expecting == TERM ) {
22643             $found_target = 1;
22644             write_logfile_entry("Note: bare here-doc operator <<\n");
22645         }
22646         else {
22647             $i = $ibeg;
22648         }
22649     }
22650
22651     # patch to neglect any prepended backslash
22652     if ( $found_target && $backslash ) { $i++ }
22653
22654     return ( $found_target, $here_doc_target, $here_quote_character, $i );
22655 }
22656
22657 # try to resolve here-doc vs. shift by looking ahead for
22658 # non-code or the end token (currently only looks for end token)
22659 # returns 1 if it is probably a here doc, 0 if not
22660 sub guess_if_here_doc {
22661
22662     # This is how many lines we will search for a target as part of the
22663     # guessing strategy.  It is a constant because there is probably
22664     # little reason to change it.
22665     use constant HERE_DOC_WINDOW => 40;
22666
22667     my $next_token        = shift;
22668     my $here_doc_expected = 0;
22669     my $line;
22670     my $k   = 0;
22671     my $msg = "checking <<";
22672
22673     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
22674     {
22675         chomp $line;
22676
22677         if ( $line =~ /^$next_token$/ ) {
22678             $msg .= " -- found target $next_token ahead $k lines\n";
22679             $here_doc_expected = 1;    # got it
22680             last;
22681         }
22682         last if ( $k >= HERE_DOC_WINDOW );
22683     }
22684
22685     unless ($here_doc_expected) {
22686
22687         if ( !defined($line) ) {
22688             $here_doc_expected = -1;    # hit eof without seeing target
22689             $msg .= " -- must be shift; target $next_token not in file\n";
22690
22691         }
22692         else {                          # still unsure..taking a wild guess
22693
22694             if ( !$is_constant{$current_package}{$next_token} ) {
22695                 $here_doc_expected = 1;
22696                 $msg .=
22697                   " -- guessing it's a here-doc ($next_token not a constant)\n";
22698             }
22699             else {
22700                 $msg .=
22701                   " -- guessing it's a shift ($next_token is a constant)\n";
22702             }
22703         }
22704     }
22705     write_logfile_entry($msg);
22706     return $here_doc_expected;
22707 }
22708
22709 sub do_quote {
22710
22711     # follow (or continue following) quoted string or pattern
22712     # $in_quote return code:
22713     #   0 - ok, found end
22714     #   1 - still must find end of quote whose target is $quote_character
22715     #   2 - still looking for end of first of two quotes
22716     my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
22717         $rtoken_map )
22718       = @_;
22719
22720     if ( $in_quote == 2 ) {    # two quotes/patterns to follow
22721         my $ibeg = $i;
22722         ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
22723           follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
22724             $quote_pos, $quote_depth );
22725
22726         if ( $in_quote == 1 ) {
22727             if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
22728             $quote_character = '';
22729         }
22730     }
22731
22732     if ( $in_quote == 1 ) {    # one (more) quote to follow
22733         my $ibeg = $i;
22734         ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
22735           follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
22736             $quote_pos, $quote_depth );
22737     }
22738     return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
22739 }
22740
22741 sub scan_number_do {
22742
22743     #  scan a number in any of the formats that Perl accepts
22744     #  Underbars (_) are allowed in decimal numbers.
22745     #  input parameters -
22746     #      $input_line  - the string to scan
22747     #      $i           - pre_token index to start scanning
22748     #    $rtoken_map    - reference to the pre_token map giving starting
22749     #                    character position in $input_line of token $i
22750     #  output parameters -
22751     #    $i            - last pre_token index of the number just scanned
22752     #    number        - the number (characters); or undef if not a number
22753
22754     my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
22755     my $pos_beg = $$rtoken_map[$i];
22756     my $pos;
22757     my $i_begin = $i;
22758     my $number  = undef;
22759     my $type    = $input_type;
22760
22761     my $first_char = substr( $input_line, $pos_beg, 1 );
22762
22763     # Look for bad starting characters; Shouldn't happen..
22764     if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
22765         warning("Program bug - scan_number given character $first_char\n");
22766         report_definite_bug();
22767         return ( $i, $type, $number );
22768     }
22769
22770     # handle v-string without leading 'v' character ('Two Dot' rule)
22771     # (vstring.t)
22772     pos($input_line) = $pos_beg;
22773     if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
22774         $pos = pos($input_line);
22775         my $numc = $pos - $pos_beg;
22776         $number = substr( $input_line, $pos_beg, $numc );
22777         $type = 'v';
22778         unless ($saw_v_string) { report_v_string($number) }
22779     }
22780
22781     # handle octal, hex, binary
22782     if ( !defined($number) ) {
22783         pos($input_line) = $pos_beg;
22784         if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
22785         {
22786             $pos = pos($input_line);
22787             my $numc = $pos - $pos_beg;
22788             $number = substr( $input_line, $pos_beg, $numc );
22789             $type = 'n';
22790         }
22791     }
22792
22793     # handle decimal
22794     if ( !defined($number) ) {
22795         pos($input_line) = $pos_beg;
22796
22797         if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
22798             $pos = pos($input_line);
22799
22800             # watch out for things like 0..40 which would give 0. by this;
22801             if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
22802                 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
22803             {
22804                 $pos--;
22805             }
22806             my $numc = $pos - $pos_beg;
22807             $number = substr( $input_line, $pos_beg, $numc );
22808             $type = 'n';
22809         }
22810     }
22811
22812     # filter out non-numbers like e + - . e2  .e3 +e6
22813     # the rule: at least one digit, and any 'e' must be preceded by a digit
22814     if (
22815         $number !~ /\d/    # no digits
22816         || (   $number =~ /^(.*)[eE]/
22817             && $1 !~ /\d/ )    # or no digits before the 'e'
22818       )
22819     {
22820         $number = undef;
22821         $type   = $input_type;
22822         return ( $i, $type, $number );
22823     }
22824
22825     # Found a number; now we must convert back from character position
22826     # to pre_token index. An error here implies user syntax error.
22827     # An example would be an invalid octal number like '009'.
22828     my $error;
22829     ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
22830     if ($error) { warning("Possibly invalid number\n") }
22831
22832     return ( $i, $type, $number );
22833 }
22834
22835 sub scan_bare_identifier_do {
22836
22837     # this routine is called to scan a token starting with an alphanumeric
22838     # variable or package separator, :: or '.
22839
22840     my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_;
22841     my $i_begin = $i;
22842     my $package = undef;
22843
22844     my $i_beg = $i;
22845
22846     # we have to back up one pretoken at a :: since each : is one pretoken
22847     if ( $tok eq '::' ) { $i_beg-- }
22848     if ( $tok eq '->' ) { $i_beg-- }
22849     my $pos_beg = $$rtoken_map[$i_beg];
22850     pos($input_line) = $pos_beg;
22851
22852     #  Examples:
22853     #   A::B::C
22854     #   A::
22855     #   ::A
22856     #   A'B
22857     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
22858
22859         my $pos  = pos($input_line);
22860         my $numc = $pos - $pos_beg;
22861         $tok = substr( $input_line, $pos_beg, $numc );
22862
22863         # type 'w' includes anything without leading type info
22864         # ($,%,@,*) including something like abc::def::ghi
22865         $type = 'w';
22866
22867         my $sub_name = "";
22868         if ( defined($2) ) { $sub_name = $2; }
22869         if ( defined($1) ) {
22870             $package = $1;
22871
22872             # patch: don't allow isolated package name which just ends
22873             # in the old style package separator (single quote).  Example:
22874             #   use CGI':all';
22875             if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
22876                 $pos--;
22877             }
22878
22879             $package =~ s/\'/::/g;
22880             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
22881             $package =~ s/::$//;
22882         }
22883         else {
22884             $package = $current_package;
22885
22886             if ( $is_keyword{$tok} ) {
22887                 $type = 'k';
22888             }
22889         }
22890
22891         # if it is a bareword..
22892         if ( $type eq 'w' ) {
22893
22894             # check for v-string with leading 'v' type character
22895             # (This seems to have presidence over filehandle, type 'Y')
22896             if ( $tok =~ /^v\d+$/ ) {
22897
22898                 # we only have the first part - something like 'v101' -
22899                 # look for more
22900                 if ( $input_line =~ m/\G(\.\d+)+/gc ) {
22901                     $pos  = pos($input_line);
22902                     $numc = $pos - $pos_beg;
22903                     $tok  = substr( $input_line, $pos_beg, $numc );
22904                 }
22905                 $type = 'v';
22906
22907                 # warn if this version can't handle v-strings
22908                 unless ($saw_v_string) { report_v_string($tok) }
22909             }
22910
22911             elsif ( $is_constant{$package}{$sub_name} ) {
22912                 $type = 'C';
22913             }
22914
22915             # bareword after sort has implied empty prototype; for example:
22916             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
22917             # This has priority over whatever the user has specified.
22918             elsif ($last_nonblank_token eq 'sort'
22919                 && $last_nonblank_type eq 'k' )
22920             {
22921                 $type = 'Z';
22922             }
22923
22924             # Note: strangely, perl does not seem to really let you create
22925             # functions which act like eval and do, in the sense that eval
22926             # and do may have operators following the final }, but any operators
22927             # that you create with prototype (&) apparently do not allow
22928             # trailing operators, only terms.  This seems strange.
22929             # If this ever changes, here is the update
22930             # to make perltidy behave accordingly:
22931
22932             # elsif ( $is_block_function{$package}{$tok} ) {
22933             #    $tok='eval'; # patch to do braces like eval  - doesn't work
22934             #    $type = 'k';
22935             #}
22936             # FIXME: This could become a separate type to allow for different
22937             # future behavior:
22938             elsif ( $is_block_function{$package}{$sub_name} ) {
22939                 $type = 'G';
22940             }
22941
22942             elsif ( $is_block_list_function{$package}{$sub_name} ) {
22943                 $type = 'G';
22944             }
22945             elsif ( $is_user_function{$package}{$sub_name} ) {
22946                 $type      = 'U';
22947                 $prototype = $user_function_prototype{$package}{$sub_name};
22948             }
22949
22950             # check for indirect object
22951             elsif (
22952
22953                 # added 2001-03-27: must not be followed immediately by '('
22954                 # see fhandle.t
22955                 ( $input_line !~ m/\G\(/gc )
22956
22957                 # and
22958                 && (
22959
22960                     # preceded by keyword like 'print', 'printf' and friends
22961                     $is_indirect_object_taker{$last_nonblank_token}
22962
22963                     # or preceded by something like 'print(' or 'printf('
22964                     || (
22965                         ( $last_nonblank_token eq '(' )
22966                         && $is_indirect_object_taker{ $paren_type[$paren_depth]
22967                         }
22968
22969                     )
22970                 )
22971               )
22972             {
22973
22974                 # may not be indirect object unless followed by a space
22975                 if ( $input_line =~ m/\G\s+/gc ) {
22976                     $type = 'Y';
22977
22978                     # Abandon Hope ...
22979                     # Perl's indirect object notation is a very bad
22980                     # thing and can cause subtle bugs, especially for
22981                     # beginning programmers.  And I haven't even been
22982                     # able to figure out a sane warning scheme which
22983                     # doesn't get in the way of good scripts.
22984
22985                     # Complain if a filehandle has any lower case
22986                     # letters.  This is suggested good practice, but the
22987                     # main reason for this warning is that prior to
22988                     # release 20010328, perltidy incorrectly parsed a
22989                     # function call after a print/printf, with the
22990                     # result that a space got added before the opening
22991                     # paren, thereby converting the function name to a
22992                     # filehandle according to perl's weird rules.  This
22993                     # will not usually generate a syntax error, so this
22994                     # is a potentially serious bug.  By warning
22995                     # of filehandles with any lower case letters,
22996                     # followed by opening parens, we will help the user
22997                     # find almost all of these older errors.
22998                     # use 'sub_name' because something like
22999                     # main::MYHANDLE is ok for filehandle
23000                     if ( $sub_name =~ /[a-z]/ ) {
23001
23002                         # could be bug caused by older perltidy if
23003                         # followed by '('
23004                         if ( $input_line =~ m/\G\s*\(/gc ) {
23005                             complain(
23006 "Caution: unknown word '$tok' in indirect object slot\n"
23007                             );
23008                         }
23009                     }
23010                 }
23011
23012                 # bareword not followed by a space -- may not be filehandle
23013                 # (may be function call defined in a 'use' statement)
23014                 else {
23015                     $type = 'Z';
23016                 }
23017             }
23018         }
23019
23020         # Now we must convert back from character position
23021         # to pre_token index.
23022         # I don't think an error flag can occur here ..but who knows
23023         my $error;
23024         ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23025         if ($error) {
23026             warning("scan_bare_identifier: Possibly invalid tokenization\n");
23027         }
23028     }
23029
23030     # no match but line not blank - could be syntax error
23031     # perl will take '::' alone without complaint
23032     else {
23033         $type = 'w';
23034
23035         # change this warning to log message if it becomes annoying
23036         warning("didn't find identifier after leading ::\n");
23037     }
23038     return ( $i, $tok, $type, $prototype );
23039 }
23040
23041 sub scan_id_do {
23042
23043     # This is the new scanner and will eventually replace scan_identifier.
23044     # Only type 'sub' and 'package' are implemented.
23045     # Token types $ * % @ & -> are not yet implemented.
23046     #
23047     # Scan identifier following a type token.
23048     # The type of call depends on $id_scan_state: $id_scan_state = ''
23049     # for starting call, in which case $tok must be the token defining
23050     # the type.
23051     #
23052     # If the type token is the last nonblank token on the line, a value
23053     # of $id_scan_state = $tok is returned, indicating that further
23054     # calls must be made to get the identifier.  If the type token is
23055     # not the last nonblank token on the line, the identifier is
23056     # scanned and handled and a value of '' is returned.
23057
23058     my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_;
23059     my $type = '';
23060     my ( $i_beg, $pos_beg );
23061
23062     #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
23063     #my ($a,$b,$c) = caller;
23064     #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
23065
23066     # on re-entry, start scanning at first token on the line
23067     if ($id_scan_state) {
23068         $i_beg = $i;
23069         $type  = '';
23070     }
23071
23072     # on initial entry, start scanning just after type token
23073     else {
23074         $i_beg         = $i + 1;
23075         $id_scan_state = $tok;
23076         $type          = 't';
23077     }
23078
23079     # find $i_beg = index of next nonblank token,
23080     # and handle empty lines
23081     my $blank_line          = 0;
23082     my $next_nonblank_token = $$rtokens[$i_beg];
23083     if ( $i_beg > $max_token_index ) {
23084         $blank_line = 1;
23085     }
23086     else {
23087
23088         # only a '#' immediately after a '$' is not a comment
23089         if ( $next_nonblank_token eq '#' ) {
23090             unless ( $tok eq '$' ) {
23091                 $blank_line = 1;
23092             }
23093         }
23094
23095         if ( $next_nonblank_token =~ /^\s/ ) {
23096             ( $next_nonblank_token, $i_beg ) =
23097               find_next_nonblank_token_on_this_line( $i_beg, $rtokens );
23098             if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
23099                 $blank_line = 1;
23100             }
23101         }
23102     }
23103
23104     # handle non-blank line; identifier, if any, must follow
23105     unless ($blank_line) {
23106
23107         if ( $id_scan_state eq 'sub' ) {
23108             ( $i, $tok, $type, $id_scan_state ) =
23109               do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens,
23110                 $rtoken_map, $id_scan_state );
23111         }
23112
23113         elsif ( $id_scan_state eq 'package' ) {
23114             ( $i, $tok, $type ) =
23115               do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
23116                 $rtoken_map );
23117             $id_scan_state = '';
23118         }
23119
23120         else {
23121             warning("invalid token in scan_id: $tok\n");
23122             $id_scan_state = '';
23123         }
23124     }
23125
23126     if ( $id_scan_state && ( !defined($type) || !$type ) ) {
23127
23128         # shouldn't happen:
23129         warning(
23130 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
23131         );
23132         report_definite_bug();
23133     }
23134
23135     TOKENIZER_DEBUG_FLAG_NSCAN && do {
23136         print
23137           "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
23138     };
23139     return ( $i, $tok, $type, $id_scan_state );
23140 }
23141
23142 {
23143
23144     # saved package and subnames in case prototype is on separate line
23145     my ( $package_saved, $subname_saved );
23146
23147     sub do_scan_sub {
23148
23149         # do_scan_sub parses a sub name and prototype
23150         # it is called with $i_beg equal to the index of the first nonblank
23151         # token following a 'sub' token.
23152
23153         # TODO: add future error checks to be sure we have a valid
23154         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
23155         # a name is given if and only if a non-anonymous sub is
23156         # appropriate.
23157
23158         my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
23159             $id_scan_state )
23160           = @_;
23161         $id_scan_state = "";    # normally we get everything in one call
23162         my $subname = undef;
23163         my $package = undef;
23164         my $proto   = undef;
23165         my $attrs   = undef;
23166         my $match;
23167
23168         my $pos_beg = $$rtoken_map[$i_beg];
23169         pos($input_line) = $pos_beg;
23170
23171         # sub NAME PROTO ATTRS
23172         if (
23173             $input_line =~ m/\G\s*
23174         ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
23175         (\w+)               # NAME    - required
23176         (\s*\([^){]*\))?    # PROTO   - something in parens
23177         (\s*:)?             # ATTRS   - leading : of attribute list
23178         /gcx
23179           )
23180         {
23181             $match   = 1;
23182             $subname = $2;
23183             $proto   = $3;
23184             $attrs   = $4;
23185
23186             $package = ( defined($1) && $1 ) ? $1 : $current_package;
23187             $package =~ s/\'/::/g;
23188             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
23189             $package =~ s/::$//;
23190             my $pos  = pos($input_line);
23191             my $numc = $pos - $pos_beg;
23192             $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
23193             $type = 'i';
23194         }
23195
23196         # Look for prototype/attributes not preceded on this line by subname;
23197         # This might be an anonymous sub with attributes,
23198         # or a prototype on a separate line from its sub name
23199         elsif (
23200             $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
23201             (\s*:)?                              # ATTRS leading ':'
23202             /gcx
23203             && ( $1 || $2 )
23204           )
23205         {
23206             $match = 1;
23207             $proto = $1;
23208             $attrs = $2;
23209
23210             # Handle prototype on separate line from subname
23211             if ($subname_saved) {
23212                 $package = $package_saved;
23213                 $subname = $subname_saved;
23214                 $tok     = $last_nonblank_token;
23215             }
23216             $type = 'i';
23217         }
23218
23219         if ($match) {
23220
23221             # ATTRS: if there are attributes, back up and let the ':' be
23222             # found later by the scanner.
23223             my $pos = pos($input_line);
23224             if ($attrs) {
23225                 $pos -= length($attrs);
23226             }
23227
23228             my $next_nonblank_token = $tok;
23229
23230             # catch case of line with leading ATTR ':' after anonymous sub
23231             if ( $pos == $pos_beg && $tok eq ':' ) {
23232                 $type = 'A';
23233             }
23234
23235             # We must convert back from character position
23236             # to pre_token index.
23237             else {
23238
23239                 # I don't think an error flag can occur here ..but ?
23240                 my $error;
23241                 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23242                 if ($error) { warning("Possibly invalid sub\n") }
23243
23244                 # check for multiple definitions of a sub
23245                 ( $next_nonblank_token, my $i_next ) =
23246                   find_next_nonblank_token_on_this_line( $i, $rtokens );
23247             }
23248
23249             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
23250             {    # skip blank or side comment
23251                 my ( $rpre_tokens, $rpre_types ) =
23252                   peek_ahead_for_n_nonblank_pre_tokens(1);
23253                 if ( defined($rpre_tokens) && @$rpre_tokens ) {
23254                     $next_nonblank_token = $rpre_tokens->[0];
23255                 }
23256                 else {
23257                     $next_nonblank_token = '}';
23258                 }
23259             }
23260             $package_saved = "";
23261             $subname_saved = "";
23262             if ( $next_nonblank_token eq '{' ) {
23263                 if ($subname) {
23264                     if ( $saw_function_definition{$package}{$subname} ) {
23265                         my $lno = $saw_function_definition{$package}{$subname};
23266                         warning(
23267 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
23268                         );
23269                     }
23270                     $saw_function_definition{$package}{$subname} =
23271                       $input_line_number;
23272                 }
23273             }
23274             elsif ( $next_nonblank_token eq ';' ) {
23275             }
23276             elsif ( $next_nonblank_token eq '}' ) {
23277             }
23278
23279             # ATTRS - if an attribute list follows, remember the name
23280             # of the sub so the next opening brace can be labeled.
23281             # Setting 'statement_type' causes any ':'s to introduce
23282             # attributes.
23283             elsif ( $next_nonblank_token eq ':' ) {
23284                 $statement_type = $tok;
23285             }
23286
23287             # see if PROTO follows on another line:
23288             elsif ( $next_nonblank_token eq '(' ) {
23289                 if ( $attrs || $proto ) {
23290                     warning(
23291 "unexpected '(' after definition or declaration of sub '$subname'\n"
23292                     );
23293                 }
23294                 else {
23295                     $id_scan_state  = 'sub';    # we must come back to get proto
23296                     $statement_type = $tok;
23297                     $package_saved  = $package;
23298                     $subname_saved  = $subname;
23299                 }
23300             }
23301             elsif ($next_nonblank_token) {      # EOF technically ok
23302                 warning(
23303 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
23304                 );
23305             }
23306             check_prototype( $proto, $package, $subname );
23307         }
23308
23309         # no match but line not blank
23310         else {
23311         }
23312         return ( $i, $tok, $type, $id_scan_state );
23313     }
23314 }
23315
23316 sub check_prototype {
23317     my ( $proto, $package, $subname ) = @_;
23318     return unless ( defined($package) && defined($subname) );
23319     if ( defined($proto) ) {
23320         $proto =~ s/^\s*\(\s*//;
23321         $proto =~ s/\s*\)$//;
23322         if ($proto) {
23323             $is_user_function{$package}{$subname}        = 1;
23324             $user_function_prototype{$package}{$subname} = "($proto)";
23325
23326             # prototypes containing '&' must be treated specially..
23327             if ( $proto =~ /\&/ ) {
23328
23329                 # right curly braces of prototypes ending in
23330                 # '&' may be followed by an operator
23331                 if ( $proto =~ /\&$/ ) {
23332                     $is_block_function{$package}{$subname} = 1;
23333                 }
23334
23335                 # right curly braces of prototypes NOT ending in
23336                 # '&' may NOT be followed by an operator
23337                 elsif ( $proto !~ /\&$/ ) {
23338                     $is_block_list_function{$package}{$subname} = 1;
23339                 }
23340             }
23341         }
23342         else {
23343             $is_constant{$package}{$subname} = 1;
23344         }
23345     }
23346     else {
23347         $is_user_function{$package}{$subname} = 1;
23348     }
23349 }
23350
23351 sub do_scan_package {
23352
23353     # do_scan_package parses a package name
23354     # it is called with $i_beg equal to the index of the first nonblank
23355     # token following a 'package' token.
23356
23357     my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_;
23358     my $package = undef;
23359     my $pos_beg = $$rtoken_map[$i_beg];
23360     pos($input_line) = $pos_beg;
23361
23362     # handle non-blank line; package name, if any, must follow
23363     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
23364         $package = $1;
23365         $package = ( defined($1) && $1 ) ? $1 : 'main';
23366         $package =~ s/\'/::/g;
23367         if ( $package =~ /^\:/ ) { $package = 'main' . $package }
23368         $package =~ s/::$//;
23369         my $pos  = pos($input_line);
23370         my $numc = $pos - $pos_beg;
23371         $tok  = 'package ' . substr( $input_line, $pos_beg, $numc );
23372         $type = 'i';
23373
23374         # Now we must convert back from character position
23375         # to pre_token index.
23376         # I don't think an error flag can occur here ..but ?
23377         my $error;
23378         ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
23379         if ($error) { warning("Possibly invalid package\n") }
23380         $current_package = $package;
23381
23382         # check for error
23383         my ( $next_nonblank_token, $i_next ) =
23384           find_next_nonblank_token( $i, $rtokens );
23385         if ( $next_nonblank_token !~ /^[;\}]$/ ) {
23386             warning(
23387                 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
23388             );
23389         }
23390     }
23391
23392     # no match but line not blank --
23393     # could be a label with name package, like package:  , for example.
23394     else {
23395         $type = 'k';
23396     }
23397
23398     return ( $i, $tok, $type );
23399 }
23400
23401 sub scan_identifier_do {
23402
23403     # This routine assembles tokens into identifiers.  It maintains a
23404     # scan state, id_scan_state.  It updates id_scan_state based upon
23405     # current id_scan_state and token, and returns an updated
23406     # id_scan_state and the next index after the identifier.
23407
23408     my ( $i, $id_scan_state, $identifier, $rtokens ) = @_;
23409     my $i_begin   = $i;
23410     my $type      = '';
23411     my $tok_begin = $$rtokens[$i_begin];
23412     if ( $tok_begin eq ':' ) { $tok_begin = '::' }
23413     my $id_scan_state_begin = $id_scan_state;
23414     my $identifier_begin    = $identifier;
23415     my $tok                 = $tok_begin;
23416     my $message             = "";
23417
23418     # these flags will be used to help figure out the type:
23419     my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
23420     my $saw_type;
23421
23422     # allow old package separator (') except in 'use' statement
23423     my $allow_tick = ( $last_nonblank_token ne 'use' );
23424
23425     # get started by defining a type and a state if necessary
23426     unless ($id_scan_state) {
23427         $context = UNKNOWN_CONTEXT;
23428
23429         # fixup for digraph
23430         if ( $tok eq '>' ) {
23431             $tok       = '->';
23432             $tok_begin = $tok;
23433         }
23434         $identifier = $tok;
23435
23436         if ( $tok eq '$' || $tok eq '*' ) {
23437             $id_scan_state = '$';
23438             $context       = SCALAR_CONTEXT;
23439         }
23440         elsif ( $tok eq '%' || $tok eq '@' ) {
23441             $id_scan_state = '$';
23442             $context       = LIST_CONTEXT;
23443         }
23444         elsif ( $tok eq '&' ) {
23445             $id_scan_state = '&';
23446         }
23447         elsif ( $tok eq 'sub' or $tok eq 'package' ) {
23448             $saw_alpha     = 0;     # 'sub' is considered type info here
23449             $id_scan_state = '$';
23450             $identifier .= ' ';     # need a space to separate sub from sub name
23451         }
23452         elsif ( $tok eq '::' ) {
23453             $id_scan_state = 'A';
23454         }
23455         elsif ( $tok =~ /^[A-Za-z_]/ ) {
23456             $id_scan_state = ':';
23457         }
23458         elsif ( $tok eq '->' ) {
23459             $id_scan_state = '$';
23460         }
23461         else {
23462
23463             # shouldn't happen
23464             my ( $a, $b, $c ) = caller;
23465             warning("Program Bug: scan_identifier given bad token = $tok \n");
23466             warning("   called from sub $a  line: $c\n");
23467             report_definite_bug();
23468         }
23469         $saw_type = !$saw_alpha;
23470     }
23471     else {
23472         $i--;
23473         $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
23474     }
23475
23476     # now loop to gather the identifier
23477     my $i_save = $i;
23478
23479     while ( $i < $max_token_index ) {
23480         $i_save = $i unless ( $tok =~ /^\s*$/ );
23481         $tok    = $$rtokens[ ++$i ];
23482
23483         if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
23484             $tok = '::';
23485             $i++;
23486         }
23487
23488         if ( $id_scan_state eq '$' ) {    # starting variable name
23489
23490             if ( $tok eq '$' ) {
23491
23492                 $identifier .= $tok;
23493
23494                 # we've got a punctuation variable if end of line (punct.t)
23495                 if ( $i == $max_token_index ) {
23496                     $type          = 'i';
23497                     $id_scan_state = '';
23498                     last;
23499                 }
23500             }
23501             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
23502                 $saw_alpha     = 1;
23503                 $id_scan_state = ':';           # now need ::
23504                 $identifier .= $tok;
23505             }
23506             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
23507                 $saw_alpha     = 1;
23508                 $id_scan_state = ':';                 # now need ::
23509                 $identifier .= $tok;
23510
23511                 # Perl will accept leading digits in identifiers,
23512                 # although they may not always produce useful results.
23513                 # Something like $main::0 is ok.  But this also works:
23514                 #
23515                 #  sub howdy::123::bubba{ print "bubba $54321!\n" }
23516                 #  howdy::123::bubba();
23517                 #
23518             }
23519             elsif ( $tok =~ /^[0-9]/ ) {              # numeric
23520                 $saw_alpha     = 1;
23521                 $id_scan_state = ':';                 # now need ::
23522                 $identifier .= $tok;
23523             }
23524             elsif ( $tok eq '::' ) {
23525                 $id_scan_state = 'A';
23526                 $identifier .= $tok;
23527             }
23528             elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
23529                 $identifier .= $tok;    # keep same state, a $ could follow
23530             }
23531             elsif ( $tok eq '{' ) {
23532
23533                 # check for something like ${#} or ${©}
23534                 if (   $identifier eq '$'
23535                     && $i + 2 <= $max_token_index
23536                     && $$rtokens[ $i + 2 ] eq '}'
23537                     && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
23538                 {
23539                     my $next2 = $$rtokens[ $i + 2 ];
23540                     my $next1 = $$rtokens[ $i + 1 ];
23541                     $identifier .= $tok . $next1 . $next2;
23542                     $i += 2;
23543                     $id_scan_state = '';
23544                     last;
23545                 }
23546
23547                 # skip something like ${xxx} or ->{
23548                 $id_scan_state = '';
23549
23550                 # if this is the first token of a line, any tokens for this
23551                 # identifier have already been accumulated
23552                 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
23553                 $i = $i_save;
23554                 last;
23555             }
23556
23557             # space ok after leading $ % * & @
23558             elsif ( $tok =~ /^\s*$/ ) {
23559
23560                 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
23561
23562                     if ( length($identifier) > 1 ) {
23563                         $id_scan_state = '';
23564                         $i             = $i_save;
23565                         $type          = 'i';    # probably punctuation variable
23566                         last;
23567                     }
23568                     else {
23569
23570                         # spaces after $'s are common, and space after @
23571                         # is harmless, so only complain about space
23572                         # after other type characters. Space after $ and
23573                         # @ will be removed in formatting.  Report space
23574                         # after % and * because they might indicate a
23575                         # parsing error.  In other words '% ' might be a
23576                         # modulo operator.  Delete this warning if it
23577                         # gets annoying.
23578                         if ( $identifier !~ /^[\@\$]$/ ) {
23579                             $message =
23580                               "Space in identifier, following $identifier\n";
23581                         }
23582                     }
23583                 }
23584
23585                 # else:
23586                 # space after '->' is ok
23587             }
23588             elsif ( $tok eq '^' ) {
23589
23590                 # check for some special variables like $^W
23591                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
23592                     $identifier .= $tok;
23593                     $id_scan_state = 'A';
23594                 }
23595                 else {
23596                     $id_scan_state = '';
23597                 }
23598             }
23599             else {    # something else
23600
23601                 # check for various punctuation variables
23602                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
23603                     $identifier .= $tok;
23604                 }
23605
23606                 elsif ( $identifier eq '$#' ) {
23607
23608                     if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
23609
23610                     # perl seems to allow just these: $#: $#- $#+
23611                     elsif ( $tok =~ /^[\:\-\+]$/ ) {
23612                         $type = 'i';
23613                         $identifier .= $tok;
23614                     }
23615                     else {
23616                         $i = $i_save;
23617                         write_logfile_entry( 'Use of $# is deprecated' . "\n" );
23618                     }
23619                 }
23620                 elsif ( $identifier eq '$$' ) {
23621
23622                     # perl does not allow references to punctuation
23623                     # variables without braces.  For example, this
23624                     # won't work:
23625                     #  $:=\4;
23626                     #  $a = $$:;
23627                     # You would have to use
23628                     #  $a = ${$:};
23629
23630                     $i = $i_save;
23631                     if ( $tok eq '{' ) { $type = 't' }
23632                     else { $type = 'i' }
23633                 }
23634                 elsif ( $identifier eq '->' ) {
23635                     $i = $i_save;
23636                 }
23637                 else {
23638                     $i = $i_save;
23639                     if ( length($identifier) == 1 ) { $identifier = ''; }
23640                 }
23641                 $id_scan_state = '';
23642                 last;
23643             }
23644         }
23645         elsif ( $id_scan_state eq '&' ) {    # starting sub call?
23646
23647             if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
23648                 $id_scan_state = ':';          # now need ::
23649                 $saw_alpha     = 1;
23650                 $identifier .= $tok;
23651             }
23652             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
23653                 $id_scan_state = ':';                 # now need ::
23654                 $saw_alpha     = 1;
23655                 $identifier .= $tok;
23656             }
23657             elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
23658                 $id_scan_state = ':';       # now need ::
23659                 $saw_alpha     = 1;
23660                 $identifier .= $tok;
23661             }
23662             elsif ( $tok =~ /^\s*$/ ) {     # allow space
23663             }
23664             elsif ( $tok eq '::' ) {        # leading ::
23665                 $id_scan_state = 'A';       # accept alpha next
23666                 $identifier .= $tok;
23667             }
23668             elsif ( $tok eq '{' ) {
23669                 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
23670                 $i             = $i_save;
23671                 $id_scan_state = '';
23672                 last;
23673             }
23674             else {
23675
23676                 # punctuation variable?
23677                 # testfile: cunningham4.pl
23678                 if ( $identifier eq '&' ) {
23679                     $identifier .= $tok;
23680                 }
23681                 else {
23682                     $identifier = '';
23683                     $i          = $i_save;
23684                     $type       = '&';
23685                 }
23686                 $id_scan_state = '';
23687                 last;
23688             }
23689         }
23690         elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
23691
23692             if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
23693                 $identifier .= $tok;
23694                 $id_scan_state = ':';        # now need ::
23695                 $saw_alpha     = 1;
23696             }
23697             elsif ( $tok eq "'" && $allow_tick ) {
23698                 $identifier .= $tok;
23699                 $id_scan_state = ':';        # now need ::
23700                 $saw_alpha     = 1;
23701             }
23702             elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
23703                 $identifier .= $tok;
23704                 $id_scan_state = ':';        # now need ::
23705                 $saw_alpha     = 1;
23706             }
23707             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
23708                 $id_scan_state = '(';
23709                 $identifier .= $tok;
23710             }
23711             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
23712                 $id_scan_state = ')';
23713                 $identifier .= $tok;
23714             }
23715             else {
23716                 $id_scan_state = '';
23717                 $i             = $i_save;
23718                 last;
23719             }
23720         }
23721         elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
23722
23723             if ( $tok eq '::' ) {            # got it
23724                 $identifier .= $tok;
23725                 $id_scan_state = 'A';        # now require alpha
23726             }
23727             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
23728                 $identifier .= $tok;
23729                 $id_scan_state = ':';           # now need ::
23730                 $saw_alpha     = 1;
23731             }
23732             elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
23733                 $identifier .= $tok;
23734                 $id_scan_state = ':';           # now need ::
23735                 $saw_alpha     = 1;
23736             }
23737             elsif ( $tok eq "'" && $allow_tick ) {    # tick
23738
23739                 if ( $is_keyword{$identifier} ) {
23740                     $id_scan_state = '';              # that's all
23741                     $i             = $i_save;
23742                 }
23743                 else {
23744                     $identifier .= $tok;
23745                 }
23746             }
23747             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
23748                 $id_scan_state = '(';
23749                 $identifier .= $tok;
23750             }
23751             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
23752                 $id_scan_state = ')';
23753                 $identifier .= $tok;
23754             }
23755             else {
23756                 $id_scan_state = '';        # that's all
23757                 $i             = $i_save;
23758                 last;
23759             }
23760         }
23761         elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
23762
23763             if ( $tok eq '(' ) {             # got it
23764                 $identifier .= $tok;
23765                 $id_scan_state = ')';        # now find the end of it
23766             }
23767             elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
23768                 $identifier .= $tok;
23769             }
23770             else {
23771                 $id_scan_state = '';         # that's all - no prototype
23772                 $i             = $i_save;
23773                 last;
23774             }
23775         }
23776         elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
23777
23778             if ( $tok eq ')' ) {             # got it
23779                 $identifier .= $tok;
23780                 $id_scan_state = '';         # all done
23781                 last;
23782             }
23783             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
23784                 $identifier .= $tok;
23785             }
23786             else {    # probable error in script, but keep going
23787                 warning("Unexpected '$tok' while seeking end of prototype\n");
23788                 $identifier .= $tok;
23789             }
23790         }
23791         else {        # can get here due to error in initialization
23792             $id_scan_state = '';
23793             $i             = $i_save;
23794             last;
23795         }
23796     }
23797
23798     if ( $id_scan_state eq ')' ) {
23799         warning("Hit end of line while seeking ) to end prototype\n");
23800     }
23801
23802     # once we enter the actual identifier, it may not extend beyond
23803     # the end of the current line
23804     if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
23805         $id_scan_state = '';
23806     }
23807     if ( $i < 0 ) { $i = 0 }
23808
23809     unless ($type) {
23810
23811         if ($saw_type) {
23812
23813             if ($saw_alpha) {
23814                 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
23815                     $type = 'w';
23816                 }
23817                 else { $type = 'i' }
23818             }
23819             elsif ( $identifier eq '->' ) {
23820                 $type = '->';
23821             }
23822             elsif (
23823                 ( length($identifier) > 1 )
23824
23825                 # In something like '@$=' we have an identifier '@$'
23826                 # In something like '$${' we have type '$$' (and only
23827                 # part of an identifier)
23828                 && !( $identifier =~ /\$$/ && $tok eq '{' )
23829                 && ( $identifier !~ /^(sub |package )$/ )
23830               )
23831             {
23832                 $type = 'i';
23833             }
23834             else { $type = 't' }
23835         }
23836         elsif ($saw_alpha) {
23837
23838             # type 'w' includes anything without leading type info
23839             # ($,%,@,*) including something like abc::def::ghi
23840             $type = 'w';
23841         }
23842         else {
23843             $type = '';
23844         }    # this can happen on a restart
23845     }
23846
23847     if ($identifier) {
23848         $tok = $identifier;
23849         if ($message) { write_logfile_entry($message) }
23850     }
23851     else {
23852         $tok = $tok_begin;
23853         $i   = $i_begin;
23854     }
23855
23856     TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
23857         my ( $a, $b, $c ) = caller;
23858         print
23859 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
23860         print
23861 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
23862     };
23863     return ( $i, $tok, $type, $id_scan_state, $identifier );
23864 }
23865
23866 sub follow_quoted_string {
23867
23868     # scan for a specific token, skipping escaped characters
23869     # if the quote character is blank, use the first non-blank character
23870     # input parameters:
23871     #   $rtokens = reference to the array of tokens
23872     #   $i = the token index of the first character to search
23873     #   $in_quote = number of quoted strings being followed
23874     #   $beginning_tok = the starting quote character
23875     #   $quote_pos = index to check next for alphanumeric delimiter
23876     # output parameters:
23877     #   $i = the token index of the ending quote character
23878     #   $in_quote = decremented if found end, unchanged if not
23879     #   $beginning_tok = the starting quote character
23880     #   $quote_pos = index to check next for alphanumeric delimiter
23881     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
23882     my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth )
23883       = @_;
23884     my ( $tok, $end_tok );
23885     my $i = $i_beg - 1;
23886
23887     TOKENIZER_DEBUG_FLAG_QUOTE && do {
23888         print
23889 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
23890     };
23891
23892     # get the corresponding end token
23893     if ( $beginning_tok !~ /^\s*$/ ) {
23894         $end_tok = matching_end_token($beginning_tok);
23895     }
23896
23897     # a blank token means we must find and use the first non-blank one
23898     else {
23899         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
23900
23901         while ( $i < $max_token_index ) {
23902             $tok = $$rtokens[ ++$i ];
23903
23904             if ( $tok !~ /^\s*$/ ) {
23905
23906                 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
23907                     $i = $max_token_index;
23908                 }
23909                 else {
23910
23911                     if ( length($tok) > 1 ) {
23912                         if ( $quote_pos <= 0 ) { $quote_pos = 1 }
23913                         $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
23914                     }
23915                     else {
23916                         $beginning_tok = $tok;
23917                         $quote_pos     = 0;
23918                     }
23919                     $end_tok     = matching_end_token($beginning_tok);
23920                     $quote_depth = 1;
23921                     last;
23922                 }
23923             }
23924             else {
23925                 $allow_quote_comments = 1;
23926             }
23927         }
23928     }
23929
23930     # There are two different loops which search for the ending quote
23931     # character.  In the rare case of an alphanumeric quote delimiter, we
23932     # have to look through alphanumeric tokens character-by-character, since
23933     # the pre-tokenization process combines multiple alphanumeric
23934     # characters, whereas for a non-alphanumeric delimiter, only tokens of
23935     # length 1 can match.
23936
23937     # loop for case of alphanumeric quote delimiter..
23938     # "quote_pos" is the position the current word to begin searching
23939     if ( $beginning_tok =~ /\w/ ) {
23940
23941         # Note this because it is not recommended practice except
23942         # for obfuscated perl contests
23943         if ( $in_quote == 1 ) {
23944             write_logfile_entry(
23945                 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
23946         }
23947
23948         while ( $i < $max_token_index ) {
23949
23950             if ( $quote_pos == 0 || ( $i < 0 ) ) {
23951                 $tok = $$rtokens[ ++$i ];
23952
23953                 if ( $tok eq '\\' ) {
23954
23955                     $quote_pos++;
23956                     last if ( $i >= $max_token_index );
23957                     $tok = $$rtokens[ ++$i ];
23958
23959                 }
23960             }
23961             my $old_pos = $quote_pos;
23962
23963             unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
23964             {
23965
23966             }
23967             $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
23968
23969             if ( $quote_pos > 0 ) {
23970
23971                 $quote_depth--;
23972
23973                 if ( $quote_depth == 0 ) {
23974                     $in_quote--;
23975                     last;
23976                 }
23977             }
23978         }
23979     }
23980
23981     # loop for case of a non-alphanumeric quote delimiter..
23982     else {
23983
23984         while ( $i < $max_token_index ) {
23985             $tok = $$rtokens[ ++$i ];
23986
23987             if ( $tok eq $end_tok ) {
23988                 $quote_depth--;
23989
23990                 if ( $quote_depth == 0 ) {
23991                     $in_quote--;
23992                     last;
23993                 }
23994             }
23995             elsif ( $tok eq $beginning_tok ) {
23996                 $quote_depth++;
23997             }
23998             elsif ( $tok eq '\\' ) {
23999                 $i++;
24000             }
24001         }
24002     }
24003     if ( $i > $max_token_index ) { $i = $max_token_index }
24004     return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth );
24005 }
24006
24007 sub matching_end_token {
24008
24009     # find closing character for a pattern
24010     my $beginning_token = shift;
24011
24012     if ( $beginning_token eq '{' ) {
24013         '}';
24014     }
24015     elsif ( $beginning_token eq '[' ) {
24016         ']';
24017     }
24018     elsif ( $beginning_token eq '<' ) {
24019         '>';
24020     }
24021     elsif ( $beginning_token eq '(' ) {
24022         ')';
24023     }
24024     else {
24025         $beginning_token;
24026     }
24027 }
24028
24029 BEGIN {
24030
24031     # These names are used in error messages
24032     @opening_brace_names = qw# '{' '[' '(' '?' #;
24033     @closing_brace_names = qw# '}' ']' ')' ':' #;
24034
24035     my @digraphs = qw(
24036       .. :: << >> ** && .. ||  -> => += -= .= %= &= |= ^= *= <>
24037       <= >= == =~ !~ != ++ -- /= x=
24038     );
24039     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
24040
24041     my @trigraphs = qw( ... **= <<= >>= &&= ||= <=> );
24042     @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
24043
24044     # make a hash of all valid token types for self-checking the tokenizer
24045     # (adding NEW_TOKENS : select a new character and add to this list)
24046     my @valid_token_types = qw#
24047       A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
24048       { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
24049       #;
24050     push( @valid_token_types, @digraphs );
24051     push( @valid_token_types, @trigraphs );
24052     push( @valid_token_types, '#' );
24053     push( @valid_token_types, ',' );
24054     @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
24055
24056     # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
24057     my @file_test_operators =
24058       qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
24059     @is_file_test_operator{@file_test_operators} =
24060       (1) x scalar(@file_test_operators);
24061
24062     # these functions have prototypes of the form (&), so when they are
24063     # followed by a block, that block MAY BE followed by an operator.
24064     @_ = qw( do eval );
24065     @is_block_operator{@_} = (1) x scalar(@_);
24066
24067     # these functions allow an identifier in the indirect object slot
24068     @_ = qw( print printf sort exec system );
24069     @is_indirect_object_taker{@_} = (1) x scalar(@_);
24070
24071     # These tokens may precede a code block
24072     # patched for SWITCH/CASE
24073     @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
24074       unless do while until eval for foreach map grep sort
24075       switch case given when);
24076     @is_code_block_token{@_} = (1) x scalar(@_);
24077
24078     # I'll build the list of keywords incrementally
24079     my @Keywords = ();
24080
24081     # keywords and tokens after which a value or pattern is expected,
24082     # but not an operator.  In other words, these should consume terms
24083     # to their right, or at least they are not expected to be followed
24084     # immediately by operators.
24085     my @value_requestor = qw(
24086       AUTOLOAD
24087       BEGIN
24088       CHECK
24089       DESTROY
24090       END
24091       EQ
24092       GE
24093       GT
24094       INIT
24095       LE
24096       LT
24097       NE
24098       abs
24099       accept
24100       alarm
24101       and
24102       atan2
24103       bind
24104       binmode
24105       bless
24106       caller
24107       chdir
24108       chmod
24109       chomp
24110       chop
24111       chown
24112       chr
24113       chroot
24114       close
24115       closedir
24116       cmp
24117       connect
24118       continue
24119       cos
24120       crypt
24121       dbmclose
24122       dbmopen
24123       defined
24124       delete
24125       die
24126       dump
24127       each
24128       else
24129       elsif
24130       eof
24131       eq
24132       exec
24133       exists
24134       exit
24135       exp
24136       fcntl
24137       fileno
24138       flock
24139       for
24140       foreach
24141       formline
24142       ge
24143       getc
24144       getgrgid
24145       getgrnam
24146       gethostbyaddr
24147       gethostbyname
24148       getnetbyaddr
24149       getnetbyname
24150       getpeername
24151       getpgrp
24152       getpriority
24153       getprotobyname
24154       getprotobynumber
24155       getpwnam
24156       getpwuid
24157       getservbyname
24158       getservbyport
24159       getsockname
24160       getsockopt
24161       glob
24162       gmtime
24163       goto
24164       grep
24165       gt
24166       hex
24167       if
24168       index
24169       int
24170       ioctl
24171       join
24172       keys
24173       kill
24174       last
24175       lc
24176       lcfirst
24177       le
24178       length
24179       link
24180       listen
24181       local
24182       localtime
24183       lock
24184       log
24185       lstat
24186       lt
24187       map
24188       mkdir
24189       msgctl
24190       msgget
24191       msgrcv
24192       msgsnd
24193       my
24194       ne
24195       next
24196       no
24197       not
24198       oct
24199       open
24200       opendir
24201       or
24202       ord
24203       our
24204       pack
24205       pipe
24206       pop
24207       pos
24208       print
24209       printf
24210       prototype
24211       push
24212       quotemeta
24213       rand
24214       read
24215       readdir
24216       readlink
24217       readline
24218       readpipe
24219       recv
24220       redo
24221       ref
24222       rename
24223       require
24224       reset
24225       return
24226       reverse
24227       rewinddir
24228       rindex
24229       rmdir
24230       scalar
24231       seek
24232       seekdir
24233       select
24234       semctl
24235       semget
24236       semop
24237       send
24238       sethostent
24239       setnetent
24240       setpgrp
24241       setpriority
24242       setprotoent
24243       setservent
24244       setsockopt
24245       shift
24246       shmctl
24247       shmget
24248       shmread
24249       shmwrite
24250       shutdown
24251       sin
24252       sleep
24253       socket
24254       socketpair
24255       sort
24256       splice
24257       split
24258       sprintf
24259       sqrt
24260       srand
24261       stat
24262       study
24263       substr
24264       symlink
24265       syscall
24266       sysopen
24267       sysread
24268       sysseek
24269       system
24270       syswrite
24271       tell
24272       telldir
24273       tie
24274       tied
24275       truncate
24276       uc
24277       ucfirst
24278       umask
24279       undef
24280       unless
24281       unlink
24282       unpack
24283       unshift
24284       untie
24285       until
24286       use
24287       utime
24288       values
24289       vec
24290       waitpid
24291       warn
24292       while
24293       write
24294       xor
24295
24296       switch
24297       case
24298       given
24299       when
24300     );
24301
24302     # patched above for SWITCH/CASE
24303     push( @Keywords, @value_requestor );
24304
24305     # These are treated the same but are not keywords:
24306     my @extra_vr = qw(
24307       constant
24308       vars
24309     );
24310     push( @value_requestor, @extra_vr );
24311
24312     @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
24313
24314     # this list contains keywords which do not look for arguments,
24315     # so that they might be followed by an operator, or at least
24316     # not a term.
24317     my @operator_requestor = qw(
24318       endgrent
24319       endhostent
24320       endnetent
24321       endprotoent
24322       endpwent
24323       endservent
24324       fork
24325       getgrent
24326       gethostent
24327       getlogin
24328       getnetent
24329       getppid
24330       getprotoent
24331       getpwent
24332       getservent
24333       setgrent
24334       setpwent
24335       time
24336       times
24337       wait
24338       wantarray
24339     );
24340
24341     push( @Keywords, @operator_requestor );
24342
24343     # These are treated the same but are not considered keywords:
24344     my @extra_or = qw(
24345       STDERR
24346       STDIN
24347       STDOUT
24348     );
24349
24350     push( @operator_requestor, @extra_or );
24351
24352     @expecting_operator_token{@operator_requestor} =
24353       (1) x scalar(@operator_requestor);
24354
24355     # these token TYPES expect trailing operator but not a term
24356     # note: ++ and -- are post-increment and decrement, 'C' = constant
24357     my @operator_requestor_types = qw( ++ -- C );
24358     @expecting_operator_types{@operator_requestor_types} =
24359       (1) x scalar(@operator_requestor_types);
24360
24361     # these token TYPES consume values (terms)
24362     # note: pp and mm are pre-increment and decrement
24363     # f=semicolon in for,  F=file test operator
24364     my @value_requestor_type = qw#
24365       L { ( [ ~ !~ =~ ; . .. ... A : && ! || = + - x
24366       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||=
24367       <= >= == != => \ > < % * / ? & | ** <=>
24368       f F pp mm Y p m U J G
24369       #;
24370     push( @value_requestor_type, ',' )
24371       ;    # (perl doesn't like a ',' in a qw block)
24372     @expecting_term_types{@value_requestor_type} =
24373       (1) x scalar(@value_requestor_type);
24374
24375     # For simple syntax checking, it is nice to have a list of operators which
24376     # will really be unhappy if not followed by a term.  This includes most
24377     # of the above...
24378     %really_want_term = %expecting_term_types;
24379
24380     # with these exceptions...
24381     delete $really_want_term{'U'}; # user sub, depends on prototype
24382     delete $really_want_term{'F'}; # file test works on $_ if no following term
24383     delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
24384                                    # let perl do it
24385
24386     @_ = qw(q qq qw qx qr s y tr m);
24387     @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
24388
24389     # These keywords are handled specially in the tokenizer code:
24390     my @special_keywords = qw(
24391       do
24392       eval
24393       format
24394       m
24395       package
24396       q
24397       qq
24398       qr
24399       qw
24400       qx
24401       s
24402       sub
24403       tr
24404       y
24405     );
24406     push( @Keywords, @special_keywords );
24407
24408     # Keywords after which list formatting may be used
24409     # WARNING: do not include |map|grep|eval or perl may die on
24410     # syntax errors (map1.t).
24411     my @keyword_taking_list = qw(
24412       and
24413       chmod
24414       chomp
24415       chop
24416       chown
24417       dbmopen
24418       die
24419       elsif
24420       exec
24421       fcntl
24422       for
24423       foreach
24424       formline
24425       getsockopt
24426       if
24427       index
24428       ioctl
24429       join
24430       kill
24431       local
24432       msgctl
24433       msgrcv
24434       msgsnd
24435       my
24436       open
24437       or
24438       our
24439       pack
24440       print
24441       printf
24442       push
24443       read
24444       readpipe
24445       recv
24446       return
24447       reverse
24448       rindex
24449       seek
24450       select
24451       semctl
24452       semget
24453       send
24454       setpriority
24455       setsockopt
24456       shmctl
24457       shmget
24458       shmread
24459       shmwrite
24460       socket
24461       socketpair
24462       sort
24463       splice
24464       split
24465       sprintf
24466       substr
24467       syscall
24468       sysopen
24469       sysread
24470       sysseek
24471       system
24472       syswrite
24473       tie
24474       unless
24475       unlink
24476       unpack
24477       unshift
24478       until
24479       vec
24480       warn
24481       while
24482     );
24483     @is_keyword_taking_list{@keyword_taking_list} =
24484       (1) x scalar(@keyword_taking_list);
24485
24486     # These are not used in any way yet
24487     #    my @unused_keywords = qw(
24488     #      CORE
24489     #     __FILE__
24490     #     __LINE__
24491     #     __PACKAGE__
24492     #     );
24493
24494     #  The list of keywords was extracted from function 'keyword' in
24495     #  perl file toke.c version 5.005.03, using this utility, plus a
24496     #  little editing: (file getkwd.pl):
24497     #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
24498     #  Add 'get' prefix where necessary, then split into the above lists.
24499     #  This list should be updated as necessary.
24500     #  The list should not contain these special variables:
24501     #  ARGV DATA ENV SIG STDERR STDIN STDOUT
24502     #  __DATA__ __END__
24503
24504     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
24505 }
24506 1;
24507 __END__
24508
24509 =head1 NAME
24510
24511 Perl::Tidy - Parses and beautifies perl source
24512
24513 =head1 SYNOPSIS
24514
24515     use Perl::Tidy;
24516
24517     Perl::Tidy::perltidy(
24518         source      => $source,
24519         destination => $destination,
24520         stderr      => $stderr,
24521         argv        => $argv,
24522         perltidyrc  => $perltidyrc,
24523         logfile     => $logfile,
24524         errorfile   => $errorfile,
24525         formatter   => $formatter,  # callback object (see below)
24526     );
24527
24528 =head1 DESCRIPTION
24529
24530 This module makes the functionality of the perltidy utility available to perl
24531 scripts.  Any or all of the input parameters may be omitted, in which case the
24532 @ARGV array will be used to provide input parameters as described
24533 in the perltidy(1) man page.
24534
24535 For example, the perltidy script is basically just this:
24536
24537     use Perl::Tidy;
24538     Perl::Tidy::perltidy();
24539
24540 The module accepts input and output streams by a variety of methods.
24541 The following list of parameters may be any of a the following: a
24542 filename, an ARRAY reference, a SCALAR reference, or an object with
24543 either a B<getline> or B<print> method, as appropriate.
24544
24545         source          - the source of the script to be formatted
24546         destination     - the destination of the formatted output
24547         stderr          - standard error output
24548         perltidyrc      - the .perltidyrc file
24549         logfile         - the .LOG file stream, if any
24550         errorfile       - the .ERR file stream, if any
24551
24552 The following chart illustrates the logic used to decide how to
24553 treat a parameter.
24554
24555    ref($param)  $param is assumed to be:
24556    -----------  ---------------------
24557    undef        a filename
24558    SCALAR       ref to string
24559    ARRAY        ref to array
24560    (other)      object with getline (if source) or print method
24561
24562 If the parameter is an object, and the object has a B<close> method, that
24563 close method will be called at the end of the stream.
24564
24565 =over 4
24566
24567 =item source
24568
24569 If the B<source> parameter is given, it defines the source of the
24570 input stream.
24571
24572 =item destination
24573
24574 If the B<destination> parameter is given, it will be used to define the
24575 file or memory location to receive output of perltidy.
24576
24577 =item stderr
24578
24579 The B<stderr> parameter allows the calling program to capture the output
24580 to what would otherwise go to the standard error output device.
24581
24582 =item perltidyrc
24583
24584 If the B<perltidyrc> file is given, it will be used instead of any
24585 F<.perltidyrc> configuration file that would otherwise be used.
24586
24587 =item argv
24588
24589 If the B<argv> parameter is given, it will be used instead of the
24590 B<@ARGV> array.  The B<argv> parameter may be a string, a reference to a
24591 string, or a reference to an array.  If it is a string or reference to a
24592 string, it will be parsed into an array of items just as if it were a
24593 command line string.
24594
24595 =back
24596
24597 =head1 EXAMPLE
24598
24599 The following example passes perltidy a snippet as a reference
24600 to a string and receives the result back in a reference to
24601 an array.
24602
24603  use Perl::Tidy;
24604
24605  # some messy source code to format
24606  my $source = <<'EOM';
24607  use strict;
24608  my @editors=('Emacs', 'Vi   '); my $rand = rand();
24609  print "A poll of 10 random programmers gave these results:\n";
24610  foreach(0..10) {
24611  my $i=int ($rand+rand());
24612  print " $editors[$i] users are from Venus" . ", " .
24613  "$editors[1-$i] users are from Mars" .
24614  "\n";
24615  }
24616  EOM
24617
24618  # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
24619  my @dest;
24620  perltidy( source => \$source, destination => \@dest );
24621  foreach (@dest) {print}
24622
24623 =head1 Using the B<formatter> Callback Object
24624
24625 The B<formatter> parameter is an optional callback object which allows
24626 the calling program to receive tokenized lines directly from perltidy for
24627 further specialized processing.  When this parameter is used, the two
24628 formatting options which are built into perltidy (beautification or
24629 html) are ignored.  The following diagram illustrates the logical flow:
24630
24631                     |-- (normal route)   -> code beautification
24632   caller->perltidy->|-- (-html flag )    -> create html
24633                     |-- (formatter given)-> callback to write_line
24634
24635 This can be useful for processing perl scripts in some way.  The
24636 parameter C<$formatter> in the perltidy call,
24637
24638         formatter   => $formatter,
24639
24640 is an object created by the caller with a C<write_line> method which
24641 will accept and process tokenized lines, one line per call.  Here is
24642 a simple example of a C<write_line> which merely prints the line number,
24643 the line type (as determined by perltidy), and the text of the line:
24644
24645  sub write_line {
24646
24647      # This is called from perltidy line-by-line
24648      my $self              = shift;
24649      my $line_of_tokens    = shift;
24650      my $line_type         = $line_of_tokens->{_line_type};
24651      my $input_line_number = $line_of_tokens->{_line_number};
24652      my $input_line        = $line_of_tokens->{_line_text};
24653      print "$input_line_number:$line_type:$input_line";
24654  }
24655
24656 The complete program, B<perllinetype>, is contained in the examples section of
24657 the source distribution.  As this example shows, the callback method
24658 receives a parameter B<$line_of_tokens>, which is a reference to a hash
24659 of other useful information.  This example uses these hash entries:
24660
24661  $line_of_tokens->{_line_number} - the line number (1,2,...)
24662  $line_of_tokens->{_line_text}   - the text of the line
24663  $line_of_tokens->{_line_type}   - the type of the line, one of:
24664
24665     SYSTEM         - system-specific code before hash-bang line
24666     CODE           - line of perl code (including comments)
24667     POD_START      - line starting pod, such as '=head'
24668     POD            - pod documentation text
24669     POD_END        - last line of pod section, '=cut'
24670     HERE           - text of here-document
24671     HERE_END       - last line of here-doc (target word)
24672     FORMAT         - format section
24673     FORMAT_END     - last line of format section, '.'
24674     DATA_START     - __DATA__ line
24675     DATA           - unidentified text following __DATA__
24676     END_START      - __END__ line
24677     END            - unidentified text following __END__
24678     ERROR          - we are in big trouble, probably not a perl script
24679
24680 Most applications will be only interested in lines of type B<CODE>.  For
24681 another example, let's write a program which checks for one of the
24682 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
24683 can slow down processing.  Here is a B<write_line>, from the example
24684 program B<find_naughty.pl>, which does that:
24685
24686  sub write_line {
24687
24688      # This is called back from perltidy line-by-line
24689      # We're looking for $`, $&, and $'
24690      my ( $self, $line_of_tokens ) = @_;
24691
24692      # pull out some stuff we might need
24693      my $line_type         = $line_of_tokens->{_line_type};
24694      my $input_line_number = $line_of_tokens->{_line_number};
24695      my $input_line        = $line_of_tokens->{_line_text};
24696      my $rtoken_type       = $line_of_tokens->{_rtoken_type};
24697      my $rtokens           = $line_of_tokens->{_rtokens};
24698      chomp $input_line;
24699
24700      # skip comments, pod, etc
24701      return if ( $line_type ne 'CODE' );
24702
24703      # loop over tokens looking for $`, $&, and $'
24704      for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
24705
24706          # we only want to examine token types 'i' (identifier)
24707          next unless $$rtoken_type[$j] eq 'i';
24708
24709          # pull out the actual token text
24710          my $token = $$rtokens[$j];
24711
24712          # and check it
24713          if ( $token =~ /^\$[\`\&\']$/ ) {
24714              print STDERR
24715                "$input_line_number: $token\n";
24716          }
24717      }
24718  }
24719
24720 This example pulls out these tokenization variables from the $line_of_tokens
24721 hash reference:
24722
24723      $rtoken_type = $line_of_tokens->{_rtoken_type};
24724      $rtokens     = $line_of_tokens->{_rtokens};
24725
24726 The variable C<$rtoken_type> is a reference to an array of token type codes,
24727 and C<$rtokens> is a reference to a corresponding array of token text.
24728 These are obviously only defined for lines of type B<CODE>.
24729 Perltidy classifies tokens into types, and has a brief code for each type.
24730 You can get a complete list at any time by running perltidy from the
24731 command line with
24732
24733      perltidy --dump-token-types
24734
24735 In the present example, we are only looking for tokens of type B<i>
24736 (identifiers), so the for loop skips past all other types.  When an
24737 identifier is found, its actual text is checked to see if it is one
24738 being sought.  If so, the above write_line prints the token and its
24739 line number.
24740
24741 The B<formatter> feature is relatively new in perltidy, and further
24742 documentation needs to be written to complete its description.  However,
24743 several example programs have been written and can be found in the
24744 B<examples> section of the source distribution.  Probably the best way
24745 to get started is to find one of the examples which most closely matches
24746 your application and start modifying it.
24747
24748 For help with perltidy's pecular way of breaking lines into tokens, you
24749 might run, from the command line,
24750
24751  perltidy -D filename
24752
24753 where F<filename> is a short script of interest.  This will produce
24754 F<filename.DEBUG> with interleaved lines of text and their token types.
24755 The -D flag has been in perltidy from the beginning for this purpose.
24756 If you want to see the code which creates this file, it is
24757 C<write_debug_entry> in Tidy.pm.
24758
24759 =head1 EXPORT
24760
24761   &perltidy
24762
24763 =head1 CREDITS
24764
24765 Thanks to Hugh Myers who developed the initial modular interface
24766 to perltidy.
24767
24768 =head1 VERSION
24769
24770 This man page documents Perl::Tidy version 20031021.
24771
24772 =head1 AUTHOR
24773
24774  Steve Hancock
24775  perltidy at users.sourceforge.net
24776
24777 =head1 SEE ALSO
24778
24779 The perltidy(1) man page describes all of the features of perltidy.  It
24780 can be found at http://perltidy.sourceforge.net.
24781
24782 =cut