lib/Perl/Tidy.pm

   1 ############################################################
   2 #
   3 #    perltidy - a perl script indenter and formatter
   4 #
   5 #    Copyright (c) 2000-2007 by Steve Hancock
   6 #    Distributed under the GPL license agreement; see file COPYING
   7 #
   8 #    This program is free software; you can redistribute it and/or modify
   9 #    it under the terms of the GNU General Public License as published by
  10 #    the Free Software Foundation; either version 2 of the License, or
  11 #    (at your option) any later version.
  12 #
  13 #    This program is distributed in the hope that it will be useful,
  14 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 #    GNU General Public License for more details.
  17 #
  18 #    You should have received a copy of the GNU General Public License
  19 #    along with this program; if not, write to the Free Software
  20 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 #
  22 #    For brief instructions instructions, try 'perltidy -h'.
  23 #    For more complete documentation, try 'man perltidy'
  24 #    or visit http://perltidy.sourceforge.net
  25 #
  26 #    This script is an example of the default style.  It was formatted with:
  27 #
  28 #      perltidy Tidy.pm
  29 #
  30 #    Code Contributions:
  31 #      Michael Cartmell supplied code for adaptation to VMS and helped with
  32 #        v-strings.
  33 #      Hugh S. Myers supplied sub streamhandle and the supporting code to
  34 #        create a Perl::Tidy module which can operate on strings, arrays, etc.
  35 #      Yves Orton supplied coding to help detect Windows versions.
  36 #      Axel Rose supplied a patch for MacPerl.
  37 #      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
  38 #      Dan Tyrell contributed a patch for binary I/O.
  39 #      Ueli Hugenschmidt contributed a patch for -fpsc
  40 #      Many others have supplied key ideas, suggestions, and bug reports;
  41 #        see the CHANGES file.
  42 #
  43 ############################################################
  44
  45 package Perl::Tidy;
  46 use 5.004;    # need IO::File from 5.004 or later
  47 BEGIN { $^W = 1; }    # turn on warnings
  48
  49 use strict;
  50 use Exporter;
  51 use Carp;
  52 $|++;
  53
  54 use vars qw{
  55   $VERSION
  56   @ISA
  57   @EXPORT
  58   $missing_file_spec
  59 };
  60
  61 @ISA    = qw( Exporter );
  62 @EXPORT = qw( &perltidy );
  63
  64 use IO::File;
  65 use File::Basename;
  66
  67 BEGIN {
  68     ( $VERSION = q($Id: Tidy.pm,v 1.68 2007/08/01 16:22:38 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  69 }
  70
  71 sub streamhandle {
  72
  73     # given filename and mode (r or w), create an object which:
  74     #   has a 'getline' method if mode='r', and
  75     #   has a 'print' method if mode='w'.
  76     # The objects also need a 'close' method.
  77     #
  78     # How the object is made:
  79     #
  80     # if $filename is:     Make object using:
  81     # ----------------     -----------------
  82     # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
  83     # string               IO::File
  84     # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
  85     # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
  86     # object               object
  87     #                      (check for 'print' method for 'w' mode)
  88     #                      (check for 'getline' method for 'r' mode)
  89     my $ref = ref( my $filename = shift );
  90     my $mode = shift;
  91     my $New;
  92     my $fh;
  93
  94     # handle a reference
  95     if ($ref) {
  96         if ( $ref eq 'ARRAY' ) {
  97             $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
  98         }
  99         elsif ( $ref eq 'SCALAR' ) {
 100             $New = sub { Perl::Tidy::IOScalar->new(@_) };
 101         }
 102         else {
 103
 104             # Accept an object with a getline method for reading. Note:
 105             # IO::File is built-in and does not respond to the defined
 106             # operator.  If this causes trouble, the check can be
 107             # skipped and we can just let it crash if there is no
 108             # getline.
 109             if ( $mode =~ /[rR]/ ) {
 110                 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
 111                     $New = sub { $filename };
 112                 }
 113                 else {
 114                     $New = sub { undef };
 115                     confess <<EOM;
 116 ------------------------------------------------------------------------
 117 No 'getline' method is defined for object of class $ref
 118 Please check your call to Perl::Tidy::perltidy.  Trace follows.
 119 ------------------------------------------------------------------------
 120 EOM
 121                 }
 122             }
 123
 124             # Accept an object with a print method for writing.
 125             # See note above about IO::File
 126             if ( $mode =~ /[wW]/ ) {
 127                 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
 128                     $New = sub { $filename };
 129                 }
 130                 else {
 131                     $New = sub { undef };
 132                     confess <<EOM;
 133 ------------------------------------------------------------------------
 134 No 'print' method is defined for object of class $ref
 135 Please check your call to Perl::Tidy::perltidy. Trace follows.
 136 ------------------------------------------------------------------------
 137 EOM
 138                 }
 139             }
 140         }
 141     }
 142
 143     # handle a string
 144     else {
 145         if ( $filename eq '-' ) {
 146             $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
 147         }
 148         else {
 149             $New = sub { IO::File->new(@_) };
 150         }
 151     }
 152     $fh = $New->( $filename, $mode )
 153       or warn "Couldn't open file:$filename in mode:$mode : $!\n";
 154     return $fh, ( $ref or $filename );
 155 }
 156
 157 sub find_input_line_ending {
 158
 159     # Peek at a file and return first line ending character.
 160     # Quietly return undef in case of any trouble.
 161     my ($input_file) = @_;
 162     my $ending;
 163
 164     # silently ignore input from object or stdin
 165     if ( ref($input_file) || $input_file eq '-' ) {
 166         return $ending;
 167     }
 168     open( INFILE, $input_file ) || return $ending;
 169
 170     binmode INFILE;
 171     my $buf;
 172     read( INFILE, $buf, 1024 );
 173     close INFILE;
 174     if ( $buf && $buf =~ /([\012\015]+)/ ) {
 175         my $test = $1;
 176
 177         # dos
 178         if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 179
 180         # mac
 181         elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 182
 183         # unix
 184         elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 185
 186         # unknown
 187         else { }
 188     }
 189
 190     # no ending seen
 191     else { }
 192
 193     return $ending;
 194 }
 195
 196 sub catfile {
 197
 198     # concatenate a path and file basename
 199     # returns undef in case of error
 200
 201     BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
 202
 203     # use File::Spec if we can
 204     unless ($missing_file_spec) {
 205         return File::Spec->catfile(@_);
 206     }
 207
 208     # Perl 5.004 systems may not have File::Spec so we'll make
 209     # a simple try.  We assume File::Basename is available.
 210     # return undef if not successful.
 211     my $name      = pop @_;
 212     my $path      = join '/', @_;
 213     my $test_file = $path . $name;
 214     my ( $test_name, $test_path ) = fileparse($test_file);
 215     return $test_file if ( $test_name eq $name );
 216     return undef      if ( $^O        eq 'VMS' );
 217
 218     # this should work at least for Windows and Unix:
 219     $test_file = $path . '/' . $name;
 220     ( $test_name, $test_path ) = fileparse($test_file);
 221     return $test_file if ( $test_name eq $name );
 222     return undef;
 223 }
 224
 225 sub make_temporary_filename {
 226
 227     # Make a temporary filename.
 228     #
 229     # The POSIX tmpnam() function tends to be unreliable for non-unix
 230     # systems (at least for the win32 systems that I've tested), so use
 231     # a pre-defined name.  A slight disadvantage of this is that two
 232     # perltidy runs in the same working directory may conflict.
 233     # However, the chance of that is small and managable by the user.
 234     # An alternative would be to check for the file's existance and use,
 235     # say .TMP0, .TMP1, etc, but that scheme has its own problems.  So,
 236     # keep it simple.
 237     my $name = "perltidy.TMP";
 238     if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
 239         return $name;
 240     }
 241     eval "use POSIX qw(tmpnam)";
 242     if ($@) { return $name }
 243     use IO::File;
 244
 245     # just make a couple of tries before giving up and using the default
 246     for ( 0 .. 1 ) {
 247         my $tmpname = tmpnam();
 248         my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
 249         if ($fh) {
 250             $fh->close();
 251             return ($tmpname);
 252             last;
 253         }
 254     }
 255     return ($name);
 256 }
 257
 258 # Here is a map of the flow of data from the input source to the output
 259 # line sink:
 260 #
 261 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
 262 #       input                         groups                 output
 263 #       lines   tokens      lines       of          lines    lines
 264 #                                      lines
 265 #
 266 # The names correspond to the package names responsible for the unit processes.
 267 #
 268 # The overall process is controlled by the "main" package.
 269 #
 270 # LineSource is the stream of input lines
 271 #
 272 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
 273 # if necessary.  A token is any section of the input line which should be
 274 # manipulated as a single entity during formatting.  For example, a single
 275 # ',' character is a token, and so is an entire side comment.  It handles
 276 # the complexities of Perl syntax, such as distinguishing between '<<' as
 277 # a shift operator and as a here-document, or distinguishing between '/'
 278 # as a divide symbol and as a pattern delimiter.
 279 #
 280 # Formatter inserts and deletes whitespace between tokens, and breaks
 281 # sequences of tokens at appropriate points as output lines.  It bases its
 282 # decisions on the default rules as modified by any command-line options.
 283 #
 284 # VerticalAligner collects groups of lines together and tries to line up
 285 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
 286 #
 287 # FileWriter simply writes lines to the output stream.
 288 #
 289 # The Logger package, not shown, records significant events and warning
 290 # messages.  It writes a .LOG file, which may be saved with a
 291 # '-log' or a '-g' flag.
 292
 293 {
 294
 295     # variables needed by interrupt handler:
 296     my $tokenizer;
 297     my $input_file;
 298
 299     # this routine may be called to give a status report if interrupted.  If a
 300     # parameter is given, it will call exit with that parameter.  This is no
 301     # longer used because it works under Unix but not under Windows.
 302     sub interrupt_handler {
 303
 304         my $exit_flag = shift;
 305         print STDERR "perltidy interrupted";
 306         if ($tokenizer) {
 307             my $input_line_number =
 308               Perl::Tidy::Tokenizer::get_input_line_number();
 309             print STDERR " at line $input_line_number";
 310         }
 311         if ($input_file) {
 312
 313             if   ( ref $input_file ) { print STDERR " of reference to:" }
 314             else                     { print STDERR " of file:" }
 315             print STDERR " $input_file";
 316         }
 317         print STDERR "\n";
 318         exit $exit_flag if defined($exit_flag);
 319     }
 320
 321     sub perltidy {
 322
 323         my %defaults = (
 324             argv                  => undef,
 325             destination           => undef,
 326             formatter             => undef,
 327             logfile               => undef,
 328             errorfile             => undef,
 329             perltidyrc            => undef,
 330             source                => undef,
 331             stderr                => undef,
 332             dump_options          => undef,
 333             dump_options_type     => undef,
 334             dump_getopt_flags     => undef,
 335             dump_options_category => undef,
 336             dump_options_range    => undef,
 337             dump_abbreviations    => undef,
 338         );
 339
 340         # don't overwrite callers ARGV
 341         local @ARGV = @ARGV;
 342
 343         my %input_hash = @_;
 344
 345         if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
 346             local $" = ')(';
 347             my @good_keys = sort keys %defaults;
 348             @bad_keys = sort @bad_keys;
 349             confess <<EOM;
 350 ------------------------------------------------------------------------
 351 Unknown perltidy parameter : (@bad_keys)
 352 perltidy only understands : (@good_keys)
 353 ------------------------------------------------------------------------
 354
 355 EOM
 356         }
 357
 358         my $get_hash_ref = sub {
 359             my ($key) = @_;
 360             my $hash_ref = $input_hash{$key};
 361             if ( defined($hash_ref) ) {
 362                 unless ( ref($hash_ref) eq 'HASH' ) {
 363                     my $what = ref($hash_ref);
 364                     my $but_is =
 365                       $what ? "but is ref to $what" : "but is not a reference";
 366                     croak <<EOM;
 367 ------------------------------------------------------------------------
 368 error in call to perltidy:
 369 -$key must be reference to HASH $but_is
 370 ------------------------------------------------------------------------
 371 EOM
 372                 }
 373             }
 374             return $hash_ref;
 375         };
 376
 377         %input_hash = ( %defaults, %input_hash );
 378         my $argv               = $input_hash{'argv'};
 379         my $destination_stream = $input_hash{'destination'};
 380         my $errorfile_stream   = $input_hash{'errorfile'};
 381         my $logfile_stream     = $input_hash{'logfile'};
 382         my $perltidyrc_stream  = $input_hash{'perltidyrc'};
 383         my $source_stream      = $input_hash{'source'};
 384         my $stderr_stream      = $input_hash{'stderr'};
 385         my $user_formatter     = $input_hash{'formatter'};
 386
 387         # various dump parameters
 388         my $dump_options_type     = $input_hash{'dump_options_type'};
 389         my $dump_options          = $get_hash_ref->('dump_options');
 390         my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
 391         my $dump_options_category = $get_hash_ref->('dump_options_category');
 392         my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
 393         my $dump_options_range    = $get_hash_ref->('dump_options_range');
 394
 395         # validate dump_options_type
 396         if ( defined($dump_options) ) {
 397             unless ( defined($dump_options_type) ) {
 398                 $dump_options_type = 'perltidyrc';
 399             }
 400             unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
 401                 croak <<EOM;
 402 ------------------------------------------------------------------------
 403 Please check value of -dump_options_type in call to perltidy;
 404 saw: '$dump_options_type'
 405 expecting: 'perltidyrc' or 'full'
 406 ------------------------------------------------------------------------
 407 EOM
 408
 409             }
 410         }
 411         else {
 412             $dump_options_type = "";
 413         }
 414
 415         if ($user_formatter) {
 416
 417             # if the user defines a formatter, there is no output stream,
 418             # but we need a null stream to keep coding simple
 419             $destination_stream = Perl::Tidy::DevNull->new();
 420         }
 421
 422         # see if ARGV is overridden
 423         if ( defined($argv) ) {
 424
 425             my $rargv = ref $argv;
 426             if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
 427
 428             # ref to ARRAY
 429             if ($rargv) {
 430                 if ( $rargv eq 'ARRAY' ) {
 431                     @ARGV = @$argv;
 432                 }
 433                 else {
 434                     croak <<EOM;
 435 ------------------------------------------------------------------------
 436 Please check value of -argv in call to perltidy;
 437 it must be a string or ref to ARRAY but is: $rargv
 438 ------------------------------------------------------------------------
 439 EOM
 440                 }
 441             }
 442
 443             # string
 444             else {
 445                 my ( $rargv, $msg ) = parse_args($argv);
 446                 if ($msg) {
 447                     die <<EOM;
 448 Error parsing this string passed to to perltidy with 'argv':
 449 $msg
 450 EOM
 451                 }
 452                 @ARGV = @{$rargv};
 453             }
 454         }
 455
 456         # redirect STDERR if requested
 457         if ($stderr_stream) {
 458             my ( $fh_stderr, $stderr_file ) =
 459               Perl::Tidy::streamhandle( $stderr_stream, 'w' );
 460             if ($fh_stderr) { *STDERR = $fh_stderr }
 461             else {
 462                 croak <<EOM;
 463 ------------------------------------------------------------------------
 464 Unable to redirect STDERR to $stderr_stream
 465 Please check value of -stderr in call to perltidy
 466 ------------------------------------------------------------------------
 467 EOM
 468             }
 469         }
 470
 471         my $rpending_complaint;
 472         $$rpending_complaint = "";
 473         my $rpending_logfile_message;
 474         $$rpending_logfile_message = "";
 475
 476         my ( $is_Windows, $Windows_type ) =
 477           look_for_Windows($rpending_complaint);
 478
 479         # VMS file names are restricted to a 40.40 format, so we append _tdy
 480         # instead of .tdy, etc. (but see also sub check_vms_filename)
 481         my $dot;
 482         my $dot_pattern;
 483         if ( $^O eq 'VMS' ) {
 484             $dot         = '_';
 485             $dot_pattern = '_';
 486         }
 487         else {
 488             $dot         = '.';
 489             $dot_pattern = '\.';    # must escape for use in regex
 490         }
 491
 492         # handle command line options
 493         my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
 494             $rexpansion, $roption_category, $roption_range )
 495           = process_command_line(
 496             $perltidyrc_stream,  $is_Windows, $Windows_type,
 497             $rpending_complaint, $dump_options_type,
 498           );
 499
 500         # return or exit immediately after all dumps
 501         my $quit_now = 0;
 502
 503         # Getopt parameters and their flags
 504         if ( defined($dump_getopt_flags) ) {
 505             $quit_now = 1;
 506             foreach my $op ( @{$roption_string} ) {
 507                 my $opt  = $op;
 508                 my $flag = "";
 509
 510                 # Examples:
 511                 #  some-option=s
 512                 #  some-option=i
 513                 #  some-option:i
 514                 #  some-option!
 515                 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
 516                     $opt  = $1;
 517                     $flag = $2;
 518                 }
 519                 $dump_getopt_flags->{$opt} = $flag;
 520             }
 521         }
 522
 523         if ( defined($dump_options_category) ) {
 524             $quit_now = 1;
 525             %{$dump_options_category} = %{$roption_category};
 526         }
 527
 528         if ( defined($dump_options_range) ) {
 529             $quit_now = 1;
 530             %{$dump_options_range} = %{$roption_range};
 531         }
 532
 533         if ( defined($dump_abbreviations) ) {
 534             $quit_now = 1;
 535             %{$dump_abbreviations} = %{$rexpansion};
 536         }
 537
 538         if ( defined($dump_options) ) {
 539             $quit_now = 1;
 540             %{$dump_options} = %{$rOpts};
 541         }
 542
 543         return if ($quit_now);
 544
 545         # dump from command line
 546         if ( $rOpts->{'dump-options'} ) {
 547             dump_options( $rOpts, $roption_string );
 548             exit 1;
 549         }
 550
 551         check_options( $rOpts, $is_Windows, $Windows_type,
 552             $rpending_complaint );
 553
 554         if ($user_formatter) {
 555             $rOpts->{'format'} = 'user';
 556         }
 557
 558         # there must be one entry here for every possible format
 559         my %default_file_extension = (
 560             tidy => 'tdy',
 561             html => 'html',
 562             user => '',
 563         );
 564
 565         # be sure we have a valid output format
 566         unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
 567             my $formats = join ' ',
 568               sort map { "'" . $_ . "'" } keys %default_file_extension;
 569             my $fmt = $rOpts->{'format'};
 570             die "-format='$fmt' but must be one of: $formats\n";
 571         }
 572
 573         my $output_extension =
 574           make_extension( $rOpts->{'output-file-extension'},
 575             $default_file_extension{ $rOpts->{'format'} }, $dot );
 576
 577         my $backup_extension =
 578           make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
 579
 580         my $html_toc_extension =
 581           make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
 582
 583         my $html_src_extension =
 584           make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
 585
 586         # check for -b option;
 587         my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
 588           && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
 589           && @ARGV > 0;    # silently ignore if standard input;
 590                            # this allows -b to be in a .perltidyrc file
 591                            # without error messages when running from an editor
 592
 593         # turn off -b with warnings in case of conflicts with other options
 594         if ($in_place_modify) {
 595             if ( $rOpts->{'standard-output'} ) {
 596                 warn "Ignoring -b; you may not use -b and -st together\n";
 597                 $in_place_modify = 0;
 598             }
 599             if ($destination_stream) {
 600                 warn
 601 "Ignoring -b; you may not specify a destination array and -b together\n";
 602                 $in_place_modify = 0;
 603             }
 604             if ($source_stream) {
 605                 warn
 606 "Ignoring -b; you may not specify a source array and -b together\n";
 607                 $in_place_modify = 0;
 608             }
 609             if ( $rOpts->{'outfile'} ) {
 610                 warn "Ignoring -b; you may not use -b and -o together\n";
 611                 $in_place_modify = 0;
 612             }
 613             if ( defined( $rOpts->{'output-path'} ) ) {
 614                 warn "Ignoring -b; you may not use -b and -opath together\n";
 615                 $in_place_modify = 0;
 616             }
 617         }
 618
 619         Perl::Tidy::Formatter::check_options($rOpts);
 620         if ( $rOpts->{'format'} eq 'html' ) {
 621             Perl::Tidy::HtmlWriter->check_options($rOpts);
 622         }
 623
 624         # make the pattern of file extensions that we shouldn't touch
 625         my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
 626         if ($output_extension) {
 627             my $ext = quotemeta($output_extension);
 628             $forbidden_file_extensions .= "|$ext";
 629         }
 630         if ( $in_place_modify && $backup_extension ) {
 631             my $ext = quotemeta($backup_extension);
 632             $forbidden_file_extensions .= "|$ext";
 633         }
 634         $forbidden_file_extensions .= ')$';
 635
 636         # Create a diagnostics object if requested;
 637         # This is only useful for code development
 638         my $diagnostics_object = undef;
 639         if ( $rOpts->{'DIAGNOSTICS'} ) {
 640             $diagnostics_object = Perl::Tidy::Diagnostics->new();
 641         }
 642
 643         # no filenames should be given if input is from an array
 644         if ($source_stream) {
 645             if ( @ARGV > 0 ) {
 646                 die
 647 "You may not specify any filenames when a source array is given\n";
 648             }
 649
 650             # we'll stuff the source array into ARGV
 651             unshift( @ARGV, $source_stream );
 652
 653             # No special treatment for source stream which is a filename.
 654             # This will enable checks for binary files and other bad stuff.
 655             $source_stream = undef unless ref($source_stream);
 656         }
 657
 658         # use stdin by default if no source array and no args
 659         else {
 660             unshift( @ARGV, '-' ) unless @ARGV;
 661         }
 662
 663         # loop to process all files in argument list
 664         my $number_of_files = @ARGV;
 665         my $formatter       = undef;
 666         $tokenizer = undef;
 667         while ( $input_file = shift @ARGV ) {
 668             my $fileroot;
 669             my $input_file_permissions;
 670
 671             #---------------------------------------------------------------
 672             # determine the input file name
 673             #---------------------------------------------------------------
 674             if ($source_stream) {
 675                 $fileroot = "perltidy";
 676             }
 677             elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
 678                 $fileroot = "perltidy";   # root name to use for .ERR, .LOG, etc
 679                 $in_place_modify = 0;
 680             }
 681             else {
 682                 $fileroot = $input_file;
 683                 unless ( -e $input_file ) {
 684
 685                     # file doesn't exist - check for a file glob
 686                     if ( $input_file =~ /([\?\*\[\{])/ ) {
 687
 688                         # Windows shell may not remove quotes, so do it
 689                         my $input_file = $input_file;
 690                         if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
 691                         if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
 692                         my $pattern = fileglob_to_re($input_file);
 693                         eval "/$pattern/";
 694                         if ( !$@ && opendir( DIR, './' ) ) {
 695                             my @files =
 696                               grep { /$pattern/ && !-d $_ } readdir(DIR);
 697                             closedir(DIR);
 698                             if (@files) {
 699                                 unshift @ARGV, @files;
 700                                 next;
 701                             }
 702                         }
 703                     }
 704                     print "skipping file: '$input_file': no matches found\n";
 705                     next;
 706                 }
 707
 708                 unless ( -f $input_file ) {
 709                     print "skipping file: $input_file: not a regular file\n";
 710                     next;
 711                 }
 712
 713                 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
 714                     print
 715 "skipping file: $input_file: Non-text (override with -f)\n";
 716                     next;
 717                 }
 718
 719                 # we should have a valid filename now
 720                 $fileroot               = $input_file;
 721                 $input_file_permissions = ( stat $input_file )[2] & 07777;
 722
 723                 if ( $^O eq 'VMS' ) {
 724                     ( $fileroot, $dot ) = check_vms_filename($fileroot);
 725                 }
 726
 727                 # add option to change path here
 728                 if ( defined( $rOpts->{'output-path'} ) ) {
 729
 730                     my ( $base, $old_path ) = fileparse($fileroot);
 731                     my $new_path = $rOpts->{'output-path'};
 732                     unless ( -d $new_path ) {
 733                         unless ( mkdir $new_path, 0777 ) {
 734                             die "unable to create directory $new_path: $!\n";
 735                         }
 736                     }
 737                     my $path = $new_path;
 738                     $fileroot = catfile( $path, $base );
 739                     unless ($fileroot) {
 740                         die <<EOM;
 741 ------------------------------------------------------------------------
 742 Problem combining $new_path and $base to make a filename; check -opath
 743 ------------------------------------------------------------------------
 744 EOM
 745                     }
 746                 }
 747             }
 748
 749             # Skip files with same extension as the output files because
 750             # this can lead to a messy situation with files like
 751             # script.tdy.tdy.tdy ... or worse problems ...  when you
 752             # rerun perltidy over and over with wildcard input.
 753             if (
 754                 !$source_stream
 755                 && (   $input_file =~ /$forbidden_file_extensions/o
 756                     || $input_file eq 'DIAGNOSTICS' )
 757               )
 758             {
 759                 print "skipping file: $input_file: wrong extension\n";
 760                 next;
 761             }
 762
 763             # the 'source_object' supplies a method to read the input file
 764             my $source_object =
 765               Perl::Tidy::LineSource->new( $input_file, $rOpts,
 766                 $rpending_logfile_message );
 767             next unless ($source_object);
 768
 769             # register this file name with the Diagnostics package
 770             $diagnostics_object->set_input_file($input_file)
 771               if $diagnostics_object;
 772
 773             #---------------------------------------------------------------
 774             # determine the output file name
 775             #---------------------------------------------------------------
 776             my $output_file = undef;
 777             my $actual_output_extension;
 778
 779             if ( $rOpts->{'outfile'} ) {
 780
 781                 if ( $number_of_files <= 1 ) {
 782
 783                     if ( $rOpts->{'standard-output'} ) {
 784                         die "You may not use -o and -st together\n";
 785                     }
 786                     elsif ($destination_stream) {
 787                         die
 788 "You may not specify a destination array and -o together\n";
 789                     }
 790                     elsif ( defined( $rOpts->{'output-path'} ) ) {
 791                         die "You may not specify -o and -opath together\n";
 792                     }
 793                     elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
 794                         die "You may not specify -o and -oext together\n";
 795                     }
 796                     $output_file = $rOpts->{outfile};
 797
 798                     # make sure user gives a file name after -o
 799                     if ( $output_file =~ /^-/ ) {
 800                         die "You must specify a valid filename after -o\n";
 801                     }
 802
 803                     # do not overwrite input file with -o
 804                     if ( defined($input_file_permissions)
 805                         && ( $output_file eq $input_file ) )
 806                     {
 807                         die
 808                           "Use 'perltidy -b $input_file' to modify in-place\n";
 809                     }
 810                 }
 811                 else {
 812                     die "You may not use -o with more than one input file\n";
 813                 }
 814             }
 815             elsif ( $rOpts->{'standard-output'} ) {
 816                 if ($destination_stream) {
 817                     die
 818 "You may not specify a destination array and -st together\n";
 819                 }
 820                 $output_file = '-';
 821
 822                 if ( $number_of_files <= 1 ) {
 823                 }
 824                 else {
 825                     die "You may not use -st with more than one input file\n";
 826                 }
 827             }
 828             elsif ($destination_stream) {
 829                 $output_file = $destination_stream;
 830             }
 831             elsif ($source_stream) {  # source but no destination goes to stdout
 832                 $output_file = '-';
 833             }
 834             elsif ( $input_file eq '-' ) {
 835                 $output_file = '-';
 836             }
 837             else {
 838                 if ($in_place_modify) {
 839                     $output_file = IO::File->new_tmpfile()
 840                       or die "cannot open temp file for -b option: $!\n";
 841                 }
 842                 else {
 843                     $actual_output_extension = $output_extension;
 844                     $output_file             = $fileroot . $output_extension;
 845                 }
 846             }
 847
 848             # the 'sink_object' knows how to write the output file
 849             my $tee_file = $fileroot . $dot . "TEE";
 850
 851             my $line_separator = $rOpts->{'output-line-ending'};
 852             if ( $rOpts->{'preserve-line-endings'} ) {
 853                 $line_separator = find_input_line_ending($input_file);
 854             }
 855
 856             # Eventually all I/O may be done with binmode, but for now it is
 857             # only done when a user requests a particular line separator
 858             # through the -ple or -ole flags
 859             my $binmode = 0;
 860             if   ( defined($line_separator) ) { $binmode        = 1 }
 861             else                              { $line_separator = "\n" }
 862
 863             my $sink_object =
 864               Perl::Tidy::LineSink->new( $output_file, $tee_file,
 865                 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
 866
 867             #---------------------------------------------------------------
 868             # initialize the error logger
 869             #---------------------------------------------------------------
 870             my $warning_file = $fileroot . $dot . "ERR";
 871             if ($errorfile_stream) { $warning_file = $errorfile_stream }
 872             my $log_file = $fileroot . $dot . "LOG";
 873             if ($logfile_stream) { $log_file = $logfile_stream }
 874
 875             my $logger_object =
 876               Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
 877                 $saw_extrude );
 878             write_logfile_header(
 879                 $rOpts,        $logger_object, $config_file,
 880                 $rraw_options, $Windows_type
 881             );
 882             if ($$rpending_logfile_message) {
 883                 $logger_object->write_logfile_entry($$rpending_logfile_message);
 884             }
 885             if ($$rpending_complaint) {
 886                 $logger_object->complain($$rpending_complaint);
 887             }
 888
 889             #---------------------------------------------------------------
 890             # initialize the debug object, if any
 891             #---------------------------------------------------------------
 892             my $debugger_object = undef;
 893             if ( $rOpts->{DEBUG} ) {
 894                 $debugger_object =
 895                   Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
 896             }
 897
 898             #---------------------------------------------------------------
 899             # create a formatter for this file : html writer or pretty printer
 900             #---------------------------------------------------------------
 901
 902             # we have to delete any old formatter because, for safety,
 903             # the formatter will check to see that there is only one.
 904             $formatter = undef;
 905
 906             if ($user_formatter) {
 907                 $formatter = $user_formatter;
 908             }
 909             elsif ( $rOpts->{'format'} eq 'html' ) {
 910                 $formatter =
 911                   Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
 912                     $actual_output_extension, $html_toc_extension,
 913                     $html_src_extension );
 914             }
 915             elsif ( $rOpts->{'format'} eq 'tidy' ) {
 916                 $formatter = Perl::Tidy::Formatter->new(
 917                     logger_object      => $logger_object,
 918                     diagnostics_object => $diagnostics_object,
 919                     sink_object        => $sink_object,
 920                 );
 921             }
 922             else {
 923                 die "I don't know how to do -format=$rOpts->{'format'}\n";
 924             }
 925
 926             unless ($formatter) {
 927                 die "Unable to continue with $rOpts->{'format'} formatting\n";
 928             }
 929
 930             #---------------------------------------------------------------
 931             # create the tokenizer for this file
 932             #---------------------------------------------------------------
 933             $tokenizer = undef;                     # must destroy old tokenizer
 934             $tokenizer = Perl::Tidy::Tokenizer->new(
 935                 source_object       => $source_object,
 936                 logger_object       => $logger_object,
 937                 debugger_object     => $debugger_object,
 938                 diagnostics_object  => $diagnostics_object,
 939                 starting_level      => $rOpts->{'starting-indentation-level'},
 940                 tabs                => $rOpts->{'tabs'},
 941                 indent_columns      => $rOpts->{'indent-columns'},
 942                 look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
 943                 look_for_autoloader => $rOpts->{'look-for-autoloader'},
 944                 look_for_selfloader => $rOpts->{'look-for-selfloader'},
 945                 trim_qw             => $rOpts->{'trim-qw'},
 946             );
 947
 948             #---------------------------------------------------------------
 949             # now we can do it
 950             #---------------------------------------------------------------
 951             process_this_file( $tokenizer, $formatter );
 952
 953             #---------------------------------------------------------------
 954             # close the input source and report errors
 955             #---------------------------------------------------------------
 956             $source_object->close_input_file();
 957
 958             # get file names to use for syntax check
 959             my $ifname = $source_object->get_input_file_copy_name();
 960             my $ofname = $sink_object->get_output_file_copy();
 961
 962             #---------------------------------------------------------------
 963             # handle the -b option (backup and modify in-place)
 964             #---------------------------------------------------------------
 965             if ($in_place_modify) {
 966                 unless ( -f $input_file ) {
 967
 968                     # oh, oh, no real file to backup ..
 969                     # shouldn't happen because of numerous preliminary checks
 970                     die print
 971 "problem with -b backing up input file '$input_file': not a file\n";
 972                 }
 973                 my $backup_name = $input_file . $backup_extension;
 974                 if ( -f $backup_name ) {
 975                     unlink($backup_name)
 976                       or die
 977 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
 978                 }
 979                 rename( $input_file, $backup_name )
 980                   or die
 981 "problem renaming $input_file to $backup_name for -b option: $!\n";
 982                 $ifname = $backup_name;
 983
 984                 seek( $output_file, 0, 0 )
 985                   or die "unable to rewind tmp file for -b option: $!\n";
 986
 987                 my $fout = IO::File->new("> $input_file")
 988                   or die
 989 "problem opening $input_file for write for -b option; check directory permissions: $!\n";
 990                 binmode $fout;
 991                 my $line;
 992                 while ( $line = $output_file->getline() ) {
 993                     $fout->print($line);
 994                 }
 995                 $fout->close();
 996                 $output_file = $input_file;
 997                 $ofname      = $input_file;
 998             }
 999
1000             #---------------------------------------------------------------
1001             # clean up and report errors
1002             #---------------------------------------------------------------
1003             $sink_object->close_output_file()    if $sink_object;
1004             $debugger_object->close_debug_file() if $debugger_object;
1005
1006             my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
1007             if ($output_file) {
1008
1009                 if ($input_file_permissions) {
1010
1011                     # give output script same permissions as input script, but
1012                     # make it user-writable or else we can't run perltidy again.
1013                     # Thus we retain whatever executable flags were set.
1014                     if ( $rOpts->{'format'} eq 'tidy' ) {
1015                         chmod( $input_file_permissions | 0600, $output_file );
1016                     }
1017
1018                     # else use default permissions for html and any other format
1019
1020                 }
1021                 if ( $logger_object && $rOpts->{'check-syntax'} ) {
1022                     $infile_syntax_ok =
1023                       check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1024                 }
1025             }
1026
1027             $logger_object->finish( $infile_syntax_ok, $formatter )
1028               if $logger_object;
1029         }    # end of loop to process all files
1030     }    # end of main program
1031 }
1032
1033 sub fileglob_to_re {
1034
1035     # modified (corrected) from version in find2perl
1036     my $x = shift;
1037     $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1038     $x =~ s#\*#.*#g;               # '*' -> '.*'
1039     $x =~ s#\?#.#g;                # '?' -> '.'
1040     "^$x\\z";                      # match whole word
1041 }
1042
1043 sub make_extension {
1044
1045     # Make a file extension, including any leading '.' if necessary
1046     # The '.' may actually be an '_' under VMS
1047     my ( $extension, $default, $dot ) = @_;
1048
1049     # Use the default if none specified
1050     $extension = $default unless ($extension);
1051
1052     # Only extensions with these leading characters get a '.'
1053     # This rule gives the user some freedom
1054     if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1055         $extension = $dot . $extension;
1056     }
1057     return $extension;
1058 }
1059
1060 sub write_logfile_header {
1061     my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
1062       @_;
1063     $logger_object->write_logfile_entry(
1064 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1065     );
1066     if ($Windows_type) {
1067         $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1068     }
1069     my $options_string = join( ' ', @$rraw_options );
1070
1071     if ($config_file) {
1072         $logger_object->write_logfile_entry(
1073             "Found Configuration File >>> $config_file \n");
1074     }
1075     $logger_object->write_logfile_entry(
1076         "Configuration and command line parameters for this run:\n");
1077     $logger_object->write_logfile_entry("$options_string\n");
1078
1079     if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1080         $rOpts->{'logfile'} = 1;    # force logfile to be saved
1081         $logger_object->write_logfile_entry(
1082             "Final parameter set for this run\n");
1083         $logger_object->write_logfile_entry(
1084             "------------------------------------\n");
1085
1086         foreach ( keys %{$rOpts} ) {
1087             $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
1088         }
1089         $logger_object->write_logfile_entry(
1090             "------------------------------------\n");
1091     }
1092     $logger_object->write_logfile_entry(
1093         "To find error messages search for 'WARNING' with your editor\n");
1094 }
1095
1096 sub generate_options {
1097
1098     ######################################################################
1099     # Generate and return references to:
1100     #  @option_string - the list of options to be passed to Getopt::Long
1101     #  @defaults - the list of default options
1102     #  %expansion - a hash showing how all abbreviations are expanded
1103     #  %category - a hash giving the general category of each option
1104     #  %option_range - a hash giving the valid ranges of certain options
1105
1106     # Note: a few options are not documented in the man page and usage
1107     # message. This is because these are experimental or debug options and
1108     # may or may not be retained in future versions.
1109     #
1110     # Here are the undocumented flags as far as I know.  Any of them
1111     # may disappear at any time.  They are mainly for fine-tuning
1112     # and debugging.
1113     #
1114     # fll --> fuzzy-line-length           # a trivial parameter which gets
1115     #                                       turned off for the extrude option
1116     #                                       which is mainly for debugging
1117     # chk --> check-multiline-quotes      # check for old bug; to be deleted
1118     # scl --> short-concatenation-item-length   # helps break at '.'
1119     # recombine                           # for debugging line breaks
1120     # valign                              # for debugging vertical alignment
1121     # I   --> DIAGNOSTICS                 # for debugging
1122     ######################################################################
1123
1124     # here is a summary of the Getopt codes:
1125     # <none> does not take an argument
1126     # =s takes a mandatory string
1127     # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1128     # =i takes a mandatory integer
1129     # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1130     # ! does not take an argument and may be negated
1131     #  i.e., -foo and -nofoo are allowed
1132     # a double dash signals the end of the options list
1133     #
1134     #---------------------------------------------------------------
1135     # Define the option string passed to GetOptions.
1136     #---------------------------------------------------------------
1137
1138     my @option_string   = ();
1139     my %expansion       = ();
1140     my %option_category = ();
1141     my %option_range    = ();
1142     my $rexpansion      = \%expansion;
1143
1144     # names of categories in manual
1145     # leading integers will allow sorting
1146     my @category_name = (
1147         '0. I/O control',
1148         '1. Basic formatting options',
1149         '2. Code indentation control',
1150         '3. Whitespace control',
1151         '4. Comment controls',
1152         '5. Linebreak controls',
1153         '6. Controlling list formatting',
1154         '7. Retaining or ignoring existing line breaks',
1155         '8. Blank line control',
1156         '9. Other controls',
1157         '10. HTML options',
1158         '11. pod2html options',
1159         '12. Controlling HTML properties',
1160         '13. Debugging',
1161     );
1162
1163     #  These options are parsed directly by perltidy:
1164     #    help h
1165     #    version v
1166     #  However, they are included in the option set so that they will
1167     #  be seen in the options dump.
1168
1169     # These long option names have no abbreviations or are treated specially
1170     @option_string = qw(
1171       html!
1172       noprofile
1173       no-profile
1174       npro
1175       recombine!
1176       valign!
1177     );
1178
1179     my $category = 13;    # Debugging
1180     foreach (@option_string) {
1181         my $opt = $_;     # must avoid changing the actual flag
1182         $opt =~ s/!$//;
1183         $option_category{$opt} = $category_name[$category];
1184     }
1185
1186     $category = 11;                                       # HTML
1187     $option_category{html} = $category_name[$category];
1188
1189     # routine to install and check options
1190     my $add_option = sub {
1191         my ( $long_name, $short_name, $flag ) = @_;
1192         push @option_string, $long_name . $flag;
1193         $option_category{$long_name} = $category_name[$category];
1194         if ($short_name) {
1195             if ( $expansion{$short_name} ) {
1196                 my $existing_name = $expansion{$short_name}[0];
1197                 die
1198 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1199             }
1200             $expansion{$short_name} = [$long_name];
1201             if ( $flag eq '!' ) {
1202                 my $nshort_name = 'n' . $short_name;
1203                 my $nolong_name = 'no' . $long_name;
1204                 if ( $expansion{$nshort_name} ) {
1205                     my $existing_name = $expansion{$nshort_name}[0];
1206                     die
1207 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1208                 }
1209                 $expansion{$nshort_name} = [$nolong_name];
1210             }
1211         }
1212     };
1213
1214     # Install long option names which have a simple abbreviation.
1215     # Options with code '!' get standard negation ('no' for long names,
1216     # 'n' for abbreviations).  Categories follow the manual.
1217
1218     ###########################
1219     $category = 0;    # I/O_Control
1220     ###########################
1221     $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1222     $add_option->( 'backup-file-extension',      'bext',  '=s' );
1223     $add_option->( 'force-read-binary',          'f',     '!' );
1224     $add_option->( 'format',                     'fmt',   '=s' );
1225     $add_option->( 'logfile',                    'log',   '!' );
1226     $add_option->( 'logfile-gap',                'g',     ':i' );
1227     $add_option->( 'outfile',                    'o',     '=s' );
1228     $add_option->( 'output-file-extension',      'oext',  '=s' );
1229     $add_option->( 'output-path',                'opath', '=s' );
1230     $add_option->( 'profile',                    'pro',   '=s' );
1231     $add_option->( 'quiet',                      'q',     '!' );
1232     $add_option->( 'standard-error-output',      'se',    '!' );
1233     $add_option->( 'standard-output',            'st',    '!' );
1234     $add_option->( 'warning-output',             'w',     '!' );
1235
1236     # options which are both toggle switches and values moved here
1237     # to hide from tidyview (which does not show category 0 flags):
1238     # -ole moved here from category 1
1239     # -sil moved here from category 2
1240     $add_option->( 'output-line-ending',         'ole', '=s' );
1241     $add_option->( 'starting-indentation-level', 'sil', '=i' );
1242
1243     ########################################
1244     $category = 1;    # Basic formatting options
1245     ########################################
1246     $add_option->( 'check-syntax',             'syn',  '!' );
1247     $add_option->( 'entab-leading-whitespace', 'et',   '=i' );
1248     $add_option->( 'indent-columns',           'i',    '=i' );
1249     $add_option->( 'maximum-line-length',      'l',    '=i' );
1250     $add_option->( 'perl-syntax-check-flags',  'pscf', '=s' );
1251     $add_option->( 'preserve-line-endings',    'ple',  '!' );
1252     $add_option->( 'tabs',                     't',    '!' );
1253
1254     ########################################
1255     $category = 2;    # Code indentation control
1256     ########################################
1257     $add_option->( 'continuation-indentation',           'ci',   '=i' );
1258     $add_option->( 'line-up-parentheses',                'lp',   '!' );
1259     $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1260     $add_option->( 'outdent-keywords',                   'okw',  '!' );
1261     $add_option->( 'outdent-labels',                     'ola',  '!' );
1262     $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1263     $add_option->( 'indent-closing-brace',               'icb',  '!' );
1264     $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1265     $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1266     $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1267     $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1268     $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1269     $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1270
1271     ########################################
1272     $category = 3;    # Whitespace control
1273     ########################################
1274     $add_option->( 'add-semicolons',                            'asc',   '!' );
1275     $add_option->( 'add-whitespace',                            'aws',   '!' );
1276     $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1277     $add_option->( 'brace-tightness',                           'bt',    '=i' );
1278     $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1279     $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1280     $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1281     $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1282     $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1283     $add_option->( 'paren-tightness',                           'pt',    '=i' );
1284     $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1285     $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1286     $add_option->( 'space-function-paren',                      'sfp',   '!' );
1287     $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1288     $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1289     $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1290     $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1291     $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1292     $add_option->( 'trim-qw',                                   'tqw',   '!' );
1293     $add_option->( 'want-left-space',                           'wls',   '=s' );
1294     $add_option->( 'want-right-space',                          'wrs',   '=s' );
1295
1296     ########################################
1297     $category = 4;    # Comment controls
1298     ########################################
1299     $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1300     $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1301     $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1302     $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1303     $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1304     $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1305     $add_option->( 'closing-side-comments',             'csc',  '!' );
1306     $add_option->( 'format-skipping',                   'fs',   '!' );
1307     $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1308     $add_option->( 'format-skipping-end',               'fse',  '=s' );
1309     $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1310     $add_option->( 'indent-block-comments',             'ibc',  '!' );
1311     $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1312     $add_option->( 'fixed-position-side-comment',       'fpsc', '=i' );
1313     $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1314     $add_option->( 'outdent-long-comments',             'olc',  '!' );
1315     $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1316     $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1317     $add_option->( 'static-block-comments',             'sbc',  '!' );
1318     $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1319     $add_option->( 'static-side-comments',              'ssc',  '!' );
1320
1321     ########################################
1322     $category = 5;    # Linebreak controls
1323     ########################################
1324     $add_option->( 'add-newlines',                        'anl',   '!' );
1325     $add_option->( 'block-brace-vertical-tightness',      'bbvt',  '=i' );
1326     $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1327     $add_option->( 'brace-vertical-tightness',            'bvt',   '=i' );
1328     $add_option->( 'brace-vertical-tightness-closing',    'bvtc',  '=i' );
1329     $add_option->( 'cuddled-else',                        'ce',    '!' );
1330     $add_option->( 'delete-old-newlines',                 'dnl',   '!' );
1331     $add_option->( 'opening-brace-always-on-right',       'bar',   '!' );
1332     $add_option->( 'opening-brace-on-new-line',           'bl',    '!' );
1333     $add_option->( 'opening-hash-brace-right',            'ohbr',  '!' );
1334     $add_option->( 'opening-paren-right',                 'opr',   '!' );
1335     $add_option->( 'opening-square-bracket-right',        'osbr',  '!' );
1336     $add_option->( 'opening-sub-brace-on-new-line',       'sbl',   '!' );
1337     $add_option->( 'paren-vertical-tightness',            'pvt',   '=i' );
1338     $add_option->( 'paren-vertical-tightness-closing',    'pvtc',  '=i' );
1339     $add_option->( 'stack-closing-hash-brace',            'schb',  '!' );
1340     $add_option->( 'stack-closing-paren',                 'scp',   '!' );
1341     $add_option->( 'stack-closing-square-bracket',        'scsb',  '!' );
1342     $add_option->( 'stack-opening-hash-brace',            'sohb',  '!' );
1343     $add_option->( 'stack-opening-paren',                 'sop',   '!' );
1344     $add_option->( 'stack-opening-square-bracket',        'sosb',  '!' );
1345     $add_option->( 'vertical-tightness',                  'vt',    '=i' );
1346     $add_option->( 'vertical-tightness-closing',          'vtc',   '=i' );
1347     $add_option->( 'want-break-after',                    'wba',   '=s' );
1348     $add_option->( 'want-break-before',                   'wbb',   '=s' );
1349     $add_option->( 'break-after-all-operators',           'baao',  '!' );
1350     $add_option->( 'break-before-all-operators',          'bbao',  '!' );
1351     $add_option->( 'keep-interior-semicolons',            'kis',   '!' );
1352
1353     ########################################
1354     $category = 6;    # Controlling list formatting
1355     ########################################
1356     $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1357     $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1358     $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1359
1360     ########################################
1361     $category = 7;    # Retaining or ignoring existing line breaks
1362     ########################################
1363     $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1364     $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1365     $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1366     $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
1367
1368     ########################################
1369     $category = 8;    # Blank line control
1370     ########################################
1371     $add_option->( 'blanks-before-blocks',            'bbb', '!' );
1372     $add_option->( 'blanks-before-comments',          'bbc', '!' );
1373     $add_option->( 'blanks-before-subs',              'bbs', '!' );
1374     $add_option->( 'long-block-line-count',           'lbl', '=i' );
1375     $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1376     $add_option->( 'swallow-optional-blank-lines',    'sob', '!' );
1377
1378     ########################################
1379     $category = 9;    # Other controls
1380     ########################################
1381     $add_option->( 'delete-block-comments',        'dbc',  '!' );
1382     $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1383     $add_option->( 'delete-pod',                   'dp',   '!' );
1384     $add_option->( 'delete-side-comments',         'dsc',  '!' );
1385     $add_option->( 'tee-block-comments',           'tbc',  '!' );
1386     $add_option->( 'tee-pod',                      'tp',   '!' );
1387     $add_option->( 'tee-side-comments',            'tsc',  '!' );
1388     $add_option->( 'look-for-autoloader',          'lal',  '!' );
1389     $add_option->( 'look-for-hash-bang',           'x',    '!' );
1390     $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1391     $add_option->( 'pass-version-line',            'pvl',  '!' );
1392
1393     ########################################
1394     $category = 13;    # Debugging
1395     ########################################
1396     $add_option->( 'DEBUG',                           'D',    '!' );
1397     $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1398     $add_option->( 'check-multiline-quotes',          'chk',  '!' );
1399     $add_option->( 'dump-defaults',                   'ddf',  '!' );
1400     $add_option->( 'dump-long-names',                 'dln',  '!' );
1401     $add_option->( 'dump-options',                    'dop',  '!' );
1402     $add_option->( 'dump-profile',                    'dpro', '!' );
1403     $add_option->( 'dump-short-names',                'dsn',  '!' );
1404     $add_option->( 'dump-token-types',                'dtt',  '!' );
1405     $add_option->( 'dump-want-left-space',            'dwls', '!' );
1406     $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1407     $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1408     $add_option->( 'help',                            'h',    '' );
1409     $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1410     $add_option->( 'show-options',                    'opt',  '!' );
1411     $add_option->( 'version',                         'v',    '' );
1412
1413     #---------------------------------------------------------------------
1414
1415     # The Perl::Tidy::HtmlWriter will add its own options to the string
1416     Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1417
1418     ########################################
1419     # Set categories 10, 11, 12
1420     ########################################
1421     # Based on their known order
1422     $category = 12;    # HTML properties
1423     foreach my $opt (@option_string) {
1424         my $long_name = $opt;
1425         $long_name =~ s/(!|=.*|:.*)$//;
1426         unless ( defined( $option_category{$long_name} ) ) {
1427             if ( $long_name =~ /^html-linked/ ) {
1428                 $category = 10;    # HTML options
1429             }
1430             elsif ( $long_name =~ /^pod2html/ ) {
1431                 $category = 11;    # Pod2html
1432             }
1433             $option_category{$long_name} = $category_name[$category];
1434         }
1435     }
1436
1437     #---------------------------------------------------------------
1438     # Assign valid ranges to certain options
1439     #---------------------------------------------------------------
1440     # In the future, these may be used to make preliminary checks
1441     # hash keys are long names
1442     # If key or value is undefined:
1443     #   strings may have any value
1444     #   integer ranges are >=0
1445     # If value is defined:
1446     #   value is [qw(any valid words)] for strings
1447     #   value is [min, max] for integers
1448     #   if min is undefined, there is no lower limit
1449     #   if max is undefined, there is no upper limit
1450     # Parameters not listed here have defaults
1451     %option_range = (
1452         'format'             => [ 'tidy', 'html', 'user' ],
1453         'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
1454
1455         'block-brace-tightness'    => [ 0, 2 ],
1456         'brace-tightness'          => [ 0, 2 ],
1457         'paren-tightness'          => [ 0, 2 ],
1458         'square-bracket-tightness' => [ 0, 2 ],
1459
1460         'block-brace-vertical-tightness'            => [ 0, 2 ],
1461         'brace-vertical-tightness'                  => [ 0, 2 ],
1462         'brace-vertical-tightness-closing'          => [ 0, 2 ],
1463         'paren-vertical-tightness'                  => [ 0, 2 ],
1464         'paren-vertical-tightness-closing'          => [ 0, 2 ],
1465         'square-bracket-vertical-tightness'         => [ 0, 2 ],
1466         'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1467         'vertical-tightness'                        => [ 0, 2 ],
1468         'vertical-tightness-closing'                => [ 0, 2 ],
1469
1470         'closing-brace-indentation'          => [ 0, 3 ],
1471         'closing-paren-indentation'          => [ 0, 3 ],
1472         'closing-square-bracket-indentation' => [ 0, 3 ],
1473         'closing-token-indentation'          => [ 0, 3 ],
1474
1475         'closing-side-comment-else-flag' => [ 0, 2 ],
1476         'comma-arrow-breakpoints'        => [ 0, 3 ],
1477     );
1478
1479     # Note: we could actually allow negative ci if someone really wants it:
1480     # $option_range{'continuation-indentation'} = [ undef, undef ];
1481
1482     #---------------------------------------------------------------
1483     # Assign default values to the above options here, except
1484     # for 'outfile' and 'help'.
1485     # These settings should approximate the perlstyle(1) suggestions.
1486     #---------------------------------------------------------------
1487     my @defaults = qw(
1488       add-newlines
1489       add-semicolons
1490       add-whitespace
1491       blanks-before-blocks
1492       blanks-before-comments
1493       blanks-before-subs
1494       block-brace-tightness=0
1495       block-brace-vertical-tightness=0
1496       brace-tightness=1
1497       brace-vertical-tightness-closing=0
1498       brace-vertical-tightness=0
1499       break-at-old-logical-breakpoints
1500       break-at-old-ternary-breakpoints
1501       break-at-old-keyword-breakpoints
1502       comma-arrow-breakpoints=1
1503       nocheck-syntax
1504       closing-side-comment-interval=6
1505       closing-side-comment-maximum-text=20
1506       closing-side-comment-else-flag=0
1507       closing-paren-indentation=0
1508       closing-brace-indentation=0
1509       closing-square-bracket-indentation=0
1510       continuation-indentation=2
1511       delete-old-newlines
1512       delete-semicolons
1513       fuzzy-line-length
1514       hanging-side-comments
1515       indent-block-comments
1516       indent-columns=4
1517       long-block-line-count=8
1518       look-for-autoloader
1519       look-for-selfloader
1520       maximum-consecutive-blank-lines=1
1521       maximum-fields-per-table=0
1522       maximum-line-length=80
1523       minimum-space-to-comment=4
1524       nobrace-left-and-indent
1525       nocuddled-else
1526       nodelete-old-whitespace
1527       nohtml
1528       nologfile
1529       noquiet
1530       noshow-options
1531       nostatic-side-comments
1532       noswallow-optional-blank-lines
1533       notabs
1534       nowarning-output
1535       outdent-labels
1536       outdent-long-quotes
1537       outdent-long-comments
1538       paren-tightness=1
1539       paren-vertical-tightness-closing=0
1540       paren-vertical-tightness=0
1541       pass-version-line
1542       recombine
1543       valign
1544       short-concatenation-item-length=8
1545       space-for-semicolon
1546       square-bracket-tightness=1
1547       square-bracket-vertical-tightness-closing=0
1548       square-bracket-vertical-tightness=0
1549       static-block-comments
1550       trim-qw
1551       format=tidy
1552       backup-file-extension=bak
1553       format-skipping
1554
1555       pod2html
1556       html-table-of-contents
1557       html-entities
1558     );
1559
1560     push @defaults, "perl-syntax-check-flags=-c -T";
1561
1562     #---------------------------------------------------------------
1563     # Define abbreviations which will be expanded into the above primitives.
1564     # These may be defined recursively.
1565     #---------------------------------------------------------------
1566     %expansion = (
1567         %expansion,
1568         'freeze-newlines'    => [qw(noadd-newlines nodelete-old-newlines)],
1569         'fnl'                => [qw(freeze-newlines)],
1570         'freeze-whitespace'  => [qw(noadd-whitespace nodelete-old-whitespace)],
1571         'fws'                => [qw(freeze-whitespace)],
1572         'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1573         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1574         'nooutdent-long-lines' =>
1575           [qw(nooutdent-long-quotes nooutdent-long-comments)],
1576         'noll' => [qw(nooutdent-long-lines)],
1577         'io'   => [qw(indent-only)],
1578         'delete-all-comments' =>
1579           [qw(delete-block-comments delete-side-comments delete-pod)],
1580         'nodelete-all-comments' =>
1581           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1582         'dac'  => [qw(delete-all-comments)],
1583         'ndac' => [qw(nodelete-all-comments)],
1584         'gnu'  => [qw(gnu-style)],
1585         'pbp'  => [qw(perl-best-practices)],
1586         'tee-all-comments' =>
1587           [qw(tee-block-comments tee-side-comments tee-pod)],
1588         'notee-all-comments' =>
1589           [qw(notee-block-comments notee-side-comments notee-pod)],
1590         'tac'   => [qw(tee-all-comments)],
1591         'ntac'  => [qw(notee-all-comments)],
1592         'html'  => [qw(format=html)],
1593         'nhtml' => [qw(format=tidy)],
1594         'tidy'  => [qw(format=tidy)],
1595
1596         'break-after-comma-arrows'   => [qw(cab=0)],
1597         'nobreak-after-comma-arrows' => [qw(cab=1)],
1598         'baa'                        => [qw(cab=0)],
1599         'nbaa'                       => [qw(cab=1)],
1600
1601         'break-at-old-trinary-breakpoints' => [qw(bot)],
1602
1603         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1604         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1605         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1606         'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1607         'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1608
1609         'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1610         'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1611         'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1612         'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1613         'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1614
1615         'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1616         'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1617         'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1618
1619         'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1620         'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1621         'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1622
1623         'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1624         'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1625         'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1626
1627         'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1628         'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1629         'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1630
1631         'otr'                   => [qw(opr ohbr osbr)],
1632         'opening-token-right'   => [qw(opr ohbr osbr)],
1633         'notr'                  => [qw(nopr nohbr nosbr)],
1634         'noopening-token-right' => [qw(nopr nohbr nosbr)],
1635
1636         'sot'                    => [qw(sop sohb sosb)],
1637         'nsot'                   => [qw(nsop nsohb nsosb)],
1638         'stack-opening-tokens'   => [qw(sop sohb sosb)],
1639         'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1640
1641         'sct'                    => [qw(scp schb scsb)],
1642         'stack-closing-tokens'   => => [qw(scp schb scsb)],
1643         'nsct'                   => [qw(nscp nschb nscsb)],
1644         'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1645
1646         # 'mangle' originally deleted pod and comments, but to keep it
1647         # reversible, it no longer does.  But if you really want to
1648         # delete them, just use:
1649         #   -mangle -dac
1650
1651         # An interesting use for 'mangle' is to do this:
1652         #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1653         # which will form as many one-line blocks as possible
1654
1655         'mangle' => [
1656             qw(
1657               check-syntax
1658               delete-old-newlines
1659               delete-old-whitespace
1660               delete-semicolons
1661               indent-columns=0
1662               maximum-consecutive-blank-lines=0
1663               maximum-line-length=100000
1664               noadd-newlines
1665               noadd-semicolons
1666               noadd-whitespace
1667               noblanks-before-blocks
1668               noblanks-before-subs
1669               notabs
1670               )
1671         ],
1672
1673         # 'extrude' originally deleted pod and comments, but to keep it
1674         # reversible, it no longer does.  But if you really want to
1675         # delete them, just use
1676         #   extrude -dac
1677         #
1678         # An interesting use for 'extrude' is to do this:
1679         #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1680         # which will break up all one-line blocks.
1681
1682         'extrude' => [
1683             qw(
1684               check-syntax
1685               ci=0
1686               delete-old-newlines
1687               delete-old-whitespace
1688               delete-semicolons
1689               indent-columns=0
1690               maximum-consecutive-blank-lines=0
1691               maximum-line-length=1
1692               noadd-semicolons
1693               noadd-whitespace
1694               noblanks-before-blocks
1695               noblanks-before-subs
1696               nofuzzy-line-length
1697               notabs
1698               norecombine
1699               )
1700         ],
1701
1702         # this style tries to follow the GNU Coding Standards (which do
1703         # not really apply to perl but which are followed by some perl
1704         # programmers).
1705         'gnu-style' => [
1706             qw(
1707               lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1708               )
1709         ],
1710
1711         # Style suggested in Damian Conway's Perl Best Practices
1712         'perl-best-practices' => [
1713             qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
1714 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
1715         ],
1716
1717         # Additional styles can be added here
1718     );
1719
1720     Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1721
1722     # Uncomment next line to dump all expansions for debugging:
1723     # dump_short_names(\%expansion);
1724     return (
1725         \@option_string,   \@defaults, \%expansion,
1726         \%option_category, \%option_range
1727     );
1728
1729 }    # end of generate_options
1730
1731 sub process_command_line {
1732
1733     my (
1734         $perltidyrc_stream,  $is_Windows, $Windows_type,
1735         $rpending_complaint, $dump_options_type
1736     ) = @_;
1737
1738     use Getopt::Long;
1739
1740     my (
1741         $roption_string,   $rdefaults, $rexpansion,
1742         $roption_category, $roption_range
1743     ) = generate_options();
1744
1745     #---------------------------------------------------------------
1746     # set the defaults by passing the above list through GetOptions
1747     #---------------------------------------------------------------
1748     my %Opts = ();
1749     {
1750         local @ARGV;
1751         my $i;
1752
1753         # do not load the defaults if we are just dumping perltidyrc
1754         unless ( $dump_options_type eq 'perltidyrc' ) {
1755             for $i (@$rdefaults) { push @ARGV, "--" . $i }
1756         }
1757
1758         # Patch to save users Getopt::Long configuration
1759         # and set to Getopt::Long defaults.  Use eval to avoid
1760         # breaking old versions of Perl without these routines.
1761         my $glc;
1762         eval { $glc = Getopt::Long::Configure() };
1763         unless ($@) {
1764             eval { Getopt::Long::ConfigDefaults() };
1765         }
1766         else { $glc = undef }
1767
1768         if ( !GetOptions( \%Opts, @$roption_string ) ) {
1769             die "Programming Bug: error in setting default options";
1770         }
1771
1772         # Patch to put the previous Getopt::Long configuration back
1773         eval { Getopt::Long::Configure($glc) } if defined $glc;
1774     }
1775
1776     my $word;
1777     my @raw_options        = ();
1778     my $config_file        = "";
1779     my $saw_ignore_profile = 0;
1780     my $saw_extrude        = 0;
1781     my $saw_dump_profile   = 0;
1782     my $i;
1783
1784     #---------------------------------------------------------------
1785     # Take a first look at the command-line parameters.  Do as many
1786     # immediate dumps as possible, which can avoid confusion if the
1787     # perltidyrc file has an error.
1788     #---------------------------------------------------------------
1789     foreach $i (@ARGV) {
1790
1791         $i =~ s/^--/-/;
1792         if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1793             $saw_ignore_profile = 1;
1794         }
1795
1796         # note: this must come before -pro and -profile, below:
1797         elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1798             $saw_dump_profile = 1;
1799         }
1800         elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1801             if ($config_file) {
1802                 warn
1803 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1804             }
1805             $config_file = $2;
1806             unless ( -e $config_file ) {
1807                 warn "cannot find file given with -pro=$config_file: $!\n";
1808                 $config_file = "";
1809             }
1810         }
1811         elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1812             die "usage: -pro=filename or --profile=filename, no spaces\n";
1813         }
1814         elsif ( $i =~ /^-extrude$/ ) {
1815             $saw_extrude = 1;
1816         }
1817         elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1818             usage();
1819             exit 1;
1820         }
1821         elsif ( $i =~ /^-(version|v)$/ ) {
1822             show_version();
1823             exit 1;
1824         }
1825         elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1826             dump_defaults(@$rdefaults);
1827             exit 1;
1828         }
1829         elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1830             dump_long_names(@$roption_string);
1831             exit 1;
1832         }
1833         elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1834             dump_short_names($rexpansion);
1835             exit 1;
1836         }
1837         elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1838             Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1839             exit 1;
1840         }
1841     }
1842
1843     if ( $saw_dump_profile && $saw_ignore_profile ) {
1844         warn "No profile to dump because of -npro\n";
1845         exit 1;
1846     }
1847
1848     #---------------------------------------------------------------
1849     # read any .perltidyrc configuration file
1850     #---------------------------------------------------------------
1851     unless ($saw_ignore_profile) {
1852
1853         # resolve possible conflict between $perltidyrc_stream passed
1854         # as call parameter to perltidy and -pro=filename on command
1855         # line.
1856         if ($perltidyrc_stream) {
1857             if ($config_file) {
1858                 warn <<EOM;
1859  Conflict: a perltidyrc configuration file was specified both as this
1860  perltidy call parameter: $perltidyrc_stream
1861  and with this -profile=$config_file.
1862  Using -profile=$config_file.
1863 EOM
1864             }
1865             else {
1866                 $config_file = $perltidyrc_stream;
1867             }
1868         }
1869
1870         # look for a config file if we don't have one yet
1871         my $rconfig_file_chatter;
1872         $$rconfig_file_chatter = "";
1873         $config_file =
1874           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
1875             $rpending_complaint )
1876           unless $config_file;
1877
1878         # open any config file
1879         my $fh_config;
1880         if ($config_file) {
1881             ( $fh_config, $config_file ) =
1882               Perl::Tidy::streamhandle( $config_file, 'r' );
1883             unless ($fh_config) {
1884                 $$rconfig_file_chatter .=
1885                   "# $config_file exists but cannot be opened\n";
1886             }
1887         }
1888
1889         if ($saw_dump_profile) {
1890             if ($saw_dump_profile) {
1891                 dump_config_file( $fh_config, $config_file,
1892                     $rconfig_file_chatter );
1893                 exit 1;
1894             }
1895         }
1896
1897         if ($fh_config) {
1898
1899             my ( $rconfig_list, $death_message ) =
1900               read_config_file( $fh_config, $config_file, $rexpansion );
1901             die $death_message if ($death_message);
1902
1903             # process any .perltidyrc parameters right now so we can
1904             # localize errors
1905             if (@$rconfig_list) {
1906                 local @ARGV = @$rconfig_list;
1907
1908                 expand_command_abbreviations( $rexpansion, \@raw_options,
1909                     $config_file );
1910
1911                 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1912                     die
1913 "Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
1914                 }
1915
1916                 # Anything left in this local @ARGV is an error and must be
1917                 # invalid bare words from the configuration file.  We cannot
1918                 # check this earlier because bare words may have been valid
1919                 # values for parameters.  We had to wait for GetOptions to have
1920                 # a look at @ARGV.
1921                 if (@ARGV) {
1922                     my $count = @ARGV;
1923                     my $str   = "\'" . pop(@ARGV) . "\'";
1924                     while ( my $param = pop(@ARGV) ) {
1925                         if ( length($str) < 70 ) {
1926                             $str .= ", '$param'";
1927                         }
1928                         else {
1929                             $str .= ", ...";
1930                             last;
1931                         }
1932                     }
1933                     die <<EOM;
1934 There are $count unrecognized values in the configuration file '$config_file':
1935 $str
1936 Use leading dashes for parameters.  Use -npro to ignore this file.
1937 EOM
1938                 }
1939
1940                 # Undo any options which cause premature exit.  They are not
1941                 # appropriate for a config file, and it could be hard to
1942                 # diagnose the cause of the premature exit.
1943                 foreach (
1944                     qw{
1945                     dump-defaults
1946                     dump-long-names
1947                     dump-options
1948                     dump-profile
1949                     dump-short-names
1950                     dump-token-types
1951                     dump-want-left-space
1952                     dump-want-right-space
1953                     help
1954                     stylesheet
1955                     version
1956                     }
1957                   )
1958                 {
1959
1960                     if ( defined( $Opts{$_} ) ) {
1961                         delete $Opts{$_};
1962                         warn "ignoring --$_ in config file: $config_file\n";
1963                     }
1964                 }
1965             }
1966         }
1967     }
1968
1969     #---------------------------------------------------------------
1970     # now process the command line parameters
1971     #---------------------------------------------------------------
1972     expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
1973
1974     if ( !GetOptions( \%Opts, @$roption_string ) ) {
1975         die "Error on command line; for help try 'perltidy -h'\n";
1976     }
1977
1978     return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
1979         $rexpansion, $roption_category, $roption_range );
1980 }    # end of process_command_line
1981
1982 sub check_options {
1983
1984     my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
1985
1986     #---------------------------------------------------------------
1987     # check and handle any interactions among the basic options..
1988     #---------------------------------------------------------------
1989
1990     # Since -vt, -vtc, and -cti are abbreviations, but under
1991     # msdos, an unquoted input parameter like vtc=1 will be
1992     # seen as 2 parameters, vtc and 1, so the abbreviations
1993     # won't be seen.  Therefore, we will catch them here if
1994     # they get through.
1995
1996     if ( defined $rOpts->{'vertical-tightness'} ) {
1997         my $vt = $rOpts->{'vertical-tightness'};
1998         $rOpts->{'paren-vertical-tightness'}          = $vt;
1999         $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2000         $rOpts->{'brace-vertical-tightness'}          = $vt;
2001     }
2002
2003     if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2004         my $vtc = $rOpts->{'vertical-tightness-closing'};
2005         $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
2006         $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2007         $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
2008     }
2009
2010     if ( defined $rOpts->{'closing-token-indentation'} ) {
2011         my $cti = $rOpts->{'closing-token-indentation'};
2012         $rOpts->{'closing-square-bracket-indentation'} = $cti;
2013         $rOpts->{'closing-brace-indentation'}          = $cti;
2014         $rOpts->{'closing-paren-indentation'}          = $cti;
2015     }
2016
2017     # In quiet mode, there is no log file and hence no way to report
2018     # results of syntax check, so don't do it.
2019     if ( $rOpts->{'quiet'} ) {
2020         $rOpts->{'check-syntax'} = 0;
2021     }
2022
2023     # can't check syntax if no output
2024     if ( $rOpts->{'format'} ne 'tidy' ) {
2025         $rOpts->{'check-syntax'} = 0;
2026     }
2027
2028     # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2029     # wide variety of nasty problems on these systems, because they cannot
2030     # reliably run backticks.  Don't even think about changing this!
2031     if (   $rOpts->{'check-syntax'}
2032         && $is_Windows
2033         && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2034     {
2035         $rOpts->{'check-syntax'} = 0;
2036     }
2037
2038     # It's really a bad idea to check syntax as root unless you wrote
2039     # the script yourself.  FIXME: not sure if this works with VMS
2040     unless ($is_Windows) {
2041
2042         if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2043             $rOpts->{'check-syntax'} = 0;
2044             $$rpending_complaint .=
2045 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2046         }
2047     }
2048
2049     # see if user set a non-negative logfile-gap
2050     if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2051
2052         # a zero gap will be taken as a 1
2053         if ( $rOpts->{'logfile-gap'} == 0 ) {
2054             $rOpts->{'logfile-gap'} = 1;
2055         }
2056
2057         # setting a non-negative logfile gap causes logfile to be saved
2058         $rOpts->{'logfile'} = 1;
2059     }
2060
2061     # not setting logfile gap, or setting it negative, causes default of 50
2062     else {
2063         $rOpts->{'logfile-gap'} = 50;
2064     }
2065
2066     # set short-cut flag when only indentation is to be done.
2067     # Note that the user may or may not have already set the
2068     # indent-only flag.
2069     if (   !$rOpts->{'add-whitespace'}
2070         && !$rOpts->{'delete-old-whitespace'}
2071         && !$rOpts->{'add-newlines'}
2072         && !$rOpts->{'delete-old-newlines'} )
2073     {
2074         $rOpts->{'indent-only'} = 1;
2075     }
2076
2077     # -isbc implies -ibc
2078     if ( $rOpts->{'indent-spaced-block-comments'} ) {
2079         $rOpts->{'indent-block-comments'} = 1;
2080     }
2081
2082     # -bli flag implies -bl
2083     if ( $rOpts->{'brace-left-and-indent'} ) {
2084         $rOpts->{'opening-brace-on-new-line'} = 1;
2085     }
2086
2087     if (   $rOpts->{'opening-brace-always-on-right'}
2088         && $rOpts->{'opening-brace-on-new-line'} )
2089     {
2090         warn <<EOM;
2091  Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2092   'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2093 EOM
2094         $rOpts->{'opening-brace-on-new-line'} = 0;
2095     }
2096
2097     # it simplifies things if -bl is 0 rather than undefined
2098     if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2099         $rOpts->{'opening-brace-on-new-line'} = 0;
2100     }
2101
2102     # -sbl defaults to -bl if not defined
2103     if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2104         $rOpts->{'opening-sub-brace-on-new-line'} =
2105           $rOpts->{'opening-brace-on-new-line'};
2106     }
2107
2108     # set shortcut flag if no blanks to be written
2109     unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {
2110         $rOpts->{'swallow-optional-blank-lines'} = 1;
2111     }
2112
2113     if ( $rOpts->{'entab-leading-whitespace'} ) {
2114         if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2115             warn "-et=n must use a positive integer; ignoring -et\n";
2116             $rOpts->{'entab-leading-whitespace'} = undef;
2117         }
2118
2119         # entab leading whitespace has priority over the older 'tabs' option
2120         if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2121     }
2122 }
2123
2124 sub expand_command_abbreviations {
2125
2126     # go through @ARGV and expand any abbreviations
2127
2128     my ( $rexpansion, $rraw_options, $config_file ) = @_;
2129     my ($word);
2130
2131     # set a pass limit to prevent an infinite loop;
2132     # 10 should be plenty, but it may be increased to allow deeply
2133     # nested expansions.
2134     my $max_passes = 10;
2135     my @new_argv   = ();
2136
2137     # keep looping until all expansions have been converted into actual
2138     # dash parameters..
2139     for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2140         my @new_argv     = ();
2141         my $abbrev_count = 0;
2142
2143         # loop over each item in @ARGV..
2144         foreach $word (@ARGV) {
2145
2146             # convert any leading 'no-' to just 'no'
2147             if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2148
2149             # if it is a dash flag (instead of a file name)..
2150             if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2151
2152                 my $abr   = $1;
2153                 my $flags = $2;
2154
2155                 # save the raw input for debug output in case of circular refs
2156                 if ( $pass_count == 0 ) {
2157                     push( @$rraw_options, $word );
2158                 }
2159
2160                 # recombine abbreviation and flag, if necessary,
2161                 # to allow abbreviations with arguments such as '-vt=1'
2162                 if ( $rexpansion->{ $abr . $flags } ) {
2163                     $abr   = $abr . $flags;
2164                     $flags = "";
2165                 }
2166
2167                 # if we see this dash item in the expansion hash..
2168                 if ( $rexpansion->{$abr} ) {
2169                     $abbrev_count++;
2170
2171                     # stuff all of the words that it expands to into the
2172                     # new arg list for the next pass
2173                     foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2174                         next unless $abbrev;    # for safety; shouldn't happen
2175                         push( @new_argv, '--' . $abbrev . $flags );
2176                     }
2177                 }
2178
2179                 # not in expansion hash, must be actual long name
2180                 else {
2181                     push( @new_argv, $word );
2182                 }
2183             }
2184
2185             # not a dash item, so just save it for the next pass
2186             else {
2187                 push( @new_argv, $word );
2188             }
2189         }    # end of this pass
2190
2191         # update parameter list @ARGV to the new one
2192         @ARGV = @new_argv;
2193         last unless ( $abbrev_count > 0 );
2194
2195         # make sure we are not in an infinite loop
2196         if ( $pass_count == $max_passes ) {
2197             print STDERR
2198 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2199             print STDERR "Here are the raw options\n";
2200             local $" = ')(';
2201             print STDERR "(@$rraw_options)\n";
2202             my $num = @new_argv;
2203
2204             if ( $num < 50 ) {
2205                 print STDERR "After $max_passes passes here is ARGV\n";
2206                 print STDERR "(@new_argv)\n";
2207             }
2208             else {
2209                 print STDERR "After $max_passes passes ARGV has $num entries\n";
2210             }
2211
2212             if ($config_file) {
2213                 die <<"DIE";
2214 Please check your configuration file $config_file for circular-references.
2215 To deactivate it, use -npro.
2216 DIE
2217             }
2218             else {
2219                 die <<'DIE';
2220 Program bug - circular-references in the %expansion hash, probably due to
2221 a recent program change.
2222 DIE
2223             }
2224         }    # end of check for circular references
2225     }    # end of loop over all passes
2226 }
2227
2228 # Debug routine -- this will dump the expansion hash
2229 sub dump_short_names {
2230     my $rexpansion = shift;
2231     print STDOUT <<EOM;
2232 List of short names.  This list shows how all abbreviations are
2233 translated into other abbreviations and, eventually, into long names.
2234 New abbreviations may be defined in a .perltidyrc file.
2235 For a list of all long names, use perltidy --dump-long-names (-dln).
2236 --------------------------------------------------------------------------
2237 EOM
2238     foreach my $abbrev ( sort keys %$rexpansion ) {
2239         my @list = @{ $$rexpansion{$abbrev} };
2240         print STDOUT "$abbrev --> @list\n";
2241     }
2242 }
2243
2244 sub check_vms_filename {
2245
2246     # given a valid filename (the perltidy input file)
2247     # create a modified filename and separator character
2248     # suitable for VMS.
2249     #
2250     # Contributed by Michael Cartmell
2251     #
2252     my ( $base, $path ) = fileparse( $_[0] );
2253
2254     # remove explicit ; version
2255     $base =~ s/;-?\d*$//
2256
2257       # remove explicit . version ie two dots in filename NB ^ escapes a dot
2258       or $base =~ s/(          # begin capture $1
2259                   (?:^|[^^])\. # match a dot not preceded by a caret
2260                   (?:          # followed by nothing
2261                     |          # or
2262                     .*[^^]     # anything ending in a non caret
2263                   )
2264                 )              # end capture $1
2265                 \.-?\d*$       # match . version number
2266               /$1/x;
2267
2268     # normalise filename, if there are no unescaped dots then append one
2269     $base .= '.' unless $base =~ /(?:^|[^^])\./;
2270
2271     # if we don't already have an extension then we just append the extention
2272     my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2273     return ( $path . $base, $separator );
2274 }
2275
2276 sub Win_OS_Type {
2277
2278     # TODO: are these more standard names?
2279     # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2280
2281     # Returns a string that determines what MS OS we are on.
2282     # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2283     # Returns blank string if not an MS system.
2284     # Original code contributed by: Yves Orton
2285     # We need to know this to decide where to look for config files
2286
2287     my $rpending_complaint = shift;
2288     my $os                 = "";
2289     return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2290
2291     # Systems built from Perl source may not have Win32.pm
2292     # But probably have Win32::GetOSVersion() anyway so the
2293     # following line is not 'required':
2294     # return $os unless eval('require Win32');
2295
2296     # Use the standard API call to determine the version
2297     my ( $undef, $major, $minor, $build, $id );
2298     eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2299
2300     #
2301     #    NAME                   ID   MAJOR  MINOR
2302     #    Windows NT 4           2      4       0
2303     #    Windows 2000           2      5       0
2304     #    Windows XP             2      5       1
2305     #    Windows Server 2003    2      5       2
2306
2307     return "win32s" unless $id;    # If id==0 then its a win32s box.
2308     $os = {                        # Magic numbers from MSDN
2309                                    # documentation of GetOSVersion
2310         1 => {
2311             0  => "95",
2312             10 => "98",
2313             90 => "Me"
2314         },
2315         2 => {
2316             0  => "2000",          # or NT 4, see below
2317             1  => "XP/.Net",
2318             2  => "Win2003",
2319             51 => "NT3.51"
2320         }
2321     }->{$id}->{$minor};
2322
2323     # If $os is undefined, the above code is out of date.  Suggested updates
2324     # are welcome.
2325     unless ( defined $os ) {
2326         $os = "";
2327         $$rpending_complaint .= <<EOS;
2328 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2329 We won't be able to look for a system-wide config file.
2330 EOS
2331     }
2332
2333     # Unfortunately the logic used for the various versions isnt so clever..
2334     # so we have to handle an outside case.
2335     return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2336 }
2337
2338 sub is_unix {
2339     return ( $^O !~ /win32|dos/i )
2340       && ( $^O ne 'VMS' )
2341       && ( $^O ne 'OS2' )
2342       && ( $^O ne 'MacOS' );
2343 }
2344
2345 sub look_for_Windows {
2346
2347     # determine Windows sub-type and location of
2348     # system-wide configuration files
2349     my $rpending_complaint = shift;
2350     my $is_Windows         = ( $^O =~ /win32|dos/i );
2351     my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
2352     return ( $is_Windows, $Windows_type );
2353 }
2354
2355 sub find_config_file {
2356
2357     # look for a .perltidyrc configuration file
2358     my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2359         $rpending_complaint ) = @_;
2360
2361     $$rconfig_file_chatter .= "# Config file search...system reported as:";
2362     if ($is_Windows) {
2363         $$rconfig_file_chatter .= "Windows $Windows_type\n";
2364     }
2365     else {
2366         $$rconfig_file_chatter .= " $^O\n";
2367     }
2368
2369     # sub to check file existance and record all tests
2370     my $exists_config_file = sub {
2371         my $config_file = shift;
2372         return 0 unless $config_file;
2373         $$rconfig_file_chatter .= "# Testing: $config_file\n";
2374         return -f $config_file;
2375     };
2376
2377     my $config_file;
2378
2379     # look in current directory first
2380     $config_file = ".perltidyrc";
2381     return $config_file if $exists_config_file->($config_file);
2382
2383     # Default environment vars.
2384     my @envs = qw(PERLTIDY HOME);
2385
2386     # Check the NT/2k/XP locations, first a local machine def, then a
2387     # network def
2388     push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2389
2390     # Now go through the enviornment ...
2391     foreach my $var (@envs) {
2392         $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2393         if ( defined( $ENV{$var} ) ) {
2394             $$rconfig_file_chatter .= " = $ENV{$var}\n";
2395
2396             # test ENV{ PERLTIDY } as file:
2397             if ( $var eq 'PERLTIDY' ) {
2398                 $config_file = "$ENV{$var}";
2399                 return $config_file if $exists_config_file->($config_file);
2400             }
2401
2402             # test ENV as directory:
2403             $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2404             return $config_file if $exists_config_file->($config_file);
2405         }
2406         else {
2407             $$rconfig_file_chatter .= "\n";
2408         }
2409     }
2410
2411     # then look for a system-wide definition
2412     # where to look varies with OS
2413     if ($is_Windows) {
2414
2415         if ($Windows_type) {
2416             my ( $os, $system, $allusers ) =
2417               Win_Config_Locs( $rpending_complaint, $Windows_type );
2418
2419             # Check All Users directory, if there is one.
2420             if ($allusers) {
2421                 $config_file = catfile( $allusers, ".perltidyrc" );
2422                 return $config_file if $exists_config_file->($config_file);
2423             }
2424
2425             # Check system directory.
2426             $config_file = catfile( $system, ".perltidyrc" );
2427             return $config_file if $exists_config_file->($config_file);
2428         }
2429     }
2430
2431     # Place to add customization code for other systems
2432     elsif ( $^O eq 'OS2' ) {
2433     }
2434     elsif ( $^O eq 'MacOS' ) {
2435     }
2436     elsif ( $^O eq 'VMS' ) {
2437     }
2438
2439     # Assume some kind of Unix
2440     else {
2441
2442         $config_file = "/usr/local/etc/perltidyrc";
2443         return $config_file if $exists_config_file->($config_file);
2444
2445         $config_file = "/etc/perltidyrc";
2446         return $config_file if $exists_config_file->($config_file);
2447     }
2448
2449     # Couldn't find a config file
2450     return;
2451 }
2452
2453 sub Win_Config_Locs {
2454
2455     # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2456     # or undef if its not a win32 OS.  In list context returns OS, System
2457     # Directory, and All Users Directory.  All Users will be empty on a
2458     # 9x/Me box.  Contributed by: Yves Orton.
2459
2460     my $rpending_complaint = shift;
2461     my $os = (@_) ? shift : Win_OS_Type();
2462     return unless $os;
2463
2464     my $system   = "";
2465     my $allusers = "";
2466
2467     if ( $os =~ /9[58]|Me/ ) {
2468         $system = "C:/Windows";
2469     }
2470     elsif ( $os =~ /NT|XP|200?/ ) {
2471         $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2472         $allusers =
2473           ( $os =~ /NT/ )
2474           ? "C:/WinNT/profiles/All Users/"
2475           : "C:/Documents and Settings/All Users/";
2476     }
2477     else {
2478
2479         # This currently would only happen on a win32s computer.  I dont have
2480         # one to test, so I am unsure how to proceed.  Suggestions welcome!
2481         $$rpending_complaint .=
2482 "I dont know a sensible place to look for config files on an $os system.\n";
2483         return;
2484     }
2485     return wantarray ? ( $os, $system, $allusers ) : $os;
2486 }
2487
2488 sub dump_config_file {
2489     my $fh                   = shift;
2490     my $config_file          = shift;
2491     my $rconfig_file_chatter = shift;
2492     print STDOUT "$$rconfig_file_chatter";
2493     if ($fh) {
2494         print STDOUT "# Dump of file: '$config_file'\n";
2495         while ( my $line = $fh->getline() ) { print STDOUT $line }
2496         eval { $fh->close() };
2497     }
2498     else {
2499         print STDOUT "# ...no config file found\n";
2500     }
2501 }
2502
2503 sub read_config_file {
2504
2505     my ( $fh, $config_file, $rexpansion ) = @_;
2506     my @config_list = ();
2507
2508     # file is bad if non-empty $death_message is returned
2509     my $death_message = "";
2510
2511     my $name = undef;
2512     my $line_no;
2513     while ( my $line = $fh->getline() ) {
2514         $line_no++;
2515         chomp $line;
2516         next if $line =~ /^\s*#/;    # skip full-line comment
2517         ( $line, $death_message ) =
2518           strip_comment( $line, $config_file, $line_no );
2519         last if ($death_message);
2520         $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
2521         next unless $line;
2522
2523         # look for something of the general form
2524         #    newname { body }
2525         # or just
2526         #    body
2527
2528         if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2529             my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2530
2531             # handle a new alias definition
2532             if ($newname) {
2533                 if ($name) {
2534                     $death_message =
2535 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2536                     last;
2537                 }
2538                 $name = $newname;
2539
2540                 if ( ${$rexpansion}{$name} ) {
2541                     local $" = ')(';
2542                     my @names = sort keys %$rexpansion;
2543                     $death_message =
2544                         "Here is a list of all installed aliases\n(@names)\n"
2545                       . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2546                     last;
2547                 }
2548                 ${$rexpansion}{$name} = [];
2549             }
2550
2551             # now do the body
2552             if ($body) {
2553
2554                 my ( $rbody_parts, $msg ) = parse_args($body);
2555                 if ($msg) {
2556                     $death_message = <<EOM;
2557 Error reading file '$config_file' at line number $line_no.
2558 $msg
2559 Please fix this line or use -npro to avoid reading this file
2560 EOM
2561                     last;
2562                 }
2563
2564                 if ($name) {
2565
2566                     # remove leading dashes if this is an alias
2567                     foreach (@$rbody_parts) { s/^\-+//; }
2568                     push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2569                 }
2570                 else {
2571                     push( @config_list, @$rbody_parts );
2572                 }
2573             }
2574
2575             if ($curly) {
2576                 unless ($name) {
2577                     $death_message =
2578 "Unexpected '}' seen in config file $config_file line $.\n";
2579                     last;
2580                 }
2581                 $name = undef;
2582             }
2583         }
2584     }
2585     eval { $fh->close() };
2586     return ( \@config_list, $death_message );
2587 }
2588
2589 sub strip_comment {
2590
2591     my ( $instr, $config_file, $line_no ) = @_;
2592     my $msg = "";
2593
2594     # nothing to do if no comments
2595     if ( $instr !~ /#/ ) {
2596         return ( $instr, $msg );
2597     }
2598
2599     # use simple method of no quotes
2600     elsif ( $instr !~ /['"]/ ) {
2601         $instr =~ s/\s*\#.*$//;    # simple trim
2602         return ( $instr, $msg );
2603     }
2604
2605     # handle comments and quotes
2606     my $outstr     = "";
2607     my $quote_char = "";
2608     while (1) {
2609
2610         # looking for ending quote character
2611         if ($quote_char) {
2612             if ( $instr =~ /\G($quote_char)/gc ) {
2613                 $quote_char = "";
2614                 $outstr .= $1;
2615             }
2616             elsif ( $instr =~ /\G(.)/gc ) {
2617                 $outstr .= $1;
2618             }
2619
2620             # error..we reached the end without seeing the ending quote char
2621             else {
2622                 $msg = <<EOM;
2623 Error reading file $config_file at line number $line_no.
2624 Did not see ending quote character <$quote_char> in this text:
2625 $instr
2626 Please fix this line or use -npro to avoid reading this file
2627 EOM
2628                 last;
2629             }
2630         }
2631
2632         # accumulating characters and looking for start of a quoted string
2633         else {
2634             if ( $instr =~ /\G([\"\'])/gc ) {
2635                 $outstr .= $1;
2636                 $quote_char = $1;
2637             }
2638             elsif ( $instr =~ /\G#/gc ) {
2639                 last;
2640             }
2641             elsif ( $instr =~ /\G(.)/gc ) {
2642                 $outstr .= $1;
2643             }
2644             else {
2645                 last;
2646             }
2647         }
2648     }
2649     return ( $outstr, $msg );
2650 }
2651
2652 sub parse_args {
2653
2654     # Parse a command string containing multiple string with possible
2655     # quotes, into individual commands.  It might look like this, for example:
2656     #
2657     #    -wba=" + - "  -some-thing -wbb='. && ||'
2658     #
2659     # There is no need, at present, to handle escaped quote characters.
2660     # (They are not perltidy tokens, so needn't be in strings).
2661
2662     my ($body)     = @_;
2663     my @body_parts = ();
2664     my $quote_char = "";
2665     my $part       = "";
2666     my $msg        = "";
2667     while (1) {
2668
2669         # looking for ending quote character
2670         if ($quote_char) {
2671             if ( $body =~ /\G($quote_char)/gc ) {
2672                 $quote_char = "";
2673             }
2674             elsif ( $body =~ /\G(.)/gc ) {
2675                 $part .= $1;
2676             }
2677
2678             # error..we reached the end without seeing the ending quote char
2679             else {
2680                 if ( length($part) ) { push @body_parts, $part; }
2681                 $msg = <<EOM;
2682 Did not see ending quote character <$quote_char> in this text:
2683 $body
2684 EOM
2685                 last;
2686             }
2687         }
2688
2689         # accumulating characters and looking for start of a quoted string
2690         else {
2691             if ( $body =~ /\G([\"\'])/gc ) {
2692                 $quote_char = $1;
2693             }
2694             elsif ( $body =~ /\G(\s+)/gc ) {
2695                 if ( length($part) ) { push @body_parts, $part; }
2696                 $part = "";
2697             }
2698             elsif ( $body =~ /\G(.)/gc ) {
2699                 $part .= $1;
2700             }
2701             else {
2702                 if ( length($part) ) { push @body_parts, $part; }
2703                 last;
2704             }
2705         }
2706     }
2707     return ( \@body_parts, $msg );
2708 }
2709
2710 sub dump_long_names {
2711
2712     my @names = sort @_;
2713     print STDOUT <<EOM;
2714 # Command line long names (passed to GetOptions)
2715 #---------------------------------------------------------------
2716 # here is a summary of the Getopt codes:
2717 # <none> does not take an argument
2718 # =s takes a mandatory string
2719 # :s takes an optional string
2720 # =i takes a mandatory integer
2721 # :i takes an optional integer
2722 # ! does not take an argument and may be negated
2723 #  i.e., -foo and -nofoo are allowed
2724 # a double dash signals the end of the options list
2725 #
2726 #---------------------------------------------------------------
2727 EOM
2728
2729     foreach (@names) { print STDOUT "$_\n" }
2730 }
2731
2732 sub dump_defaults {
2733     my @defaults = sort @_;
2734     print STDOUT "Default command line options:\n";
2735     foreach (@_) { print STDOUT "$_\n" }
2736 }
2737
2738 sub dump_options {
2739
2740     # write the options back out as a valid .perltidyrc file
2741     my ( $rOpts, $roption_string ) = @_;
2742     my %Getopt_flags;
2743     my $rGetopt_flags = \%Getopt_flags;
2744     foreach my $opt ( @{$roption_string} ) {
2745         my $flag = "";
2746         if ( $opt =~ /(.*)(!|=.*)$/ ) {
2747             $opt  = $1;
2748             $flag = $2;
2749         }
2750         if ( defined( $rOpts->{$opt} ) ) {
2751             $rGetopt_flags->{$opt} = $flag;
2752         }
2753     }
2754     print STDOUT "# Final parameter set for this run:\n";
2755     foreach my $key ( sort keys %{$rOpts} ) {
2756         my $flag   = $rGetopt_flags->{$key};
2757         my $value  = $rOpts->{$key};
2758         my $prefix = '--';
2759         my $suffix = "";
2760         if ($flag) {
2761             if ( $flag =~ /^=/ ) {
2762                 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2763                 $suffix = "=" . $value;
2764             }
2765             elsif ( $flag =~ /^!/ ) {
2766                 $prefix .= "no" unless ($value);
2767             }
2768             else {
2769
2770                 # shouldn't happen
2771                 print
2772                   "# ERROR in dump_options: unrecognized flag $flag for $key\n";
2773             }
2774         }
2775         print STDOUT $prefix . $key . $suffix . "\n";
2776     }
2777 }
2778
2779 sub show_version {
2780     print <<"EOM";
2781 This is perltidy, v$VERSION
2782
2783 Copyright 2000-2007, Steve Hancock
2784
2785 Perltidy is free software and may be copied under the terms of the GNU
2786 General Public License, which is included in the distribution files.
2787
2788 Complete documentation for perltidy can be found using 'man perltidy'
2789 or on the internet at http://perltidy.sourceforge.net.
2790 EOM
2791 }
2792
2793 sub usage {
2794
2795     print STDOUT <<EOF;
2796 This is perltidy version $VERSION, a perl script indenter.  Usage:
2797
2798     perltidy [ options ] file1 file2 file3 ...
2799             (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2800     perltidy [ options ] file1 -o outfile
2801     perltidy [ options ] file1 -st >outfile
2802     perltidy [ options ] <infile >outfile
2803
2804 Options have short and long forms. Short forms are shown; see
2805 man pages for long forms.  Note: '=s' indicates a required string,
2806 and '=n' indicates a required integer.
2807
2808 I/O control
2809  -h      show this help
2810  -o=file name of the output file (only if single input file)
2811  -oext=s change output extension from 'tdy' to s
2812  -opath=path  change path to be 'path' for output files
2813  -b      backup original to .bak and modify file in-place
2814  -bext=s change default backup extension from 'bak' to s
2815  -q      deactivate error messages (for running under editor)
2816  -w      include non-critical warning messages in the .ERR error output
2817  -syn    run perl -c to check syntax (default under unix systems)
2818  -log    save .LOG file, which has useful diagnostics
2819  -f      force perltidy to read a binary file
2820  -g      like -log but writes more detailed .LOG file, for debugging scripts
2821  -opt    write the set of options actually used to a .LOG file
2822  -npro   ignore .perltidyrc configuration command file
2823  -pro=file   read configuration commands from file instead of .perltidyrc
2824  -st     send output to standard output, STDOUT
2825  -se     send error output to standard error output, STDERR
2826  -v      display version number to standard output and quit
2827
2828 Basic Options:
2829  -i=n    use n columns per indentation level (default n=4)
2830  -t      tabs: use one tab character per indentation level, not recommeded
2831  -nt     no tabs: use n spaces per indentation level (default)
2832  -et=n   entab leading whitespace n spaces per tab; not recommended
2833  -io     "indent only": just do indentation, no other formatting.
2834  -sil=n  set starting indentation level to n;  use if auto detection fails
2835  -ole=s  specify output line ending (s=dos or win, mac, unix)
2836  -ple    keep output line endings same as input (input must be filename)
2837
2838 Whitespace Control
2839  -fws    freeze whitespace; this disables all whitespace changes
2840            and disables the following switches:
2841  -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
2842  -bbt    same as -bt but for code block braces; same as -bt if not given
2843  -bbvt   block braces vertically tight; use with -bl or -bli
2844  -bbvtl=s  make -bbvt to apply to selected list of block types
2845  -pt=n   paren tightness (n=0, 1 or 2)
2846  -sbt=n  square bracket tightness (n=0, 1, or 2)
2847  -bvt=n  brace vertical tightness,
2848          n=(0=open, 1=close unless multiple steps on a line, 2=always close)
2849  -pvt=n  paren vertical tightness (see -bvt for n)
2850  -sbvt=n square bracket vertical tightness (see -bvt for n)
2851  -bvtc=n closing brace vertical tightness:
2852          n=(0=open, 1=sometimes close, 2=always close)
2853  -pvtc=n closing paren vertical tightness, see -bvtc for n.
2854  -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
2855  -ci=n   sets continuation indentation=n,  default is n=2 spaces
2856  -lp     line up parentheses, brackets, and non-BLOCK braces
2857  -sfs    add space before semicolon in for( ; ; )
2858  -aws    allow perltidy to add whitespace (default)
2859  -dws    delete all old non-essential whitespace
2860  -icb    indent closing brace of a code block
2861  -cti=n  closing indentation of paren, square bracket, or non-block brace:
2862          n=0 none, =1 align with opening, =2 one full indentation level
2863  -icp    equivalent to -cti=2
2864  -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
2865  -wrs=s  want space right of tokens in string;
2866  -sts    put space before terminal semicolon of a statement
2867  -sak=s  put space between keywords given in s and '(';
2868  -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
2869
2870 Line Break Control
2871  -fnl    freeze newlines; this disables all line break changes
2872             and disables the following switches:
2873  -anl    add newlines;  ok to introduce new line breaks
2874  -bbs    add blank line before subs and packages
2875  -bbc    add blank line before block comments
2876  -bbb    add blank line between major blocks
2877  -sob    swallow optional blank lines
2878  -ce     cuddled else; use this style: '} else {'
2879  -dnl    delete old newlines (default)
2880  -mbl=n  maximum consecutive blank lines (default=1)
2881  -l=n    maximum line length;  default n=80
2882  -bl     opening brace on new line
2883  -sbl    opening sub brace on new line.  value of -bl is used if not given.
2884  -bli    opening brace on new line and indented
2885  -bar    opening brace always on right, even for long clauses
2886  -vt=n   vertical tightness (requires -lp); n controls break after opening
2887          token: 0=never  1=no break if next line balanced   2=no break
2888  -vtc=n  vertical tightness of closing container; n controls if closing
2889          token starts new line: 0=always  1=not unless list  1=never
2890  -wba=s  want break after tokens in string; i.e. wba=': .'
2891  -wbb=s  want break before tokens in string
2892
2893 Following Old Breakpoints
2894  -kis    keep interior semicolons.  Allows multiple statements per line.
2895  -boc    break at old comma breaks: turns off all automatic list formatting
2896  -bol    break at old logical breakpoints: or, and, ||, && (default)
2897  -bok    break at old list keyword breakpoints such as map, sort (default)
2898  -bot    break at old conditional (ternary ?:) operator breakpoints (default)
2899  -cab=n  break at commas after a comma-arrow (=>):
2900          n=0 break at all commas after =>
2901          n=1 stable: break unless this breaks an existing one-line container
2902          n=2 break only if a one-line container cannot be formed
2903          n=3 do not treat commas after => specially at all
2904
2905 Comment controls
2906  -ibc    indent block comments (default)
2907  -isbc   indent spaced block comments; may indent unless no leading space
2908  -msc=n  minimum desired spaces to side comment, default 4
2909  -fpsc=n fix position for side comments; default 0;
2910  -csc    add or update closing side comments after closing BLOCK brace
2911  -dcsc   delete closing side comments created by a -csc command
2912  -cscp=s change closing side comment prefix to be other than '## end'
2913  -cscl=s change closing side comment to apply to selected list of blocks
2914  -csci=n minimum number of lines needed to apply a -csc tag, default n=6
2915  -csct=n maximum number of columns of appended text, default n=20
2916  -cscw   causes warning if old side comment is overwritten with -csc
2917
2918  -sbc    use 'static block comments' identified by leading '##' (default)
2919  -sbcp=s change static block comment identifier to be other than '##'
2920  -osbc   outdent static block comments
2921
2922  -ssc    use 'static side comments' identified by leading '##' (default)
2923  -sscp=s change static side comment identifier to be other than '##'
2924
2925 Delete selected text
2926  -dac    delete all comments AND pod
2927  -dbc    delete block comments
2928  -dsc    delete side comments
2929  -dp     delete pod
2930
2931 Send selected text to a '.TEE' file
2932  -tac    tee all comments AND pod
2933  -tbc    tee block comments
2934  -tsc    tee side comments
2935  -tp     tee pod
2936
2937 Outdenting
2938  -olq    outdent long quoted strings (default)
2939  -olc    outdent a long block comment line
2940  -ola    outdent statement labels
2941  -okw    outdent control keywords (redo, next, last, goto, return)
2942  -okwl=s specify alternative keywords for -okw command
2943
2944 Other controls
2945  -mft=n  maximum fields per table; default n=40
2946  -x      do not format lines before hash-bang line (i.e., for VMS)
2947  -asc    allows perltidy to add a ';' when missing (default)
2948  -dsm    allows perltidy to delete an unnecessary ';'  (default)
2949
2950 Combinations of other parameters
2951  -gnu     attempt to follow GNU Coding Standards as applied to perl
2952  -mangle  remove as many newlines as possible (but keep comments and pods)
2953  -extrude  insert as many newlines as possible
2954
2955 Dump and die, debugging
2956  -dop    dump options used in this run to standard output and quit
2957  -ddf    dump default options to standard output and quit
2958  -dsn    dump all option short names to standard output and quit
2959  -dln    dump option long names to standard output and quit
2960  -dpro   dump whatever configuration file is in effect to standard output
2961  -dtt    dump all token types to standard output and quit
2962
2963 HTML
2964  -html write an html file (see 'man perl2web' for many options)
2965        Note: when -html is used, no indentation or formatting are done.
2966        Hint: try perltidy -html -css=mystyle.css filename.pl
2967        and edit mystyle.css to change the appearance of filename.html.
2968        -nnn gives line numbers
2969        -pre only writes out <pre>..</pre> code section
2970        -toc places a table of contents to subs at the top (default)
2971        -pod passes pod text through pod2html (default)
2972        -frm write html as a frame (3 files)
2973        -text=s extra extension for table of contents if -frm, default='toc'
2974        -sext=s extra extension for file content if -frm, default='src'
2975
2976 A prefix of "n" negates short form toggle switches, and a prefix of "no"
2977 negates the long forms.  For example, -nasc means don't add missing
2978 semicolons.
2979
2980 If you are unable to see this entire text, try "perltidy -h | more"
2981 For more detailed information, and additional options, try "man perltidy",
2982 or go to the perltidy home page at http://perltidy.sourceforge.net
2983 EOF
2984
2985 }
2986
2987 sub process_this_file {
2988
2989     my ( $truth, $beauty ) = @_;
2990
2991     # loop to process each line of this file
2992     while ( my $line_of_tokens = $truth->get_line() ) {
2993         $beauty->write_line($line_of_tokens);
2994     }
2995
2996     # finish up
2997     eval { $beauty->finish_formatting() };
2998     $truth->report_tokenization_errors();
2999 }
3000
3001 sub check_syntax {
3002
3003     # Use 'perl -c' to make sure that we did not create bad syntax
3004     # This is a very good independent check for programming errors
3005     #
3006     # Given names of the input and output files, ($ifname, $ofname),
3007     # we do the following:
3008     # - check syntax of the input file
3009     # - if bad, all done (could be an incomplete code snippet)
3010     # - if infile syntax ok, then check syntax of the output file;
3011     #   - if outfile syntax bad, issue warning; this implies a code bug!
3012     # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3013
3014     my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
3015     my $infile_syntax_ok = 0;
3016     my $line_of_dashes   = '-' x 42 . "\n";
3017
3018     my $flags = $rOpts->{'perl-syntax-check-flags'};
3019
3020     # be sure we invoke perl with -c
3021     # note: perl will accept repeated flags like '-c -c'.  It is safest
3022     # to append another -c than try to find an interior bundled c, as
3023     # in -Tc, because such a 'c' might be in a quoted string, for example.
3024     if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3025
3026     # be sure we invoke perl with -x if requested
3027     # same comments about repeated parameters applies
3028     if ( $rOpts->{'look-for-hash-bang'} ) {
3029         if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3030     }
3031
3032     # this shouldn't happen unless a termporary file couldn't be made
3033     if ( $ifname eq '-' ) {
3034         $logger_object->write_logfile_entry(
3035             "Cannot run perl -c on STDIN and STDOUT\n");
3036         return $infile_syntax_ok;
3037     }
3038
3039     $logger_object->write_logfile_entry(
3040         "checking input file syntax with perl $flags\n");
3041     $logger_object->write_logfile_entry($line_of_dashes);
3042
3043     # Not all operating systems/shells support redirection of the standard
3044     # error output.
3045     my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3046
3047     my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3048     $logger_object->write_logfile_entry("$perl_output\n");
3049
3050     if ( $perl_output =~ /syntax\s*OK/ ) {
3051         $infile_syntax_ok = 1;
3052         $logger_object->write_logfile_entry($line_of_dashes);
3053         $logger_object->write_logfile_entry(
3054             "checking output file syntax with perl $flags ...\n");
3055         $logger_object->write_logfile_entry($line_of_dashes);
3056
3057         my $perl_output =
3058           do_syntax_check( $ofname, $flags, $error_redirection );
3059         $logger_object->write_logfile_entry("$perl_output\n");
3060
3061         unless ( $perl_output =~ /syntax\s*OK/ ) {
3062             $logger_object->write_logfile_entry($line_of_dashes);
3063             $logger_object->warning(
3064 "The output file has a syntax error when tested with perl $flags $ofname !\n"
3065             );
3066             $logger_object->warning(
3067                 "This implies an error in perltidy; the file $ofname is bad\n");
3068             $logger_object->report_definite_bug();
3069
3070             # the perl version number will be helpful for diagnosing the problem
3071             $logger_object->write_logfile_entry(
3072                 qx/perl -v $error_redirection/ . "\n" );
3073         }
3074     }
3075     else {
3076
3077         # Only warn of perl -c syntax errors.  Other messages,
3078         # such as missing modules, are too common.  They can be
3079         # seen by running with perltidy -w
3080         $logger_object->complain("A syntax check using perl $flags gives: \n");
3081         $logger_object->complain($line_of_dashes);
3082         $logger_object->complain("$perl_output\n");
3083         $logger_object->complain($line_of_dashes);
3084         $infile_syntax_ok = -1;
3085         $logger_object->write_logfile_entry($line_of_dashes);
3086         $logger_object->write_logfile_entry(
3087 "The output file will not be checked because of input file problems\n"
3088         );
3089     }
3090     return $infile_syntax_ok;
3091 }
3092
3093 sub do_syntax_check {
3094     my ( $fname, $flags, $error_redirection ) = @_;
3095
3096     # We have to quote the filename in case it has unusual characters
3097     # or spaces.  Example: this filename #CM11.pm# gives trouble.
3098     $fname = '"' . $fname . '"';
3099
3100     # Under VMS something like -T will become -t (and an error) so we
3101     # will put quotes around the flags.  Double quotes seem to work on
3102     # Unix/Windows/VMS, but this may not work on all systems.  (Single
3103     # quotes do not work under Windows).  It could become necessary to
3104     # put double quotes around each flag, such as:  -"c"  -"T"
3105     # We may eventually need some system-dependent coding here.
3106     $flags = '"' . $flags . '"';
3107
3108     # now wish for luck...
3109     return qx/perl $flags $fname $error_redirection/;
3110 }
3111
3112 #####################################################################
3113 #
3114 # This is a stripped down version of IO::Scalar
3115 # Given a reference to a scalar, it supplies either:
3116 # a getline method which reads lines (mode='r'), or
3117 # a print method which reads lines (mode='w')
3118 #
3119 #####################################################################
3120 package Perl::Tidy::IOScalar;
3121 use Carp;
3122
3123 sub new {
3124     my ( $package, $rscalar, $mode ) = @_;
3125     my $ref = ref $rscalar;
3126     if ( $ref ne 'SCALAR' ) {
3127         confess <<EOM;
3128 ------------------------------------------------------------------------
3129 expecting ref to SCALAR but got ref to ($ref); trace follows:
3130 ------------------------------------------------------------------------
3131 EOM
3132
3133     }
3134     if ( $mode eq 'w' ) {
3135         $$rscalar = "";
3136         return bless [ $rscalar, $mode ], $package;
3137     }
3138     elsif ( $mode eq 'r' ) {
3139
3140         # Convert a scalar to an array.
3141         # This avoids looking for "\n" on each call to getline
3142         my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3143         my $i_next = 0;
3144         return bless [ \@array, $mode, $i_next ], $package;
3145     }
3146     else {
3147         confess <<EOM;
3148 ------------------------------------------------------------------------
3149 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3150 ------------------------------------------------------------------------
3151 EOM
3152     }
3153 }
3154
3155 sub getline {
3156     my $self = shift;
3157     my $mode = $self->[1];
3158     if ( $mode ne 'r' ) {
3159         confess <<EOM;
3160 ------------------------------------------------------------------------
3161 getline call requires mode = 'r' but mode = ($mode); trace follows:
3162 ------------------------------------------------------------------------
3163 EOM
3164     }
3165     my $i = $self->[2]++;
3166     ##my $line = $self->[0]->[$i];
3167     return $self->[0]->[$i];
3168 }
3169
3170 sub print {
3171     my $self = shift;
3172     my $mode = $self->[1];
3173     if ( $mode ne 'w' ) {
3174         confess <<EOM;
3175 ------------------------------------------------------------------------
3176 print call requires mode = 'w' but mode = ($mode); trace follows:
3177 ------------------------------------------------------------------------
3178 EOM
3179     }
3180     ${ $self->[0] } .= $_[0];
3181 }
3182 sub close { return }
3183
3184 #####################################################################
3185 #
3186 # This is a stripped down version of IO::ScalarArray
3187 # Given a reference to an array, it supplies either:
3188 # a getline method which reads lines (mode='r'), or
3189 # a print method which reads lines (mode='w')
3190 #
3191 # NOTE: this routine assumes that that there aren't any embedded
3192 # newlines within any of the array elements.  There are no checks
3193 # for that.
3194 #
3195 #####################################################################
3196 package Perl::Tidy::IOScalarArray;
3197 use Carp;
3198
3199 sub new {
3200     my ( $package, $rarray, $mode ) = @_;
3201     my $ref = ref $rarray;
3202     if ( $ref ne 'ARRAY' ) {
3203         confess <<EOM;
3204 ------------------------------------------------------------------------
3205 expecting ref to ARRAY but got ref to ($ref); trace follows:
3206 ------------------------------------------------------------------------
3207 EOM
3208
3209     }
3210     if ( $mode eq 'w' ) {
3211         @$rarray = ();
3212         return bless [ $rarray, $mode ], $package;
3213     }
3214     elsif ( $mode eq 'r' ) {
3215         my $i_next = 0;
3216         return bless [ $rarray, $mode, $i_next ], $package;
3217     }
3218     else {
3219         confess <<EOM;
3220 ------------------------------------------------------------------------
3221 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3222 ------------------------------------------------------------------------
3223 EOM
3224     }
3225 }
3226
3227 sub getline {
3228     my $self = shift;
3229     my $mode = $self->[1];
3230     if ( $mode ne 'r' ) {
3231         confess <<EOM;
3232 ------------------------------------------------------------------------
3233 getline requires mode = 'r' but mode = ($mode); trace follows:
3234 ------------------------------------------------------------------------
3235 EOM
3236     }
3237     my $i = $self->[2]++;
3238     ##my $line = $self->[0]->[$i];
3239     return $self->[0]->[$i];
3240 }
3241
3242 sub print {
3243     my $self = shift;
3244     my $mode = $self->[1];
3245     if ( $mode ne 'w' ) {
3246         confess <<EOM;
3247 ------------------------------------------------------------------------
3248 print requires mode = 'w' but mode = ($mode); trace follows:
3249 ------------------------------------------------------------------------
3250 EOM
3251     }
3252     push @{ $self->[0] }, $_[0];
3253 }
3254 sub close { return }
3255
3256 #####################################################################
3257 #
3258 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3259 # which returns the next line to be parsed
3260 #
3261 #####################################################################
3262
3263 package Perl::Tidy::LineSource;
3264
3265 sub new {
3266
3267     my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3268     my $input_file_copy = undef;
3269     my $fh_copy;
3270
3271     my $input_line_ending;
3272     if ( $rOpts->{'preserve-line-endings'} ) {
3273         $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3274     }
3275
3276     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3277     return undef unless $fh;
3278
3279     # in order to check output syntax when standard output is used,
3280     # or when it is an object, we have to make a copy of the file
3281     if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3282     {
3283
3284         # Turning off syntax check when input output is used.
3285         # The reason is that temporary files cause problems on
3286         # on many systems.
3287         $rOpts->{'check-syntax'} = 0;
3288         $input_file_copy = '-';
3289
3290         $$rpending_logfile_message .= <<EOM;
3291 Note: --syntax check will be skipped because standard input is used
3292 EOM
3293
3294     }
3295
3296     return bless {
3297         _fh                => $fh,
3298         _fh_copy           => $fh_copy,
3299         _filename          => $input_file,
3300         _input_file_copy   => $input_file_copy,
3301         _input_line_ending => $input_line_ending,
3302         _rinput_buffer     => [],
3303         _started           => 0,
3304     }, $class;
3305 }
3306
3307 sub get_input_file_copy_name {
3308     my $self   = shift;
3309     my $ifname = $self->{_input_file_copy};
3310     unless ($ifname) {
3311         $ifname = $self->{_filename};
3312     }
3313     return $ifname;
3314 }
3315
3316 sub close_input_file {
3317     my $self = shift;
3318     eval { $self->{_fh}->close() };
3319     eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3320 }
3321
3322 sub get_line {
3323     my $self          = shift;
3324     my $line          = undef;
3325     my $fh            = $self->{_fh};
3326     my $fh_copy       = $self->{_fh_copy};
3327     my $rinput_buffer = $self->{_rinput_buffer};
3328
3329     if ( scalar(@$rinput_buffer) ) {
3330         $line = shift @$rinput_buffer;
3331     }
3332     else {
3333         $line = $fh->getline();
3334
3335         # patch to read raw mac files under unix, dos
3336         # see if the first line has embedded \r's
3337         if ( $line && !$self->{_started} ) {
3338             if ( $line =~ /[\015][^\015\012]/ ) {
3339
3340                 # found one -- break the line up and store in a buffer
3341                 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3342                 my $count = @$rinput_buffer;
3343                 $line = shift @$rinput_buffer;
3344             }
3345             $self->{_started}++;
3346         }
3347     }
3348     if ( $line && $fh_copy ) { $fh_copy->print($line); }
3349     return $line;
3350 }
3351
3352 sub old_get_line {
3353     my $self    = shift;
3354     my $line    = undef;
3355     my $fh      = $self->{_fh};
3356     my $fh_copy = $self->{_fh_copy};
3357     $line = $fh->getline();
3358     if ( $line && $fh_copy ) { $fh_copy->print($line); }
3359     return $line;
3360 }
3361
3362 #####################################################################
3363 #
3364 # the Perl::Tidy::LineSink class supplies a write_line method for
3365 # actual file writing
3366 #
3367 #####################################################################
3368
3369 package Perl::Tidy::LineSink;
3370
3371 sub new {
3372
3373     my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3374         $rpending_logfile_message, $binmode )
3375       = @_;
3376     my $fh               = undef;
3377     my $fh_copy          = undef;
3378     my $fh_tee           = undef;
3379     my $output_file_copy = "";
3380     my $output_file_open = 0;
3381
3382     if ( $rOpts->{'format'} eq 'tidy' ) {
3383         ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3384         unless ($fh) { die "Cannot write to output stream\n"; }
3385         $output_file_open = 1;
3386         if ($binmode) {
3387             if ( ref($fh) eq 'IO::File' ) {
3388                 binmode $fh;
3389             }
3390             if ( $output_file eq '-' ) { binmode STDOUT }
3391         }
3392     }
3393
3394     # in order to check output syntax when standard output is used,
3395     # or when it is an object, we have to make a copy of the file
3396     if ( $output_file eq '-' || ref $output_file ) {
3397         if ( $rOpts->{'check-syntax'} ) {
3398
3399             # Turning off syntax check when standard output is used.
3400             # The reason is that temporary files cause problems on
3401             # on many systems.
3402             $rOpts->{'check-syntax'} = 0;
3403             $output_file_copy = '-';
3404             $$rpending_logfile_message .= <<EOM;
3405 Note: --syntax check will be skipped because standard output is used
3406 EOM
3407
3408         }
3409     }
3410
3411     bless {
3412         _fh               => $fh,
3413         _fh_copy          => $fh_copy,
3414         _fh_tee           => $fh_tee,
3415         _output_file      => $output_file,
3416         _output_file_open => $output_file_open,
3417         _output_file_copy => $output_file_copy,
3418         _tee_flag         => 0,
3419         _tee_file         => $tee_file,
3420         _tee_file_opened  => 0,
3421         _line_separator   => $line_separator,
3422         _binmode          => $binmode,
3423     }, $class;
3424 }
3425
3426 sub write_line {
3427
3428     my $self    = shift;
3429     my $fh      = $self->{_fh};
3430     my $fh_copy = $self->{_fh_copy};
3431
3432     my $output_file_open = $self->{_output_file_open};
3433     chomp $_[0];
3434     $_[0] .= $self->{_line_separator};
3435
3436     $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3437     print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3438
3439     if ( $self->{_tee_flag} ) {
3440         unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3441         my $fh_tee = $self->{_fh_tee};
3442         print $fh_tee $_[0];
3443     }
3444 }
3445
3446 sub get_output_file_copy {
3447     my $self   = shift;
3448     my $ofname = $self->{_output_file_copy};
3449     unless ($ofname) {
3450         $ofname = $self->{_output_file};
3451     }
3452     return $ofname;
3453 }
3454
3455 sub tee_on {
3456     my $self = shift;
3457     $self->{_tee_flag} = 1;
3458 }
3459
3460 sub tee_off {
3461     my $self = shift;
3462     $self->{_tee_flag} = 0;
3463 }
3464
3465 sub really_open_tee_file {
3466     my $self     = shift;
3467     my $tee_file = $self->{_tee_file};
3468     my $fh_tee;
3469     $fh_tee = IO::File->new(">$tee_file")
3470       or die("couldn't open TEE file $tee_file: $!\n");
3471     binmode $fh_tee if $self->{_binmode};
3472     $self->{_tee_file_opened} = 1;
3473     $self->{_fh_tee}          = $fh_tee;
3474 }
3475
3476 sub close_output_file {
3477     my $self = shift;
3478     eval { $self->{_fh}->close() }      if $self->{_output_file_open};
3479     eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3480     $self->close_tee_file();
3481 }
3482
3483 sub close_tee_file {
3484     my $self = shift;
3485
3486     if ( $self->{_tee_file_opened} ) {
3487         eval { $self->{_fh_tee}->close() };
3488         $self->{_tee_file_opened} = 0;
3489     }
3490 }
3491
3492 #####################################################################
3493 #
3494 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3495 # useful for program development.
3496 #
3497 # Only one such file is created regardless of the number of input
3498 # files processed.  This allows the results of processing many files
3499 # to be summarized in a single file.
3500 #
3501 #####################################################################
3502
3503 package Perl::Tidy::Diagnostics;
3504
3505 sub new {
3506
3507     my $class = shift;
3508     bless {
3509         _write_diagnostics_count => 0,
3510         _last_diagnostic_file    => "",
3511         _input_file              => "",
3512         _fh                      => undef,
3513     }, $class;
3514 }
3515
3516 sub set_input_file {
3517     my $self = shift;
3518     $self->{_input_file} = $_[0];
3519 }
3520
3521 # This is a diagnostic routine which is useful for program development.
3522 # Output from debug messages go to a file named DIAGNOSTICS, where
3523 # they are labeled by file and line.  This allows many files to be
3524 # scanned at once for some particular condition of interest.
3525 sub write_diagnostics {
3526     my $self = shift;
3527
3528     unless ( $self->{_write_diagnostics_count} ) {
3529         open DIAGNOSTICS, ">DIAGNOSTICS"
3530           or death("couldn't open DIAGNOSTICS: $!\n");
3531     }
3532
3533     my $last_diagnostic_file = $self->{_last_diagnostic_file};
3534     my $input_file           = $self->{_input_file};
3535     if ( $last_diagnostic_file ne $input_file ) {
3536         print DIAGNOSTICS "\nFILE:$input_file\n";
3537     }
3538     $self->{_last_diagnostic_file} = $input_file;
3539     my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3540     print DIAGNOSTICS "$input_line_number:\t@_";
3541     $self->{_write_diagnostics_count}++;
3542 }
3543
3544 #####################################################################
3545 #
3546 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3547 #
3548 #####################################################################
3549
3550 package Perl::Tidy::Logger;
3551
3552 sub new {
3553     my $class = shift;
3554     my $fh;
3555     my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3556
3557     # remove any old error output file
3558     unless ( ref($warning_file) ) {
3559         if ( -e $warning_file ) { unlink($warning_file) }
3560     }
3561
3562     bless {
3563         _log_file                      => $log_file,
3564         _fh_warnings                   => undef,
3565         _rOpts                         => $rOpts,
3566         _fh_warnings                   => undef,
3567         _last_input_line_written       => 0,
3568         _at_end_of_file                => 0,
3569         _use_prefix                    => 1,
3570         _block_log_output              => 0,
3571         _line_of_tokens                => undef,
3572         _output_line_number            => undef,
3573         _wrote_line_information_string => 0,
3574         _wrote_column_headings         => 0,
3575         _warning_file                  => $warning_file,
3576         _warning_count                 => 0,
3577         _complaint_count               => 0,
3578         _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
3579         _saw_brace_error => 0,
3580         _saw_extrude     => $saw_extrude,
3581         _output_array    => [],
3582     }, $class;
3583 }
3584
3585 sub close_log_file {
3586
3587     my $self = shift;
3588     if ( $self->{_fh_warnings} ) {
3589         eval { $self->{_fh_warnings}->close() };
3590         $self->{_fh_warnings} = undef;
3591     }
3592 }
3593
3594 sub get_warning_count {
3595     my $self = shift;
3596     return $self->{_warning_count};
3597 }
3598
3599 sub get_use_prefix {
3600     my $self = shift;
3601     return $self->{_use_prefix};
3602 }
3603
3604 sub block_log_output {
3605     my $self = shift;
3606     $self->{_block_log_output} = 1;
3607 }
3608
3609 sub unblock_log_output {
3610     my $self = shift;
3611     $self->{_block_log_output} = 0;
3612 }
3613
3614 sub interrupt_logfile {
3615     my $self = shift;
3616     $self->{_use_prefix} = 0;
3617     $self->warning("\n");
3618     $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
3619 }
3620
3621 sub resume_logfile {
3622     my $self = shift;
3623     $self->write_logfile_entry( '#' x 60 . "\n" );
3624     $self->{_use_prefix} = 1;
3625 }
3626
3627 sub we_are_at_the_last_line {
3628     my $self = shift;
3629     unless ( $self->{_wrote_line_information_string} ) {
3630         $self->write_logfile_entry("Last line\n\n");
3631     }
3632     $self->{_at_end_of_file} = 1;
3633 }
3634
3635 # record some stuff in case we go down in flames
3636 sub black_box {
3637     my $self = shift;
3638     my ( $line_of_tokens, $output_line_number ) = @_;
3639     my $input_line        = $line_of_tokens->{_line_text};
3640     my $input_line_number = $line_of_tokens->{_line_number};
3641
3642     # save line information in case we have to write a logfile message
3643     $self->{_line_of_tokens}                = $line_of_tokens;
3644     $self->{_output_line_number}            = $output_line_number;
3645     $self->{_wrote_line_information_string} = 0;
3646
3647     my $last_input_line_written = $self->{_last_input_line_written};
3648     my $rOpts                   = $self->{_rOpts};
3649     if (
3650         (
3651             ( $input_line_number - $last_input_line_written ) >=
3652             $rOpts->{'logfile-gap'}
3653         )
3654         || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3655       )
3656     {
3657         my $rlevels                      = $line_of_tokens->{_rlevels};
3658         my $structural_indentation_level = $$rlevels[0];
3659         $self->{_last_input_line_written} = $input_line_number;
3660         ( my $out_str = $input_line ) =~ s/^\s*//;
3661         chomp $out_str;
3662
3663         $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3664
3665         if ( length($out_str) > 35 ) {
3666             $out_str = substr( $out_str, 0, 35 ) . " ....";
3667         }
3668         $self->logfile_output( "", "$out_str\n" );
3669     }
3670 }
3671
3672 sub write_logfile_entry {
3673     my $self = shift;
3674
3675     # add leading >>> to avoid confusing error mesages and code
3676     $self->logfile_output( ">>>", "@_" );
3677 }
3678
3679 sub write_column_headings {
3680     my $self = shift;
3681
3682     $self->{_wrote_column_headings} = 1;
3683     my $routput_array = $self->{_output_array};
3684     push @{$routput_array}, <<EOM;
3685 The nesting depths in the table below are at the start of the lines.
3686 The indicated output line numbers are not always exact.
3687 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3688
3689 in:out indent c b  nesting   code + messages; (messages begin with >>>)
3690 lines  levels i k            (code begins with one '.' per indent level)
3691 ------  ----- - - --------   -------------------------------------------
3692 EOM
3693 }
3694
3695 sub make_line_information_string {
3696
3697     # make columns of information when a logfile message needs to go out
3698     my $self                    = shift;
3699     my $line_of_tokens          = $self->{_line_of_tokens};
3700     my $input_line_number       = $line_of_tokens->{_line_number};
3701     my $line_information_string = "";
3702     if ($input_line_number) {
3703
3704         my $output_line_number   = $self->{_output_line_number};
3705         my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
3706         my $paren_depth          = $line_of_tokens->{_paren_depth};
3707         my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3708         my $python_indentation_level =
3709           $line_of_tokens->{_python_indentation_level};
3710         my $rlevels         = $line_of_tokens->{_rlevels};
3711         my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3712         my $rci_levels      = $line_of_tokens->{_rci_levels};
3713         my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3714
3715         my $structural_indentation_level = $$rlevels[0];
3716
3717         $self->write_column_headings() unless $self->{_wrote_column_headings};
3718
3719         # keep logfile columns aligned for scripts up to 999 lines;
3720         # for longer scripts it doesn't really matter
3721         my $extra_space = "";
3722         $extra_space .=
3723             ( $input_line_number < 10 )  ? "  "
3724           : ( $input_line_number < 100 ) ? " "
3725           :                                "";
3726         $extra_space .=
3727             ( $output_line_number < 10 )  ? "  "
3728           : ( $output_line_number < 100 ) ? " "
3729           :                                 "";
3730
3731         # there are 2 possible nesting strings:
3732         # the original which looks like this:  (0 [1 {2
3733         # the new one, which looks like this:  {{[
3734         # the new one is easier to read, and shows the order, but
3735         # could be arbitrarily long, so we use it unless it is too long
3736         my $nesting_string =
3737           "($paren_depth [$square_bracket_depth {$brace_depth";
3738         my $nesting_string_new = $$rnesting_tokens[0];
3739
3740         my $ci_level = $$rci_levels[0];
3741         if ( $ci_level > 9 ) { $ci_level = '*' }
3742         my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3743
3744         if ( length($nesting_string_new) <= 8 ) {
3745             $nesting_string =
3746               $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3747         }
3748         if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3749         $line_information_string =
3750 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3751     }
3752     return $line_information_string;
3753 }
3754
3755 sub logfile_output {
3756     my $self = shift;
3757     my ( $prompt, $msg ) = @_;
3758     return if ( $self->{_block_log_output} );
3759
3760     my $routput_array = $self->{_output_array};
3761     if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3762         push @{$routput_array}, "$msg";
3763     }
3764     else {
3765         my $line_information_string = $self->make_line_information_string();
3766         $self->{_wrote_line_information_string} = 1;
3767
3768         if ($line_information_string) {
3769             push @{$routput_array}, "$line_information_string   $prompt$msg";
3770         }
3771         else {
3772             push @{$routput_array}, "$msg";
3773         }
3774     }
3775 }
3776
3777 sub get_saw_brace_error {
3778     my $self = shift;
3779     return $self->{_saw_brace_error};
3780 }
3781
3782 sub increment_brace_error {
3783     my $self = shift;
3784     $self->{_saw_brace_error}++;
3785 }
3786
3787 sub brace_warning {
3788     my $self = shift;
3789     use constant BRACE_WARNING_LIMIT => 10;
3790     my $saw_brace_error = $self->{_saw_brace_error};
3791
3792     if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3793         $self->warning(@_);
3794     }
3795     $saw_brace_error++;
3796     $self->{_saw_brace_error} = $saw_brace_error;
3797
3798     if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3799         $self->warning("No further warnings of this type will be given\n");
3800     }
3801 }
3802
3803 sub complain {
3804
3805     # handle non-critical warning messages based on input flag
3806     my $self  = shift;
3807     my $rOpts = $self->{_rOpts};
3808
3809     # these appear in .ERR output only if -w flag is used
3810     if ( $rOpts->{'warning-output'} ) {
3811         $self->warning(@_);
3812     }
3813
3814     # otherwise, they go to the .LOG file
3815     else {
3816         $self->{_complaint_count}++;
3817         $self->write_logfile_entry(@_);
3818     }
3819 }
3820
3821 sub warning {
3822
3823     # report errors to .ERR file (or stdout)
3824     my $self = shift;
3825     use constant WARNING_LIMIT => 50;
3826
3827     my $rOpts = $self->{_rOpts};
3828     unless ( $rOpts->{'quiet'} ) {
3829
3830         my $warning_count = $self->{_warning_count};
3831         unless ($warning_count) {
3832             my $warning_file = $self->{_warning_file};
3833             my $fh_warnings;
3834             if ( $rOpts->{'standard-error-output'} ) {
3835                 $fh_warnings = *STDERR;
3836             }
3837             else {
3838                 ( $fh_warnings, my $filename ) =
3839                   Perl::Tidy::streamhandle( $warning_file, 'w' );
3840                 $fh_warnings or die("couldn't open $filename $!\n");
3841                 warn "## Please see file $filename\n";
3842             }
3843             $self->{_fh_warnings} = $fh_warnings;
3844         }
3845
3846         my $fh_warnings = $self->{_fh_warnings};
3847         if ( $warning_count < WARNING_LIMIT ) {
3848             if ( $self->get_use_prefix() > 0 ) {
3849                 my $input_line_number =
3850                   Perl::Tidy::Tokenizer::get_input_line_number();
3851                 $fh_warnings->print("$input_line_number:\t@_");
3852                 $self->write_logfile_entry("WARNING: @_");
3853             }
3854             else {
3855                 $fh_warnings->print(@_);
3856                 $self->write_logfile_entry(@_);
3857             }
3858         }
3859         $warning_count++;
3860         $self->{_warning_count} = $warning_count;
3861
3862         if ( $warning_count == WARNING_LIMIT ) {
3863             $fh_warnings->print("No further warnings will be given\n");
3864         }
3865     }
3866 }
3867
3868 # programming bug codes:
3869 #   -1 = no bug
3870 #    0 = maybe, not sure.
3871 #    1 = definitely
3872 sub report_possible_bug {
3873     my $self         = shift;
3874     my $saw_code_bug = $self->{_saw_code_bug};
3875     $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
3876 }
3877
3878 sub report_definite_bug {
3879     my $self = shift;
3880     $self->{_saw_code_bug} = 1;
3881 }
3882
3883 sub ask_user_for_bug_report {
3884     my $self = shift;
3885
3886     my ( $infile_syntax_ok, $formatter ) = @_;
3887     my $saw_code_bug = $self->{_saw_code_bug};
3888     if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
3889         $self->warning(<<EOM);
3890
3891 You may have encountered a code bug in perltidy.  If you think so, and
3892 the problem is not listed in the BUGS file at
3893 http://perltidy.sourceforge.net, please report it so that it can be
3894 corrected.  Include the smallest possible script which has the problem,
3895 along with the .LOG file. See the manual pages for contact information.
3896 Thank you!
3897 EOM
3898
3899     }
3900     elsif ( $saw_code_bug == 1 ) {
3901         if ( $self->{_saw_extrude} ) {
3902             $self->warning(<<EOM);
3903
3904 You may have encountered a bug in perltidy.  However, since you are using the
3905 -extrude option, the problem may be with perl or one of its modules, which have
3906 occasional problems with this type of file.  If you believe that the
3907 problem is with perltidy, and the problem is not listed in the BUGS file at
3908 http://perltidy.sourceforge.net, please report it so that it can be corrected.
3909 Include the smallest possible script which has the problem, along with the .LOG
3910 file. See the manual pages for contact information.
3911 Thank you!
3912 EOM
3913         }
3914         else {
3915             $self->warning(<<EOM);
3916
3917 Oops, you seem to have encountered a bug in perltidy.  Please check the
3918 BUGS file at http://perltidy.sourceforge.net.  If the problem is not
3919 listed there, please report it so that it can be corrected.  Include the
3920 smallest possible script which produces this message, along with the
3921 .LOG file if appropriate.  See the manual pages for contact information.
3922 Your efforts are appreciated.
3923 Thank you!
3924 EOM
3925             my $added_semicolon_count = 0;
3926             eval {
3927                 $added_semicolon_count =
3928                   $formatter->get_added_semicolon_count();
3929             };
3930             if ( $added_semicolon_count > 0 ) {
3931                 $self->warning(<<EOM);
3932
3933 The log file shows that perltidy added $added_semicolon_count semicolons.
3934 Please rerun with -nasc to see if that is the cause of the syntax error.  Even
3935 if that is the problem, please report it so that it can be fixed.
3936 EOM
3937
3938             }
3939         }
3940     }
3941 }
3942
3943 sub finish {
3944
3945     # called after all formatting to summarize errors
3946     my $self = shift;
3947     my ( $infile_syntax_ok, $formatter ) = @_;
3948
3949     my $rOpts         = $self->{_rOpts};
3950     my $warning_count = $self->{_warning_count};
3951     my $saw_code_bug  = $self->{_saw_code_bug};
3952
3953     my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
3954       || $saw_code_bug == 1
3955       || $rOpts->{'logfile'};
3956     my $log_file = $self->{_log_file};
3957     if ($warning_count) {
3958         if ($save_logfile) {
3959             $self->block_log_output();    # avoid echoing this to the logfile
3960             $self->warning(
3961                 "The logfile $log_file may contain useful information\n");
3962             $self->unblock_log_output();
3963         }
3964
3965         if ( $self->{_complaint_count} > 0 ) {
3966             $self->warning(
3967 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
3968             );
3969         }
3970
3971         if ( $self->{_saw_brace_error}
3972             && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
3973         {
3974             $self->warning("To save a full .LOG file rerun with -g\n");
3975         }
3976     }
3977     $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
3978
3979     if ($save_logfile) {
3980         my $log_file = $self->{_log_file};
3981         my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
3982         if ($fh) {
3983             my $routput_array = $self->{_output_array};
3984             foreach ( @{$routput_array} ) { $fh->print($_) }
3985             eval                          { $fh->close() };
3986         }
3987     }
3988 }
3989
3990 #####################################################################
3991 #
3992 # The Perl::Tidy::DevNull class supplies a dummy print method
3993 #
3994 #####################################################################
3995
3996 package Perl::Tidy::DevNull;
3997 sub new { return bless {}, $_[0] }
3998 sub print { return }
3999 sub close { return }
4000
4001 #####################################################################
4002 #
4003 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4004 #
4005 #####################################################################
4006
4007 package Perl::Tidy::HtmlWriter;
4008
4009 use File::Basename;
4010
4011 # class variables
4012 use vars qw{
4013   %html_color
4014   %html_bold
4015   %html_italic
4016   %token_short_names
4017   %short_to_long_names
4018   $rOpts
4019   $css_filename
4020   $css_linkname
4021   $missing_html_entities
4022 };
4023
4024 # replace unsafe characters with HTML entity representation if HTML::Entities
4025 # is available
4026 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4027
4028 sub new {
4029
4030     my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4031         $html_src_extension )
4032       = @_;
4033
4034     my $html_file_opened = 0;
4035     my $html_fh;
4036     ( $html_fh, my $html_filename ) =
4037       Perl::Tidy::streamhandle( $html_file, 'w' );
4038     unless ($html_fh) {
4039         warn("can't open $html_file: $!\n");
4040         return undef;
4041     }
4042     $html_file_opened = 1;
4043
4044     if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4045         $input_file = "NONAME";
4046     }
4047
4048     # write the table of contents to a string
4049     my $toc_string;
4050     my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4051
4052     my $html_pre_fh;
4053     my @pre_string_stack;
4054     if ( $rOpts->{'html-pre-only'} ) {
4055
4056         # pre section goes directly to the output stream
4057         $html_pre_fh = $html_fh;
4058         $html_pre_fh->print( <<"PRE_END");
4059 <pre>
4060 PRE_END
4061     }
4062     else {
4063
4064         # pre section go out to a temporary string
4065         my $pre_string;
4066         $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4067         push @pre_string_stack, \$pre_string;
4068     }
4069
4070     # pod text gets diverted if the 'pod2html' is used
4071     my $html_pod_fh;
4072     my $pod_string;
4073     if ( $rOpts->{'pod2html'} ) {
4074         if ( $rOpts->{'html-pre-only'} ) {
4075             undef $rOpts->{'pod2html'};
4076         }
4077         else {
4078             eval "use Pod::Html";
4079             if ($@) {
4080                 warn
4081 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4082                 undef $rOpts->{'pod2html'};
4083             }
4084             else {
4085                 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4086             }
4087         }
4088     }
4089
4090     my $toc_filename;
4091     my $src_filename;
4092     if ( $rOpts->{'frames'} ) {
4093         unless ($extension) {
4094             warn
4095 "cannot use frames without a specified output extension; ignoring -frm\n";
4096             undef $rOpts->{'frames'};
4097         }
4098         else {
4099             $toc_filename = $input_file . $html_toc_extension . $extension;
4100             $src_filename = $input_file . $html_src_extension . $extension;
4101         }
4102     }
4103
4104     # ----------------------------------------------------------
4105     # Output is now directed as follows:
4106     # html_toc_fh <-- table of contents items
4107     # html_pre_fh <-- the <pre> section of formatted code, except:
4108     # html_pod_fh <-- pod goes here with the pod2html option
4109     # ----------------------------------------------------------
4110
4111     my $title = $rOpts->{'title'};
4112     unless ($title) {
4113         ( $title, my $path ) = fileparse($input_file);
4114     }
4115     my $toc_item_count = 0;
4116     my $in_toc_package = "";
4117     my $last_level     = 0;
4118     bless {
4119         _input_file        => $input_file,          # name of input file
4120         _title             => $title,               # title, unescaped
4121         _html_file         => $html_file,           # name of .html output file
4122         _toc_filename      => $toc_filename,        # for frames option
4123         _src_filename      => $src_filename,        # for frames option
4124         _html_file_opened  => $html_file_opened,    # a flag
4125         _html_fh           => $html_fh,             # the output stream
4126         _html_pre_fh       => $html_pre_fh,         # pre section goes here
4127         _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4128         _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4129         _rpod_string       => \$pod_string,         # string holding pod
4130         _pod_cut_count     => 0,                    # how many =cut's?
4131         _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4132         _rtoc_string       => \$toc_string,         # string holding toc
4133         _rtoc_item_count   => \$toc_item_count,     # how many toc items
4134         _rin_toc_package   => \$in_toc_package,     # package name
4135         _rtoc_name_count   => {},                   # hash to track unique names
4136         _rpackage_stack    => [],                   # stack to check for package
4137                                                     # name changes
4138         _rlast_level       => \$last_level,         # brace indentation level
4139     }, $class;
4140 }
4141
4142 sub add_toc_item {
4143
4144     # Add an item to the html table of contents.
4145     # This is called even if no table of contents is written,
4146     # because we still want to put the anchors in the <pre> text.
4147     # We are given an anchor name and its type; types are:
4148     #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4149     # There must be an 'EOF' call at the end to wrap things up.
4150     my $self = shift;
4151     my ( $name, $type ) = @_;
4152     my $html_toc_fh     = $self->{_html_toc_fh};
4153     my $html_pre_fh     = $self->{_html_pre_fh};
4154     my $rtoc_name_count = $self->{_rtoc_name_count};
4155     my $rtoc_item_count = $self->{_rtoc_item_count};
4156     my $rlast_level     = $self->{_rlast_level};
4157     my $rin_toc_package = $self->{_rin_toc_package};
4158     my $rpackage_stack  = $self->{_rpackage_stack};
4159
4160     # packages contain sublists of subs, so to avoid errors all package
4161     # items are written and finished with the following routines
4162     my $end_package_list = sub {
4163         if ($$rin_toc_package) {
4164             $html_toc_fh->print("</ul>\n</li>\n");
4165             $$rin_toc_package = "";
4166         }
4167     };
4168
4169     my $start_package_list = sub {
4170         my ( $unique_name, $package ) = @_;
4171         if ($$rin_toc_package) { $end_package_list->() }
4172         $html_toc_fh->print(<<EOM);
4173 <li><a href=\"#$unique_name\">package $package</a>
4174 <ul>
4175 EOM
4176         $$rin_toc_package = $package;
4177     };
4178
4179     # start the table of contents on the first item
4180     unless ($$rtoc_item_count) {
4181
4182         # but just quit if we hit EOF without any other entries
4183         # in this case, there will be no toc
4184         return if ( $type eq 'EOF' );
4185         $html_toc_fh->print( <<"TOC_END");
4186 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4187 <ul>
4188 TOC_END
4189     }
4190     $$rtoc_item_count++;
4191
4192     # make a unique anchor name for this location:
4193     #   - packages get a 'package-' prefix
4194     #   - subs use their names
4195     my $unique_name = $name;
4196     if ( $type eq 'package' ) { $unique_name = "package-$name" }
4197
4198     # append '-1', '-2', etc if necessary to make unique; this will
4199     # be unique because subs and packages cannot have a '-'
4200     if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4201         $unique_name .= "-$count";
4202     }
4203
4204     #   - all names get terminal '-' if pod2html is used, to avoid
4205     #     conflicts with anchor names created by pod2html
4206     if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4207
4208     # start/stop lists of subs
4209     if ( $type eq 'sub' ) {
4210         my $package = $rpackage_stack->[$$rlast_level];
4211         unless ($package) { $package = 'main' }
4212
4213         # if we're already in a package/sub list, be sure its the right
4214         # package or else close it
4215         if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4216             $end_package_list->();
4217         }
4218
4219         # start a package/sub list if necessary
4220         unless ($$rin_toc_package) {
4221             $start_package_list->( $unique_name, $package );
4222         }
4223     }
4224
4225     # now write an entry in the toc for this item
4226     if ( $type eq 'package' ) {
4227         $start_package_list->( $unique_name, $name );
4228     }
4229     elsif ( $type eq 'sub' ) {
4230         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4231     }
4232     else {
4233         $end_package_list->();
4234         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4235     }
4236
4237     # write the anchor in the <pre> section
4238     $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4239
4240     # end the table of contents, if any, on the end of file
4241     if ( $type eq 'EOF' ) {
4242         $html_toc_fh->print( <<"TOC_END");
4243 </ul>
4244 <!-- END CODE INDEX -->
4245 TOC_END
4246     }
4247 }
4248
4249 BEGIN {
4250
4251     # This is the official list of tokens which may be identified by the
4252     # user.  Long names are used as getopt keys.  Short names are
4253     # convenient short abbreviations for specifying input.  Short names
4254     # somewhat resemble token type characters, but are often different
4255     # because they may only be alphanumeric, to allow command line
4256     # input.  Also, note that because of case insensitivity of html,
4257     # this table must be in a single case only (I've chosen to use all
4258     # lower case).
4259     # When adding NEW_TOKENS: update this hash table
4260     # short names => long names
4261     %short_to_long_names = (
4262         'n'  => 'numeric',
4263         'p'  => 'paren',
4264         'q'  => 'quote',
4265         's'  => 'structure',
4266         'c'  => 'comment',
4267         'v'  => 'v-string',
4268         'cm' => 'comma',
4269         'w'  => 'bareword',
4270         'co' => 'colon',
4271         'pu' => 'punctuation',
4272         'i'  => 'identifier',
4273         'j'  => 'label',
4274         'h'  => 'here-doc-target',
4275         'hh' => 'here-doc-text',
4276         'k'  => 'keyword',
4277         'sc' => 'semicolon',
4278         'm'  => 'subroutine',
4279         'pd' => 'pod-text',
4280     );
4281
4282     # Now we have to map actual token types into one of the above short
4283     # names; any token types not mapped will get 'punctuation'
4284     # properties.
4285
4286     # The values of this hash table correspond to the keys of the
4287     # previous hash table.
4288     # The keys of this hash table are token types and can be seen
4289     # by running with --dump-token-types (-dtt).
4290
4291     # When adding NEW_TOKENS: update this hash table
4292     # $type => $short_name
4293     %token_short_names = (
4294         '#'  => 'c',
4295         'n'  => 'n',
4296         'v'  => 'v',
4297         'k'  => 'k',
4298         'F'  => 'k',
4299         'Q'  => 'q',
4300         'q'  => 'q',
4301         'J'  => 'j',
4302         'j'  => 'j',
4303         'h'  => 'h',
4304         'H'  => 'hh',
4305         'w'  => 'w',
4306         ','  => 'cm',
4307         '=>' => 'cm',
4308         ';'  => 'sc',
4309         ':'  => 'co',
4310         'f'  => 'sc',
4311         '('  => 'p',
4312         ')'  => 'p',
4313         'M'  => 'm',
4314         'P'  => 'pd',
4315         'A'  => 'co',
4316     );
4317
4318     # These token types will all be called identifiers for now
4319     # FIXME: need to separate user defined modules as separate type
4320     my @identifier = qw" i t U C Y Z G :: ";
4321     @token_short_names{@identifier} = ('i') x scalar(@identifier);
4322
4323     # These token types will be called 'structure'
4324     my @structure = qw" { } ";
4325     @token_short_names{@structure} = ('s') x scalar(@structure);
4326
4327     # OLD NOTES: save for reference
4328     # Any of these could be added later if it would be useful.
4329     # For now, they will by default become punctuation
4330     #    my @list = qw" L R [ ] ";
4331     #    @token_long_names{@list} = ('non-structure') x scalar(@list);
4332     #
4333     #    my @list = qw"
4334     #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4335     #      ";
4336     #    @token_long_names{@list} = ('math') x scalar(@list);
4337     #
4338     #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
4339     #    @token_long_names{@list} = ('bit') x scalar(@list);
4340     #
4341     #    my @list = qw" == != < > <= <=> ";
4342     #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4343     #
4344     #    my @list = qw" && || ! &&= ||= //= ";
4345     #    @token_long_names{@list} = ('logical') x scalar(@list);
4346     #
4347     #    my @list = qw" . .= =~ !~ x x= ";
4348     #    @token_long_names{@list} = ('string-operators') x scalar(@list);
4349     #
4350     #    # Incomplete..
4351     #    my @list = qw" .. -> <> ... \ ? ";
4352     #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
4353
4354 }
4355
4356 sub make_getopt_long_names {
4357     my $class = shift;
4358     my ($rgetopt_names) = @_;
4359     while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4360         push @$rgetopt_names, "html-color-$name=s";
4361         push @$rgetopt_names, "html-italic-$name!";
4362         push @$rgetopt_names, "html-bold-$name!";
4363     }
4364     push @$rgetopt_names, "html-color-background=s";
4365     push @$rgetopt_names, "html-linked-style-sheet=s";
4366     push @$rgetopt_names, "nohtml-style-sheets";
4367     push @$rgetopt_names, "html-pre-only";
4368     push @$rgetopt_names, "html-line-numbers";
4369     push @$rgetopt_names, "html-entities!";
4370     push @$rgetopt_names, "stylesheet";
4371     push @$rgetopt_names, "html-table-of-contents!";
4372     push @$rgetopt_names, "pod2html!";
4373     push @$rgetopt_names, "frames!";
4374     push @$rgetopt_names, "html-toc-extension=s";
4375     push @$rgetopt_names, "html-src-extension=s";
4376
4377     # Pod::Html parameters:
4378     push @$rgetopt_names, "backlink=s";
4379     push @$rgetopt_names, "cachedir=s";
4380     push @$rgetopt_names, "htmlroot=s";
4381     push @$rgetopt_names, "libpods=s";
4382     push @$rgetopt_names, "podpath=s";
4383     push @$rgetopt_names, "podroot=s";
4384     push @$rgetopt_names, "title=s";
4385
4386     # Pod::Html parameters with leading 'pod' which will be removed
4387     # before the call to Pod::Html
4388     push @$rgetopt_names, "podquiet!";
4389     push @$rgetopt_names, "podverbose!";
4390     push @$rgetopt_names, "podrecurse!";
4391     push @$rgetopt_names, "podflush";
4392     push @$rgetopt_names, "podheader!";
4393     push @$rgetopt_names, "podindex!";
4394 }
4395
4396 sub make_abbreviated_names {
4397
4398     # We're appending things like this to the expansion list:
4399     #      'hcc'    => [qw(html-color-comment)],
4400     #      'hck'    => [qw(html-color-keyword)],
4401     #  etc
4402     my $class = shift;
4403     my ($rexpansion) = @_;
4404
4405     # abbreviations for color/bold/italic properties
4406     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4407         ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
4408         ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
4409         ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
4410         ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4411         ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4412     }
4413
4414     # abbreviations for all other html options
4415     ${$rexpansion}{"hcbg"}  = ["html-color-background"];
4416     ${$rexpansion}{"pre"}   = ["html-pre-only"];
4417     ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
4418     ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
4419     ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
4420     ${$rexpansion}{"hent"}  = ["html-entities"];
4421     ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4422     ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
4423     ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
4424     ${$rexpansion}{"ss"}    = ["stylesheet"];
4425     ${$rexpansion}{"pod"}   = ["pod2html"];
4426     ${$rexpansion}{"npod"}  = ["nopod2html"];
4427     ${$rexpansion}{"frm"}   = ["frames"];
4428     ${$rexpansion}{"nfrm"}  = ["noframes"];
4429     ${$rexpansion}{"text"}  = ["html-toc-extension"];
4430     ${$rexpansion}{"sext"}  = ["html-src-extension"];
4431 }
4432
4433 sub check_options {
4434
4435     # This will be called once after options have been parsed
4436     my $class = shift;
4437     $rOpts = shift;
4438
4439     # X11 color names for default settings that seemed to look ok
4440     # (these color names are only used for programming clarity; the hex
4441     # numbers are actually written)
4442     use constant ForestGreen   => "#228B22";
4443     use constant SaddleBrown   => "#8B4513";
4444     use constant magenta4      => "#8B008B";
4445     use constant IndianRed3    => "#CD5555";
4446     use constant DeepSkyBlue4  => "#00688B";
4447     use constant MediumOrchid3 => "#B452CD";
4448     use constant black         => "#000000";
4449     use constant white         => "#FFFFFF";
4450     use constant red           => "#FF0000";
4451
4452     # set default color, bold, italic properties
4453     # anything not listed here will be given the default (punctuation) color --
4454     # these types currently not listed and get default: ws pu s sc cm co p
4455     # When adding NEW_TOKENS: add an entry here if you don't want defaults
4456
4457     # set_default_properties( $short_name, default_color, bold?, italic? );
4458     set_default_properties( 'c',  ForestGreen,   0, 0 );
4459     set_default_properties( 'pd', ForestGreen,   0, 1 );
4460     set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4461     set_default_properties( 'q',  IndianRed3,    0, 0 );
4462     set_default_properties( 'hh', IndianRed3,    0, 1 );
4463     set_default_properties( 'h',  IndianRed3,    1, 0 );
4464     set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4465     set_default_properties( 'w',  black,         0, 0 );
4466     set_default_properties( 'n',  MediumOrchid3, 0, 0 );
4467     set_default_properties( 'v',  MediumOrchid3, 0, 0 );
4468     set_default_properties( 'j',  IndianRed3,    1, 0 );
4469     set_default_properties( 'm',  red,           1, 0 );
4470
4471     set_default_color( 'html-color-background',  white );
4472     set_default_color( 'html-color-punctuation', black );
4473
4474     # setup property lookup tables for tokens based on their short names
4475     # every token type has a short name, and will use these tables
4476     # to do the html markup
4477     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4478         $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
4479         $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
4480         $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4481     }
4482
4483     # write style sheet to STDOUT and die if requested
4484     if ( defined( $rOpts->{'stylesheet'} ) ) {
4485         write_style_sheet_file('-');
4486         exit 1;
4487     }
4488
4489     # make sure user gives a file name after -css
4490     if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4491         $css_linkname = $rOpts->{'html-linked-style-sheet'};
4492         if ( $css_linkname =~ /^-/ ) {
4493             die "You must specify a valid filename after -css\n";
4494         }
4495     }
4496
4497     # check for conflict
4498     if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4499         $rOpts->{'nohtml-style-sheets'} = 0;
4500         warning("You can't specify both -css and -nss; -nss ignored\n");
4501     }
4502
4503     # write a style sheet file if necessary
4504     if ($css_linkname) {
4505
4506         # if the selected filename exists, don't write, because user may
4507         # have done some work by hand to create it; use backup name instead
4508         # Also, this will avoid a potential disaster in which the user
4509         # forgets to specify the style sheet, like this:
4510         #    perltidy -html -css myfile1.pl myfile2.pl
4511         # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4512         my $css_filename = $css_linkname;
4513         unless ( -e $css_filename ) {
4514             write_style_sheet_file($css_filename);
4515         }
4516     }
4517     $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4518 }
4519
4520 sub write_style_sheet_file {
4521
4522     my $css_filename = shift;
4523     my $fh;
4524     unless ( $fh = IO::File->new("> $css_filename") ) {
4525         die "can't open $css_filename: $!\n";
4526     }
4527     write_style_sheet_data($fh);
4528     eval { $fh->close };
4529 }
4530
4531 sub write_style_sheet_data {
4532
4533     # write the style sheet data to an open file handle
4534     my $fh = shift;
4535
4536     my $bg_color   = $rOpts->{'html-color-background'};
4537     my $text_color = $rOpts->{'html-color-punctuation'};
4538
4539     # pre-bgcolor is new, and may not be defined
4540     my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4541     $pre_bg_color = $bg_color unless $pre_bg_color;
4542
4543     $fh->print(<<"EOM");
4544 /* default style sheet generated by perltidy */
4545 body {background: $bg_color; color: $text_color}
4546 pre { color: $text_color;
4547       background: $pre_bg_color;
4548       font-family: courier;
4549     }
4550
4551 EOM
4552
4553     foreach my $short_name ( sort keys %short_to_long_names ) {
4554         my $long_name = $short_to_long_names{$short_name};
4555
4556         my $abbrev = '.' . $short_name;
4557         if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
4558         my $color = $html_color{$short_name};
4559         if ( !defined($color) ) { $color = $text_color }
4560         $fh->print("$abbrev \{ color: $color;");
4561
4562         if ( $html_bold{$short_name} ) {
4563             $fh->print(" font-weight:bold;");
4564         }
4565
4566         if ( $html_italic{$short_name} ) {
4567             $fh->print(" font-style:italic;");
4568         }
4569         $fh->print("} /* $long_name */\n");
4570     }
4571 }
4572
4573 sub set_default_color {
4574
4575     # make sure that options hash $rOpts->{$key} contains a valid color
4576     my ( $key, $color ) = @_;
4577     if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4578     $rOpts->{$key} = check_RGB($color);
4579 }
4580
4581 sub check_RGB {
4582
4583     # if color is a 6 digit hex RGB value, prepend a #, otherwise
4584     # assume that it is a valid ascii color name
4585     my ($color) = @_;
4586     if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4587     return $color;
4588 }
4589
4590 sub set_default_properties {
4591     my ( $short_name, $color, $bold, $italic ) = @_;
4592
4593     set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4594     my $key;
4595     $key = "html-bold-$short_to_long_names{$short_name}";
4596     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4597     $key = "html-italic-$short_to_long_names{$short_name}";
4598     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4599 }
4600
4601 sub pod_to_html {
4602
4603     # Use Pod::Html to process the pod and make the page
4604     # then merge the perltidy code sections into it.
4605     # return 1 if success, 0 otherwise
4606     my $self = shift;
4607     my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4608     my $input_file   = $self->{_input_file};
4609     my $title        = $self->{_title};
4610     my $success_flag = 0;
4611
4612     # don't try to use pod2html if no pod
4613     unless ($pod_string) {
4614         return $success_flag;
4615     }
4616
4617     # Pod::Html requires a real temporary filename
4618     # If we are making a frame, we have a name available
4619     # Otherwise, we have to fine one
4620     my $tmpfile;
4621     if ( $rOpts->{'frames'} ) {
4622         $tmpfile = $self->{_toc_filename};
4623     }
4624     else {
4625         $tmpfile = Perl::Tidy::make_temporary_filename();
4626     }
4627     my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4628     unless ($fh_tmp) {
4629         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4630         return $success_flag;
4631     }
4632
4633     #------------------------------------------------------------------
4634     # Warning: a temporary file is open; we have to clean up if
4635     # things go bad.  From here on all returns should be by going to
4636     # RETURN so that the temporary file gets unlinked.
4637     #------------------------------------------------------------------
4638
4639     # write the pod text to the temporary file
4640     $fh_tmp->print($pod_string);
4641     $fh_tmp->close();
4642
4643     # Hand off the pod to pod2html.
4644     # Note that we can use the same temporary filename for input and output
4645     # because of the way pod2html works.
4646     {
4647
4648         my @args;
4649         push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4650         my $kw;
4651
4652         # Flags with string args:
4653         # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4654         # "podpath=s", "podroot=s"
4655         # Note: -css=s is handled by perltidy itself
4656         foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4657             if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4658         }
4659
4660         # Toggle switches; these have extra leading 'pod'
4661         # "header!", "index!", "recurse!", "quiet!", "verbose!"
4662         foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4663             my $kwd = $kw;    # allows us to strip 'pod'
4664             if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4665             elsif ( defined( $rOpts->{$kw} ) ) {
4666                 $kwd =~ s/^pod//;
4667                 push @args, "--no$kwd";
4668             }
4669         }
4670
4671         # "flush",
4672         $kw = 'podflush';
4673         if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4674
4675         # Must clean up if pod2html dies (it can);
4676         # Be careful not to overwrite callers __DIE__ routine
4677         local $SIG{__DIE__} = sub {
4678             print $_[0];
4679             unlink $tmpfile if -e $tmpfile;
4680             exit 1;
4681         };
4682
4683         pod2html(@args);
4684     }
4685     $fh_tmp = IO::File->new( $tmpfile, 'r' );
4686     unless ($fh_tmp) {
4687
4688         # this error shouldn't happen ... we just used this filename
4689         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4690         goto RETURN;
4691     }
4692
4693     my $html_fh = $self->{_html_fh};
4694     my @toc;
4695     my $in_toc;
4696     my $no_print;
4697
4698     # This routine will write the html selectively and store the toc
4699     my $html_print = sub {
4700         foreach (@_) {
4701             $html_fh->print($_) unless ($no_print);
4702             if ($in_toc) { push @toc, $_ }
4703         }
4704     };
4705
4706     # loop over lines of html output from pod2html and merge in
4707     # the necessary perltidy html sections
4708     my ( $saw_body, $saw_index, $saw_body_end );
4709     while ( my $line = $fh_tmp->getline() ) {
4710
4711         if ( $line =~ /^\s*<html>\s*$/i ) {
4712             my $date = localtime;
4713             $html_print->("<!-- Generated by perltidy on $date -->\n");
4714             $html_print->($line);
4715         }
4716
4717         # Copy the perltidy css, if any, after <body> tag
4718         elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4719             $saw_body = 1;
4720             $html_print->($css_string) if $css_string;
4721             $html_print->($line);
4722
4723             # add a top anchor and heading
4724             $html_print->("<a name=\"-top-\"></a>\n");
4725             $title = escape_html($title);
4726             $html_print->("<h1>$title</h1>\n");
4727         }
4728         elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4729             $in_toc = 1;
4730
4731             # when frames are used, an extra table of contents in the
4732             # contents panel is confusing, so don't print it
4733             $no_print = $rOpts->{'frames'}
4734               || !$rOpts->{'html-table-of-contents'};
4735             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4736             $html_print->($line);
4737         }
4738
4739         # Copy the perltidy toc, if any, after the Pod::Html toc
4740         elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4741             $saw_index = 1;
4742             $html_print->($line);
4743             if ($toc_string) {
4744                 $html_print->("<hr />\n") if $rOpts->{'frames'};
4745                 $html_print->("<h2>Code Index:</h2>\n");
4746                 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4747                 $html_print->(@toc);
4748             }
4749             $in_toc   = 0;
4750             $no_print = 0;
4751         }
4752
4753         # Copy one perltidy section after each marker
4754         elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4755             $line = $2;
4756             $html_print->($1) if $1;
4757
4758             # Intermingle code and pod sections if we saw multiple =cut's.
4759             if ( $self->{_pod_cut_count} > 1 ) {
4760                 my $rpre_string = shift(@$rpre_string_stack);
4761                 if ($$rpre_string) {
4762                     $html_print->('<pre>');
4763                     $html_print->($$rpre_string);
4764                     $html_print->('</pre>');
4765                 }
4766                 else {
4767
4768                     # shouldn't happen: we stored a string before writing
4769                     # each marker.
4770                     warn
4771 "Problem merging html stream with pod2html; order may be wrong\n";
4772                 }
4773                 $html_print->($line);
4774             }
4775
4776             # If didn't see multiple =cut lines, we'll put the pod out first
4777             # and then the code, because it's less confusing.
4778             else {
4779
4780                 # since we are not intermixing code and pod, we don't need
4781                 # or want any <hr> lines which separated pod and code
4782                 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4783             }
4784         }
4785
4786         # Copy any remaining code section before the </body> tag
4787         elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4788             $saw_body_end = 1;
4789             if (@$rpre_string_stack) {
4790                 unless ( $self->{_pod_cut_count} > 1 ) {
4791                     $html_print->('<hr />');
4792                 }
4793                 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4794                     $html_print->('<pre>');
4795                     $html_print->($$rpre_string);
4796                     $html_print->('</pre>');
4797                 }
4798             }
4799             $html_print->($line);
4800         }
4801         else {
4802             $html_print->($line);
4803         }
4804     }
4805
4806     $success_flag = 1;
4807     unless ($saw_body) {
4808         warn "Did not see <body> in pod2html output\n";
4809         $success_flag = 0;
4810     }
4811     unless ($saw_body_end) {
4812         warn "Did not see </body> in pod2html output\n";
4813         $success_flag = 0;
4814     }
4815     unless ($saw_index) {
4816         warn "Did not find INDEX END in pod2html output\n";
4817         $success_flag = 0;
4818     }
4819
4820   RETURN:
4821     eval { $html_fh->close() };
4822
4823     # note that we have to unlink tmpfile before making frames
4824     # because the tmpfile may be one of the names used for frames
4825     unlink $tmpfile if -e $tmpfile;
4826     if ( $success_flag && $rOpts->{'frames'} ) {
4827         $self->make_frame( \@toc );
4828     }
4829     return $success_flag;
4830 }
4831
4832 sub make_frame {
4833
4834     # Make a frame with table of contents in the left panel
4835     # and the text in the right panel.
4836     # On entry:
4837     #  $html_filename contains the no-frames html output
4838     #  $rtoc is a reference to an array with the table of contents
4839     my $self          = shift;
4840     my ($rtoc)        = @_;
4841     my $input_file    = $self->{_input_file};
4842     my $html_filename = $self->{_html_file};
4843     my $toc_filename  = $self->{_toc_filename};
4844     my $src_filename  = $self->{_src_filename};
4845     my $title         = $self->{_title};
4846     $title = escape_html($title);
4847
4848     # FUTURE input parameter:
4849     my $top_basename = "";
4850
4851     # We need to produce 3 html files:
4852     # 1. - the table of contents
4853     # 2. - the contents (source code) itself
4854     # 3. - the frame which contains them
4855
4856     # get basenames for relative links
4857     my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
4858     my ( $src_basename, $src_path ) = fileparse($src_filename);
4859
4860     # 1. Make the table of contents panel, with appropriate changes
4861     # to the anchor names
4862     my $src_frame_name = 'SRC';
4863     my $first_anchor =
4864       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
4865         $src_frame_name );
4866
4867     # 2. The current .html filename is renamed to be the contents panel
4868     rename( $html_filename, $src_filename )
4869       or die "Cannot rename $html_filename to $src_filename:$!\n";
4870
4871     # 3. Then use the original html filename for the frame
4872     write_frame_html(
4873         $title,        $html_filename, $top_basename,
4874         $toc_basename, $src_basename,  $src_frame_name
4875     );
4876 }
4877
4878 sub write_toc_html {
4879
4880     # write a separate html table of contents file for frames
4881     my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
4882     my $fh = IO::File->new( $toc_filename, 'w' )
4883       or die "Cannot open $toc_filename:$!\n";
4884     $fh->print(<<EOM);
4885 <html>
4886 <head>
4887 <title>$title</title>
4888 </head>
4889 <body>
4890 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
4891 EOM
4892
4893     my $first_anchor =
4894       change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
4895     $fh->print( join "", @$rtoc );
4896
4897     $fh->print(<<EOM);
4898 </body>
4899 </html>
4900 EOM
4901
4902 }
4903
4904 sub write_frame_html {
4905
4906     # write an html file to be the table of contents frame
4907     my (
4908         $title,        $frame_filename, $top_basename,
4909         $toc_basename, $src_basename,   $src_frame_name
4910     ) = @_;
4911
4912     my $fh = IO::File->new( $frame_filename, 'w' )
4913       or die "Cannot open $toc_basename:$!\n";
4914
4915     $fh->print(<<EOM);
4916 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4917     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
4918 <?xml version="1.0" encoding="iso-8859-1" ?>
4919 <html xmlns="http://www.w3.org/1999/xhtml">
4920 <head>
4921 <title>$title</title>
4922 </head>
4923 EOM
4924
4925     # two left panels, one right, if master index file
4926     if ($top_basename) {
4927         $fh->print(<<EOM);
4928 <frameset cols="20%,80%">
4929 <frameset rows="30%,70%">
4930 <frame src = "$top_basename" />
4931 <frame src = "$toc_basename" />
4932 </frameset>
4933 EOM
4934     }
4935
4936     # one left panels, one right, if no master index file
4937     else {
4938         $fh->print(<<EOM);
4939 <frameset cols="20%,*">
4940 <frame src = "$toc_basename" />
4941 EOM
4942     }
4943     $fh->print(<<EOM);
4944 <frame src = "$src_basename" name = "$src_frame_name" />
4945 <noframes>
4946 <body>
4947 <p>If you see this message, you are using a non-frame-capable web client.</p>
4948 <p>This document contains:</p>
4949 <ul>
4950 <li><a href="$toc_basename">A table of contents</a></li>
4951 <li><a href="$src_basename">The source code</a></li>
4952 </ul>
4953 </body>
4954 </noframes>
4955 </frameset>
4956 </html>
4957 EOM
4958 }
4959
4960 sub change_anchor_names {
4961
4962     # add a filename and target to anchors
4963     # also return the first anchor
4964     my ( $rlines, $filename, $target ) = @_;
4965     my $first_anchor;
4966     foreach my $line (@$rlines) {
4967
4968         #  We're looking for lines like this:
4969         #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
4970         #  ----  -       --------  -----------------
4971         #  $1              $4            $5
4972         if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
4973             my $pre  = $1;
4974             my $name = $4;
4975             my $post = $5;
4976             my $href = "$filename#$name";
4977             $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
4978             unless ($first_anchor) { $first_anchor = $href }
4979         }
4980     }
4981     return $first_anchor;
4982 }
4983
4984 sub close_html_file {
4985     my $self = shift;
4986     return unless $self->{_html_file_opened};
4987
4988     my $html_fh     = $self->{_html_fh};
4989     my $rtoc_string = $self->{_rtoc_string};
4990
4991     # There are 3 basic paths to html output...
4992
4993     # ---------------------------------
4994     # Path 1: finish up if in -pre mode
4995     # ---------------------------------
4996     if ( $rOpts->{'html-pre-only'} ) {
4997         $html_fh->print( <<"PRE_END");
4998 </pre>
4999 PRE_END
5000         eval { $html_fh->close() };
5001         return;
5002     }
5003
5004     # Finish the index
5005     $self->add_toc_item( 'EOF', 'EOF' );
5006
5007     my $rpre_string_stack = $self->{_rpre_string_stack};
5008
5009     # Patch to darken the <pre> background color in case of pod2html and
5010     # interleaved code/documentation.  Otherwise, the distinction
5011     # between code and documentation is blurred.
5012     if (   $rOpts->{pod2html}
5013         && $self->{_pod_cut_count} >= 1
5014         && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5015     {
5016         $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5017     }
5018
5019     # put the css or its link into a string, if used
5020     my $css_string;
5021     my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5022
5023     # use css linked to another file
5024     if ( $rOpts->{'html-linked-style-sheet'} ) {
5025         $fh_css->print(
5026             qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5027         );
5028     }
5029
5030     # use css embedded in this file
5031     elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5032         $fh_css->print( <<'ENDCSS');
5033 <style type="text/css">
5034 <!--
5035 ENDCSS
5036         write_style_sheet_data($fh_css);
5037         $fh_css->print( <<"ENDCSS");
5038 -->
5039 </style>
5040 ENDCSS
5041     }
5042
5043     # -----------------------------------------------------------
5044     # path 2: use pod2html if requested
5045     #         If we fail for some reason, continue on to path 3
5046     # -----------------------------------------------------------
5047     if ( $rOpts->{'pod2html'} ) {
5048         my $rpod_string = $self->{_rpod_string};
5049         $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5050             $rpre_string_stack )
5051           && return;
5052     }
5053
5054     # --------------------------------------------------
5055     # path 3: write code in html, with pod only in italics
5056     # --------------------------------------------------
5057     my $input_file = $self->{_input_file};
5058     my $title      = escape_html($input_file);
5059     my $date       = localtime;
5060     $html_fh->print( <<"HTML_START");
5061 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5062    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5063 <!-- Generated by perltidy on $date -->
5064 <html xmlns="http://www.w3.org/1999/xhtml">
5065 <head>
5066 <title>$title</title>
5067 HTML_START
5068
5069     # output the css, if used
5070     if ($css_string) {
5071         $html_fh->print($css_string);
5072         $html_fh->print( <<"ENDCSS");
5073 </head>
5074 <body>
5075 ENDCSS
5076     }
5077     else {
5078
5079         $html_fh->print( <<"HTML_START");
5080 </head>
5081 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5082 HTML_START
5083     }
5084
5085     $html_fh->print("<a name=\"-top-\"></a>\n");
5086     $html_fh->print( <<"EOM");
5087 <h1>$title</h1>
5088 EOM
5089
5090     # copy the table of contents
5091     if (   $$rtoc_string
5092         && !$rOpts->{'frames'}
5093         && $rOpts->{'html-table-of-contents'} )
5094     {
5095         $html_fh->print($$rtoc_string);
5096     }
5097
5098     # copy the pre section(s)
5099     my $fname_comment = $input_file;
5100     $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5101     $html_fh->print( <<"END_PRE");
5102 <hr />
5103 <!-- contents of filename: $fname_comment -->
5104 <pre>
5105 END_PRE
5106
5107     foreach my $rpre_string (@$rpre_string_stack) {
5108         $html_fh->print($$rpre_string);
5109     }
5110
5111     # and finish the html page
5112     $html_fh->print( <<"HTML_END");
5113 </pre>
5114 </body>
5115 </html>
5116 HTML_END
5117     eval { $html_fh->close() };    # could be object without close method
5118
5119     if ( $rOpts->{'frames'} ) {
5120         my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5121         $self->make_frame( \@toc );
5122     }
5123 }
5124
5125 sub markup_tokens {
5126     my $self = shift;
5127     my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5128     my ( @colored_tokens, $j, $string, $type, $token, $level );
5129     my $rlast_level    = $self->{_rlast_level};
5130     my $rpackage_stack = $self->{_rpackage_stack};
5131
5132     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5133         $type  = $$rtoken_type[$j];
5134         $token = $$rtokens[$j];
5135         $level = $$rlevels[$j];
5136         $level = 0 if ( $level < 0 );
5137
5138         #-------------------------------------------------------
5139         # Update the package stack.  The package stack is needed to keep
5140         # the toc correct because some packages may be declared within
5141         # blocks and go out of scope when we leave the block.
5142         #-------------------------------------------------------
5143         if ( $level > $$rlast_level ) {
5144             unless ( $rpackage_stack->[ $level - 1 ] ) {
5145                 $rpackage_stack->[ $level - 1 ] = 'main';
5146             }
5147             $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5148         }
5149         elsif ( $level < $$rlast_level ) {
5150             my $package = $rpackage_stack->[$level];
5151             unless ($package) { $package = 'main' }
5152
5153             # if we change packages due to a nesting change, we
5154             # have to make an entry in the toc
5155             if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5156                 $self->add_toc_item( $package, 'package' );
5157             }
5158         }
5159         $$rlast_level = $level;
5160
5161         #-------------------------------------------------------
5162         # Intercept a sub name here; split it
5163         # into keyword 'sub' and sub name; and add an
5164         # entry in the toc
5165         #-------------------------------------------------------
5166         if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5167             $token = $self->markup_html_element( $1, 'k' );
5168             push @colored_tokens, $token;
5169             $token = $2;
5170             $type  = 'M';
5171
5172             # but don't include sub declarations in the toc;
5173             # these wlll have leading token types 'i;'
5174             my $signature = join "", @$rtoken_type;
5175             unless ( $signature =~ /^i;/ ) {
5176                 my $subname = $token;
5177                 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5178                 $self->add_toc_item( $subname, 'sub' );
5179             }
5180         }
5181
5182         #-------------------------------------------------------
5183         # Intercept a package name here; split it
5184         # into keyword 'package' and name; add to the toc,
5185         # and update the package stack
5186         #-------------------------------------------------------
5187         if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5188             $token = $self->markup_html_element( $1, 'k' );
5189             push @colored_tokens, $token;
5190             $token = $2;
5191             $type  = 'i';
5192             $self->add_toc_item( "$token", 'package' );
5193             $rpackage_stack->[$level] = $token;
5194         }
5195
5196         $token = $self->markup_html_element( $token, $type );
5197         push @colored_tokens, $token;
5198     }
5199     return ( \@colored_tokens );
5200 }
5201
5202 sub markup_html_element {
5203     my $self = shift;
5204     my ( $token, $type ) = @_;
5205
5206     return $token if ( $type eq 'b' );    # skip a blank token
5207     return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5208     $token = escape_html($token);
5209
5210     # get the short abbreviation for this token type
5211     my $short_name = $token_short_names{$type};
5212     if ( !defined($short_name) ) {
5213         $short_name = "pu";                    # punctuation is default
5214     }
5215
5216     # handle style sheets..
5217     if ( !$rOpts->{'nohtml-style-sheets'} ) {
5218         if ( $short_name ne 'pu' ) {
5219             $token = qq(<span class="$short_name">) . $token . "</span>";
5220         }
5221     }
5222
5223     # handle no style sheets..
5224     else {
5225         my $color = $html_color{$short_name};
5226
5227         if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5228             $token = qq(<font color="$color">) . $token . "</font>";
5229         }
5230         if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5231         if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5232     }
5233     return $token;
5234 }
5235
5236 sub escape_html {
5237
5238     my $token = shift;
5239     if ($missing_html_entities) {
5240         $token =~ s/\&/&amp;/g;
5241         $token =~ s/\</&lt;/g;
5242         $token =~ s/\>/&gt;/g;
5243         $token =~ s/\"/&quot;/g;
5244     }
5245     else {
5246         HTML::Entities::encode_entities($token);
5247     }
5248     return $token;
5249 }
5250
5251 sub finish_formatting {
5252
5253     # called after last line
5254     my $self = shift;
5255     $self->close_html_file();
5256     return;
5257 }
5258
5259 sub write_line {
5260
5261     my $self = shift;
5262     return unless $self->{_html_file_opened};
5263     my $html_pre_fh      = $self->{_html_pre_fh};
5264     my ($line_of_tokens) = @_;
5265     my $line_type        = $line_of_tokens->{_line_type};
5266     my $input_line       = $line_of_tokens->{_line_text};
5267     my $line_number      = $line_of_tokens->{_line_number};
5268     chomp $input_line;
5269
5270     # markup line of code..
5271     my $html_line;
5272     if ( $line_type eq 'CODE' ) {
5273         my $rtoken_type = $line_of_tokens->{_rtoken_type};
5274         my $rtokens     = $line_of_tokens->{_rtokens};
5275         my $rlevels     = $line_of_tokens->{_rlevels};
5276
5277         if ( $input_line =~ /(^\s*)/ ) {
5278             $html_line = $1;
5279         }
5280         else {
5281             $html_line = "";
5282         }
5283         my ($rcolored_tokens) =
5284           $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5285         $html_line .= join '', @$rcolored_tokens;
5286     }
5287
5288     # markup line of non-code..
5289     else {
5290         my $line_character;
5291         if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
5292         elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
5293         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
5294         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5295         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
5296         elsif ( $line_type eq 'END_START' ) {
5297             $line_character = 'k';
5298             $self->add_toc_item( '__END__', '__END__' );
5299         }
5300         elsif ( $line_type eq 'DATA_START' ) {
5301             $line_character = 'k';
5302             $self->add_toc_item( '__DATA__', '__DATA__' );
5303         }
5304         elsif ( $line_type =~ /^POD/ ) {
5305             $line_character = 'P';
5306             if ( $rOpts->{'pod2html'} ) {
5307                 my $html_pod_fh = $self->{_html_pod_fh};
5308                 if ( $line_type eq 'POD_START' ) {
5309
5310                     my $rpre_string_stack = $self->{_rpre_string_stack};
5311                     my $rpre_string       = $rpre_string_stack->[-1];
5312
5313                     # if we have written any non-blank lines to the
5314                     # current pre section, start writing to a new output
5315                     # string
5316                     if ( $$rpre_string =~ /\S/ ) {
5317                         my $pre_string;
5318                         $html_pre_fh =
5319                           Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5320                         $self->{_html_pre_fh} = $html_pre_fh;
5321                         push @$rpre_string_stack, \$pre_string;
5322
5323                         # leave a marker in the pod stream so we know
5324                         # where to put the pre section we just
5325                         # finished.
5326                         my $for_html = '=for html';    # don't confuse pod utils
5327                         $html_pod_fh->print(<<EOM);
5328
5329 $for_html
5330 <!-- pERLTIDY sECTION -->
5331
5332 EOM
5333                     }
5334
5335                     # otherwise, just clear the current string and start
5336                     # over
5337                     else {
5338                         $$rpre_string = "";
5339                         $html_pod_fh->print("\n");
5340                     }
5341                 }
5342                 $html_pod_fh->print( $input_line . "\n" );
5343                 if ( $line_type eq 'POD_END' ) {
5344                     $self->{_pod_cut_count}++;
5345                     $html_pod_fh->print("\n");
5346                 }
5347                 return;
5348             }
5349         }
5350         else { $line_character = 'Q' }
5351         $html_line = $self->markup_html_element( $input_line, $line_character );
5352     }
5353
5354     # add the line number if requested
5355     if ( $rOpts->{'html-line-numbers'} ) {
5356         my $extra_space .=
5357             ( $line_number < 10 )   ? "   "
5358           : ( $line_number < 100 )  ? "  "
5359           : ( $line_number < 1000 ) ? " "
5360           :                           "";
5361         $html_line = $extra_space . $line_number . " " . $html_line;
5362     }
5363
5364     # write the line
5365     $html_pre_fh->print("$html_line\n");
5366 }
5367
5368 #####################################################################
5369 #
5370 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5371 # line breaks to the token stream
5372 #
5373 # WARNING: This is not a real class for speed reasons.  Only one
5374 # Formatter may be used.
5375 #
5376 #####################################################################
5377
5378 package Perl::Tidy::Formatter;
5379
5380 BEGIN {
5381
5382     # Caution: these debug flags produce a lot of output
5383     # They should all be 0 except when debugging small scripts
5384     use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
5385     use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
5386     use constant FORMATTER_DEBUG_FLAG_CI      => 0;
5387     use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
5388     use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
5389     use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
5390     use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5391     use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
5392     use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
5393     use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
5394     use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
5395     use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
5396
5397     my $debug_warning = sub {
5398         print "FORMATTER_DEBUGGING with key $_[0]\n";
5399     };
5400
5401     FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
5402     FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
5403     FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
5404     FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
5405     FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
5406     FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
5407     FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5408     FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
5409     FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
5410     FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
5411     FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
5412     FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
5413 }
5414
5415 use Carp;
5416 use vars qw{
5417
5418   @gnu_stack
5419   $max_gnu_stack_index
5420   $gnu_position_predictor
5421   $line_start_index_to_go
5422   $last_indentation_written
5423   $last_unadjusted_indentation
5424   $last_leading_token
5425
5426   $saw_VERSION_in_this_file
5427   $saw_END_or_DATA_
5428
5429   @gnu_item_list
5430   $max_gnu_item_index
5431   $gnu_sequence_number
5432   $last_output_indentation
5433   %last_gnu_equals
5434   %gnu_comma_count
5435   %gnu_arrow_count
5436
5437   @block_type_to_go
5438   @type_sequence_to_go
5439   @container_environment_to_go
5440   @bond_strength_to_go
5441   @forced_breakpoint_to_go
5442   @lengths_to_go
5443   @levels_to_go
5444   @leading_spaces_to_go
5445   @reduced_spaces_to_go
5446   @matching_token_to_go
5447   @mate_index_to_go
5448   @nesting_blocks_to_go
5449   @ci_levels_to_go
5450   @nesting_depth_to_go
5451   @nobreak_to_go
5452   @old_breakpoint_to_go
5453   @tokens_to_go
5454   @types_to_go
5455
5456   %saved_opening_indentation
5457
5458   $max_index_to_go
5459   $comma_count_in_batch
5460   $old_line_count_in_batch
5461   $last_nonblank_index_to_go
5462   $last_nonblank_type_to_go
5463   $last_nonblank_token_to_go
5464   $last_last_nonblank_index_to_go
5465   $last_last_nonblank_type_to_go
5466   $last_last_nonblank_token_to_go
5467   @nonblank_lines_at_depth
5468   $starting_in_quote
5469   $ending_in_quote
5470
5471   $in_format_skipping_section
5472   $format_skipping_pattern_begin
5473   $format_skipping_pattern_end
5474
5475   $forced_breakpoint_count
5476   $forced_breakpoint_undo_count
5477   @forced_breakpoint_undo_stack
5478   %postponed_breakpoint
5479
5480   $tabbing
5481   $embedded_tab_count
5482   $first_embedded_tab_at
5483   $last_embedded_tab_at
5484   $deleted_semicolon_count
5485   $first_deleted_semicolon_at
5486   $last_deleted_semicolon_at
5487   $added_semicolon_count
5488   $first_added_semicolon_at
5489   $last_added_semicolon_at
5490   $first_tabbing_disagreement
5491   $last_tabbing_disagreement
5492   $in_tabbing_disagreement
5493   $tabbing_disagreement_count
5494   $input_line_tabbing
5495
5496   $last_line_type
5497   $last_line_leading_type
5498   $last_line_leading_level
5499   $last_last_line_leading_level
5500
5501   %block_leading_text
5502   %block_opening_line_number
5503   $csc_new_statement_ok
5504   $accumulating_text_for_block
5505   $leading_block_text
5506   $rleading_block_if_elsif_text
5507   $leading_block_text_level
5508   $leading_block_text_length_exceeded
5509   $leading_block_text_line_length
5510   $leading_block_text_line_number
5511   $closing_side_comment_prefix_pattern
5512   $closing_side_comment_list_pattern
5513
5514   $last_nonblank_token
5515   $last_nonblank_type
5516   $last_last_nonblank_token
5517   $last_last_nonblank_type
5518   $last_nonblank_block_type
5519   $last_output_level
5520   %is_do_follower
5521   %is_if_brace_follower
5522   %space_after_keyword
5523   $rbrace_follower
5524   $looking_for_else
5525   %is_last_next_redo_return
5526   %is_other_brace_follower
5527   %is_else_brace_follower
5528   %is_anon_sub_brace_follower
5529   %is_anon_sub_1_brace_follower
5530   %is_sort_map_grep
5531   %is_sort_map_grep_eval
5532   %is_sort_map_grep_eval_do
5533   %is_block_without_semicolon
5534   %is_if_unless
5535   %is_and_or
5536   %is_assignment
5537   %is_chain_operator
5538   %is_if_unless_and_or_last_next_redo_return
5539   %is_until_while_for_if_elsif_else
5540
5541   @has_broken_sublist
5542   @dont_align
5543   @want_comma_break
5544
5545   $is_static_block_comment
5546   $index_start_one_line_block
5547   $semicolons_before_block_self_destruct
5548   $index_max_forced_break
5549   $input_line_number
5550   $diagnostics_object
5551   $vertical_aligner_object
5552   $logger_object
5553   $file_writer_object
5554   $formatter_self
5555   @ci_stack
5556   $last_line_had_side_comment
5557   %want_break_before
5558   %outdent_keyword
5559   $static_block_comment_pattern
5560   $static_side_comment_pattern
5561   %opening_vertical_tightness
5562   %closing_vertical_tightness
5563   %closing_token_indentation
5564
5565   %opening_token_right
5566   %stack_opening_token
5567   %stack_closing_token
5568
5569   $block_brace_vertical_tightness_pattern
5570
5571   $rOpts_add_newlines
5572   $rOpts_add_whitespace
5573   $rOpts_block_brace_tightness
5574   $rOpts_block_brace_vertical_tightness
5575   $rOpts_brace_left_and_indent
5576   $rOpts_comma_arrow_breakpoints
5577   $rOpts_break_at_old_keyword_breakpoints
5578   $rOpts_break_at_old_comma_breakpoints
5579   $rOpts_break_at_old_logical_breakpoints
5580   $rOpts_break_at_old_ternary_breakpoints
5581   $rOpts_closing_side_comment_else_flag
5582   $rOpts_closing_side_comment_maximum_text
5583   $rOpts_continuation_indentation
5584   $rOpts_cuddled_else
5585   $rOpts_delete_old_whitespace
5586   $rOpts_fuzzy_line_length
5587   $rOpts_indent_columns
5588   $rOpts_line_up_parentheses
5589   $rOpts_maximum_fields_per_table
5590   $rOpts_maximum_line_length
5591   $rOpts_short_concatenation_item_length
5592   $rOpts_swallow_optional_blank_lines
5593   $rOpts_ignore_old_breakpoints
5594   $rOpts_format_skipping
5595   $rOpts_space_function_paren
5596   $rOpts_space_keyword_paren
5597   $rOpts_keep_interior_semicolons
5598
5599   $half_maximum_line_length
5600
5601   %is_opening_type
5602   %is_closing_type
5603   %is_keyword_returning_list
5604   %tightness
5605   %matching_token
5606   $rOpts
5607   %right_bond_strength
5608   %left_bond_strength
5609   %binary_ws_rules
5610   %want_left_space
5611   %want_right_space
5612   %is_digraph
5613   %is_trigraph
5614   $bli_pattern
5615   $bli_list_string
5616   %is_closing_type
5617   %is_opening_type
5618   %is_closing_token
5619   %is_opening_token
5620 };
5621
5622 BEGIN {
5623
5624     # default list of block types for which -bli would apply
5625     $bli_list_string = 'if else elsif unless while for foreach do : sub';
5626
5627     @_ = qw(
5628       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5629       <= >= == =~ !~ != ++ -- /= x=
5630     );
5631     @is_digraph{@_} = (1) x scalar(@_);
5632
5633     @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5634     @is_trigraph{@_} = (1) x scalar(@_);
5635
5636     @_ = qw(
5637       = **= += *= &= <<= &&=
5638       -= /= |= >>= ||= //=
5639       .= %= ^=
5640       x=
5641     );
5642     @is_assignment{@_} = (1) x scalar(@_);
5643
5644     @_ = qw(
5645       grep
5646       keys
5647       map
5648       reverse
5649       sort
5650       split
5651     );
5652     @is_keyword_returning_list{@_} = (1) x scalar(@_);
5653
5654     @_ = qw(is if unless and or err last next redo return);
5655     @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5656
5657     # always break after a closing curly of these block types:
5658     @_ = qw(until while for if elsif else);
5659     @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
5660
5661     @_ = qw(last next redo return);
5662     @is_last_next_redo_return{@_} = (1) x scalar(@_);
5663
5664     @_ = qw(sort map grep);
5665     @is_sort_map_grep{@_} = (1) x scalar(@_);
5666
5667     @_ = qw(sort map grep eval);
5668     @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5669
5670     @_ = qw(sort map grep eval do);
5671     @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5672
5673     @_ = qw(if unless);
5674     @is_if_unless{@_} = (1) x scalar(@_);
5675
5676     @_ = qw(and or err);
5677     @is_and_or{@_} = (1) x scalar(@_);
5678
5679     # Identify certain operators which often occur in chains.
5680     # Note: the minus (-) causes a side effect of padding of the first line in
5681     # something like this (by sub set_logical_padding):
5682     #    Checkbutton => 'Transmission checked',
5683     #   -variable    => \$TRANS
5684     # This usually improves appearance so it seems ok.
5685     @_ = qw(&& || and or : ? . + - * /);
5686     @is_chain_operator{@_} = (1) x scalar(@_);
5687
5688     # We can remove semicolons after blocks preceded by these keywords
5689     @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
5690       unless while until for foreach);
5691     @is_block_without_semicolon{@_} = (1) x scalar(@_);
5692
5693     # 'L' is token for opening { at hash key
5694     @_ = qw" L { ( [ ";
5695     @is_opening_type{@_} = (1) x scalar(@_);
5696
5697     # 'R' is token for closing } at hash key
5698     @_ = qw" R } ) ] ";
5699     @is_closing_type{@_} = (1) x scalar(@_);
5700
5701     @_ = qw" { ( [ ";
5702     @is_opening_token{@_} = (1) x scalar(@_);
5703
5704     @_ = qw" } ) ] ";
5705     @is_closing_token{@_} = (1) x scalar(@_);
5706 }
5707
5708 # whitespace codes
5709 use constant WS_YES      => 1;
5710 use constant WS_OPTIONAL => 0;
5711 use constant WS_NO       => -1;
5712
5713 # Token bond strengths.
5714 use constant NO_BREAK    => 10000;
5715 use constant VERY_STRONG => 100;
5716 use constant STRONG      => 2.1;
5717 use constant NOMINAL     => 1.1;
5718 use constant WEAK        => 0.8;
5719 use constant VERY_WEAK   => 0.55;
5720
5721 # values for testing indexes in output array
5722 use constant UNDEFINED_INDEX => -1;
5723
5724 # Maximum number of little messages; probably need not be changed.
5725 use constant MAX_NAG_MESSAGES => 6;
5726
5727 # increment between sequence numbers for each type
5728 # For example, ?: pairs might have numbers 7,11,15,...
5729 use constant TYPE_SEQUENCE_INCREMENT => 4;
5730
5731 {
5732
5733     # methods to count instances
5734     my $_count = 0;
5735     sub get_count        { $_count; }
5736     sub _increment_count { ++$_count }
5737     sub _decrement_count { --$_count }
5738 }
5739
5740 sub trim {
5741
5742     # trim leading and trailing whitespace from a string
5743     $_[0] =~ s/\s+$//;
5744     $_[0] =~ s/^\s+//;
5745     return $_[0];
5746 }
5747
5748 sub split_words {
5749
5750     # given a string containing words separated by whitespace,
5751     # return the list of words
5752     my ($str) = @_;
5753     return unless $str;
5754     $str =~ s/\s+$//;
5755     $str =~ s/^\s+//;
5756     return split( /\s+/, $str );
5757 }
5758
5759 # interface to Perl::Tidy::Logger routines
5760 sub warning {
5761     if ($logger_object) {
5762         $logger_object->warning(@_);
5763     }
5764 }
5765
5766 sub complain {
5767     if ($logger_object) {
5768         $logger_object->complain(@_);
5769     }
5770 }
5771
5772 sub write_logfile_entry {
5773     if ($logger_object) {
5774         $logger_object->write_logfile_entry(@_);
5775     }
5776 }
5777
5778 sub black_box {
5779     if ($logger_object) {
5780         $logger_object->black_box(@_);
5781     }
5782 }
5783
5784 sub report_definite_bug {
5785     if ($logger_object) {
5786         $logger_object->report_definite_bug();
5787     }
5788 }
5789
5790 sub get_saw_brace_error {
5791     if ($logger_object) {
5792         $logger_object->get_saw_brace_error();
5793     }
5794 }
5795
5796 sub we_are_at_the_last_line {
5797     if ($logger_object) {
5798         $logger_object->we_are_at_the_last_line();
5799     }
5800 }
5801
5802 # interface to Perl::Tidy::Diagnostics routine
5803 sub write_diagnostics {
5804
5805     if ($diagnostics_object) {
5806         $diagnostics_object->write_diagnostics(@_);
5807     }
5808 }
5809
5810 sub get_added_semicolon_count {
5811     my $self = shift;
5812     return $added_semicolon_count;
5813 }
5814
5815 sub DESTROY {
5816     $_[0]->_decrement_count();
5817 }
5818
5819 sub new {
5820
5821     my $class = shift;
5822
5823     # we are given an object with a write_line() method to take lines
5824     my %defaults = (
5825         sink_object        => undef,
5826         diagnostics_object => undef,
5827         logger_object      => undef,
5828     );
5829     my %args = ( %defaults, @_ );
5830
5831     $logger_object      = $args{logger_object};
5832     $diagnostics_object = $args{diagnostics_object};
5833
5834     # we create another object with a get_line() and peek_ahead() method
5835     my $sink_object = $args{sink_object};
5836     $file_writer_object =
5837       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
5838
5839     # initialize the leading whitespace stack to negative levels
5840     # so that we can never run off the end of the stack
5841     $gnu_position_predictor = 0;    # where the current token is predicted to be
5842     $max_gnu_stack_index    = 0;
5843     $max_gnu_item_index     = -1;
5844     $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
5845     @gnu_item_list               = ();
5846     $last_output_indentation     = 0;
5847     $last_indentation_written    = 0;
5848     $last_unadjusted_indentation = 0;
5849     $last_leading_token          = "";
5850
5851     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
5852     $saw_END_or_DATA_         = 0;
5853
5854     @block_type_to_go            = ();
5855     @type_sequence_to_go         = ();
5856     @container_environment_to_go = ();
5857     @bond_strength_to_go         = ();
5858     @forced_breakpoint_to_go     = ();
5859     @lengths_to_go               = ();    # line length to start of ith token
5860     @levels_to_go                = ();
5861     @matching_token_to_go        = ();
5862     @mate_index_to_go            = ();
5863     @nesting_blocks_to_go        = ();
5864     @ci_levels_to_go             = ();
5865     @nesting_depth_to_go         = (0);
5866     @nobreak_to_go               = ();
5867     @old_breakpoint_to_go        = ();
5868     @tokens_to_go                = ();
5869     @types_to_go                 = ();
5870     @leading_spaces_to_go        = ();
5871     @reduced_spaces_to_go        = ();
5872
5873     @dont_align         = ();
5874     @has_broken_sublist = ();
5875     @want_comma_break   = ();
5876
5877     @ci_stack                   = ("");
5878     $first_tabbing_disagreement = 0;
5879     $last_tabbing_disagreement  = 0;
5880     $tabbing_disagreement_count = 0;
5881     $in_tabbing_disagreement    = 0;
5882     $input_line_tabbing         = undef;
5883
5884     $last_line_type               = "";
5885     $last_last_line_leading_level = 0;
5886     $last_line_leading_level      = 0;
5887     $last_line_leading_type       = '#';
5888
5889     $last_nonblank_token        = ';';
5890     $last_nonblank_type         = ';';
5891     $last_last_nonblank_token   = ';';
5892     $last_last_nonblank_type    = ';';
5893     $last_nonblank_block_type   = "";
5894     $last_output_level          = 0;
5895     $looking_for_else           = 0;
5896     $embedded_tab_count         = 0;
5897     $first_embedded_tab_at      = 0;
5898     $last_embedded_tab_at       = 0;
5899     $deleted_semicolon_count    = 0;
5900     $first_deleted_semicolon_at = 0;
5901     $last_deleted_semicolon_at  = 0;
5902     $added_semicolon_count      = 0;
5903     $first_added_semicolon_at   = 0;
5904     $last_added_semicolon_at    = 0;
5905     $last_line_had_side_comment = 0;
5906     $is_static_block_comment    = 0;
5907     %postponed_breakpoint       = ();
5908
5909     # variables for adding side comments
5910     %block_leading_text        = ();
5911     %block_opening_line_number = ();
5912     $csc_new_statement_ok      = 1;
5913
5914     %saved_opening_indentation  = ();
5915     $in_format_skipping_section = 0;
5916
5917     reset_block_text_accumulator();
5918
5919     prepare_for_new_input_lines();
5920
5921     $vertical_aligner_object =
5922       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
5923         $logger_object, $diagnostics_object );
5924
5925     if ( $rOpts->{'entab-leading-whitespace'} ) {
5926         write_logfile_entry(
5927 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
5928         );
5929     }
5930     elsif ( $rOpts->{'tabs'} ) {
5931         write_logfile_entry("Indentation will be with a tab character\n");
5932     }
5933     else {
5934         write_logfile_entry(
5935             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
5936     }
5937
5938     # This was the start of a formatter referent, but object-oriented
5939     # coding has turned out to be too slow here.
5940     $formatter_self = {};
5941
5942     bless $formatter_self, $class;
5943
5944     # Safety check..this is not a class yet
5945     if ( _increment_count() > 1 ) {
5946         confess
5947 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
5948     }
5949     return $formatter_self;
5950 }
5951
5952 sub prepare_for_new_input_lines {
5953
5954     $gnu_sequence_number++;    # increment output batch counter
5955     %last_gnu_equals                = ();
5956     %gnu_comma_count                = ();
5957     %gnu_arrow_count                = ();
5958     $line_start_index_to_go         = 0;
5959     $max_gnu_item_index             = UNDEFINED_INDEX;
5960     $index_max_forced_break         = UNDEFINED_INDEX;
5961     $max_index_to_go                = UNDEFINED_INDEX;
5962     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
5963     $last_nonblank_type_to_go       = '';
5964     $last_nonblank_token_to_go      = '';
5965     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
5966     $last_last_nonblank_type_to_go  = '';
5967     $last_last_nonblank_token_to_go = '';
5968     $forced_breakpoint_count        = 0;
5969     $forced_breakpoint_undo_count   = 0;
5970     $rbrace_follower                = undef;
5971     $lengths_to_go[0]               = 0;
5972     $old_line_count_in_batch        = 1;
5973     $comma_count_in_batch           = 0;
5974     $starting_in_quote              = 0;
5975
5976     destroy_one_line_block();
5977 }
5978
5979 sub write_line {
5980
5981     my $self = shift;
5982     my ($line_of_tokens) = @_;
5983
5984     my $line_type  = $line_of_tokens->{_line_type};
5985     my $input_line = $line_of_tokens->{_line_text};
5986
5987     # _line_type codes are:
5988     #   SYSTEM         - system-specific code before hash-bang line
5989     #   CODE           - line of perl code (including comments)
5990     #   POD_START      - line starting pod, such as '=head'
5991     #   POD            - pod documentation text
5992     #   POD_END        - last line of pod section, '=cut'
5993     #   HERE           - text of here-document
5994     #   HERE_END       - last line of here-doc (target word)
5995     #   FORMAT         - format section
5996     #   FORMAT_END     - last line of format section, '.'
5997     #   DATA_START     - __DATA__ line
5998     #   DATA           - unidentified text following __DATA__
5999     #   END_START      - __END__ line
6000     #   END            - unidentified text following __END__
6001     #   ERROR          - we are in big trouble, probably not a perl script
6002
6003     # put a blank line after an =cut which comes before __END__ and __DATA__
6004     # (required by podchecker)
6005     if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6006         $file_writer_object->reset_consecutive_blank_lines();
6007         if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6008     }
6009
6010     # handle line of code..
6011     if ( $line_type eq 'CODE' ) {
6012
6013         # let logger see all non-blank lines of code
6014         if ( $input_line !~ /^\s*$/ ) {
6015             my $output_line_number =
6016               $vertical_aligner_object->get_output_line_number();
6017             black_box( $line_of_tokens, $output_line_number );
6018         }
6019         print_line_of_tokens($line_of_tokens);
6020     }
6021
6022     # handle line of non-code..
6023     else {
6024
6025         # set special flags
6026         my $skip_line = 0;
6027         my $tee_line  = 0;
6028         if ( $line_type =~ /^POD/ ) {
6029
6030             # Pod docs should have a preceding blank line.  But be
6031             # very careful in __END__ and __DATA__ sections, because:
6032             #   1. the user may be using this section for any purpose whatsoever
6033             #   2. the blank counters are not active there
6034             # It should be safe to request a blank line between an
6035             # __END__ or __DATA__ and an immediately following '=head'
6036             # type line, (types END_START and DATA_START), but not for
6037             # any other lines of type END or DATA.
6038             if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6039             if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
6040             if (   !$skip_line
6041                 && $line_type eq 'POD_START'
6042                 && $last_line_type !~ /^(END|DATA)$/ )
6043             {
6044                 want_blank_line();
6045             }
6046         }
6047
6048         # leave the blank counters in a predictable state
6049         # after __END__ or __DATA__
6050         elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6051             $file_writer_object->reset_consecutive_blank_lines();
6052             $saw_END_or_DATA_ = 1;
6053         }
6054
6055         # write unindented non-code line
6056         if ( !$skip_line ) {
6057             if ($tee_line) { $file_writer_object->tee_on() }
6058             write_unindented_line($input_line);
6059             if ($tee_line) { $file_writer_object->tee_off() }
6060         }
6061     }
6062     $last_line_type = $line_type;
6063 }
6064
6065 sub create_one_line_block {
6066     $index_start_one_line_block            = $_[0];
6067     $semicolons_before_block_self_destruct = $_[1];
6068 }
6069
6070 sub destroy_one_line_block {
6071     $index_start_one_line_block            = UNDEFINED_INDEX;
6072     $semicolons_before_block_self_destruct = 0;
6073 }
6074
6075 sub leading_spaces_to_go {
6076
6077     # return the number of indentation spaces for a token in the output stream;
6078     # these were previously stored by 'set_leading_whitespace'.
6079
6080     return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6081
6082 }
6083
6084 sub get_SPACES {
6085
6086     # return the number of leading spaces associated with an indentation
6087     # variable $indentation is either a constant number of spaces or an object
6088     # with a get_SPACES method.
6089     my $indentation = shift;
6090     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6091 }
6092
6093 sub get_RECOVERABLE_SPACES {
6094
6095     # return the number of spaces (+ means shift right, - means shift left)
6096     # that we would like to shift a group of lines with the same indentation
6097     # to get them to line up with their opening parens
6098     my $indentation = shift;
6099     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6100 }
6101
6102 sub get_AVAILABLE_SPACES_to_go {
6103
6104     my $item = $leading_spaces_to_go[ $_[0] ];
6105
6106     # return the number of available leading spaces associated with an
6107     # indentation variable.  $indentation is either a constant number of
6108     # spaces or an object with a get_AVAILABLE_SPACES method.
6109     return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6110 }
6111
6112 sub new_lp_indentation_item {
6113
6114     # this is an interface to the IndentationItem class
6115     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6116
6117     # A negative level implies not to store the item in the item_list
6118     my $index = 0;
6119     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6120
6121     my $item = Perl::Tidy::IndentationItem->new(
6122         $spaces,      $level,
6123         $ci_level,    $available_spaces,
6124         $index,       $gnu_sequence_number,
6125         $align_paren, $max_gnu_stack_index,
6126         $line_start_index_to_go,
6127     );
6128
6129     if ( $level >= 0 ) {
6130         $gnu_item_list[$max_gnu_item_index] = $item;
6131     }
6132
6133     return $item;
6134 }
6135
6136 sub set_leading_whitespace {
6137
6138     # This routine defines leading whitespace
6139     # given: the level and continuation_level of a token,
6140     # define: space count of leading string which would apply if it
6141     # were the first token of a new line.
6142
6143     my ( $level, $ci_level, $in_continued_quote ) = @_;
6144
6145     # modify for -bli, which adds one continuation indentation for
6146     # opening braces
6147     if (   $rOpts_brace_left_and_indent
6148         && $max_index_to_go == 0
6149         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6150     {
6151         $ci_level++;
6152     }
6153
6154     # patch to avoid trouble when input file has negative indentation.
6155     # other logic should catch this error.
6156     if ( $level < 0 ) { $level = 0 }
6157
6158     #-------------------------------------------
6159     # handle the standard indentation scheme
6160     #-------------------------------------------
6161     unless ($rOpts_line_up_parentheses) {
6162         my $space_count =
6163           $ci_level * $rOpts_continuation_indentation +
6164           $level * $rOpts_indent_columns;
6165         my $ci_spaces =
6166           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6167
6168         if ($in_continued_quote) {
6169             $space_count = 0;
6170             $ci_spaces   = 0;
6171         }
6172         $leading_spaces_to_go[$max_index_to_go] = $space_count;
6173         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6174         return;
6175     }
6176
6177     #-------------------------------------------------------------
6178     # handle case of -lp indentation..
6179     #-------------------------------------------------------------
6180
6181     # The continued_quote flag means that this is the first token of a
6182     # line, and it is the continuation of some kind of multi-line quote
6183     # or pattern.  It requires special treatment because it must have no
6184     # added leading whitespace. So we create a special indentation item
6185     # which is not in the stack.
6186     if ($in_continued_quote) {
6187         my $space_count     = 0;
6188         my $available_space = 0;
6189         $level = -1;    # flag to prevent storing in item_list
6190         $leading_spaces_to_go[$max_index_to_go] =
6191           $reduced_spaces_to_go[$max_index_to_go] =
6192           new_lp_indentation_item( $space_count, $level, $ci_level,
6193             $available_space, 0 );
6194         return;
6195     }
6196
6197     # get the top state from the stack
6198     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6199     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6200     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6201
6202     my $type        = $types_to_go[$max_index_to_go];
6203     my $token       = $tokens_to_go[$max_index_to_go];
6204     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6205
6206     if ( $type eq '{' || $type eq '(' ) {
6207
6208         $gnu_comma_count{ $total_depth + 1 } = 0;
6209         $gnu_arrow_count{ $total_depth + 1 } = 0;
6210
6211         # If we come to an opening token after an '=' token of some type,
6212         # see if it would be helpful to 'break' after the '=' to save space
6213         my $last_equals = $last_gnu_equals{$total_depth};
6214         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6215
6216             # find the position if we break at the '='
6217             my $i_test = $last_equals;
6218             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6219
6220             # TESTING
6221             ##my $too_close = ($i_test==$max_index_to_go-1);
6222
6223             my $test_position = total_line_length( $i_test, $max_index_to_go );
6224
6225             if (
6226
6227                 # the equals is not just before an open paren (testing)
6228                 ##!$too_close &&
6229
6230                 # if we are beyond the midpoint
6231                 $gnu_position_predictor > $half_maximum_line_length
6232
6233                 # or we are beyont the 1/4 point and there was an old
6234                 # break at the equals
6235                 || (
6236                     $gnu_position_predictor > $half_maximum_line_length / 2
6237                     && (
6238                         $old_breakpoint_to_go[$last_equals]
6239                         || (   $last_equals > 0
6240                             && $old_breakpoint_to_go[ $last_equals - 1 ] )
6241                         || (   $last_equals > 1
6242                             && $types_to_go[ $last_equals - 1 ] eq 'b'
6243                             && $old_breakpoint_to_go[ $last_equals - 2 ] )
6244                     )
6245                 )
6246               )
6247             {
6248
6249                 # then make the switch -- note that we do not set a real
6250                 # breakpoint here because we may not really need one; sub
6251                 # scan_list will do that if necessary
6252                 $line_start_index_to_go = $i_test + 1;
6253                 $gnu_position_predictor = $test_position;
6254             }
6255         }
6256     }
6257
6258     # Check for decreasing depth ..
6259     # Note that one token may have both decreasing and then increasing
6260     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
6261     # in this example we would first go back to (1,0) then up to (2,0)
6262     # in a single call.
6263     if ( $level < $current_level || $ci_level < $current_ci_level ) {
6264
6265         # loop to find the first entry at or completely below this level
6266         my ( $lev, $ci_lev );
6267         while (1) {
6268             if ($max_gnu_stack_index) {
6269
6270                 # save index of token which closes this level
6271                 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6272
6273                 # Undo any extra indentation if we saw no commas
6274                 my $available_spaces =
6275                   $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6276
6277                 my $comma_count = 0;
6278                 my $arrow_count = 0;
6279                 if ( $type eq '}' || $type eq ')' ) {
6280                     $comma_count = $gnu_comma_count{$total_depth};
6281                     $arrow_count = $gnu_arrow_count{$total_depth};
6282                     $comma_count = 0 unless $comma_count;
6283                     $arrow_count = 0 unless $arrow_count;
6284                 }
6285                 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6286                 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6287
6288                 if ( $available_spaces > 0 ) {
6289
6290                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
6291
6292                         my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6293                         my $seqno =
6294                           $gnu_stack[$max_gnu_stack_index]
6295                           ->get_SEQUENCE_NUMBER();
6296
6297                         # Be sure this item was created in this batch.  This
6298                         # should be true because we delete any available
6299                         # space from open items at the end of each batch.
6300                         if (   $gnu_sequence_number != $seqno
6301                             || $i > $max_gnu_item_index )
6302                         {
6303                             warning(
6304 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6305                             );
6306                             report_definite_bug();
6307                         }
6308
6309                         else {
6310                             if ( $arrow_count == 0 ) {
6311                                 $gnu_item_list[$i]
6312                                   ->permanently_decrease_AVAILABLE_SPACES(
6313                                     $available_spaces);
6314                             }
6315                             else {
6316                                 $gnu_item_list[$i]
6317                                   ->tentatively_decrease_AVAILABLE_SPACES(
6318                                     $available_spaces);
6319                             }
6320
6321                             my $j;
6322                             for (
6323                                 $j = $i + 1 ;
6324                                 $j <= $max_gnu_item_index ;
6325                                 $j++
6326                               )
6327                             {
6328                                 $gnu_item_list[$j]
6329                                   ->decrease_SPACES($available_spaces);
6330                             }
6331                         }
6332                     }
6333                 }
6334
6335                 # go down one level
6336                 --$max_gnu_stack_index;
6337                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6338                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6339
6340                 # stop when we reach a level at or below the current level
6341                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6342                     $space_count =
6343                       $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6344                     $current_level    = $lev;
6345                     $current_ci_level = $ci_lev;
6346                     last;
6347                 }
6348             }
6349
6350             # reached bottom of stack .. should never happen because
6351             # only negative levels can get here, and $level was forced
6352             # to be positive above.
6353             else {
6354                 warning(
6355 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6356                 );
6357                 report_definite_bug();
6358                 last;
6359             }
6360         }
6361     }
6362
6363     # handle increasing depth
6364     if ( $level > $current_level || $ci_level > $current_ci_level ) {
6365
6366         # Compute the standard incremental whitespace.  This will be
6367         # the minimum incremental whitespace that will be used.  This
6368         # choice results in a smooth transition between the gnu-style
6369         # and the standard style.
6370         my $standard_increment =
6371           ( $level - $current_level ) * $rOpts_indent_columns +
6372           ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6373
6374         # Now we have to define how much extra incremental space
6375         # ("$available_space") we want.  This extra space will be
6376         # reduced as necessary when long lines are encountered or when
6377         # it becomes clear that we do not have a good list.
6378         my $available_space = 0;
6379         my $align_paren     = 0;
6380         my $excess          = 0;
6381
6382         # initialization on empty stack..
6383         if ( $max_gnu_stack_index == 0 ) {
6384             $space_count = $level * $rOpts_indent_columns;
6385         }
6386
6387         # if this is a BLOCK, add the standard increment
6388         elsif ($last_nonblank_block_type) {
6389             $space_count += $standard_increment;
6390         }
6391
6392         # if last nonblank token was not structural indentation,
6393         # just use standard increment
6394         elsif ( $last_nonblank_type ne '{' ) {
6395             $space_count += $standard_increment;
6396         }
6397
6398         # otherwise use the space to the first non-blank level change token
6399         else {
6400
6401             $space_count = $gnu_position_predictor;
6402
6403             my $min_gnu_indentation =
6404               $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6405
6406             $available_space = $space_count - $min_gnu_indentation;
6407             if ( $available_space >= $standard_increment ) {
6408                 $min_gnu_indentation += $standard_increment;
6409             }
6410             elsif ( $available_space > 1 ) {
6411                 $min_gnu_indentation += $available_space + 1;
6412             }
6413             elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6414                 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6415                     $min_gnu_indentation += 2;
6416                 }
6417                 else {
6418                     $min_gnu_indentation += 1;
6419                 }
6420             }
6421             else {
6422                 $min_gnu_indentation += $standard_increment;
6423             }
6424             $available_space = $space_count - $min_gnu_indentation;
6425
6426             if ( $available_space < 0 ) {
6427                 $space_count     = $min_gnu_indentation;
6428                 $available_space = 0;
6429             }
6430             $align_paren = 1;
6431         }
6432
6433         # update state, but not on a blank token
6434         if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6435
6436             $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6437
6438             ++$max_gnu_stack_index;
6439             $gnu_stack[$max_gnu_stack_index] =
6440               new_lp_indentation_item( $space_count, $level, $ci_level,
6441                 $available_space, $align_paren );
6442
6443             # If the opening paren is beyond the half-line length, then
6444             # we will use the minimum (standard) indentation.  This will
6445             # help avoid problems associated with running out of space
6446             # near the end of a line.  As a result, in deeply nested
6447             # lists, there will be some indentations which are limited
6448             # to this minimum standard indentation. But the most deeply
6449             # nested container will still probably be able to shift its
6450             # parameters to the right for proper alignment, so in most
6451             # cases this will not be noticable.
6452             if (   $available_space > 0
6453                 && $space_count > $half_maximum_line_length )
6454             {
6455                 $gnu_stack[$max_gnu_stack_index]
6456                   ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6457             }
6458         }
6459     }
6460
6461     # Count commas and look for non-list characters.  Once we see a
6462     # non-list character, we give up and don't look for any more commas.
6463     if ( $type eq '=>' ) {
6464         $gnu_arrow_count{$total_depth}++;
6465
6466         # tentatively treating '=>' like '=' for estimating breaks
6467         # TODO: this could use some experimentation
6468         $last_gnu_equals{$total_depth} = $max_index_to_go;
6469     }
6470
6471     elsif ( $type eq ',' ) {
6472         $gnu_comma_count{$total_depth}++;
6473     }
6474
6475     elsif ( $is_assignment{$type} ) {
6476         $last_gnu_equals{$total_depth} = $max_index_to_go;
6477     }
6478
6479     # this token might start a new line
6480     # if this is a non-blank..
6481     if ( $type ne 'b' ) {
6482
6483         # and if ..
6484         if (
6485
6486             # this is the first nonblank token of the line
6487             $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6488
6489             # or previous character was one of these:
6490             || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6491
6492             # or previous character was opening and this does not close it
6493             || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6494             || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6495
6496             # or this token is one of these:
6497             || $type =~ /^([\.]|\|\||\&\&)$/
6498
6499             # or this is a closing structure
6500             || (   $last_nonblank_type_to_go eq '}'
6501                 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6502
6503             # or previous token was keyword 'return'
6504             || ( $last_nonblank_type_to_go eq 'k'
6505                 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6506
6507             # or starting a new line at certain keywords is fine
6508             || (   $type eq 'k'
6509                 && $is_if_unless_and_or_last_next_redo_return{$token} )
6510
6511             # or this is after an assignment after a closing structure
6512             || (
6513                 $is_assignment{$last_nonblank_type_to_go}
6514                 && (
6515                     $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6516
6517                     # and it is significantly to the right
6518                     || $gnu_position_predictor > $half_maximum_line_length
6519                 )
6520             )
6521           )
6522         {
6523             check_for_long_gnu_style_lines();
6524             $line_start_index_to_go = $max_index_to_go;
6525
6526             # back up 1 token if we want to break before that type
6527             # otherwise, we may strand tokens like '?' or ':' on a line
6528             if ( $line_start_index_to_go > 0 ) {
6529                 if ( $last_nonblank_type_to_go eq 'k' ) {
6530
6531                     if ( $want_break_before{$last_nonblank_token_to_go} ) {
6532                         $line_start_index_to_go--;
6533                     }
6534                 }
6535                 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6536                     $line_start_index_to_go--;
6537                 }
6538             }
6539         }
6540     }
6541
6542     # remember the predicted position of this token on the output line
6543     if ( $max_index_to_go > $line_start_index_to_go ) {
6544         $gnu_position_predictor =
6545           total_line_length( $line_start_index_to_go, $max_index_to_go );
6546     }
6547     else {
6548         $gnu_position_predictor = $space_count +
6549           token_sequence_length( $max_index_to_go, $max_index_to_go );
6550     }
6551
6552     # store the indentation object for this token
6553     # this allows us to manipulate the leading whitespace
6554     # (in case we have to reduce indentation to fit a line) without
6555     # having to change any token values
6556     $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6557     $reduced_spaces_to_go[$max_index_to_go] =
6558       ( $max_gnu_stack_index > 0 && $ci_level )
6559       ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6560       : $gnu_stack[$max_gnu_stack_index];
6561     return;
6562 }
6563
6564 sub check_for_long_gnu_style_lines {
6565
6566     # look at the current estimated maximum line length, and
6567     # remove some whitespace if it exceeds the desired maximum
6568
6569     # this is only for the '-lp' style
6570     return unless ($rOpts_line_up_parentheses);
6571
6572     # nothing can be done if no stack items defined for this line
6573     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6574
6575     # see if we have exceeded the maximum desired line length
6576     # keep 2 extra free because they are needed in some cases
6577     # (result of trial-and-error testing)
6578     my $spaces_needed =
6579       $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6580
6581     return if ( $spaces_needed < 0 );
6582
6583     # We are over the limit, so try to remove a requested number of
6584     # spaces from leading whitespace.  We are only allowed to remove
6585     # from whitespace items created on this batch, since others have
6586     # already been used and cannot be undone.
6587     my @candidates = ();
6588     my $i;
6589
6590     # loop over all whitespace items created for the current batch
6591     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6592         my $item = $gnu_item_list[$i];
6593
6594         # item must still be open to be a candidate (otherwise it
6595         # cannot influence the current token)
6596         next if ( $item->get_CLOSED() >= 0 );
6597
6598         my $available_spaces = $item->get_AVAILABLE_SPACES();
6599
6600         if ( $available_spaces > 0 ) {
6601             push( @candidates, [ $i, $available_spaces ] );
6602         }
6603     }
6604
6605     return unless (@candidates);
6606
6607     # sort by available whitespace so that we can remove whitespace
6608     # from the maximum available first
6609     @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6610
6611     # keep removing whitespace until we are done or have no more
6612     my $candidate;
6613     foreach $candidate (@candidates) {
6614         my ( $i, $available_spaces ) = @{$candidate};
6615         my $deleted_spaces =
6616           ( $available_spaces > $spaces_needed )
6617           ? $spaces_needed
6618           : $available_spaces;
6619
6620         # remove the incremental space from this item
6621         $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6622
6623         my $i_debug = $i;
6624
6625         # update the leading whitespace of this item and all items
6626         # that came after it
6627         for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6628
6629             my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6630             if ( $old_spaces > $deleted_spaces ) {
6631                 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6632             }
6633
6634             # shouldn't happen except for code bug:
6635             else {
6636                 my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
6637                 my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6638                 my $old_level    = $gnu_item_list[$i]->get_LEVEL();
6639                 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6640                 warning(
6641 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
6642                 );
6643                 report_definite_bug();
6644             }
6645         }
6646         $gnu_position_predictor -= $deleted_spaces;
6647         $spaces_needed          -= $deleted_spaces;
6648         last unless ( $spaces_needed > 0 );
6649     }
6650 }
6651
6652 sub finish_lp_batch {
6653
6654     # This routine is called once after each each output stream batch is
6655     # finished to undo indentation for all incomplete -lp
6656     # indentation levels.  It is too risky to leave a level open,
6657     # because then we can't backtrack in case of a long line to follow.
6658     # This means that comments and blank lines will disrupt this
6659     # indentation style.  But the vertical aligner may be able to
6660     # get the space back if there are side comments.
6661
6662     # this is only for the 'lp' style
6663     return unless ($rOpts_line_up_parentheses);
6664
6665     # nothing can be done if no stack items defined for this line
6666     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6667
6668     # loop over all whitespace items created for the current batch
6669     my $i;
6670     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6671         my $item = $gnu_item_list[$i];
6672
6673         # only look for open items
6674         next if ( $item->get_CLOSED() >= 0 );
6675
6676         # Tentatively remove all of the available space
6677         # (The vertical aligner will try to get it back later)
6678         my $available_spaces = $item->get_AVAILABLE_SPACES();
6679         if ( $available_spaces > 0 ) {
6680
6681             # delete incremental space for this item
6682             $gnu_item_list[$i]
6683               ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6684
6685             # Reduce the total indentation space of any nodes that follow
6686             # Note that any such nodes must necessarily be dependents
6687             # of this node.
6688             foreach ( $i + 1 .. $max_gnu_item_index ) {
6689                 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6690             }
6691         }
6692     }
6693     return;
6694 }
6695
6696 sub reduce_lp_indentation {
6697
6698     # reduce the leading whitespace at token $i if possible by $spaces_needed
6699     # (a large value of $spaces_needed will remove all excess space)
6700     # NOTE: to be called from scan_list only for a sequence of tokens
6701     # contained between opening and closing parens/braces/brackets
6702
6703     my ( $i, $spaces_wanted ) = @_;
6704     my $deleted_spaces = 0;
6705
6706     my $item             = $leading_spaces_to_go[$i];
6707     my $available_spaces = $item->get_AVAILABLE_SPACES();
6708
6709     if (
6710         $available_spaces > 0
6711         && ( ( $spaces_wanted <= $available_spaces )
6712             || !$item->get_HAVE_CHILD() )
6713       )
6714     {
6715
6716         # we'll remove these spaces, but mark them as recoverable
6717         $deleted_spaces =
6718           $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6719     }
6720
6721     return $deleted_spaces;
6722 }
6723
6724 sub token_sequence_length {
6725
6726     # return length of tokens ($ifirst .. $ilast) including first & last
6727     # returns 0 if $ifirst > $ilast
6728     my $ifirst = shift;
6729     my $ilast  = shift;
6730     return 0 if ( $ilast < 0 || $ifirst > $ilast );
6731     return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6732     return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6733 }
6734
6735 sub total_line_length {
6736
6737     # return length of a line of tokens ($ifirst .. $ilast)
6738     my $ifirst = shift;
6739     my $ilast  = shift;
6740     if ( $ifirst < 0 ) { $ifirst = 0 }
6741
6742     return leading_spaces_to_go($ifirst) +
6743       token_sequence_length( $ifirst, $ilast );
6744 }
6745
6746 sub excess_line_length {
6747
6748     # return number of characters by which a line of tokens ($ifirst..$ilast)
6749     # exceeds the allowable line length.
6750     my $ifirst = shift;
6751     my $ilast  = shift;
6752     if ( $ifirst < 0 ) { $ifirst = 0 }
6753     return leading_spaces_to_go($ifirst) +
6754       token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6755 }
6756
6757 sub finish_formatting {
6758
6759     # flush buffer and write any informative messages
6760     my $self = shift;
6761
6762     flush();
6763     $file_writer_object->decrement_output_line_number()
6764       ;    # fix up line number since it was incremented
6765     we_are_at_the_last_line();
6766     if ( $added_semicolon_count > 0 ) {
6767         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6768         my $what =
6769           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6770         write_logfile_entry("$added_semicolon_count $what added:\n");
6771         write_logfile_entry(
6772             "  $first at input line $first_added_semicolon_at\n");
6773
6774         if ( $added_semicolon_count > 1 ) {
6775             write_logfile_entry(
6776                 "   Last at input line $last_added_semicolon_at\n");
6777         }
6778         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
6779         write_logfile_entry("\n");
6780     }
6781
6782     if ( $deleted_semicolon_count > 0 ) {
6783         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6784         my $what =
6785           ( $deleted_semicolon_count > 1 )
6786           ? "semicolons were"
6787           : "semicolon was";
6788         write_logfile_entry(
6789             "$deleted_semicolon_count unnecessary $what deleted:\n");
6790         write_logfile_entry(
6791             "  $first at input line $first_deleted_semicolon_at\n");
6792
6793         if ( $deleted_semicolon_count > 1 ) {
6794             write_logfile_entry(
6795                 "   Last at input line $last_deleted_semicolon_at\n");
6796         }
6797         write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
6798         write_logfile_entry("\n");
6799     }
6800
6801     if ( $embedded_tab_count > 0 ) {
6802         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6803         my $what =
6804           ( $embedded_tab_count > 1 )
6805           ? "quotes or patterns"
6806           : "quote or pattern";
6807         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6808         write_logfile_entry(
6809 "This means the display of this script could vary with device or software\n"
6810         );
6811         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
6812
6813         if ( $embedded_tab_count > 1 ) {
6814             write_logfile_entry(
6815                 "   Last at input line $last_embedded_tab_at\n");
6816         }
6817         write_logfile_entry("\n");
6818     }
6819
6820     if ($first_tabbing_disagreement) {
6821         write_logfile_entry(
6822 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6823         );
6824     }
6825
6826     if ($in_tabbing_disagreement) {
6827         write_logfile_entry(
6828 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
6829         );
6830     }
6831     else {
6832
6833         if ($last_tabbing_disagreement) {
6834
6835             write_logfile_entry(
6836 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
6837             );
6838         }
6839         else {
6840             write_logfile_entry("No indentation disagreement seen\n");
6841         }
6842     }
6843     write_logfile_entry("\n");
6844
6845     $vertical_aligner_object->report_anything_unusual();
6846
6847     $file_writer_object->report_line_length_errors();
6848 }
6849
6850 sub check_options {
6851
6852     # This routine is called to check the Opts hash after it is defined
6853
6854     ($rOpts) = @_;
6855     my ( $tabbing_string, $tab_msg );
6856
6857     make_static_block_comment_pattern();
6858     make_static_side_comment_pattern();
6859     make_closing_side_comment_prefix();
6860     make_closing_side_comment_list_pattern();
6861     $format_skipping_pattern_begin =
6862       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
6863     $format_skipping_pattern_end =
6864       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
6865
6866     # If closing side comments ARE selected, then we can safely
6867     # delete old closing side comments unless closing side comment
6868     # warnings are requested.  This is a good idea because it will
6869     # eliminate any old csc's which fall below the line count threshold.
6870     # We cannot do this if warnings are turned on, though, because we
6871     # might delete some text which has been added.  So that must
6872     # be handled when comments are created.
6873     if ( $rOpts->{'closing-side-comments'} ) {
6874         if ( !$rOpts->{'closing-side-comment-warnings'} ) {
6875             $rOpts->{'delete-closing-side-comments'} = 1;
6876         }
6877     }
6878
6879     # If closing side comments ARE NOT selected, but warnings ARE
6880     # selected and we ARE DELETING csc's, then we will pretend to be
6881     # adding with a huge interval.  This will force the comments to be
6882     # generated for comparison with the old comments, but not added.
6883     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
6884         if ( $rOpts->{'delete-closing-side-comments'} ) {
6885             $rOpts->{'delete-closing-side-comments'}  = 0;
6886             $rOpts->{'closing-side-comments'}         = 1;
6887             $rOpts->{'closing-side-comment-interval'} = 100000000;
6888         }
6889     }
6890
6891     make_bli_pattern();
6892     make_block_brace_vertical_tightness_pattern();
6893
6894     if ( $rOpts->{'line-up-parentheses'} ) {
6895
6896         if (   $rOpts->{'indent-only'}
6897             || !$rOpts->{'add-newlines'}
6898             || !$rOpts->{'delete-old-newlines'} )
6899         {
6900             warn <<EOM;
6901 -----------------------------------------------------------------------
6902 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
6903
6904 The -lp indentation logic requires that perltidy be able to coordinate
6905 arbitrarily large numbers of line breakpoints.  This isn't possible
6906 with these flags. Sometimes an acceptable workaround is to use -wocb=3
6907 -----------------------------------------------------------------------
6908 EOM
6909             $rOpts->{'line-up-parentheses'} = 0;
6910         }
6911     }
6912
6913     # At present, tabs are not compatable with the line-up-parentheses style
6914     # (it would be possible to entab the total leading whitespace
6915     # just prior to writing the line, if desired).
6916     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
6917         warn <<EOM;
6918 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
6919 EOM
6920         $rOpts->{'tabs'} = 0;
6921     }
6922
6923     # Likewise, tabs are not compatable with outdenting..
6924     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
6925         warn <<EOM;
6926 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
6927 EOM
6928         $rOpts->{'tabs'} = 0;
6929     }
6930
6931     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
6932         warn <<EOM;
6933 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
6934 EOM
6935         $rOpts->{'tabs'} = 0;
6936     }
6937
6938     if ( !$rOpts->{'space-for-semicolon'} ) {
6939         $want_left_space{'f'} = -1;
6940     }
6941
6942     if ( $rOpts->{'space-terminal-semicolon'} ) {
6943         $want_left_space{';'} = 1;
6944     }
6945
6946     # implement outdenting preferences for keywords
6947     %outdent_keyword = ();
6948     unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
6949         @_ = qw(next last redo goto return);    # defaults
6950     }
6951
6952     # FUTURE: if not a keyword, assume that it is an identifier
6953     foreach (@_) {
6954         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
6955             $outdent_keyword{$_} = 1;
6956         }
6957         else {
6958             warn "ignoring '$_' in -okwl list; not a perl keyword";
6959         }
6960     }
6961
6962     # implement user whitespace preferences
6963     if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
6964         @want_left_space{@_} = (1) x scalar(@_);
6965     }
6966
6967     if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
6968         @want_right_space{@_} = (1) x scalar(@_);
6969     }
6970
6971     if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
6972         @want_left_space{@_} = (-1) x scalar(@_);
6973     }
6974
6975     if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
6976         @want_right_space{@_} = (-1) x scalar(@_);
6977     }
6978     if ( $rOpts->{'dump-want-left-space'} ) {
6979         dump_want_left_space(*STDOUT);
6980         exit 1;
6981     }
6982
6983     if ( $rOpts->{'dump-want-right-space'} ) {
6984         dump_want_right_space(*STDOUT);
6985         exit 1;
6986     }
6987
6988     # default keywords for which space is introduced before an opening paren
6989     # (at present, including them messes up vertical alignment)
6990     @_ = qw(my local our and or err eq ne if else elsif until
6991       unless while for foreach return switch case given when);
6992     @space_after_keyword{@_} = (1) x scalar(@_);
6993
6994     # allow user to modify these defaults
6995     if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
6996         @space_after_keyword{@_} = (1) x scalar(@_);
6997     }
6998
6999     if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7000         @space_after_keyword{@_} = (0) x scalar(@_);
7001     }
7002
7003     # implement user break preferences
7004     my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7005       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=);
7006
7007     my $break_after = sub {
7008         foreach my $tok (@_) {
7009             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
7010             my $lbs = $left_bond_strength{$tok};
7011             my $rbs = $right_bond_strength{$tok};
7012             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7013                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7014                   ( $lbs, $rbs );
7015             }
7016         }
7017     };
7018
7019     my $break_before = sub {
7020         foreach my $tok (@_) {
7021             my $lbs = $left_bond_strength{$tok};
7022             my $rbs = $right_bond_strength{$tok};
7023             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7024                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7025                   ( $lbs, $rbs );
7026             }
7027         }
7028     };
7029
7030     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7031     $break_before->(@all_operators)
7032       if ( $rOpts->{'break-before-all-operators'} );
7033
7034     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7035     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7036
7037     # make note if breaks are before certain key types
7038     %want_break_before = ();
7039     foreach my $tok (
7040         '=',  '.',   ',',   ':', '?', '&&', '||', 'and',
7041         'or', 'err', 'xor', '+', '-', '*',  '/',
7042       )
7043     {
7044         $want_break_before{$tok} =
7045           $left_bond_strength{$tok} < $right_bond_strength{$tok};
7046     }
7047
7048     # Coordinate ?/: breaks, which must be similar
7049     if ( !$want_break_before{':'} ) {
7050         $want_break_before{'?'}   = $want_break_before{':'};
7051         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7052         $left_bond_strength{'?'}  = NO_BREAK;
7053     }
7054
7055     # Define here tokens which may follow the closing brace of a do statement
7056     # on the same line, as in:
7057     #   } while ( $something);
7058     @_ = qw(until while unless if ; : );
7059     push @_, ',';
7060     @is_do_follower{@_} = (1) x scalar(@_);
7061
7062     # These tokens may follow the closing brace of an if or elsif block.
7063     # In other words, for cuddled else we want code to look like:
7064     #   } elsif ( $something) {
7065     #   } else {
7066     if ( $rOpts->{'cuddled-else'} ) {
7067         @_ = qw(else elsif);
7068         @is_if_brace_follower{@_} = (1) x scalar(@_);
7069     }
7070     else {
7071         %is_if_brace_follower = ();
7072     }
7073
7074     # nothing can follow the closing curly of an else { } block:
7075     %is_else_brace_follower = ();
7076
7077     # what can follow a multi-line anonymous sub definition closing curly:
7078     @_ = qw# ; : => or and  && || ~~ !~~ ) #;
7079     push @_, ',';
7080     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7081
7082     # what can follow a one-line anonynomous sub closing curly:
7083     # one-line anonumous subs also have ']' here...
7084     # see tk3.t and PP.pm
7085     @_ = qw#  ; : => or and  && || ) ] ~~ !~~ #;
7086     push @_, ',';
7087     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7088
7089     # What can follow a closing curly of a block
7090     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7091     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7092     @_ = qw#  ; : => or and  && || ) #;
7093     push @_, ',';
7094
7095     # allow cuddled continue if cuddled else is specified
7096     if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7097
7098     @is_other_brace_follower{@_} = (1) x scalar(@_);
7099
7100     $right_bond_strength{'{'} = WEAK;
7101     $left_bond_strength{'{'}  = VERY_STRONG;
7102
7103     # make -l=0  equal to -l=infinite
7104     if ( !$rOpts->{'maximum-line-length'} ) {
7105         $rOpts->{'maximum-line-length'} = 1000000;
7106     }
7107
7108     # make -lbl=0  equal to -lbl=infinite
7109     if ( !$rOpts->{'long-block-line-count'} ) {
7110         $rOpts->{'long-block-line-count'} = 1000000;
7111     }
7112
7113     my $ole = $rOpts->{'output-line-ending'};
7114     if ($ole) {
7115         my %endings = (
7116             dos  => "\015\012",
7117             win  => "\015\012",
7118             mac  => "\015",
7119             unix => "\012",
7120         );
7121         $ole = lc $ole;
7122         unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7123             my $str = join " ", keys %endings;
7124             die <<EOM;
7125 Unrecognized line ending '$ole'; expecting one of: $str
7126 EOM
7127         }
7128         if ( $rOpts->{'preserve-line-endings'} ) {
7129             warn "Ignoring -ple; conflicts with -ole\n";
7130             $rOpts->{'preserve-line-endings'} = undef;
7131         }
7132     }
7133
7134     # hashes used to simplify setting whitespace
7135     %tightness = (
7136         '{' => $rOpts->{'brace-tightness'},
7137         '}' => $rOpts->{'brace-tightness'},
7138         '(' => $rOpts->{'paren-tightness'},
7139         ')' => $rOpts->{'paren-tightness'},
7140         '[' => $rOpts->{'square-bracket-tightness'},
7141         ']' => $rOpts->{'square-bracket-tightness'},
7142     );
7143     %matching_token = (
7144         '{' => '}',
7145         '(' => ')',
7146         '[' => ']',
7147         '?' => ':',
7148     );
7149
7150     # frequently used parameters
7151     $rOpts_add_newlines          = $rOpts->{'add-newlines'};
7152     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
7153     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7154     $rOpts_block_brace_vertical_tightness =
7155       $rOpts->{'block-brace-vertical-tightness'};
7156     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
7157     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7158     $rOpts_break_at_old_ternary_breakpoints =
7159       $rOpts->{'break-at-old-ternary-breakpoints'};
7160     $rOpts_break_at_old_comma_breakpoints =
7161       $rOpts->{'break-at-old-comma-breakpoints'};
7162     $rOpts_break_at_old_keyword_breakpoints =
7163       $rOpts->{'break-at-old-keyword-breakpoints'};
7164     $rOpts_break_at_old_logical_breakpoints =
7165       $rOpts->{'break-at-old-logical-breakpoints'};
7166     $rOpts_closing_side_comment_else_flag =
7167       $rOpts->{'closing-side-comment-else-flag'};
7168     $rOpts_closing_side_comment_maximum_text =
7169       $rOpts->{'closing-side-comment-maximum-text'};
7170     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7171     $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
7172     $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
7173     $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
7174     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
7175     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
7176     $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7177     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
7178     $rOpts_short_concatenation_item_length =
7179       $rOpts->{'short-concatenation-item-length'};
7180     $rOpts_swallow_optional_blank_lines =
7181       $rOpts->{'swallow-optional-blank-lines'};
7182     $rOpts_ignore_old_breakpoints   = $rOpts->{'ignore-old-breakpoints'};
7183     $rOpts_format_skipping          = $rOpts->{'format-skipping'};
7184     $rOpts_space_function_paren     = $rOpts->{'space-function-paren'};
7185     $rOpts_space_keyword_paren      = $rOpts->{'space-keyword-paren'};
7186     $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7187     $half_maximum_line_length       = $rOpts_maximum_line_length / 2;
7188
7189     # Note that both opening and closing tokens can access the opening
7190     # and closing flags of their container types.
7191     %opening_vertical_tightness = (
7192         '(' => $rOpts->{'paren-vertical-tightness'},
7193         '{' => $rOpts->{'brace-vertical-tightness'},
7194         '[' => $rOpts->{'square-bracket-vertical-tightness'},
7195         ')' => $rOpts->{'paren-vertical-tightness'},
7196         '}' => $rOpts->{'brace-vertical-tightness'},
7197         ']' => $rOpts->{'square-bracket-vertical-tightness'},
7198     );
7199
7200     %closing_vertical_tightness = (
7201         '(' => $rOpts->{'paren-vertical-tightness-closing'},
7202         '{' => $rOpts->{'brace-vertical-tightness-closing'},
7203         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7204         ')' => $rOpts->{'paren-vertical-tightness-closing'},
7205         '}' => $rOpts->{'brace-vertical-tightness-closing'},
7206         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7207     );
7208
7209     # assume flag for '>' same as ')' for closing qw quotes
7210     %closing_token_indentation = (
7211         ')' => $rOpts->{'closing-paren-indentation'},
7212         '}' => $rOpts->{'closing-brace-indentation'},
7213         ']' => $rOpts->{'closing-square-bracket-indentation'},
7214         '>' => $rOpts->{'closing-paren-indentation'},
7215     );
7216
7217     %opening_token_right = (
7218         '(' => $rOpts->{'opening-paren-right'},
7219         '{' => $rOpts->{'opening-hash-brace-right'},
7220         '[' => $rOpts->{'opening-square-bracket-right'},
7221     );
7222
7223     %stack_opening_token = (
7224         '(' => $rOpts->{'stack-opening-paren'},
7225         '{' => $rOpts->{'stack-opening-hash-brace'},
7226         '[' => $rOpts->{'stack-opening-square-bracket'},
7227     );
7228
7229     %stack_closing_token = (
7230         ')' => $rOpts->{'stack-closing-paren'},
7231         '}' => $rOpts->{'stack-closing-hash-brace'},
7232         ']' => $rOpts->{'stack-closing-square-bracket'},
7233     );
7234 }
7235
7236 sub make_static_block_comment_pattern {
7237
7238     # create the pattern used to identify static block comments
7239     $static_block_comment_pattern = '^\s*##';
7240
7241     # allow the user to change it
7242     if ( $rOpts->{'static-block-comment-prefix'} ) {
7243         my $prefix = $rOpts->{'static-block-comment-prefix'};
7244         $prefix =~ s/^\s*//;
7245         my $pattern = $prefix;
7246
7247         # user may give leading caret to force matching left comments only
7248         if ( $prefix !~ /^\^#/ ) {
7249             if ( $prefix !~ /^#/ ) {
7250                 die
7251 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7252             }
7253             $pattern = '^\s*' . $prefix;
7254         }
7255         eval "'##'=~/$pattern/";
7256         if ($@) {
7257             die
7258 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7259         }
7260         $static_block_comment_pattern = $pattern;
7261     }
7262 }
7263
7264 sub make_format_skipping_pattern {
7265     my ( $opt_name, $default ) = @_;
7266     my $param = $rOpts->{$opt_name};
7267     unless ($param) { $param = $default }
7268     $param =~ s/^\s*//;
7269     if ( $param !~ /^#/ ) {
7270         die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7271     }
7272     my $pattern = '^' . $param . '\s';
7273     eval "'#'=~/$pattern/";
7274     if ($@) {
7275         die
7276 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7277     }
7278     return $pattern;
7279 }
7280
7281 sub make_closing_side_comment_list_pattern {
7282
7283     # turn any input list into a regex for recognizing selected block types
7284     $closing_side_comment_list_pattern = '^\w+';
7285     if ( defined( $rOpts->{'closing-side-comment-list'} )
7286         && $rOpts->{'closing-side-comment-list'} )
7287     {
7288         $closing_side_comment_list_pattern =
7289           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7290     }
7291 }
7292
7293 sub make_bli_pattern {
7294
7295     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7296         && $rOpts->{'brace-left-and-indent-list'} )
7297     {
7298         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7299     }
7300
7301     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7302 }
7303
7304 sub make_block_brace_vertical_tightness_pattern {
7305
7306     # turn any input list into a regex for recognizing selected block types
7307     $block_brace_vertical_tightness_pattern =
7308       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7309
7310     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7311         && $rOpts->{'block-brace-vertical-tightness-list'} )
7312     {
7313         $block_brace_vertical_tightness_pattern =
7314           make_block_pattern( '-bbvtl',
7315             $rOpts->{'block-brace-vertical-tightness-list'} );
7316     }
7317 }
7318
7319 sub make_block_pattern {
7320
7321     #  given a string of block-type keywords, return a regex to match them
7322     #  The only tricky part is that labels are indicated with a single ':'
7323     #  and the 'sub' token text may have additional text after it (name of
7324     #  sub).
7325     #
7326     #  Example:
7327     #
7328     #   input string: "if else elsif unless while for foreach do : sub";
7329     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7330
7331     my ( $abbrev, $string ) = @_;
7332     my @list  = split_words($string);
7333     my @words = ();
7334     my %seen;
7335     for my $i (@list) {
7336         next if $seen{$i};
7337         $seen{$i} = 1;
7338         if ( $i eq 'sub' ) {
7339         }
7340         elsif ( $i eq ':' ) {
7341             push @words, '\w+:';
7342         }
7343         elsif ( $i =~ /^\w/ ) {
7344             push @words, $i;
7345         }
7346         else {
7347             warn "unrecognized block type $i after $abbrev, ignoring\n";
7348         }
7349     }
7350     my $pattern = '(' . join( '|', @words ) . ')$';
7351     if ( $seen{'sub'} ) {
7352         $pattern = '(' . $pattern . '|sub)';
7353     }
7354     $pattern = '^' . $pattern;
7355     return $pattern;
7356 }
7357
7358 sub make_static_side_comment_pattern {
7359
7360     # create the pattern used to identify static side comments
7361     $static_side_comment_pattern = '^##';
7362
7363     # allow the user to change it
7364     if ( $rOpts->{'static-side-comment-prefix'} ) {
7365         my $prefix = $rOpts->{'static-side-comment-prefix'};
7366         $prefix =~ s/^\s*//;
7367         my $pattern = '^' . $prefix;
7368         eval "'##'=~/$pattern/";
7369         if ($@) {
7370             die
7371 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7372         }
7373         $static_side_comment_pattern = $pattern;
7374     }
7375 }
7376
7377 sub make_closing_side_comment_prefix {
7378
7379     # Be sure we have a valid closing side comment prefix
7380     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7381     my $csc_prefix_pattern;
7382     if ( !defined($csc_prefix) ) {
7383         $csc_prefix         = '## end';
7384         $csc_prefix_pattern = '^##\s+end';
7385     }
7386     else {
7387         my $test_csc_prefix = $csc_prefix;
7388         if ( $test_csc_prefix !~ /^#/ ) {
7389             $test_csc_prefix = '#' . $test_csc_prefix;
7390         }
7391
7392         # make a regex to recognize the prefix
7393         my $test_csc_prefix_pattern = $test_csc_prefix;
7394
7395         # escape any special characters
7396         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7397
7398         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7399
7400         # allow exact number of intermediate spaces to vary
7401         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7402
7403         # make sure we have a good pattern
7404         # if we fail this we probably have an error in escaping
7405         # characters.
7406         eval "'##'=~/$test_csc_prefix_pattern/";
7407         if ($@) {
7408
7409             # shouldn't happen..must have screwed up escaping, above
7410             report_definite_bug();
7411             warn
7412 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7413
7414             # just warn and keep going with defaults
7415             warn "Please consider using a simpler -cscp prefix\n";
7416             warn "Using default -cscp instead; please check output\n";
7417         }
7418         else {
7419             $csc_prefix         = $test_csc_prefix;
7420             $csc_prefix_pattern = $test_csc_prefix_pattern;
7421         }
7422     }
7423     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7424     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7425 }
7426
7427 sub dump_want_left_space {
7428     my $fh = shift;
7429     local $" = "\n";
7430     print $fh <<EOM;
7431 These values are the main control of whitespace to the left of a token type;
7432 They may be altered with the -wls parameter.
7433 For a list of token types, use perltidy --dump-token-types (-dtt)
7434  1 means the token wants a space to its left
7435 -1 means the token does not want a space to its left
7436 ------------------------------------------------------------------------
7437 EOM
7438     foreach ( sort keys %want_left_space ) {
7439         print $fh "$_\t$want_left_space{$_}\n";
7440     }
7441 }
7442
7443 sub dump_want_right_space {
7444     my $fh = shift;
7445     local $" = "\n";
7446     print $fh <<EOM;
7447 These values are the main control of whitespace to the right of a token type;
7448 They may be altered with the -wrs parameter.
7449 For a list of token types, use perltidy --dump-token-types (-dtt)
7450  1 means the token wants a space to its right
7451 -1 means the token does not want a space to its right
7452 ------------------------------------------------------------------------
7453 EOM
7454     foreach ( sort keys %want_right_space ) {
7455         print $fh "$_\t$want_right_space{$_}\n";
7456     }
7457 }
7458
7459 {    # begin is_essential_whitespace
7460
7461     my %is_sort_grep_map;
7462     my %is_for_foreach;
7463
7464     BEGIN {
7465
7466         @_ = qw(sort grep map);
7467         @is_sort_grep_map{@_} = (1) x scalar(@_);
7468
7469         @_ = qw(for foreach);
7470         @is_for_foreach{@_} = (1) x scalar(@_);
7471
7472     }
7473
7474     sub is_essential_whitespace {
7475
7476         # Essential whitespace means whitespace which cannot be safely deleted
7477         # without risking the introduction of a syntax error.
7478         # We are given three tokens and their types:
7479         # ($tokenl, $typel) is the token to the left of the space in question
7480         # ($tokenr, $typer) is the token to the right of the space in question
7481         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7482         #
7483         # This is a slow routine but is not needed too often except when -mangle
7484         # is used.
7485         #
7486         # Note: This routine should almost never need to be changed.  It is
7487         # for avoiding syntax problems rather than for formatting.
7488         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7489
7490         my $result =
7491
7492           # never combine two bare words or numbers
7493           # examples:  and ::ok(1)
7494           #            return ::spw(...)
7495           #            for bla::bla:: abc
7496           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7497           #            $input eq"quit" to make $inputeq"quit"
7498           #            my $size=-s::SINK if $file;  <==OK but we won't do it
7499           # don't join something like: for bla::bla:: abc
7500           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7501           ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7502
7503           # do not combine a number with a concatination dot
7504           # example: pom.caputo:
7505           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7506           || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7507           || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7508
7509           # do not join a minus with a bare word, because you might form
7510           # a file test operator.  Example from Complex.pm:
7511           # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7512           || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7513
7514           # and something like this could become ambiguous without space
7515           # after the '-':
7516           #   use constant III=>1;
7517           #   $a = $b - III;
7518           # and even this:
7519           #   $a = - III;
7520           || ( ( $tokenl eq '-' )
7521             && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7522
7523           # '= -' should not become =- or you will get a warning
7524           # about reversed -=
7525           # || ($tokenr eq '-')
7526
7527           # keep a space between a quote and a bareword to prevent the
7528           # bareword from becomming a quote modifier.
7529           || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7530
7531           # keep a space between a token ending in '$' and any word;
7532           # this caused trouble:  "die @$ if $@"
7533           || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7534             && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7535
7536           # perl is very fussy about spaces before <<
7537           || ( $tokenr =~ /^\<\</ )
7538
7539           # avoid combining tokens to create new meanings. Example:
7540           #     $a+ +$b must not become $a++$b
7541           || ( $is_digraph{ $tokenl . $tokenr } )
7542           || ( $is_trigraph{ $tokenl . $tokenr } )
7543
7544           # another example: do not combine these two &'s:
7545           #     allow_options & &OPT_EXECCGI
7546           || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7547
7548           # don't combine $$ or $# with any alphanumeric
7549           # (testfile mangle.t with --mangle)
7550           || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7551
7552           # retain any space after possible filehandle
7553           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7554           || ( $typel eq 'Z' )
7555
7556           # Perl is sensitive to whitespace after the + here:
7557           #  $b = xvals $a + 0.1 * yvals $a;
7558           || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
7559
7560           # keep paren separate in 'use Foo::Bar ()'
7561           || ( $tokenr eq '('
7562             && $typel   eq 'w'
7563             && $typell  eq 'k'
7564             && $tokenll eq 'use' )
7565
7566           # keep any space between filehandle and paren:
7567           # file mangle.t with --mangle:
7568           || ( $typel eq 'Y' && $tokenr eq '(' )
7569
7570           # retain any space after here doc operator ( hereerr.t)
7571           || ( $typel eq 'h' )
7572
7573           # FIXME: this needs some further work; extrude.t has test cases
7574           # it is safest to retain any space after start of ? : operator
7575           # because of perl's quirky parser.
7576           # ie, this line will fail if you remove the space after the '?':
7577           #    $b=join $comma ? ',' : ':', @_;   # ok
7578           #    $b=join $comma ?',' : ':', @_;   # error!
7579           # but this is ok :)
7580           #    $b=join $comma?',' : ':', @_;   # not a problem!
7581           ## || ($typel eq '?')
7582
7583           # be careful with a space around ++ and --, to avoid ambiguity as to
7584           # which token it applies
7585           || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7586           || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7587
7588           # need space after foreach my; for example, this will fail in
7589           # older versions of Perl:
7590           # foreach my$ft(@filetypes)...
7591           || (
7592             $tokenl eq 'my'
7593
7594             #  /^(for|foreach)$/
7595             && $is_for_foreach{$tokenll}
7596             && $tokenr =~ /^\$/
7597           )
7598
7599           # must have space between grep and left paren; "grep(" will fail
7600           || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7601
7602           # don't stick numbers next to left parens, as in:
7603           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7604           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7605
7606           # do not remove space between ? and a quote or perl
7607           # may guess that the ? begins a pattern [Loca.pm, lockarea]
7608           || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
7609
7610           # do not remove space between an '&' and a bare word because
7611           # it may turn into a function evaluation, like here
7612           # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
7613           #    $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
7614           || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7615
7616           ;    # the value of this long logic sequence is the result we want
7617         return $result;
7618     }
7619 }
7620
7621 sub set_white_space_flag {
7622
7623     #    This routine examines each pair of nonblank tokens and
7624     #    sets values for array @white_space_flag.
7625     #
7626     #    $white_space_flag[$j] is a flag indicating whether a white space
7627     #    BEFORE token $j is needed, with the following values:
7628     #
7629     #            -1 do not want a space before token $j
7630     #             0 optional space or $j is a whitespace
7631     #             1 want a space before token $j
7632     #
7633     #
7634     #   The values for the first token will be defined based
7635     #   upon the contents of the "to_go" output array.
7636     #
7637     #   Note: retain debug print statements because they are usually
7638     #   required after adding new token types.
7639
7640     BEGIN {
7641
7642         # initialize these global hashes, which control the use of
7643         # whitespace around tokens:
7644         #
7645         # %binary_ws_rules
7646         # %want_left_space
7647         # %want_right_space
7648         # %space_after_keyword
7649         #
7650         # Many token types are identical to the tokens themselves.
7651         # See the tokenizer for a complete list. Here are some special types:
7652         #   k = perl keyword
7653         #   f = semicolon in for statement
7654         #   m = unary minus
7655         #   p = unary plus
7656         # Note that :: is excluded since it should be contained in an identifier
7657         # Note that '->' is excluded because it never gets space
7658         # parentheses and brackets are excluded since they are handled specially
7659         # curly braces are included but may be overridden by logic, such as
7660         # newline logic.
7661
7662         # NEW_TOKENS: create a whitespace rule here.  This can be as
7663         # simple as adding your new letter to @spaces_both_sides, for
7664         # example.
7665
7666         @_ = qw" L { ( [ ";
7667         @is_opening_type{@_} = (1) x scalar(@_);
7668
7669         @_ = qw" R } ) ] ";
7670         @is_closing_type{@_} = (1) x scalar(@_);
7671
7672         my @spaces_both_sides = qw"
7673           + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7674           .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
7675           &&= ||= //= <=> A k f w F n C Y U G v
7676           ";
7677
7678         my @spaces_left_side = qw"
7679           t ! ~ m p { \ h pp mm Z j
7680           ";
7681         push( @spaces_left_side, '#' );    # avoids warning message
7682
7683         my @spaces_right_side = qw"
7684           ; } ) ] R J ++ -- **=
7685           ";
7686         push( @spaces_right_side, ',' );    # avoids warning message
7687         @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7688         @want_right_space{@spaces_both_sides} =
7689           (1) x scalar(@spaces_both_sides);
7690         @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
7691         @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7692         @want_left_space{@spaces_right_side} =
7693           (-1) x scalar(@spaces_right_side);
7694         @want_right_space{@spaces_right_side} =
7695           (1) x scalar(@spaces_right_side);
7696         $want_left_space{'L'}   = WS_NO;
7697         $want_left_space{'->'}  = WS_NO;
7698         $want_right_space{'->'} = WS_NO;
7699         $want_left_space{'**'}  = WS_NO;
7700         $want_right_space{'**'} = WS_NO;
7701
7702         # hash type information must stay tightly bound
7703         # as in :  ${xxxx}
7704         $binary_ws_rules{'i'}{'L'} = WS_NO;
7705         $binary_ws_rules{'i'}{'{'} = WS_YES;
7706         $binary_ws_rules{'k'}{'{'} = WS_YES;
7707         $binary_ws_rules{'U'}{'{'} = WS_YES;
7708         $binary_ws_rules{'i'}{'['} = WS_NO;
7709         $binary_ws_rules{'R'}{'L'} = WS_NO;
7710         $binary_ws_rules{'R'}{'{'} = WS_NO;
7711         $binary_ws_rules{'t'}{'L'} = WS_NO;
7712         $binary_ws_rules{'t'}{'{'} = WS_NO;
7713         $binary_ws_rules{'}'}{'L'} = WS_NO;
7714         $binary_ws_rules{'}'}{'{'} = WS_NO;
7715         $binary_ws_rules{'$'}{'L'} = WS_NO;
7716         $binary_ws_rules{'$'}{'{'} = WS_NO;
7717         $binary_ws_rules{'@'}{'L'} = WS_NO;
7718         $binary_ws_rules{'@'}{'{'} = WS_NO;
7719         $binary_ws_rules{'='}{'L'} = WS_YES;
7720
7721         # the following includes ') {'
7722         # as in :    if ( xxx ) { yyy }
7723         $binary_ws_rules{']'}{'L'} = WS_NO;
7724         $binary_ws_rules{']'}{'{'} = WS_NO;
7725         $binary_ws_rules{')'}{'{'} = WS_YES;
7726         $binary_ws_rules{')'}{'['} = WS_NO;
7727         $binary_ws_rules{']'}{'['} = WS_NO;
7728         $binary_ws_rules{']'}{'{'} = WS_NO;
7729         $binary_ws_rules{'}'}{'['} = WS_NO;
7730         $binary_ws_rules{'R'}{'['} = WS_NO;
7731
7732         $binary_ws_rules{']'}{'++'} = WS_NO;
7733         $binary_ws_rules{']'}{'--'} = WS_NO;
7734         $binary_ws_rules{')'}{'++'} = WS_NO;
7735         $binary_ws_rules{')'}{'--'} = WS_NO;
7736
7737         $binary_ws_rules{'R'}{'++'} = WS_NO;
7738         $binary_ws_rules{'R'}{'--'} = WS_NO;
7739
7740         ########################################################
7741         # should no longer be necessary (see niek.pl)
7742         ##$binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
7743         ##$binary_ws_rules{'w'}{':'} = WS_NO;
7744         ########################################################
7745         $binary_ws_rules{'i'}{'Q'} = WS_YES;
7746         $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
7747
7748         # FIXME: we need to split 'i' into variables and functions
7749         # and have no space for functions but space for variables.  For now,
7750         # I have a special patch in the special rules below
7751         $binary_ws_rules{'i'}{'('} = WS_NO;
7752
7753         $binary_ws_rules{'w'}{'('} = WS_NO;
7754         $binary_ws_rules{'w'}{'{'} = WS_YES;
7755     }
7756     my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7757     my ( $last_token, $last_type, $last_block_type, $token, $type,
7758         $block_type );
7759     my (@white_space_flag);
7760     my $j_tight_closing_paren = -1;
7761
7762     if ( $max_index_to_go >= 0 ) {
7763         $token      = $tokens_to_go[$max_index_to_go];
7764         $type       = $types_to_go[$max_index_to_go];
7765         $block_type = $block_type_to_go[$max_index_to_go];
7766     }
7767     else {
7768         $token      = ' ';
7769         $type       = 'b';
7770         $block_type = '';
7771     }
7772
7773     # loop over all tokens
7774     my ( $j, $ws );
7775
7776     for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7777
7778         if ( $$rtoken_type[$j] eq 'b' ) {
7779             $white_space_flag[$j] = WS_OPTIONAL;
7780             next;
7781         }
7782
7783         # set a default value, to be changed as needed
7784         $ws              = undef;
7785         $last_token      = $token;
7786         $last_type       = $type;
7787         $last_block_type = $block_type;
7788         $token           = $$rtokens[$j];
7789         $type            = $$rtoken_type[$j];
7790         $block_type      = $$rblock_type[$j];
7791
7792         #---------------------------------------------------------------
7793         # section 1:
7794         # handle space on the inside of opening braces
7795         #---------------------------------------------------------------
7796
7797         #    /^[L\{\(\[]$/
7798         if ( $is_opening_type{$last_type} ) {
7799
7800             $j_tight_closing_paren = -1;
7801
7802             # let's keep empty matched braces together: () {} []
7803             # except for BLOCKS
7804             if ( $token eq $matching_token{$last_token} ) {
7805                 if ($block_type) {
7806                     $ws = WS_YES;
7807                 }
7808                 else {
7809                     $ws = WS_NO;
7810                 }
7811             }
7812             else {
7813
7814                 # we're considering the right of an opening brace
7815                 # tightness = 0 means always pad inside with space
7816                 # tightness = 1 means pad inside if "complex"
7817                 # tightness = 2 means never pad inside with space
7818
7819                 my $tightness;
7820                 if (   $last_type eq '{'
7821                     && $last_token eq '{'
7822                     && $last_block_type )
7823                 {
7824                     $tightness = $rOpts_block_brace_tightness;
7825                 }
7826                 else { $tightness = $tightness{$last_token} }
7827
7828                 if ( $tightness <= 0 ) {
7829                     $ws = WS_YES;
7830                 }
7831                 elsif ( $tightness > 1 ) {
7832                     $ws = WS_NO;
7833                 }
7834                 else {
7835
7836                     # Patch to count '-foo' as single token so that
7837                     # each of  $a{-foo} and $a{foo} and $a{'foo'} do
7838                     # not get spaces with default formatting.
7839                     my $j_here = $j;
7840                     ++$j_here
7841                       if ( $token eq '-'
7842                         && $last_token             eq '{'
7843                         && $$rtoken_type[ $j + 1 ] eq 'w' );
7844
7845                     # $j_next is where a closing token should be if
7846                     # the container has a single token
7847                     my $j_next =
7848                       ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
7849                       ? $j_here + 2
7850                       : $j_here + 1;
7851                     my $tok_next  = $$rtokens[$j_next];
7852                     my $type_next = $$rtoken_type[$j_next];
7853
7854                     # for tightness = 1, if there is just one token
7855                     # within the matching pair, we will keep it tight
7856                     if (
7857                         $tok_next eq $matching_token{$last_token}
7858
7859                         # but watch out for this: [ [ ]    (misc.t)
7860                         && $last_token ne $token
7861                       )
7862                     {
7863
7864                         # remember where to put the space for the closing paren
7865                         $j_tight_closing_paren = $j_next;
7866                         $ws                    = WS_NO;
7867                     }
7868                     else {
7869                         $ws = WS_YES;
7870                     }
7871                 }
7872             }
7873         }    # done with opening braces and brackets
7874         my $ws_1 = $ws
7875           if FORMATTER_DEBUG_FLAG_WHITE;
7876
7877         #---------------------------------------------------------------
7878         # section 2:
7879         # handle space on inside of closing brace pairs
7880         #---------------------------------------------------------------
7881
7882         #   /[\}\)\]R]/
7883         if ( $is_closing_type{$type} ) {
7884
7885             if ( $j == $j_tight_closing_paren ) {
7886
7887                 $j_tight_closing_paren = -1;
7888                 $ws                    = WS_NO;
7889             }
7890             else {
7891
7892                 if ( !defined($ws) ) {
7893
7894                     my $tightness;
7895                     if ( $type eq '}' && $token eq '}' && $block_type ) {
7896                         $tightness = $rOpts_block_brace_tightness;
7897                     }
7898                     else { $tightness = $tightness{$token} }
7899
7900                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7901                 }
7902             }
7903         }
7904
7905         my $ws_2 = $ws
7906           if FORMATTER_DEBUG_FLAG_WHITE;
7907
7908         #---------------------------------------------------------------
7909         # section 3:
7910         # use the binary table
7911         #---------------------------------------------------------------
7912         if ( !defined($ws) ) {
7913             $ws = $binary_ws_rules{$last_type}{$type};
7914         }
7915         my $ws_3 = $ws
7916           if FORMATTER_DEBUG_FLAG_WHITE;
7917
7918         #---------------------------------------------------------------
7919         # section 4:
7920         # some special cases
7921         #---------------------------------------------------------------
7922         if ( $token eq '(' ) {
7923
7924             # This will have to be tweaked as tokenization changes.
7925             # We usually want a space at '} (', for example:
7926             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7927             #
7928             # But not others:
7929             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
7930             # At present, the above & block is marked as type L/R so this case
7931             # won't go through here.
7932             if ( $last_type eq '}' ) { $ws = WS_YES }
7933
7934             # NOTE: some older versions of Perl had occasional problems if
7935             # spaces are introduced between keywords or functions and opening
7936             # parens.  So the default is not to do this except is certain
7937             # cases.  The current Perl seems to tolerate spaces.
7938
7939             # Space between keyword and '('
7940             elsif ( $last_type eq 'k' ) {
7941                 $ws = WS_NO
7942                   unless ( $rOpts_space_keyword_paren
7943                     || $space_after_keyword{$last_token} );
7944             }
7945
7946             # Space between function and '('
7947             # -----------------------------------------------------
7948             # 'w' and 'i' checks for something like:
7949             #   myfun(    &myfun(   ->myfun(
7950             # -----------------------------------------------------
7951             elsif (( $last_type =~ /^[wU]$/ )
7952                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7953             {
7954                 $ws = WS_NO unless ($rOpts_space_function_paren);
7955             }
7956
7957             # space between something like $i and ( in
7958             # for $i ( 0 .. 20 ) {
7959             # FIXME: eventually, type 'i' needs to be split into multiple
7960             # token types so this can be a hardwired rule.
7961             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7962                 $ws = WS_YES;
7963             }
7964
7965             # allow constant function followed by '()' to retain no space
7966             elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
7967                 $ws = WS_NO;
7968             }
7969         }
7970
7971         # patch for SWITCH/CASE: make space at ']{' optional
7972         # since the '{' might begin a case or when block
7973         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
7974             $ws = WS_OPTIONAL;
7975         }
7976
7977         # keep space between 'sub' and '{' for anonymous sub definition
7978         if ( $type eq '{' ) {
7979             if ( $last_token eq 'sub' ) {
7980                 $ws = WS_YES;
7981             }
7982
7983             # this is needed to avoid no space in '){'
7984             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7985
7986             # avoid any space before the brace or bracket in something like
7987             #  @opts{'a','b',...}
7988             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7989                 $ws = WS_NO;
7990             }
7991         }
7992
7993         elsif ( $type eq 'i' ) {
7994
7995             # never a space before ->
7996             if ( $token =~ /^\-\>/ ) {
7997                 $ws = WS_NO;
7998             }
7999         }
8000
8001         # retain any space between '-' and bare word
8002         elsif ( $type eq 'w' || $type eq 'C' ) {
8003             $ws = WS_OPTIONAL if $last_type eq '-';
8004
8005             # never a space before ->
8006             if ( $token =~ /^\-\>/ ) {
8007                 $ws = WS_NO;
8008             }
8009         }
8010
8011         # retain any space between '-' and bare word
8012         # example: avoid space between 'USER' and '-' here:
8013         #   $myhash{USER-NAME}='steve';
8014         elsif ( $type eq 'm' || $type eq '-' ) {
8015             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8016         }
8017
8018         # always space before side comment
8019         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8020
8021         # always preserver whatever space was used after a possible
8022         # filehandle (except _) or here doc operator
8023         if (
8024             $type ne '#'
8025             && ( ( $last_type eq 'Z' && $last_token ne '_' )
8026                 || $last_type eq 'h' )
8027           )
8028         {
8029             $ws = WS_OPTIONAL;
8030         }
8031
8032         my $ws_4 = $ws
8033           if FORMATTER_DEBUG_FLAG_WHITE;
8034
8035         #---------------------------------------------------------------
8036         # section 5:
8037         # default rules not covered above
8038         #---------------------------------------------------------------
8039         # if we fall through to here,
8040         # look at the pre-defined hash tables for the two tokens, and
8041         # if (they are equal) use the common value
8042         # if (either is zero or undef) use the other
8043         # if (either is -1) use it
8044         # That is,
8045         # left  vs right
8046         #  1    vs    1     -->  1
8047         #  0    vs    0     -->  0
8048         # -1    vs   -1     --> -1
8049         #
8050         #  0    vs   -1     --> -1
8051         #  0    vs    1     -->  1
8052         #  1    vs    0     -->  1
8053         # -1    vs    0     --> -1
8054         #
8055         # -1    vs    1     --> -1
8056         #  1    vs   -1     --> -1
8057         if ( !defined($ws) ) {
8058             my $wl = $want_left_space{$type};
8059             my $wr = $want_right_space{$last_type};
8060             if ( !defined($wl) ) { $wl = 0 }
8061             if ( !defined($wr) ) { $wr = 0 }
8062             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8063         }
8064
8065         if ( !defined($ws) ) {
8066             $ws = 0;
8067             write_diagnostics(
8068                 "WS flag is undefined for tokens $last_token $token\n");
8069         }
8070
8071         # Treat newline as a whitespace. Otherwise, we might combine
8072         # 'Send' and '-recipients' here according to the above rules:
8073         #    my $msg = new Fax::Send
8074         #      -recipients => $to,
8075         #      -data => $data;
8076         if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8077
8078         if (   ( $ws == 0 )
8079             && $j > 0
8080             && $j < $jmax
8081             && ( $last_type !~ /^[Zh]$/ ) )
8082         {
8083
8084             # If this happens, we have a non-fatal but undesirable
8085             # hole in the above rules which should be patched.
8086             write_diagnostics(
8087                 "WS flag is zero for tokens $last_token $token\n");
8088         }
8089         $white_space_flag[$j] = $ws;
8090
8091         FORMATTER_DEBUG_FLAG_WHITE && do {
8092             my $str = substr( $last_token, 0, 15 );
8093             $str .= ' ' x ( 16 - length($str) );
8094             if ( !defined($ws_1) ) { $ws_1 = "*" }
8095             if ( !defined($ws_2) ) { $ws_2 = "*" }
8096             if ( !defined($ws_3) ) { $ws_3 = "*" }
8097             if ( !defined($ws_4) ) { $ws_4 = "*" }
8098             print
8099 "WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8100         };
8101     }
8102     return \@white_space_flag;
8103 }
8104
8105 {    # begin print_line_of_tokens
8106
8107     my $rtoken_type;
8108     my $rtokens;
8109     my $rlevels;
8110     my $rslevels;
8111     my $rblock_type;
8112     my $rcontainer_type;
8113     my $rcontainer_environment;
8114     my $rtype_sequence;
8115     my $input_line;
8116     my $rnesting_tokens;
8117     my $rci_levels;
8118     my $rnesting_blocks;
8119
8120     my $in_quote;
8121     my $python_indentation_level;
8122
8123     # These local token variables are stored by store_token_to_go:
8124     my $block_type;
8125     my $ci_level;
8126     my $container_environment;
8127     my $container_type;
8128     my $in_continued_quote;
8129     my $level;
8130     my $nesting_blocks;
8131     my $no_internal_newlines;
8132     my $slevel;
8133     my $token;
8134     my $type;
8135     my $type_sequence;
8136
8137     # routine to pull the jth token from the line of tokens
8138     sub extract_token {
8139         my $j = shift;
8140         $token                 = $$rtokens[$j];
8141         $type                  = $$rtoken_type[$j];
8142         $block_type            = $$rblock_type[$j];
8143         $container_type        = $$rcontainer_type[$j];
8144         $container_environment = $$rcontainer_environment[$j];
8145         $type_sequence         = $$rtype_sequence[$j];
8146         $level                 = $$rlevels[$j];
8147         $slevel                = $$rslevels[$j];
8148         $nesting_blocks        = $$rnesting_blocks[$j];
8149         $ci_level              = $$rci_levels[$j];
8150     }
8151
8152     {
8153         my @saved_token;
8154
8155         sub save_current_token {
8156
8157             @saved_token = (
8158                 $block_type,            $ci_level,
8159                 $container_environment, $container_type,
8160                 $in_continued_quote,    $level,
8161                 $nesting_blocks,        $no_internal_newlines,
8162                 $slevel,                $token,
8163                 $type,                  $type_sequence,
8164             );
8165         }
8166
8167         sub restore_current_token {
8168             (
8169                 $block_type,            $ci_level,
8170                 $container_environment, $container_type,
8171                 $in_continued_quote,    $level,
8172                 $nesting_blocks,        $no_internal_newlines,
8173                 $slevel,                $token,
8174                 $type,                  $type_sequence,
8175             ) = @saved_token;
8176         }
8177     }
8178
8179     # Routine to place the current token into the output stream.
8180     # Called once per output token.
8181     sub store_token_to_go {
8182
8183         my $flag = $no_internal_newlines;
8184         if ( $_[0] ) { $flag = 1 }
8185
8186         $tokens_to_go[ ++$max_index_to_go ]            = $token;
8187         $types_to_go[$max_index_to_go]                 = $type;
8188         $nobreak_to_go[$max_index_to_go]               = $flag;
8189         $old_breakpoint_to_go[$max_index_to_go]        = 0;
8190         $forced_breakpoint_to_go[$max_index_to_go]     = 0;
8191         $block_type_to_go[$max_index_to_go]            = $block_type;
8192         $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
8193         $container_environment_to_go[$max_index_to_go] = $container_environment;
8194         $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
8195         $ci_levels_to_go[$max_index_to_go]             = $ci_level;
8196         $mate_index_to_go[$max_index_to_go]            = -1;
8197         $matching_token_to_go[$max_index_to_go]        = '';
8198
8199         # Note: negative levels are currently retained as a diagnostic so that
8200         # the 'final indentation level' is correctly reported for bad scripts.
8201         # But this means that every use of $level as an index must be checked.
8202         # If this becomes too much of a problem, we might give up and just clip
8203         # them at zero.
8204         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8205         $levels_to_go[$max_index_to_go] = $level;
8206         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8207         $lengths_to_go[ $max_index_to_go + 1 ] =
8208           $lengths_to_go[$max_index_to_go] + length($token);
8209
8210         # Define the indentation that this token would have if it started
8211         # a new line.  We have to do this now because we need to know this
8212         # when considering one-line blocks.
8213         set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8214
8215         if ( $type ne 'b' ) {
8216             $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8217             $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
8218             $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8219             $last_nonblank_index_to_go      = $max_index_to_go;
8220             $last_nonblank_type_to_go       = $type;
8221             $last_nonblank_token_to_go      = $token;
8222             if ( $type eq ',' ) {
8223                 $comma_count_in_batch++;
8224             }
8225         }
8226
8227         FORMATTER_DEBUG_FLAG_STORE && do {
8228             my ( $a, $b, $c ) = caller();
8229             print
8230 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8231         };
8232     }
8233
8234     sub insert_new_token_to_go {
8235
8236         # insert a new token into the output stream.  use same level as
8237         # previous token; assumes a character at max_index_to_go.
8238         save_current_token();
8239         ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8240
8241         if ( $max_index_to_go == UNDEFINED_INDEX ) {
8242             warning("code bug: bad call to insert_new_token_to_go\n");
8243         }
8244         $level = $levels_to_go[$max_index_to_go];
8245
8246         # FIXME: it seems to be necessary to use the next, rather than
8247         # previous, value of this variable when creating a new blank (align.t)
8248         #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
8249         $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
8250         $ci_level              = $ci_levels_to_go[$max_index_to_go];
8251         $container_environment = $container_environment_to_go[$max_index_to_go];
8252         $in_continued_quote    = 0;
8253         $block_type            = "";
8254         $type_sequence         = "";
8255         store_token_to_go();
8256         restore_current_token();
8257         return;
8258     }
8259
8260     sub print_line_of_tokens {
8261
8262         my $line_of_tokens = shift;
8263
8264         # This routine is called once per input line to process all of
8265         # the tokens on that line.  This is the first stage of
8266         # beautification.
8267         #
8268         # Full-line comments and blank lines may be processed immediately.
8269         #
8270         # For normal lines of code, the tokens are stored one-by-one,
8271         # via calls to 'sub store_token_to_go', until a known line break
8272         # point is reached.  Then, the batch of collected tokens is
8273         # passed along to 'sub output_line_to_go' for further
8274         # processing.  This routine decides if there should be
8275         # whitespace between each pair of non-white tokens, so later
8276         # routines only need to decide on any additional line breaks.
8277         # Any whitespace is initally a single space character.  Later,
8278         # the vertical aligner may expand that to be multiple space
8279         # characters if necessary for alignment.
8280
8281         # extract input line number for error messages
8282         $input_line_number = $line_of_tokens->{_line_number};
8283
8284         $rtoken_type            = $line_of_tokens->{_rtoken_type};
8285         $rtokens                = $line_of_tokens->{_rtokens};
8286         $rlevels                = $line_of_tokens->{_rlevels};
8287         $rslevels               = $line_of_tokens->{_rslevels};
8288         $rblock_type            = $line_of_tokens->{_rblock_type};
8289         $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
8290         $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8291         $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
8292         $input_line             = $line_of_tokens->{_line_text};
8293         $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
8294         $rci_levels             = $line_of_tokens->{_rci_levels};
8295         $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
8296
8297         $in_continued_quote = $starting_in_quote =
8298           $line_of_tokens->{_starting_in_quote};
8299         $in_quote        = $line_of_tokens->{_ending_in_quote};
8300         $ending_in_quote = $in_quote;
8301         $python_indentation_level =
8302           $line_of_tokens->{_python_indentation_level};
8303
8304         my $j;
8305         my $j_next;
8306         my $jmax;
8307         my $next_nonblank_token;
8308         my $next_nonblank_token_type;
8309         my $rwhite_space_flag;
8310
8311         $jmax                    = @$rtokens - 1;
8312         $block_type              = "";
8313         $container_type          = "";
8314         $container_environment   = "";
8315         $type_sequence           = "";
8316         $no_internal_newlines    = 1 - $rOpts_add_newlines;
8317         $is_static_block_comment = 0;
8318
8319         # Handle a continued quote..
8320         if ($in_continued_quote) {
8321
8322             # A line which is entirely a quote or pattern must go out
8323             # verbatim.  Note: the \n is contained in $input_line.
8324             if ( $jmax <= 0 ) {
8325                 if ( ( $input_line =~ "\t" ) ) {
8326                     note_embedded_tab();
8327                 }
8328                 write_unindented_line("$input_line");
8329                 $last_line_had_side_comment = 0;
8330                 return;
8331             }
8332
8333             # prior to version 20010406, perltidy had a bug which placed
8334             # continuation indentation before the last line of some multiline
8335             # quotes and patterns -- exactly the lines passing this way.
8336             # To help find affected lines in scripts run with these
8337             # versions, run with '-chk', and it will warn of any quotes or
8338             # patterns which might have been modified by these early
8339             # versions.
8340             if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8341                 warning(
8342 "-chk: please check this line for extra leading whitespace\n"
8343                 );
8344             }
8345         }
8346
8347         # Write line verbatim if we are in a formatting skip section
8348         if ($in_format_skipping_section) {
8349             write_unindented_line("$input_line");
8350             $last_line_had_side_comment = 0;
8351
8352             # Note: extra space appended to comment simplifies pattern matching
8353             if (   $jmax == 0
8354                 && $$rtoken_type[0] eq '#'
8355                 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8356             {
8357                 $in_format_skipping_section = 0;
8358                 write_logfile_entry("Exiting formatting skip section\n");
8359             }
8360             return;
8361         }
8362
8363         # See if we are entering a formatting skip section
8364         if (   $rOpts_format_skipping
8365             && $jmax == 0
8366             && $$rtoken_type[0] eq '#'
8367             && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8368         {
8369             flush();
8370             $in_format_skipping_section = 1;
8371             write_logfile_entry("Entering formatting skip section\n");
8372             write_unindented_line("$input_line");
8373             $last_line_had_side_comment = 0;
8374             return;
8375         }
8376
8377         # delete trailing blank tokens
8378         if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8379
8380         # Handle a blank line..
8381         if ( $jmax < 0 ) {
8382
8383             # For the 'swallow-optional-blank-lines' option, we delete all
8384             # old blank lines and let the blank line rules generate any
8385             # needed blanks.
8386             if ( !$rOpts_swallow_optional_blank_lines ) {
8387                 flush();
8388                 $file_writer_object->write_blank_code_line();
8389                 $last_line_leading_type = 'b';
8390             }
8391             $last_line_had_side_comment = 0;
8392             return;
8393         }
8394
8395         # see if this is a static block comment (starts with ## by default)
8396         my $is_static_block_comment_without_leading_space = 0;
8397         if (   $jmax == 0
8398             && $$rtoken_type[0] eq '#'
8399             && $rOpts->{'static-block-comments'}
8400             && $input_line =~ /$static_block_comment_pattern/o )
8401         {
8402             $is_static_block_comment = 1;
8403             $is_static_block_comment_without_leading_space =
8404               substr( $input_line, 0, 1 ) eq '#';
8405         }
8406
8407         # Check for comments which are line directives
8408         # Treat exactly as static block comments without leading space
8409         # reference: perlsyn, near end, section Plain Old Comments (Not!)
8410         # example: '# line 42 "new_filename.plx"'
8411         if (
8412                $jmax == 0
8413             && $$rtoken_type[0] eq '#'
8414             && $input_line =~ /^\#   \s*
8415                                line \s+ (\d+)   \s*
8416                                (?:\s("?)([^"]+)\2)? \s*
8417                                $/x
8418           )
8419         {
8420             $is_static_block_comment                       = 1;
8421             $is_static_block_comment_without_leading_space = 1;
8422         }
8423
8424         # create a hanging side comment if appropriate
8425         if (
8426                $jmax == 0
8427             && $$rtoken_type[0] eq '#'    # only token is a comment
8428             && $last_line_had_side_comment    # last line had side comment
8429             && $input_line =~ /^\s/           # there is some leading space
8430             && !$is_static_block_comment    # do not make static comment hanging
8431             && $rOpts->{'hanging-side-comments'}    # user is allowing this
8432           )
8433         {
8434
8435             # We will insert an empty qw string at the start of the token list
8436             # to force this comment to be a side comment. The vertical aligner
8437             # should then line it up with the previous side comment.
8438             unshift @$rtoken_type,            'q';
8439             unshift @$rtokens,                '';
8440             unshift @$rlevels,                $$rlevels[0];
8441             unshift @$rslevels,               $$rslevels[0];
8442             unshift @$rblock_type,            '';
8443             unshift @$rcontainer_type,        '';
8444             unshift @$rcontainer_environment, '';
8445             unshift @$rtype_sequence,         '';
8446             unshift @$rnesting_tokens,        $$rnesting_tokens[0];
8447             unshift @$rci_levels,             $$rci_levels[0];
8448             unshift @$rnesting_blocks,        $$rnesting_blocks[0];
8449             $jmax = 1;
8450         }
8451
8452         # remember if this line has a side comment
8453         $last_line_had_side_comment =
8454           ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8455
8456         # Handle a block (full-line) comment..
8457         if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8458
8459             if ( $rOpts->{'delete-block-comments'} ) { return }
8460
8461             if ( $rOpts->{'tee-block-comments'} ) {
8462                 $file_writer_object->tee_on();
8463             }
8464
8465             destroy_one_line_block();
8466             output_line_to_go();
8467
8468             # output a blank line before block comments
8469             if (
8470                    $last_line_leading_type !~ /^[#b]$/
8471                 && $rOpts->{'blanks-before-comments'}    # only if allowed
8472                 && !
8473                 $is_static_block_comment    # never before static block comments
8474               )
8475             {
8476                 flush();                    # switching to new output stream
8477                 $file_writer_object->write_blank_code_line();
8478                 $last_line_leading_type = 'b';
8479             }
8480
8481             # TRIM COMMENTS -- This could be turned off as a option
8482             $$rtokens[0] =~ s/\s*$//;       # trim right end
8483
8484             if (
8485                 $rOpts->{'indent-block-comments'}
8486                 && ( !$rOpts->{'indent-spaced-block-comments'}
8487                     || $input_line =~ /^\s+/ )
8488                 && !$is_static_block_comment_without_leading_space
8489               )
8490             {
8491                 extract_token(0);
8492                 store_token_to_go();
8493                 output_line_to_go();
8494             }
8495             else {
8496                 flush();    # switching to new output stream
8497                 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8498                 $last_line_leading_type = '#';
8499             }
8500             if ( $rOpts->{'tee-block-comments'} ) {
8501                 $file_writer_object->tee_off();
8502             }
8503             return;
8504         }
8505
8506         # compare input/output indentation except for continuation lines
8507         # (because they have an unknown amount of initial blank space)
8508         # and lines which are quotes (because they may have been outdented)
8509         # Note: this test is placed here because we know the continuation flag
8510         # at this point, which allows us to avoid non-meaningful checks.
8511         my $structural_indentation_level = $$rlevels[0];
8512         compare_indentation_levels( $python_indentation_level,
8513             $structural_indentation_level )
8514           unless ( $python_indentation_level < 0
8515             || ( $$rci_levels[0] > 0 )
8516             || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8517           );
8518
8519         #   Patch needed for MakeMaker.  Do not break a statement
8520         #   in which $VERSION may be calculated.  See MakeMaker.pm;
8521         #   this is based on the coding in it.
8522         #   The first line of a file that matches this will be eval'd:
8523         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8524         #   Examples:
8525         #     *VERSION = \'1.01';
8526         #     ( $VERSION ) = '$Revision: 1.68 $ ' =~ /\$Revision:\s+([^\s]+)/;
8527         #   We will pass such a line straight through without breaking
8528         #   it unless -npvl is used
8529
8530         my $is_VERSION_statement = 0;
8531
8532         if (
8533             !$saw_VERSION_in_this_file
8534             && $input_line =~ /VERSION/    # quick check to reject most lines
8535             && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8536           )
8537         {
8538             $saw_VERSION_in_this_file = 1;
8539             $is_VERSION_statement     = 1;
8540             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8541             $no_internal_newlines = 1;
8542         }
8543
8544         # take care of indentation-only
8545         # NOTE: In previous versions we sent all qw lines out immediately here.
8546         # No longer doing this: also write a line which is entirely a 'qw' list
8547         # to allow stacking of opening and closing tokens.  Note that interior
8548         # qw lines will still go out at the end of this routine.
8549         if ( $rOpts->{'indent-only'} ) {
8550             flush();
8551             trim($input_line);
8552
8553             extract_token(0);
8554             $token                 = $input_line;
8555             $type                  = 'q';
8556             $block_type            = "";
8557             $container_type        = "";
8558             $container_environment = "";
8559             $type_sequence         = "";
8560             store_token_to_go();
8561             output_line_to_go();
8562             return;
8563         }
8564
8565         push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
8566         push( @$rtoken_type, 'b', 'b' );
8567         ($rwhite_space_flag) =
8568           set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8569
8570         # find input tabbing to allow checks for tabbing disagreement
8571         ## not used for now
8572         ##$input_line_tabbing = "";
8573         ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8574
8575         # if the buffer hasn't been flushed, add a leading space if
8576         # necessary to keep essential whitespace. This is really only
8577         # necessary if we are squeezing out all ws.
8578         if ( $max_index_to_go >= 0 ) {
8579
8580             $old_line_count_in_batch++;
8581
8582             if (
8583                 is_essential_whitespace(
8584                     $last_last_nonblank_token,
8585                     $last_last_nonblank_type,
8586                     $tokens_to_go[$max_index_to_go],
8587                     $types_to_go[$max_index_to_go],
8588                     $$rtokens[0],
8589                     $$rtoken_type[0]
8590                 )
8591               )
8592             {
8593                 my $slevel = $$rslevels[0];
8594                 insert_new_token_to_go( ' ', 'b', $slevel,
8595                     $no_internal_newlines );
8596             }
8597         }
8598
8599         # If we just saw the end of an elsif block, write nag message
8600         # if we do not see another elseif or an else.
8601         if ($looking_for_else) {
8602
8603             unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8604                 write_logfile_entry("(No else block)\n");
8605             }
8606             $looking_for_else = 0;
8607         }
8608
8609         # This is a good place to kill incomplete one-line blocks
8610         if (   ( $semicolons_before_block_self_destruct == 0 )
8611             && ( $max_index_to_go >= 0 )
8612             && ( $types_to_go[$max_index_to_go] eq ';' )
8613             && ( $$rtokens[0] ne '}' ) )
8614         {
8615             destroy_one_line_block();
8616             output_line_to_go();
8617         }
8618
8619         # loop to process the tokens one-by-one
8620         $type  = 'b';
8621         $token = "";
8622
8623         foreach $j ( 0 .. $jmax ) {
8624
8625             # pull out the local values for this token
8626             extract_token($j);
8627
8628             if ( $type eq '#' ) {
8629
8630                 # trim trailing whitespace
8631                 # (there is no option at present to prevent this)
8632                 $token =~ s/\s*$//;
8633
8634                 if (
8635                     $rOpts->{'delete-side-comments'}
8636
8637                     # delete closing side comments if necessary
8638                     || (   $rOpts->{'delete-closing-side-comments'}
8639                         && $token =~ /$closing_side_comment_prefix_pattern/o
8640                         && $last_nonblank_block_type =~
8641                         /$closing_side_comment_list_pattern/o )
8642                   )
8643                 {
8644                     if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8645                         unstore_token_to_go();
8646                     }
8647                     last;
8648                 }
8649             }
8650
8651             # If we are continuing after seeing a right curly brace, flush
8652             # buffer unless we see what we are looking for, as in
8653             #   } else ...
8654             if ( $rbrace_follower && $type ne 'b' ) {
8655
8656                 unless ( $rbrace_follower->{$token} ) {
8657                     output_line_to_go();
8658                 }
8659                 $rbrace_follower = undef;
8660             }
8661
8662             $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8663             $next_nonblank_token      = $$rtokens[$j_next];
8664             $next_nonblank_token_type = $$rtoken_type[$j_next];
8665
8666             #--------------------------------------------------------
8667             # Start of section to patch token text
8668             #--------------------------------------------------------
8669
8670             # Modify certain tokens here for whitespace
8671             # The following is not yet done, but could be:
8672             #   sub (x x x)
8673             if ( $type =~ /^[wit]$/ ) {
8674
8675                 # Examples:
8676                 # change '$  var'  to '$var' etc
8677                 #        '-> new'  to '->new'
8678                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8679                     $token =~ s/\s*//g;
8680                 }
8681
8682                 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8683             }
8684
8685             # change 'LABEL   :'   to 'LABEL:'
8686             elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8687
8688             # patch to add space to something like "x10"
8689             # This avoids having to split this token in the pre-tokenizer
8690             elsif ( $type eq 'n' ) {
8691                 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8692             }
8693
8694             elsif ( $type eq 'Q' ) {
8695                 note_embedded_tab() if ( $token =~ "\t" );
8696
8697                 # make note of something like '$var = s/xxx/yyy/;'
8698                 # in case it should have been '$var =~ s/xxx/yyy/;'
8699                 if (
8700                        $token               =~ /^(s|tr|y|m|\/)/
8701                     && $last_nonblank_token =~ /^(=|==|!=)$/
8702
8703                     # precededed by simple scalar
8704                     && $last_last_nonblank_type eq 'i'
8705                     && $last_last_nonblank_token =~ /^\$/
8706
8707                     # followed by some kind of termination
8708                     # (but give complaint if we can's see far enough ahead)
8709                     && $next_nonblank_token =~ /^[; \)\}]$/
8710
8711                     # scalar is not decleared
8712                     && !(
8713                            $types_to_go[0] eq 'k'
8714                         && $tokens_to_go[0] =~ /^(my|our|local)$/
8715                     )
8716                   )
8717                 {
8718                     my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8719                     complain(
8720 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8721                     );
8722                 }
8723             }
8724
8725            # trim blanks from right of qw quotes
8726            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8727             elsif ( $type eq 'q' ) {
8728                 $token =~ s/\s*$//;
8729                 note_embedded_tab() if ( $token =~ "\t" );
8730             }
8731
8732             #--------------------------------------------------------
8733             # End of section to patch token text
8734             #--------------------------------------------------------
8735
8736             # insert any needed whitespace
8737             if (   ( $type ne 'b' )
8738                 && ( $max_index_to_go >= 0 )
8739                 && ( $types_to_go[$max_index_to_go] ne 'b' )
8740                 && $rOpts_add_whitespace )
8741             {
8742                 my $ws = $$rwhite_space_flag[$j];
8743
8744                 if ( $ws == 1 ) {
8745                     insert_new_token_to_go( ' ', 'b', $slevel,
8746                         $no_internal_newlines );
8747                 }
8748             }
8749
8750             # Do not allow breaks which would promote a side comment to a
8751             # block comment.  In order to allow a break before an opening
8752             # or closing BLOCK, followed by a side comment, those sections
8753             # of code will handle this flag separately.
8754             my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8755             my $is_opening_BLOCK =
8756               (      $type eq '{'
8757                   && $token eq '{'
8758                   && $block_type
8759                   && $block_type ne 't' );
8760             my $is_closing_BLOCK =
8761               (      $type eq '}'
8762                   && $token eq '}'
8763                   && $block_type
8764                   && $block_type ne 't' );
8765
8766             if (   $side_comment_follows
8767                 && !$is_opening_BLOCK
8768                 && !$is_closing_BLOCK )
8769             {
8770                 $no_internal_newlines = 1;
8771             }
8772
8773             # We're only going to handle breaking for code BLOCKS at this
8774             # (top) level.  Other indentation breaks will be handled by
8775             # sub scan_list, which is better suited to dealing with them.
8776             if ($is_opening_BLOCK) {
8777
8778                 # Tentatively output this token.  This is required before
8779                 # calling starting_one_line_block.  We may have to unstore
8780                 # it, though, if we have to break before it.
8781                 store_token_to_go($side_comment_follows);
8782
8783                 # Look ahead to see if we might form a one-line block
8784                 my $too_long =
8785                   starting_one_line_block( $j, $jmax, $level, $slevel,
8786                     $ci_level, $rtokens, $rtoken_type, $rblock_type );
8787                 clear_breakpoint_undo_stack();
8788
8789                 # to simplify the logic below, set a flag to indicate if
8790                 # this opening brace is far from the keyword which introduces it
8791                 my $keyword_on_same_line = 1;
8792                 if (   ( $max_index_to_go >= 0 )
8793                     && ( $last_nonblank_type eq ')' ) )
8794                 {
8795                     if (   $block_type =~ /^(if|else|elsif)$/
8796                         && ( $tokens_to_go[0] eq '}' )
8797                         && $rOpts_cuddled_else )
8798                     {
8799                         $keyword_on_same_line = 1;
8800                     }
8801                     elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8802                     {
8803                         $keyword_on_same_line = 0;
8804                     }
8805                 }
8806
8807                 # decide if user requested break before '{'
8808                 my $want_break =
8809
8810                   # use -bl flag if not a sub block of any type
8811                   $block_type !~ /^sub/
8812                   ? $rOpts->{'opening-brace-on-new-line'}
8813
8814                   # use -sbl flag unless this is an anonymous sub block
8815                   : $block_type !~ /^sub\W*$/
8816                   ? $rOpts->{'opening-sub-brace-on-new-line'}
8817
8818                   # do not break for anonymous subs
8819                   : 0;
8820
8821                 # Break before an opening '{' ...
8822                 if (
8823
8824                     # if requested
8825                     $want_break
8826
8827                     # and we were unable to start looking for a block,
8828                     && $index_start_one_line_block == UNDEFINED_INDEX
8829
8830                     # or if it will not be on same line as its keyword, so that
8831                     # it will be outdented (eval.t, overload.t), and the user
8832                     # has not insisted on keeping it on the right
8833                     || (   !$keyword_on_same_line
8834                         && !$rOpts->{'opening-brace-always-on-right'} )
8835
8836                   )
8837                 {
8838
8839                     # but only if allowed
8840                     unless ($no_internal_newlines) {
8841
8842                         # since we already stored this token, we must unstore it
8843                         unstore_token_to_go();
8844
8845                         # then output the line
8846                         output_line_to_go();
8847
8848                         # and now store this token at the start of a new line
8849                         store_token_to_go($side_comment_follows);
8850                     }
8851                 }
8852
8853                 # Now update for side comment
8854                 if ($side_comment_follows) { $no_internal_newlines = 1 }
8855
8856                 # now output this line
8857                 unless ($no_internal_newlines) {
8858                     output_line_to_go();
8859                 }
8860             }
8861
8862             elsif ($is_closing_BLOCK) {
8863
8864                 # If there is a pending one-line block ..
8865                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8866
8867                     # we have to terminate it if..
8868                     if (
8869
8870                     # it is too long (final length may be different from
8871                     # initial estimate). note: must allow 1 space for this token
8872                         excess_line_length( $index_start_one_line_block,
8873                             $max_index_to_go ) >= 0
8874
8875                         # or if it has too many semicolons
8876                         || (   $semicolons_before_block_self_destruct == 0
8877                             && $last_nonblank_type ne ';' )
8878                       )
8879                     {
8880                         destroy_one_line_block();
8881                     }
8882                 }
8883
8884                 # put a break before this closing curly brace if appropriate
8885                 unless ( $no_internal_newlines
8886                     || $index_start_one_line_block != UNDEFINED_INDEX )
8887                 {
8888
8889                     # add missing semicolon if ...
8890                     # there are some tokens
8891                     if (
8892                         ( $max_index_to_go > 0 )
8893
8894                         # and we don't have one
8895                         && ( $last_nonblank_type ne ';' )
8896
8897                         # patch until some block type issues are fixed:
8898                         # Do not add semi-colon for block types '{',
8899                         # '}', and ';' because we cannot be sure yet
8900                         # that this is a block and not an anonomyous
8901                         # hash (blktype.t, blktype1.t)
8902                         && ( $block_type !~ /^[\{\};]$/ )
8903
8904                         # it seems best not to add semicolons in these
8905                         # special block types: sort|map|grep
8906                         && ( !$is_sort_map_grep{$block_type} )
8907
8908                         # and we are allowed to do so.
8909                         && $rOpts->{'add-semicolons'}
8910                       )
8911                     {
8912
8913                         save_current_token();
8914                         $token  = ';';
8915                         $type   = ';';
8916                         $level  = $levels_to_go[$max_index_to_go];
8917                         $slevel = $nesting_depth_to_go[$max_index_to_go];
8918                         $nesting_blocks =
8919                           $nesting_blocks_to_go[$max_index_to_go];
8920                         $ci_level       = $ci_levels_to_go[$max_index_to_go];
8921                         $block_type     = "";
8922                         $container_type = "";
8923                         $container_environment = "";
8924                         $type_sequence         = "";
8925
8926                         # Note - we remove any blank AFTER extracting its
8927                         # parameters such as level, etc, above
8928                         if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8929                             unstore_token_to_go();
8930                         }
8931                         store_token_to_go();
8932
8933                         note_added_semicolon();
8934                         restore_current_token();
8935                     }
8936
8937                     # then write out everything before this closing curly brace
8938                     output_line_to_go();
8939
8940                 }
8941
8942                 # Now update for side comment
8943                 if ($side_comment_follows) { $no_internal_newlines = 1 }
8944
8945                 # store the closing curly brace
8946                 store_token_to_go();
8947
8948                 # ok, we just stored a closing curly brace.  Often, but
8949                 # not always, we want to end the line immediately.
8950                 # So now we have to check for special cases.
8951
8952                 # if this '}' successfully ends a one-line block..
8953                 my $is_one_line_block = 0;
8954                 my $keep_going        = 0;
8955                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8956
8957                     # Remember the type of token just before the
8958                     # opening brace.  It would be more general to use
8959                     # a stack, but this will work for one-line blocks.
8960                     $is_one_line_block =
8961                       $types_to_go[$index_start_one_line_block];
8962
8963                     # we have to actually make it by removing tentative
8964                     # breaks that were set within it
8965                     undo_forced_breakpoint_stack(0);
8966                     set_nobreaks( $index_start_one_line_block,
8967                         $max_index_to_go - 1 );
8968
8969                     # then re-initialize for the next one-line block
8970                     destroy_one_line_block();
8971
8972                     # then decide if we want to break after the '}' ..
8973                     # We will keep going to allow certain brace followers as in:
8974                     #   do { $ifclosed = 1; last } unless $losing;
8975                     #
8976                     # But make a line break if the curly ends a
8977                     # significant block:
8978                     if (
8979                         $is_block_without_semicolon{$block_type}
8980
8981                         # if needless semicolon follows we handle it later
8982                         && $next_nonblank_token ne ';'
8983                       )
8984                     {
8985                         output_line_to_go() unless ($no_internal_newlines);
8986                     }
8987                 }
8988
8989                 # set string indicating what we need to look for brace follower
8990                 # tokens
8991                 if ( $block_type eq 'do' ) {
8992                     $rbrace_follower = \%is_do_follower;
8993                 }
8994                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
8995                     $rbrace_follower = \%is_if_brace_follower;
8996                 }
8997                 elsif ( $block_type eq 'else' ) {
8998                     $rbrace_follower = \%is_else_brace_follower;
8999                 }
9000
9001                 # added eval for borris.t
9002                 elsif ($is_sort_map_grep_eval{$block_type}
9003                     || $is_one_line_block eq 'G' )
9004                 {
9005                     $rbrace_follower = undef;
9006                     $keep_going      = 1;
9007                 }
9008
9009                 # anonymous sub
9010                 elsif ( $block_type =~ /^sub\W*$/ ) {
9011
9012                     if ($is_one_line_block) {
9013                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
9014                     }
9015                     else {
9016                         $rbrace_follower = \%is_anon_sub_brace_follower;
9017                     }
9018                 }
9019
9020                 # None of the above: specify what can follow a closing
9021                 # brace of a block which is not an
9022                 # if/elsif/else/do/sort/map/grep/eval
9023                 # Testfiles:
9024                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9025                 else {
9026                     $rbrace_follower = \%is_other_brace_follower;
9027                 }
9028
9029                 # See if an elsif block is followed by another elsif or else;
9030                 # complain if not.
9031                 if ( $block_type eq 'elsif' ) {
9032
9033                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
9034                         $looking_for_else = 1;    # ok, check on next line
9035                     }
9036                     else {
9037
9038                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9039                             write_logfile_entry("No else block :(\n");
9040                         }
9041                     }
9042                 }
9043
9044                 # keep going after certain block types (map,sort,grep,eval)
9045                 # added eval for borris.t
9046                 if ($keep_going) {
9047
9048                     # keep going
9049                 }
9050
9051                 # if no more tokens, postpone decision until re-entring
9052                 elsif ( ( $next_nonblank_token_type eq 'b' )
9053                     && $rOpts_add_newlines )
9054                 {
9055                     unless ($rbrace_follower) {
9056                         output_line_to_go() unless ($no_internal_newlines);
9057                     }
9058                 }
9059
9060                 elsif ($rbrace_follower) {
9061
9062                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
9063                         output_line_to_go() unless ($no_internal_newlines);
9064                     }
9065                     $rbrace_follower = undef;
9066                 }
9067
9068                 else {
9069                     output_line_to_go() unless ($no_internal_newlines);
9070                 }
9071
9072             }    # end treatment of closing block token
9073
9074             # handle semicolon
9075             elsif ( $type eq ';' ) {
9076
9077                 # kill one-line blocks with too many semicolons
9078                 $semicolons_before_block_self_destruct--;
9079                 if (
9080                     ( $semicolons_before_block_self_destruct < 0 )
9081                     || (   $semicolons_before_block_self_destruct == 0
9082                         && $next_nonblank_token_type !~ /^[b\}]$/ )
9083                   )
9084                 {
9085                     destroy_one_line_block();
9086                 }
9087
9088                 # Remove unnecessary semicolons, but not after bare
9089                 # blocks, where it could be unsafe if the brace is
9090                 # mistokenized.
9091                 if (
9092                     (
9093                         $last_nonblank_token eq '}'
9094                         && (
9095                             $is_block_without_semicolon{
9096                                 $last_nonblank_block_type}
9097                             || $last_nonblank_block_type =~ /^sub\s+\w/
9098                             || $last_nonblank_block_type =~ /^\w+:$/ )
9099                     )
9100                     || $last_nonblank_type eq ';'
9101                   )
9102                 {
9103
9104                     if (
9105                         $rOpts->{'delete-semicolons'}
9106
9107                         # don't delete ; before a # because it would promote it
9108                         # to a block comment
9109                         && ( $next_nonblank_token_type ne '#' )
9110                       )
9111                     {
9112                         note_deleted_semicolon();
9113                         output_line_to_go()
9114                           unless ( $no_internal_newlines
9115                             || $index_start_one_line_block != UNDEFINED_INDEX );
9116                         next;
9117                     }
9118                     else {
9119                         write_logfile_entry("Extra ';'\n");
9120                     }
9121                 }
9122                 store_token_to_go();
9123
9124                 output_line_to_go()
9125                   unless ( $no_internal_newlines
9126                     || ( $rOpts_keep_interior_semicolons && $j < $jmax )
9127                     || ( $next_nonblank_token eq '}' ) );
9128
9129             }
9130
9131             # handle here_doc target string
9132             elsif ( $type eq 'h' ) {
9133                 $no_internal_newlines =
9134                   1;    # no newlines after seeing here-target
9135                 destroy_one_line_block();
9136                 store_token_to_go();
9137             }
9138
9139             # handle all other token types
9140             else {
9141
9142                 # if this is a blank...
9143                 if ( $type eq 'b' ) {
9144
9145                     # make it just one character
9146                     $token = ' ' if $rOpts_add_whitespace;
9147
9148                     # delete it if unwanted by whitespace rules
9149                     # or we are deleting all whitespace
9150                     my $ws = $$rwhite_space_flag[ $j + 1 ];
9151                     if ( ( defined($ws) && $ws == -1 )
9152                         || $rOpts_delete_old_whitespace )
9153                     {
9154
9155                         # unless it might make a syntax error
9156                         next
9157                           unless is_essential_whitespace(
9158                             $last_last_nonblank_token,
9159                             $last_last_nonblank_type,
9160                             $tokens_to_go[$max_index_to_go],
9161                             $types_to_go[$max_index_to_go],
9162                             $$rtokens[ $j + 1 ],
9163                             $$rtoken_type[ $j + 1 ]
9164                           );
9165                     }
9166                 }
9167                 store_token_to_go();
9168             }
9169
9170             # remember two previous nonblank OUTPUT tokens
9171             if ( $type ne '#' && $type ne 'b' ) {
9172                 $last_last_nonblank_token = $last_nonblank_token;
9173                 $last_last_nonblank_type  = $last_nonblank_type;
9174                 $last_nonblank_token      = $token;
9175                 $last_nonblank_type       = $type;
9176                 $last_nonblank_block_type = $block_type;
9177             }
9178
9179             # unset the continued-quote flag since it only applies to the
9180             # first token, and we want to resume normal formatting if
9181             # there are additional tokens on the line
9182             $in_continued_quote = 0;
9183
9184         }    # end of loop over all tokens in this 'line_of_tokens'
9185
9186         # we have to flush ..
9187         if (
9188
9189             # if there is a side comment
9190             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9191
9192             # if this line ends in a quote
9193             # NOTE: This is critically important for insuring that quoted lines
9194             # do not get processed by things like -sot and -sct
9195             || $in_quote
9196
9197             # if this is a VERSION statement
9198             || $is_VERSION_statement
9199
9200             # to keep a label on one line if that is how it is now
9201             || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9202
9203             # if we are instructed to keep all old line breaks
9204             || !$rOpts->{'delete-old-newlines'}
9205           )
9206         {
9207             destroy_one_line_block();
9208             output_line_to_go();
9209         }
9210
9211         # mark old line breakpoints in current output stream
9212         if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9213             $old_breakpoint_to_go[$max_index_to_go] = 1;
9214         }
9215     }    # end sub print_line_of_tokens
9216 }    # end print_line_of_tokens
9217
9218 # sub output_line_to_go sends one logical line of tokens on down the
9219 # pipeline to the VerticalAligner package, breaking the line into continuation
9220 # lines as necessary.  The line of tokens is ready to go in the "to_go"
9221 # arrays.
9222 sub output_line_to_go {
9223
9224     # debug stuff; this routine can be called from many points
9225     FORMATTER_DEBUG_FLAG_OUTPUT && do {
9226         my ( $a, $b, $c ) = caller;
9227         write_diagnostics(
9228 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9229         );
9230         my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9231         write_diagnostics("$output_str\n");
9232     };
9233
9234     # just set a tentative breakpoint if we might be in a one-line block
9235     if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9236         set_forced_breakpoint($max_index_to_go);
9237         return;
9238     }
9239
9240     my $cscw_block_comment;
9241     $cscw_block_comment = add_closing_side_comment()
9242       if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9243
9244     match_opening_and_closing_tokens();
9245
9246     # tell the -lp option we are outputting a batch so it can close
9247     # any unfinished items in its stack
9248     finish_lp_batch();
9249
9250     # If this line ends in a code block brace, set breaks at any
9251     # previous closing code block braces to breakup a chain of code
9252     # blocks on one line.  This is very rare but can happen for
9253     # user-defined subs.  For example we might be looking at this:
9254     #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9255     my $saw_good_break = 0;    # flag to force breaks even if short line
9256     if (
9257
9258         # looking for opening or closing block brace
9259         $block_type_to_go[$max_index_to_go]
9260
9261         # but not one of these which are never duplicated on a line:
9262         # until|while|for|if|elsif|else
9263         && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9264       )
9265     {
9266         my $lev = $nesting_depth_to_go[$max_index_to_go];
9267
9268         # Walk backwards from the end and
9269         # set break at any closing block braces at the same level.
9270         # But quit if we are not in a chain of blocks.
9271         for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
9272             last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
9273             next if ( $levels_to_go[$i] > $lev );    # skip past higher level
9274
9275             if ( $block_type_to_go[$i] ) {
9276                 if ( $tokens_to_go[$i] eq '}' ) {
9277                     set_forced_breakpoint($i);
9278                     $saw_good_break = 1;
9279                 }
9280             }
9281
9282             # quit if we see anything besides words, function, blanks
9283             # at this level
9284             elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
9285         }
9286     }
9287
9288     my $imin = 0;
9289     my $imax = $max_index_to_go;
9290
9291     # trim any blank tokens
9292     if ( $max_index_to_go >= 0 ) {
9293         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9294         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9295     }
9296
9297     # anything left to write?
9298     if ( $imin <= $imax ) {
9299
9300         # add a blank line before certain key types
9301         if ( $last_line_leading_type !~ /^[#b]/ ) {
9302             my $want_blank    = 0;
9303             my $leading_token = $tokens_to_go[$imin];
9304             my $leading_type  = $types_to_go[$imin];
9305
9306             # blank lines before subs except declarations and one-liners
9307             # MCONVERSION LOCATION - for sub tokenization change
9308             if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9309                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9310                   && (
9311                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9312                         $imax ) !~ /^[\;\}]$/
9313                   );
9314             }
9315
9316             # break before all package declarations
9317             # MCONVERSION LOCATION - for tokenizaton change
9318             elsif ($leading_token =~ /^(package\s)/
9319                 && $leading_type eq 'i' )
9320             {
9321                 $want_blank = ( $rOpts->{'blanks-before-subs'} );
9322             }
9323
9324             # break before certain key blocks except one-liners
9325             if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9326                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9327                   && (
9328                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9329                         $imax ) ne '}'
9330                   );
9331             }
9332
9333             # Break before certain block types if we haven't had a
9334             # break at this level for a while.  This is the
9335             # difficult decision..
9336             elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9337                 && $leading_type eq 'k' )
9338             {
9339                 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9340                 if ( !defined($lc) ) { $lc = 0 }
9341
9342                 $want_blank = $rOpts->{'blanks-before-blocks'}
9343                   && $lc >= $rOpts->{'long-block-line-count'}
9344                   && $file_writer_object->get_consecutive_nonblank_lines() >=
9345                   $rOpts->{'long-block-line-count'}
9346                   && (
9347                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9348                         $imax ) ne '}'
9349                   );
9350             }
9351
9352             if ($want_blank) {
9353
9354                 # future: send blank line down normal path to VerticalAligner
9355                 Perl::Tidy::VerticalAligner::flush();
9356                 $file_writer_object->write_blank_code_line();
9357             }
9358         }
9359
9360         # update blank line variables and count number of consecutive
9361         # non-blank, non-comment lines at this level
9362         $last_last_line_leading_level = $last_line_leading_level;
9363         $last_line_leading_level      = $levels_to_go[$imin];
9364         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9365         $last_line_leading_type = $types_to_go[$imin];
9366         if (   $last_line_leading_level == $last_last_line_leading_level
9367             && $last_line_leading_type ne 'b'
9368             && $last_line_leading_type ne '#'
9369             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9370         {
9371             $nonblank_lines_at_depth[$last_line_leading_level]++;
9372         }
9373         else {
9374             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9375         }
9376
9377         FORMATTER_DEBUG_FLAG_FLUSH && do {
9378             my ( $package, $file, $line ) = caller;
9379             print
9380 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9381         };
9382
9383         # add a couple of extra terminal blank tokens
9384         pad_array_to_go();
9385
9386         # set all forced breakpoints for good list formatting
9387         my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9388
9389         if (
9390             $max_index_to_go > 0
9391             && (
9392                    $is_long_line
9393                 || $old_line_count_in_batch > 1
9394                 || is_unbalanced_batch()
9395                 || (
9396                     $comma_count_in_batch
9397                     && (   $rOpts_maximum_fields_per_table > 0
9398                         || $rOpts_comma_arrow_breakpoints == 0 )
9399                 )
9400             )
9401           )
9402         {
9403             $saw_good_break ||= scan_list();
9404         }
9405
9406         # let $ri_first and $ri_last be references to lists of
9407         # first and last tokens of line fragments to output..
9408         my ( $ri_first, $ri_last );
9409
9410         # write a single line if..
9411         if (
9412
9413             # we aren't allowed to add any newlines
9414             !$rOpts_add_newlines
9415
9416             # or, we don't already have an interior breakpoint
9417             # and we didn't see a good breakpoint
9418             || (
9419                    !$forced_breakpoint_count
9420                 && !$saw_good_break
9421
9422                 # and this line is 'short'
9423                 && !$is_long_line
9424             )
9425           )
9426         {
9427             @$ri_first = ($imin);
9428             @$ri_last  = ($imax);
9429         }
9430
9431         # otherwise use multiple lines
9432         else {
9433
9434             ( $ri_first, $ri_last, my $colon_count ) =
9435               set_continuation_breaks($saw_good_break);
9436
9437             break_all_chain_tokens( $ri_first, $ri_last );
9438
9439             # now we do a correction step to clean this up a bit
9440             # (The only time we would not do this is for debugging)
9441             if ( $rOpts->{'recombine'} ) {
9442                 ( $ri_first, $ri_last ) =
9443                   recombine_breakpoints( $ri_first, $ri_last );
9444             }
9445
9446             insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
9447         }
9448
9449         # do corrector step if -lp option is used
9450         my $do_not_pad = 0;
9451         if ($rOpts_line_up_parentheses) {
9452             $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9453         }
9454         send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9455     }
9456     prepare_for_new_input_lines();
9457
9458     # output any new -cscw block comment
9459     if ($cscw_block_comment) {
9460         flush();
9461         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9462     }
9463 }
9464
9465 sub note_added_semicolon {
9466     $last_added_semicolon_at = $input_line_number;
9467     if ( $added_semicolon_count == 0 ) {
9468         $first_added_semicolon_at = $last_added_semicolon_at;
9469     }
9470     $added_semicolon_count++;
9471     write_logfile_entry("Added ';' here\n");
9472 }
9473
9474 sub note_deleted_semicolon {
9475     $last_deleted_semicolon_at = $input_line_number;
9476     if ( $deleted_semicolon_count == 0 ) {
9477         $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9478     }
9479     $deleted_semicolon_count++;
9480     write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
9481 }
9482
9483 sub note_embedded_tab {
9484     $embedded_tab_count++;
9485     $last_embedded_tab_at = $input_line_number;
9486     if ( !$first_embedded_tab_at ) {
9487         $first_embedded_tab_at = $last_embedded_tab_at;
9488     }
9489
9490     if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9491         write_logfile_entry("Embedded tabs in quote or pattern\n");
9492     }
9493 }
9494
9495 sub starting_one_line_block {
9496
9497     # after seeing an opening curly brace, look for the closing brace
9498     # and see if the entire block will fit on a line.  This routine is
9499     # not always right because it uses the old whitespace, so a check
9500     # is made later (at the closing brace) to make sure we really
9501     # have a one-line block.  We have to do this preliminary check,
9502     # though, because otherwise we would always break at a semicolon
9503     # within a one-line block if the block contains multiple statements.
9504
9505     my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9506         $rblock_type )
9507       = @_;
9508
9509     # kill any current block - we can only go 1 deep
9510     destroy_one_line_block();
9511
9512     # return value:
9513     #  1=distance from start of block to opening brace exceeds line length
9514     #  0=otherwise
9515
9516     my $i_start = 0;
9517
9518     # shouldn't happen: there must have been a prior call to
9519     # store_token_to_go to put the opening brace in the output stream
9520     if ( $max_index_to_go < 0 ) {
9521         warning("program bug: store_token_to_go called incorrectly\n");
9522         report_definite_bug();
9523     }
9524     else {
9525
9526         # cannot use one-line blocks with cuddled else else/elsif lines
9527         if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9528             return 0;
9529         }
9530     }
9531
9532     my $block_type = $$rblock_type[$j];
9533
9534     # find the starting keyword for this block (such as 'if', 'else', ...)
9535
9536     if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9537         $i_start = $max_index_to_go;
9538     }
9539
9540     elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9541
9542         # For something like "if (xxx) {", the keyword "if" will be
9543         # just after the most recent break. This will be 0 unless
9544         # we have just killed a one-line block and are starting another.
9545         # (doif.t)
9546         $i_start = $index_max_forced_break + 1;
9547         if ( $types_to_go[$i_start] eq 'b' ) {
9548             $i_start++;
9549         }
9550
9551         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9552             return 0;
9553         }
9554     }
9555
9556     # the previous nonblank token should start these block types
9557     elsif (
9558         ( $last_last_nonblank_token_to_go eq $block_type )
9559         || (   $block_type =~ /^sub/
9560             && $last_last_nonblank_token_to_go =~ /^sub/ )
9561       )
9562     {
9563         $i_start = $last_last_nonblank_index_to_go;
9564     }
9565
9566     # patch for SWITCH/CASE to retain one-line case/when blocks
9567     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9568         $i_start = $index_max_forced_break + 1;
9569         if ( $types_to_go[$i_start] eq 'b' ) {
9570             $i_start++;
9571         }
9572         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9573             return 0;
9574         }
9575     }
9576
9577     else {
9578         return 1;
9579     }
9580
9581     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9582
9583     my $i;
9584
9585     # see if length is too long to even start
9586     if ( $pos > $rOpts_maximum_line_length ) {
9587         return 1;
9588     }
9589
9590     for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9591
9592         # old whitespace could be arbitrarily large, so don't use it
9593         if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9594         else                              { $pos += length( $$rtokens[$i] ) }
9595
9596         # Return false result if we exceed the maximum line length,
9597         if ( $pos > $rOpts_maximum_line_length ) {
9598             return 0;
9599         }
9600
9601         # or encounter another opening brace before finding the closing brace.
9602         elsif ($$rtokens[$i] eq '{'
9603             && $$rtoken_type[$i] eq '{'
9604             && $$rblock_type[$i] )
9605         {
9606             return 0;
9607         }
9608
9609         # if we find our closing brace..
9610         elsif ($$rtokens[$i] eq '}'
9611             && $$rtoken_type[$i] eq '}'
9612             && $$rblock_type[$i] )
9613         {
9614
9615             # be sure any trailing comment also fits on the line
9616             my $i_nonblank =
9617               ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9618
9619             if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9620                 $pos += length( $$rtokens[$i_nonblank] );
9621
9622                 if ( $i_nonblank > $i + 1 ) {
9623                     $pos += length( $$rtokens[ $i + 1 ] );
9624                 }
9625
9626                 if ( $pos > $rOpts_maximum_line_length ) {
9627                     return 0;
9628                 }
9629             }
9630
9631             # ok, it's a one-line block
9632             create_one_line_block( $i_start, 20 );
9633             return 0;
9634         }
9635
9636         # just keep going for other characters
9637         else {
9638         }
9639     }
9640
9641     # Allow certain types of new one-line blocks to form by joining
9642     # input lines.  These can be safely done, but for other block types,
9643     # we keep old one-line blocks but do not form new ones. It is not
9644     # always a good idea to make as many one-line blocks as possible,
9645     # so other types are not done.  The user can always use -mangle.
9646     if ( $is_sort_map_grep_eval{$block_type} ) {
9647         create_one_line_block( $i_start, 1 );
9648     }
9649
9650     return 0;
9651 }
9652
9653 sub unstore_token_to_go {
9654
9655     # remove most recent token from output stream
9656     if ( $max_index_to_go > 0 ) {
9657         $max_index_to_go--;
9658     }
9659     else {
9660         $max_index_to_go = UNDEFINED_INDEX;
9661     }
9662
9663 }
9664
9665 sub want_blank_line {
9666     flush();
9667     $file_writer_object->want_blank_line();
9668 }
9669
9670 sub write_unindented_line {
9671     flush();
9672     $file_writer_object->write_line( $_[0] );
9673 }
9674
9675 sub undo_lp_ci {
9676
9677     # If there is a single, long parameter within parens, like this:
9678     #
9679     #  $self->command( "/msg "
9680     #        . $infoline->chan
9681     #        . " You said $1, but did you know that it's square was "
9682     #        . $1 * $1 . " ?" );
9683     #
9684     # we can remove the continuation indentation of the 2nd and higher lines
9685     # to achieve this effect, which is more pleasing:
9686     #
9687     #  $self->command("/msg "
9688     #                 . $infoline->chan
9689     #                 . " You said $1, but did you know that it's square was "
9690     #                 . $1 * $1 . " ?");
9691
9692     my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9693     my $max_line = @$ri_first - 1;
9694
9695     # must be multiple lines
9696     return unless $max_line > $line_open;
9697
9698     my $lev_start     = $levels_to_go[$i_start];
9699     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9700
9701     # see if all additional lines in this container have continuation
9702     # indentation
9703     my $n;
9704     my $line_1 = 1 + $line_open;
9705     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
9706         my $ibeg = $$ri_first[$n];
9707         my $iend = $$ri_last[$n];
9708         if ( $ibeg eq $closing_index ) { $n--; last }
9709         return if ( $lev_start != $levels_to_go[$ibeg] );
9710         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
9711         last   if ( $closing_index <= $iend );
9712     }
9713
9714     # we can reduce the indentation of all continuation lines
9715     my $continuation_line_count = $n - $line_open;
9716     @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9717       (0) x ($continuation_line_count);
9718     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9719       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
9720 }
9721
9722 sub set_logical_padding {
9723
9724     # Look at a batch of lines and see if extra padding can improve the
9725     # alignment when there are certain leading operators. Here is an
9726     # example, in which some extra space is introduced before
9727     # '( $year' to make it line up with the subsequent lines:
9728     #
9729     #       if (   ( $Year < 1601 )
9730     #           || ( $Year > 2899 )
9731     #           || ( $EndYear < 1601 )
9732     #           || ( $EndYear > 2899 ) )
9733     #       {
9734     #           &Error_OutOfRange;
9735     #       }
9736     #
9737     my ( $ri_first, $ri_last ) = @_;
9738     my $max_line = @$ri_first - 1;
9739
9740     my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
9741         $tok_next, $has_leading_op_next, $has_leading_op );
9742
9743     # looking at each line of this batch..
9744     foreach $line ( 0 .. $max_line - 1 ) {
9745
9746         # see if the next line begins with a logical operator
9747         $ibeg                = $$ri_first[$line];
9748         $iend                = $$ri_last[$line];
9749         $ibeg_next           = $$ri_first[ $line + 1 ];
9750         $tok_next            = $tokens_to_go[$ibeg_next];
9751         $has_leading_op_next = $is_chain_operator{$tok_next};
9752         next unless ($has_leading_op_next);
9753
9754         # next line must not be at lesser depth
9755         next
9756           if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
9757
9758         # identify the token in this line to be padded on the left
9759         $ipad = undef;
9760
9761         # handle lines at same depth...
9762         if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
9763
9764             # if this is not first line of the batch ...
9765             if ( $line > 0 ) {
9766
9767                 # and we have leading operator
9768                 next if $has_leading_op;
9769
9770                 # and ..
9771                 # 1. the previous line is at lesser depth, or
9772                 # 2. the previous line ends in an assignment
9773                 # 3. the previous line ends in a 'return'
9774                 #
9775                 # Example 1: previous line at lesser depth
9776                 #       if (   ( $Year < 1601 )      # <- we are here but
9777                 #           || ( $Year > 2899 )      #  list has not yet
9778                 #           || ( $EndYear < 1601 )   # collapsed vertically
9779                 #           || ( $EndYear > 2899 ) )
9780                 #       {
9781                 #
9782                 # Example 2: previous line ending in assignment:
9783                 #    $leapyear =
9784                 #        $year % 4   ? 0     # <- We are here
9785                 #      : $year % 100 ? 1
9786                 #      : $year % 400 ? 0
9787                 #      : 1;
9788
9789                 # be sure levels agree (do not indent after an indented 'if')
9790                 next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
9791                 next
9792                   unless (
9793                     $is_assignment{ $types_to_go[$iendm] }
9794                     || ( $nesting_depth_to_go[$ibegm] <
9795                         $nesting_depth_to_go[$ibeg] )
9796                     || (   $types_to_go[$iendm] eq 'k'
9797                         && $tokens_to_go[$iendm] eq 'return' )
9798                   );
9799
9800                 # we will add padding before the first token
9801                 $ipad = $ibeg;
9802             }
9803
9804             # for first line of the batch..
9805             else {
9806
9807                 # WARNING: Never indent if first line is starting in a
9808                 # continued quote, which would change the quote.
9809                 next if $starting_in_quote;
9810
9811                 # if this is text after closing '}'
9812                 # then look for an interior token to pad
9813                 if ( $types_to_go[$ibeg] eq '}' ) {
9814
9815                 }
9816
9817                 # otherwise, we might pad if it looks really good
9818                 else {
9819
9820                     # we might pad token $ibeg, so be sure that it
9821                     # is at the same depth as the next line.
9822                     next
9823                       if ( $nesting_depth_to_go[$ibeg] !=
9824                         $nesting_depth_to_go[$ibeg_next] );
9825
9826                     # We can pad on line 1 of a statement if at least 3
9827                     # lines will be aligned. Otherwise, it
9828                     # can look very confusing.
9829
9830                  # We have to be careful not to pad if there are too few
9831                  # lines.  The current rule is:
9832                  # (1) in general we require at least 3 consecutive lines
9833                  # with the same leading chain operator token,
9834                  # (2) but an exception is that we only require two lines
9835                  # with leading colons if there are no more lines.  For example,
9836                  # the first $i in the following snippet would get padding
9837                  # by the second rule:
9838                  #
9839                  #   $i == 1 ? ( "First", "Color" )
9840                  # : $i == 2 ? ( "Then",  "Rarity" )
9841                  # :           ( "Then",  "Name" );
9842
9843                     if ( $max_line > 1 ) {
9844                         my $leading_token = $tokens_to_go[$ibeg_next];
9845                         my $tokens_differ;
9846
9847                         # never indent line 1 of a '.' series because
9848                         # previous line is most likely at same level.
9849                         # TODO: we should also look at the leasing_spaces
9850                         # of the last output line and skip if it is same
9851                         # as this line.
9852                         next if ( $leading_token eq '.' );
9853
9854                         my $count = 1;
9855                         foreach my $l ( 2 .. 3 ) {
9856                             last if ( $line + $l > $max_line );
9857                             my $ibeg_next_next = $$ri_first[ $line + $l ];
9858                             if ( $tokens_to_go[$ibeg_next_next] ne
9859                                 $leading_token )
9860                             {
9861                                 $tokens_differ = 1;
9862                                 last;
9863                             }
9864                             $count++;
9865                         }
9866                         next if ($tokens_differ);
9867                         next if ( $count < 3 && $leading_token ne ':' );
9868                         $ipad = $ibeg;
9869                     }
9870                     else {
9871                         next;
9872                     }
9873                 }
9874             }
9875         }
9876
9877         # find interior token to pad if necessary
9878         if ( !defined($ipad) ) {
9879
9880             for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
9881
9882                 # find any unclosed container
9883                 next
9884                   unless ( $type_sequence_to_go[$i]
9885                     && $mate_index_to_go[$i] > $iend );
9886
9887                 # find next nonblank token to pad
9888                 $ipad = $i + 1;
9889                 if ( $types_to_go[$ipad] eq 'b' ) {
9890                     $ipad++;
9891                     last if ( $ipad > $iend );
9892                 }
9893             }
9894             last unless $ipad;
9895         }
9896
9897         # next line must not be at greater depth
9898         my $iend_next = $$ri_last[ $line + 1 ];
9899         next
9900           if ( $nesting_depth_to_go[ $iend_next + 1 ] >
9901             $nesting_depth_to_go[$ipad] );
9902
9903         # lines must be somewhat similar to be padded..
9904         my $inext_next = $ibeg_next + 1;
9905         if ( $types_to_go[$inext_next] eq 'b' ) {
9906             $inext_next++;
9907         }
9908         my $type = $types_to_go[$ipad];
9909
9910         # see if there are multiple continuation lines
9911         my $logical_continuation_lines = 1;
9912         if ( $line + 2 <= $max_line ) {
9913             my $leading_token  = $tokens_to_go[$ibeg_next];
9914             my $ibeg_next_next = $$ri_first[ $line + 2 ];
9915             if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
9916                 && $nesting_depth_to_go[$ibeg_next] eq
9917                 $nesting_depth_to_go[$ibeg_next_next] )
9918             {
9919                 $logical_continuation_lines++;
9920             }
9921         }
9922         if (
9923
9924             # either we have multiple continuation lines to follow
9925             # and we are not padding the first token
9926             ( $logical_continuation_lines > 1 && $ipad > 0 )
9927
9928             # or..
9929             || (
9930
9931                 # types must match
9932                 $types_to_go[$inext_next] eq $type
9933
9934                 # and keywords must match if keyword
9935                 && !(
9936                        $type eq 'k'
9937                     && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
9938                 )
9939             )
9940           )
9941         {
9942
9943             #----------------------begin special checks--------------
9944             #
9945             # SPECIAL CHECK 1:
9946             # A check is needed before we can make the pad.
9947             # If we are in a list with some long items, we want each
9948             # item to stand out.  So in the following example, the
9949             # first line begining with '$casefold->' would look good
9950             # padded to align with the next line, but then it
9951             # would be indented more than the last line, so we
9952             # won't do it.
9953             #
9954             #  ok(
9955             #      $casefold->{code}         eq '0041'
9956             #        && $casefold->{status}  eq 'C'
9957             #        && $casefold->{mapping} eq '0061',
9958             #      'casefold 0x41'
9959             #  );
9960             #
9961             # Note:
9962             # It would be faster, and almost as good, to use a comma
9963             # count, and not pad if comma_count > 1 and the previous
9964             # line did not end with a comma.
9965             #
9966             my $ok_to_pad = 1;
9967
9968             my $ibg   = $$ri_first[ $line + 1 ];
9969             my $depth = $nesting_depth_to_go[ $ibg + 1 ];
9970
9971             # just use simplified formula for leading spaces to avoid
9972             # needless sub calls
9973             my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
9974
9975             # look at each line beyond the next ..
9976             my $l = $line + 1;
9977             foreach $l ( $line + 2 .. $max_line ) {
9978                 my $ibg = $$ri_first[$l];
9979
9980                 # quit looking at the end of this container
9981                 last
9982                   if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
9983                   || ( $nesting_depth_to_go[$ibg] < $depth );
9984
9985                 # cannot do the pad if a later line would be
9986                 # outdented more
9987                 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
9988                     $ok_to_pad = 0;
9989                     last;
9990                 }
9991             }
9992
9993             # don't pad if we end in a broken list
9994             if ( $l == $max_line ) {
9995                 my $i2 = $$ri_last[$l];
9996                 if ( $types_to_go[$i2] eq '#' ) {
9997                     my $i1 = $$ri_first[$l];
9998                     next
9999                       if (
10000                         terminal_type( \@types_to_go, \@block_type_to_go, $i1,
10001                             $i2 ) eq ','
10002                       );
10003                 }
10004             }
10005
10006             # SPECIAL CHECK 2:
10007             # a minus may introduce a quoted variable, and we will
10008             # add the pad only if this line begins with a bare word,
10009             # such as for the word 'Button' here:
10010             #    [
10011             #         Button      => "Print letter \"~$_\"",
10012             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10013             #        -accelerator => "Meta+$_"
10014             #    ];
10015             #
10016             #  On the other hand, if 'Button' is quoted, it looks best
10017             #  not to pad:
10018             #    [
10019             #        'Button'     => "Print letter \"~$_\"",
10020             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10021             #        -accelerator => "Meta+$_"
10022             #    ];
10023             if ( $types_to_go[$ibeg_next] eq 'm' ) {
10024                 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
10025             }
10026
10027             next unless $ok_to_pad;
10028
10029             #----------------------end special check---------------
10030
10031             my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
10032             my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
10033             $pad_spaces = $length_2 - $length_1;
10034
10035             # make sure this won't change if -lp is used
10036             my $indentation_1 = $leading_spaces_to_go[$ibeg];
10037             if ( ref($indentation_1) ) {
10038                 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
10039                     my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
10040                     unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
10041                         $pad_spaces = 0;
10042                     }
10043                 }
10044             }
10045
10046             # we might be able to handle a pad of -1 by removing a blank
10047             # token
10048             if ( $pad_spaces < 0 ) {
10049                 if ( $pad_spaces == -1 ) {
10050                     if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
10051                         $tokens_to_go[ $ipad - 1 ] = '';
10052                     }
10053                 }
10054                 $pad_spaces = 0;
10055             }
10056
10057             # now apply any padding for alignment
10058             if ( $ipad >= 0 && $pad_spaces ) {
10059                 my $length_t = total_line_length( $ibeg, $iend );
10060                 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
10061                     $tokens_to_go[$ipad] =
10062                       ' ' x $pad_spaces . $tokens_to_go[$ipad];
10063                 }
10064             }
10065         }
10066     }
10067     continue {
10068         $iendm          = $iend;
10069         $ibegm          = $ibeg;
10070         $has_leading_op = $has_leading_op_next;
10071     }    # end of loop over lines
10072     return;
10073 }
10074
10075 sub correct_lp_indentation {
10076
10077     # When the -lp option is used, we need to make a last pass through
10078     # each line to correct the indentation positions in case they differ
10079     # from the predictions.  This is necessary because perltidy uses a
10080     # predictor/corrector method for aligning with opening parens.  The
10081     # predictor is usually good, but sometimes stumbles.  The corrector
10082     # tries to patch things up once the actual opening paren locations
10083     # are known.
10084     my ( $ri_first, $ri_last ) = @_;
10085     my $do_not_pad = 0;
10086
10087     #  Note on flag '$do_not_pad':
10088     #  We want to avoid a situation like this, where the aligner inserts
10089     #  whitespace before the '=' to align it with a previous '=', because
10090     #  otherwise the parens might become mis-aligned in a situation like
10091     #  this, where the '=' has become aligned with the previous line,
10092     #  pushing the opening '(' forward beyond where we want it.
10093     #
10094     #  $mkFloor::currentRoom = '';
10095     #  $mkFloor::c_entry     = $c->Entry(
10096     #                                 -width        => '10',
10097     #                                 -relief       => 'sunken',
10098     #                                 ...
10099     #                                 );
10100     #
10101     #  We leave it to the aligner to decide how to do this.
10102
10103     # first remove continuation indentation if appropriate
10104     my $max_line = @$ri_first - 1;
10105
10106     # looking at each line of this batch..
10107     my ( $ibeg, $iend );
10108     my $line;
10109     foreach $line ( 0 .. $max_line ) {
10110         $ibeg = $$ri_first[$line];
10111         $iend = $$ri_last[$line];
10112
10113         # looking at each token in this output line..
10114         my $i;
10115         foreach $i ( $ibeg .. $iend ) {
10116
10117             # How many space characters to place before this token
10118             # for special alignment.  Actual padding is done in the
10119             # continue block.
10120
10121             # looking for next unvisited indentation item
10122             my $indentation = $leading_spaces_to_go[$i];
10123             if ( !$indentation->get_MARKED() ) {
10124                 $indentation->set_MARKED(1);
10125
10126                 # looking for indentation item for which we are aligning
10127                 # with parens, braces, and brackets
10128                 next unless ( $indentation->get_ALIGN_PAREN() );
10129
10130                 # skip closed container on this line
10131                 if ( $i > $ibeg ) {
10132                     my $im = $i - 1;
10133                     if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
10134                     if (   $type_sequence_to_go[$im]
10135                         && $mate_index_to_go[$im] <= $iend )
10136                     {
10137                         next;
10138                     }
10139                 }
10140
10141                 if ( $line == 1 && $i == $ibeg ) {
10142                     $do_not_pad = 1;
10143                 }
10144
10145                 # Ok, let's see what the error is and try to fix it
10146                 my $actual_pos;
10147                 my $predicted_pos = $indentation->get_SPACES();
10148                 if ( $i > $ibeg ) {
10149
10150                     # token is mid-line - use length to previous token
10151                     $actual_pos = total_line_length( $ibeg, $i - 1 );
10152
10153                     # for mid-line token, we must check to see if all
10154                     # additional lines have continuation indentation,
10155                     # and remove it if so.  Otherwise, we do not get
10156                     # good alignment.
10157                     my $closing_index = $indentation->get_CLOSED();
10158                     if ( $closing_index > $iend ) {
10159                         my $ibeg_next = $$ri_first[ $line + 1 ];
10160                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
10161                             undo_lp_ci( $line, $i, $closing_index, $ri_first,
10162                                 $ri_last );
10163                         }
10164                     }
10165                 }
10166                 elsif ( $line > 0 ) {
10167
10168                     # handle case where token starts a new line;
10169                     # use length of previous line
10170                     my $ibegm = $$ri_first[ $line - 1 ];
10171                     my $iendm = $$ri_last[ $line - 1 ];
10172                     $actual_pos = total_line_length( $ibegm, $iendm );
10173
10174                     # follow -pt style
10175                     ++$actual_pos
10176                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
10177                 }
10178                 else {
10179
10180                     # token is first character of first line of batch
10181                     $actual_pos = $predicted_pos;
10182                 }
10183
10184                 my $move_right = $actual_pos - $predicted_pos;
10185
10186                 # done if no error to correct (gnu2.t)
10187                 if ( $move_right == 0 ) {
10188                     $indentation->set_RECOVERABLE_SPACES($move_right);
10189                     next;
10190                 }
10191
10192                 # if we have not seen closure for this indentation in
10193                 # this batch, we can only pass on a request to the
10194                 # vertical aligner
10195                 my $closing_index = $indentation->get_CLOSED();
10196
10197                 if ( $closing_index < 0 ) {
10198                     $indentation->set_RECOVERABLE_SPACES($move_right);
10199                     next;
10200                 }
10201
10202                 # If necessary, look ahead to see if there is really any
10203                 # leading whitespace dependent on this whitespace, and
10204                 # also find the longest line using this whitespace.
10205                 # Since it is always safe to move left if there are no
10206                 # dependents, we only need to do this if we may have
10207                 # dependent nodes or need to move right.
10208
10209                 my $right_margin = 0;
10210                 my $have_child   = $indentation->get_HAVE_CHILD();
10211
10212                 my %saw_indentation;
10213                 my $line_count = 1;
10214                 $saw_indentation{$indentation} = $indentation;
10215
10216                 if ( $have_child || $move_right > 0 ) {
10217                     $have_child = 0;
10218                     my $max_length = 0;
10219                     if ( $i == $ibeg ) {
10220                         $max_length = total_line_length( $ibeg, $iend );
10221                     }
10222
10223                     # look ahead at the rest of the lines of this batch..
10224                     my $line_t;
10225                     foreach $line_t ( $line + 1 .. $max_line ) {
10226                         my $ibeg_t = $$ri_first[$line_t];
10227                         my $iend_t = $$ri_last[$line_t];
10228                         last if ( $closing_index <= $ibeg_t );
10229
10230                         # remember all different indentation objects
10231                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
10232                         $saw_indentation{$indentation_t} = $indentation_t;
10233                         $line_count++;
10234
10235                         # remember longest line in the group
10236                         my $length_t = total_line_length( $ibeg_t, $iend_t );
10237                         if ( $length_t > $max_length ) {
10238                             $max_length = $length_t;
10239                         }
10240                     }
10241                     $right_margin = $rOpts_maximum_line_length - $max_length;
10242                     if ( $right_margin < 0 ) { $right_margin = 0 }
10243                 }
10244
10245                 my $first_line_comma_count =
10246                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
10247                 my $comma_count = $indentation->get_COMMA_COUNT();
10248                 my $arrow_count = $indentation->get_ARROW_COUNT();
10249
10250                 # This is a simple approximate test for vertical alignment:
10251                 # if we broke just after an opening paren, brace, bracket,
10252                 # and there are 2 or more commas in the first line,
10253                 # and there are no '=>'s,
10254                 # then we are probably vertically aligned.  We could set
10255                 # an exact flag in sub scan_list, but this is good
10256                 # enough.
10257                 my $indentation_count = keys %saw_indentation;
10258                 my $is_vertically_aligned =
10259                   (      $i == $ibeg
10260                       && $first_line_comma_count > 1
10261                       && $indentation_count == 1
10262                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
10263
10264                 # Make the move if possible ..
10265                 if (
10266
10267                     # we can always move left
10268                     $move_right < 0
10269
10270                     # but we should only move right if we are sure it will
10271                     # not spoil vertical alignment
10272                     || ( $comma_count == 0 )
10273                     || ( $comma_count > 0 && !$is_vertically_aligned )
10274                   )
10275                 {
10276                     my $move =
10277                       ( $move_right <= $right_margin )
10278                       ? $move_right
10279                       : $right_margin;
10280
10281                     foreach ( keys %saw_indentation ) {
10282                         $saw_indentation{$_}
10283                           ->permanently_decrease_AVAILABLE_SPACES( -$move );
10284                     }
10285                 }
10286
10287                 # Otherwise, record what we want and the vertical aligner
10288                 # will try to recover it.
10289                 else {
10290                     $indentation->set_RECOVERABLE_SPACES($move_right);
10291                 }
10292             }
10293         }
10294     }
10295     return $do_not_pad;
10296 }
10297
10298 # flush is called to output any tokens in the pipeline, so that
10299 # an alternate source of lines can be written in the correct order
10300
10301 sub flush {
10302     destroy_one_line_block();
10303     output_line_to_go();
10304     Perl::Tidy::VerticalAligner::flush();
10305 }
10306
10307 sub reset_block_text_accumulator {
10308
10309     # save text after 'if' and 'elsif' to append after 'else'
10310     if ($accumulating_text_for_block) {
10311
10312         if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10313             push @{$rleading_block_if_elsif_text}, $leading_block_text;
10314         }
10315     }
10316     $accumulating_text_for_block        = "";
10317     $leading_block_text                 = "";
10318     $leading_block_text_level           = 0;
10319     $leading_block_text_length_exceeded = 0;
10320     $leading_block_text_line_number     = 0;
10321     $leading_block_text_line_length     = 0;
10322 }
10323
10324 sub set_block_text_accumulator {
10325     my $i = shift;
10326     $accumulating_text_for_block = $tokens_to_go[$i];
10327     if ( $accumulating_text_for_block !~ /^els/ ) {
10328         $rleading_block_if_elsif_text = [];
10329     }
10330     $leading_block_text       = "";
10331     $leading_block_text_level = $levels_to_go[$i];
10332     $leading_block_text_line_number =
10333       $vertical_aligner_object->get_output_line_number();
10334     $leading_block_text_length_exceeded = 0;
10335
10336     # this will contain the column number of the last character
10337     # of the closing side comment
10338     $leading_block_text_line_length =
10339       length($accumulating_text_for_block) +
10340       length( $rOpts->{'closing-side-comment-prefix'} ) +
10341       $leading_block_text_level * $rOpts_indent_columns + 3;
10342 }
10343
10344 sub accumulate_block_text {
10345     my $i = shift;
10346
10347     # accumulate leading text for -csc, ignoring any side comments
10348     if (   $accumulating_text_for_block
10349         && !$leading_block_text_length_exceeded
10350         && $types_to_go[$i] ne '#' )
10351     {
10352
10353         my $added_length = length( $tokens_to_go[$i] );
10354         $added_length += 1 if $i == 0;
10355         my $new_line_length = $leading_block_text_line_length + $added_length;
10356
10357         # we can add this text if we don't exceed some limits..
10358         if (
10359
10360             # we must not have already exceeded the text length limit
10361             length($leading_block_text) <
10362             $rOpts_closing_side_comment_maximum_text
10363
10364             # and either:
10365             # the new total line length must be below the line length limit
10366             # or the new length must be below the text length limit
10367             # (ie, we may allow one token to exceed the text length limit)
10368             && ( $new_line_length < $rOpts_maximum_line_length
10369                 || length($leading_block_text) + $added_length <
10370                 $rOpts_closing_side_comment_maximum_text )
10371
10372             # UNLESS: we are adding a closing paren before the brace we seek.
10373             # This is an attempt to avoid situations where the ... to be
10374             # added are longer than the omitted right paren, as in:
10375
10376             #   foreach my $item (@a_rather_long_variable_name_here) {
10377             #      &whatever;
10378             #   } ## end foreach my $item (@a_rather_long_variable_name_here...
10379
10380             || (
10381                 $tokens_to_go[$i] eq ')'
10382                 && (
10383                     (
10384                            $i + 1 <= $max_index_to_go
10385                         && $block_type_to_go[ $i + 1 ] eq
10386                         $accumulating_text_for_block
10387                     )
10388                     || (   $i + 2 <= $max_index_to_go
10389                         && $block_type_to_go[ $i + 2 ] eq
10390                         $accumulating_text_for_block )
10391                 )
10392             )
10393           )
10394         {
10395
10396             # add an extra space at each newline
10397             if ( $i == 0 ) { $leading_block_text .= ' ' }
10398
10399             # add the token text
10400             $leading_block_text .= $tokens_to_go[$i];
10401             $leading_block_text_line_length = $new_line_length;
10402         }
10403
10404         # show that text was truncated if necessary
10405         elsif ( $types_to_go[$i] ne 'b' ) {
10406             $leading_block_text_length_exceeded = 1;
10407             $leading_block_text .= '...';
10408         }
10409     }
10410 }
10411
10412 {
10413     my %is_if_elsif_else_unless_while_until_for_foreach;
10414
10415     BEGIN {
10416
10417         # These block types may have text between the keyword and opening
10418         # curly.  Note: 'else' does not, but must be included to allow trailing
10419         # if/elsif text to be appended.
10420         # patch for SWITCH/CASE: added 'case' and 'when'
10421         @_ = qw(if elsif else unless while until for foreach case when);
10422         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10423     }
10424
10425     sub accumulate_csc_text {
10426
10427         # called once per output buffer when -csc is used. Accumulates
10428         # the text placed after certain closing block braces.
10429         # Defines and returns the following for this buffer:
10430
10431         my $block_leading_text = "";    # the leading text of the last '}'
10432         my $rblock_leading_if_elsif_text;
10433         my $i_block_leading_text =
10434           -1;    # index of token owning block_leading_text
10435         my $block_line_count    = 100;    # how many lines the block spans
10436         my $terminal_type       = 'b';    # type of last nonblank token
10437         my $i_terminal          = 0;      # index of last nonblank token
10438         my $terminal_block_type = "";
10439
10440         for my $i ( 0 .. $max_index_to_go ) {
10441             my $type       = $types_to_go[$i];
10442             my $block_type = $block_type_to_go[$i];
10443             my $token      = $tokens_to_go[$i];
10444
10445             # remember last nonblank token type
10446             if ( $type ne '#' && $type ne 'b' ) {
10447                 $terminal_type       = $type;
10448                 $terminal_block_type = $block_type;
10449                 $i_terminal          = $i;
10450             }
10451
10452             my $type_sequence = $type_sequence_to_go[$i];
10453             if ( $block_type && $type_sequence ) {
10454
10455                 if ( $token eq '}' ) {
10456
10457                     # restore any leading text saved when we entered this block
10458                     if ( defined( $block_leading_text{$type_sequence} ) ) {
10459                         ( $block_leading_text, $rblock_leading_if_elsif_text ) =
10460                           @{ $block_leading_text{$type_sequence} };
10461                         $i_block_leading_text = $i;
10462                         delete $block_leading_text{$type_sequence};
10463                         $rleading_block_if_elsif_text =
10464                           $rblock_leading_if_elsif_text;
10465                     }
10466
10467                     # if we run into a '}' then we probably started accumulating
10468                     # at something like a trailing 'if' clause..no harm done.
10469                     if (   $accumulating_text_for_block
10470                         && $levels_to_go[$i] <= $leading_block_text_level )
10471                     {
10472                         my $lev = $levels_to_go[$i];
10473                         reset_block_text_accumulator();
10474                     }
10475
10476                     if ( defined( $block_opening_line_number{$type_sequence} ) )
10477                     {
10478                         my $output_line_number =
10479                           $vertical_aligner_object->get_output_line_number();
10480                         $block_line_count =
10481                           $output_line_number -
10482                           $block_opening_line_number{$type_sequence} + 1;
10483                         delete $block_opening_line_number{$type_sequence};
10484                     }
10485                     else {
10486
10487                         # Error: block opening line undefined for this line..
10488                         # This shouldn't be possible, but it is not a
10489                         # significant problem.
10490                     }
10491                 }
10492
10493                 elsif ( $token eq '{' ) {
10494
10495                     my $line_number =
10496                       $vertical_aligner_object->get_output_line_number();
10497                     $block_opening_line_number{$type_sequence} = $line_number;
10498
10499                     if (   $accumulating_text_for_block
10500                         && $levels_to_go[$i] == $leading_block_text_level )
10501                     {
10502
10503                         if ( $accumulating_text_for_block eq $block_type ) {
10504
10505                             # save any leading text before we enter this block
10506                             $block_leading_text{$type_sequence} = [
10507                                 $leading_block_text,
10508                                 $rleading_block_if_elsif_text
10509                             ];
10510                             $block_opening_line_number{$type_sequence} =
10511                               $leading_block_text_line_number;
10512                             reset_block_text_accumulator();
10513                         }
10514                         else {
10515
10516                             # shouldn't happen, but not a serious error.
10517                             # We were accumulating -csc text for block type
10518                             # $accumulating_text_for_block and unexpectedly
10519                             # encountered a '{' for block type $block_type.
10520                         }
10521                     }
10522                 }
10523             }
10524
10525             if (   $type eq 'k'
10526                 && $csc_new_statement_ok
10527                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
10528                 && $token =~ /$closing_side_comment_list_pattern/o )
10529             {
10530                 set_block_text_accumulator($i);
10531             }
10532             else {
10533
10534                 # note: ignoring type 'q' because of tricks being played
10535                 # with 'q' for hanging side comments
10536                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10537                     $csc_new_statement_ok =
10538                       ( $block_type || $type eq 'J' || $type eq ';' );
10539                 }
10540                 if (   $type eq ';'
10541                     && $accumulating_text_for_block
10542                     && $levels_to_go[$i] == $leading_block_text_level )
10543                 {
10544                     reset_block_text_accumulator();
10545                 }
10546                 else {
10547                     accumulate_block_text($i);
10548                 }
10549             }
10550         }
10551
10552         # Treat an 'else' block specially by adding preceding 'if' and
10553         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
10554         # especially for cuddled-else formatting.
10555         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10556             $block_leading_text =
10557               make_else_csc_text( $i_terminal, $terminal_block_type,
10558                 $block_leading_text, $rblock_leading_if_elsif_text );
10559         }
10560
10561         return ( $terminal_type, $i_terminal, $i_block_leading_text,
10562             $block_leading_text, $block_line_count );
10563     }
10564 }
10565
10566 sub make_else_csc_text {
10567
10568     # create additional -csc text for an 'else' and optionally 'elsif',
10569     # depending on the value of switch
10570     # $rOpts_closing_side_comment_else_flag:
10571     #
10572     #  = 0 add 'if' text to trailing else
10573     #  = 1 same as 0 plus:
10574     #      add 'if' to 'elsif's if can fit in line length
10575     #      add last 'elsif' to trailing else if can fit in one line
10576     #  = 2 same as 1 but do not check if exceed line length
10577     #
10578     # $rif_elsif_text = a reference to a list of all previous closing
10579     # side comments created for this if block
10580     #
10581     my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10582     my $csc_text = $block_leading_text;
10583
10584     if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10585     {
10586         return $csc_text;
10587     }
10588
10589     my $count = @{$rif_elsif_text};
10590     return $csc_text unless ($count);
10591
10592     my $if_text = '[ if' . $rif_elsif_text->[0];
10593
10594     # always show the leading 'if' text on 'else'
10595     if ( $block_type eq 'else' ) {
10596         $csc_text .= $if_text;
10597     }
10598
10599     # see if that's all
10600     if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10601         return $csc_text;
10602     }
10603
10604     my $last_elsif_text = "";
10605     if ( $count > 1 ) {
10606         $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10607         if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10608     }
10609
10610     # tentatively append one more item
10611     my $saved_text = $csc_text;
10612     if ( $block_type eq 'else' ) {
10613         $csc_text .= $last_elsif_text;
10614     }
10615     else {
10616         $csc_text .= ' ' . $if_text;
10617     }
10618
10619     # all done if no length checks requested
10620     if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10621         return $csc_text;
10622     }
10623
10624     # undo it if line length exceeded
10625     my $length =
10626       length($csc_text) +
10627       length($block_type) +
10628       length( $rOpts->{'closing-side-comment-prefix'} ) +
10629       $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10630     if ( $length > $rOpts_maximum_line_length ) {
10631         $csc_text = $saved_text;
10632     }
10633     return $csc_text;
10634 }
10635
10636 sub add_closing_side_comment {
10637
10638     # add closing side comments after closing block braces if -csc used
10639     my $cscw_block_comment;
10640
10641     #---------------------------------------------------------------
10642     # Step 1: loop through all tokens of this line to accumulate
10643     # the text needed to create the closing side comments. Also see
10644     # how the line ends.
10645     #---------------------------------------------------------------
10646
10647     my ( $terminal_type, $i_terminal, $i_block_leading_text,
10648         $block_leading_text, $block_line_count )
10649       = accumulate_csc_text();
10650
10651     #---------------------------------------------------------------
10652     # Step 2: make the closing side comment if this ends a block
10653     #---------------------------------------------------------------
10654     my $have_side_comment = $i_terminal != $max_index_to_go;
10655
10656     # if this line might end in a block closure..
10657     if (
10658         $terminal_type eq '}'
10659
10660         # ..and either
10661         && (
10662
10663             # the block is long enough
10664             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
10665
10666             # or there is an existing comment to check
10667             || (   $have_side_comment
10668                 && $rOpts->{'closing-side-comment-warnings'} )
10669         )
10670
10671         # .. and if this is one of the types of interest
10672         && $block_type_to_go[$i_terminal] =~
10673         /$closing_side_comment_list_pattern/o
10674
10675         # .. but not an anonymous sub
10676         # These are not normally of interest, and their closing braces are
10677         # often followed by commas or semicolons anyway.  This also avoids
10678         # possible erratic output due to line numbering inconsistencies
10679         # in the cases where their closing braces terminate a line.
10680         && $block_type_to_go[$i_terminal] ne 'sub'
10681
10682         # ..and the corresponding opening brace must is not in this batch
10683         # (because we do not need to tag one-line blocks, although this
10684         # should also be caught with a positive -csci value)
10685         && $mate_index_to_go[$i_terminal] < 0
10686
10687         # ..and either
10688         && (
10689
10690             # this is the last token (line doesnt have a side comment)
10691             !$have_side_comment
10692
10693             # or the old side comment is a closing side comment
10694             || $tokens_to_go[$max_index_to_go] =~
10695             /$closing_side_comment_prefix_pattern/o
10696         )
10697       )
10698     {
10699
10700         # then make the closing side comment text
10701         my $token =
10702 "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
10703
10704         # append any extra descriptive text collected above
10705         if ( $i_block_leading_text == $i_terminal ) {
10706             $token .= $block_leading_text;
10707         }
10708         $token =~ s/\s*$//;    # trim any trailing whitespace
10709
10710         # handle case of existing closing side comment
10711         if ($have_side_comment) {
10712
10713             # warn if requested and tokens differ significantly
10714             if ( $rOpts->{'closing-side-comment-warnings'} ) {
10715                 my $old_csc = $tokens_to_go[$max_index_to_go];
10716                 my $new_csc = $token;
10717                 $new_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
10718                 my $new_trailing_dots = $1;
10719                 $old_csc =~ s/\.\.\.\s*$//;
10720                 $new_csc =~ s/\s+//g;            # trim all whitespace
10721                 $old_csc =~ s/\s+//g;
10722
10723                 # Patch to handle multiple closing side comments at
10724                 # else and elsif's.  These have become too complicated
10725                 # to check, so if we see an indication of
10726                 # '[ if' or '[ # elsif', then assume they were made
10727                 # by perltidy.
10728                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
10729                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
10730                 }
10731                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
10732                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
10733                 }
10734
10735                 # if old comment is contained in new comment,
10736                 # only compare the common part.
10737                 if ( length($new_csc) > length($old_csc) ) {
10738                     $new_csc = substr( $new_csc, 0, length($old_csc) );
10739                 }
10740
10741                 # if the new comment is shorter and has been limited,
10742                 # only compare the common part.
10743                 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
10744                 {
10745                     $old_csc = substr( $old_csc, 0, length($new_csc) );
10746                 }
10747
10748                 # any remaining difference?
10749                 if ( $new_csc ne $old_csc ) {
10750
10751                     # just leave the old comment if we are below the threshold
10752                     # for creating side comments
10753                     if ( $block_line_count <
10754                         $rOpts->{'closing-side-comment-interval'} )
10755                     {
10756                         $token = undef;
10757                     }
10758
10759                     # otherwise we'll make a note of it
10760                     else {
10761
10762                         warning(
10763 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
10764                         );
10765
10766                      # save the old side comment in a new trailing block comment
10767                         my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
10768                         $year  += 1900;
10769                         $month += 1;
10770                         $cscw_block_comment =
10771 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
10772                     }
10773                 }
10774                 else {
10775
10776                     # No differences.. we can safely delete old comment if we
10777                     # are below the threshold
10778                     if ( $block_line_count <
10779                         $rOpts->{'closing-side-comment-interval'} )
10780                     {
10781                         $token = undef;
10782                         unstore_token_to_go()
10783                           if ( $types_to_go[$max_index_to_go] eq '#' );
10784                         unstore_token_to_go()
10785                           if ( $types_to_go[$max_index_to_go] eq 'b' );
10786                     }
10787                 }
10788             }
10789
10790             # switch to the new csc (unless we deleted it!)
10791             $tokens_to_go[$max_index_to_go] = $token if $token;
10792         }
10793
10794         # handle case of NO existing closing side comment
10795         else {
10796
10797             # insert the new side comment into the output token stream
10798             my $type          = '#';
10799             my $block_type    = '';
10800             my $type_sequence = '';
10801             my $container_environment =
10802               $container_environment_to_go[$max_index_to_go];
10803             my $level                = $levels_to_go[$max_index_to_go];
10804             my $slevel               = $nesting_depth_to_go[$max_index_to_go];
10805             my $no_internal_newlines = 0;
10806
10807             my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
10808             my $ci_level           = $ci_levels_to_go[$max_index_to_go];
10809             my $in_continued_quote = 0;
10810
10811             # first insert a blank token
10812             insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
10813
10814             # then the side comment
10815             insert_new_token_to_go( $token, $type, $slevel,
10816                 $no_internal_newlines );
10817         }
10818     }
10819     return $cscw_block_comment;
10820 }
10821
10822 sub previous_nonblank_token {
10823     my ($i) = @_;
10824     if ( $i <= 0 ) {
10825         return "";
10826     }
10827     elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
10828         return $tokens_to_go[ $i - 1 ];
10829     }
10830     elsif ( $i > 1 ) {
10831         return $tokens_to_go[ $i - 2 ];
10832     }
10833     else {
10834         return "";
10835     }
10836 }
10837
10838 sub send_lines_to_vertical_aligner {
10839
10840     my ( $ri_first, $ri_last, $do_not_pad ) = @_;
10841
10842     my $rindentation_list = [0];    # ref to indentations for each line
10843
10844     # define the array @matching_token_to_go for the output tokens
10845     # which will be non-blank for each special token (such as =>)
10846     # for which alignment is required.
10847     set_vertical_alignment_markers( $ri_first, $ri_last );
10848
10849     # flush if necessary to avoid unwanted alignment
10850     my $must_flush = 0;
10851     if ( @$ri_first > 1 ) {
10852
10853         # flush before a long if statement
10854         if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
10855             $must_flush = 1;
10856         }
10857     }
10858     if ($must_flush) {
10859         Perl::Tidy::VerticalAligner::flush();
10860     }
10861
10862     set_logical_padding( $ri_first, $ri_last );
10863
10864     # loop to prepare each line for shipment
10865     my $n_last_line = @$ri_first - 1;
10866     my $in_comma_list;
10867     for my $n ( 0 .. $n_last_line ) {
10868         my $ibeg = $$ri_first[$n];
10869         my $iend = $$ri_last[$n];
10870
10871         my @patterns = ();
10872         my @tokens   = ();
10873         my @fields   = ();
10874         my $i_start  = $ibeg;
10875         my $i;
10876
10877         my $depth                 = 0;
10878         my @container_name        = ("");
10879         my @multiple_comma_arrows = (undef);
10880
10881         my $j = 0;    # field index
10882
10883         $patterns[0] = "";
10884         for $i ( $ibeg .. $iend ) {
10885
10886             # Keep track of containers balanced on this line only.
10887             # These are used below to prevent unwanted cross-line alignments.
10888             # Unbalanced containers already avoid aligning across
10889             # container boundaries.
10890             if ( $tokens_to_go[$i] eq '(' ) {
10891                 my $i_mate = $mate_index_to_go[$i];
10892                 if ( $i_mate > $i && $i_mate <= $iend ) {
10893                     $depth++;
10894                     my $seqno = $type_sequence_to_go[$i];
10895                     my $count = comma_arrow_count($seqno);
10896                     $multiple_comma_arrows[$depth] = $count && $count > 1;
10897                     my $name = previous_nonblank_token($i);
10898                     $name =~ s/^->//;
10899                     $container_name[$depth] = "+" . $name;
10900                 }
10901             }
10902             elsif ( $tokens_to_go[$i] eq ')' ) {
10903                 $depth-- if $depth > 0;
10904             }
10905
10906             # if we find a new synchronization token, we are done with
10907             # a field
10908             if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
10909
10910                 my $tok = my $raw_tok = $matching_token_to_go[$i];
10911
10912                 # make separators in different nesting depths unique
10913                 # by appending the nesting depth digit.
10914                 if ( $raw_tok ne '#' ) {
10915                     $tok .= "$nesting_depth_to_go[$i]";
10916                 }
10917
10918                 # do any special decorations for commas to avoid unwanted
10919                 # cross-line alignments.
10920                 if ( $raw_tok eq ',' ) {
10921                     if ( $container_name[$depth] ) {
10922                         $tok .= $container_name[$depth];
10923                     }
10924                 }
10925
10926                 # decorate '=>' with:
10927                 # - Nothing if this container is unbalanced on this line.
10928                 # - The previous token if it is balanced and multiple '=>'s
10929                 # - The container name if it is bananced and no other '=>'s
10930                 elsif ( $raw_tok eq '=>' ) {
10931                     if ( $container_name[$depth] ) {
10932                         if ( $multiple_comma_arrows[$depth] ) {
10933                             $tok .= "+" . previous_nonblank_token($i);
10934                         }
10935                         else {
10936                             $tok .= $container_name[$depth];
10937                         }
10938                     }
10939                 }
10940
10941                 # concatenate the text of the consecutive tokens to form
10942                 # the field
10943                 push( @fields,
10944                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
10945
10946                 # store the alignment token for this field
10947                 push( @tokens, $tok );
10948
10949                 # get ready for the next batch
10950                 $i_start = $i;
10951                 $j++;
10952                 $patterns[$j] = "";
10953             }
10954
10955             # continue accumulating tokens
10956             # handle non-keywords..
10957             if ( $types_to_go[$i] ne 'k' ) {
10958                 my $type = $types_to_go[$i];
10959
10960                 # Mark most things before arrows as a quote to
10961                 # get them to line up. Testfile: mixed.pl.
10962                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
10963                     my $next_type = $types_to_go[ $i + 1 ];
10964                     my $i_next_nonblank =
10965                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
10966
10967                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
10968                         $type = 'Q';
10969                     }
10970                 }
10971
10972                 # minor patch to make numbers and quotes align
10973                 if ( $type eq 'n' ) { $type = 'Q' }
10974
10975                 $patterns[$j] .= $type;
10976             }
10977
10978             # for keywords we have to use the actual text
10979             else {
10980
10981                 # map certain keywords to the same 'if' class to align
10982                 # long if/elsif sequences. my testfile: elsif.pl
10983                 my $tok = $tokens_to_go[$i];
10984                 if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
10985                     $tok = 'if';
10986                 }
10987                 $patterns[$j] .= $tok;
10988             }
10989         }
10990
10991         # done with this line .. join text of tokens to make the last field
10992         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
10993
10994         my ( $indentation, $lev, $level_end, $terminal_type,
10995             $is_semicolon_terminated, $is_outdented_line )
10996           = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
10997             $ri_first, $ri_last, $rindentation_list );
10998
10999         # we will allow outdenting of long lines..
11000         my $outdent_long_lines = (
11001
11002             # which are long quotes, if allowed
11003             ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
11004
11005             # which are long block comments, if allowed
11006               || (
11007                    $types_to_go[$ibeg] eq '#'
11008                 && $rOpts->{'outdent-long-comments'}
11009
11010                 # but not if this is a static block comment
11011                 && !$is_static_block_comment
11012               )
11013         );
11014
11015         my $level_jump =
11016           $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
11017
11018         my $rvertical_tightness_flags =
11019           set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
11020             $ri_first, $ri_last );
11021
11022         # flush an outdented line to avoid any unwanted vertical alignment
11023         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11024
11025         my $is_terminal_ternary = 0;
11026         if (   $tokens_to_go[$ibeg] eq ':'
11027             || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
11028         {
11029             if (   ( $terminal_type eq ';' && $level_end <= $lev )
11030                 || ( $level_end < $lev ) )
11031             {
11032                 $is_terminal_ternary = 1;
11033             }
11034         }
11035
11036         # send this new line down the pipe
11037         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
11038         Perl::Tidy::VerticalAligner::append_line(
11039             $lev,
11040             $level_end,
11041             $indentation,
11042             \@fields,
11043             \@tokens,
11044             \@patterns,
11045             $forced_breakpoint_to_go[$iend] || $in_comma_list,
11046             $outdent_long_lines,
11047             $is_terminal_ternary,
11048             $is_semicolon_terminated,
11049             $do_not_pad,
11050             $rvertical_tightness_flags,
11051             $level_jump,
11052         );
11053         $in_comma_list =
11054           $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
11055
11056         # flush an outdented line to avoid any unwanted vertical alignment
11057         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11058
11059         $do_not_pad = 0;
11060
11061     }    # end of loop to output each line
11062
11063     # remember indentation of lines containing opening containers for
11064     # later use by sub set_adjusted_indentation
11065     save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
11066 }
11067
11068 {        # begin unmatched_indexes
11069
11070     # closure to keep track of unbalanced containers.
11071     # arrays shared by the routines in this block:
11072     my @unmatched_opening_indexes_in_this_batch;
11073     my @unmatched_closing_indexes_in_this_batch;
11074     my %comma_arrow_count;
11075
11076     sub is_unbalanced_batch {
11077         @unmatched_opening_indexes_in_this_batch +
11078           @unmatched_closing_indexes_in_this_batch;
11079     }
11080
11081     sub comma_arrow_count {
11082         my $seqno = $_[0];
11083         return $comma_arrow_count{$seqno};
11084     }
11085
11086     sub match_opening_and_closing_tokens {
11087
11088         # Match up indexes of opening and closing braces, etc, in this batch.
11089         # This has to be done after all tokens are stored because unstoring
11090         # of tokens would otherwise cause trouble.
11091
11092         @unmatched_opening_indexes_in_this_batch = ();
11093         @unmatched_closing_indexes_in_this_batch = ();
11094         %comma_arrow_count                       = ();
11095
11096         my ( $i, $i_mate, $token );
11097         foreach $i ( 0 .. $max_index_to_go ) {
11098             if ( $type_sequence_to_go[$i] ) {
11099                 $token = $tokens_to_go[$i];
11100                 if ( $token =~ /^[\(\[\{\?]$/ ) {
11101                     push @unmatched_opening_indexes_in_this_batch, $i;
11102                 }
11103                 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
11104
11105                     $i_mate = pop @unmatched_opening_indexes_in_this_batch;
11106                     if ( defined($i_mate) && $i_mate >= 0 ) {
11107                         if ( $type_sequence_to_go[$i_mate] ==
11108                             $type_sequence_to_go[$i] )
11109                         {
11110                             $mate_index_to_go[$i]      = $i_mate;
11111                             $mate_index_to_go[$i_mate] = $i;
11112                         }
11113                         else {
11114                             push @unmatched_opening_indexes_in_this_batch,
11115                               $i_mate;
11116                             push @unmatched_closing_indexes_in_this_batch, $i;
11117                         }
11118                     }
11119                     else {
11120                         push @unmatched_closing_indexes_in_this_batch, $i;
11121                     }
11122                 }
11123             }
11124             elsif ( $tokens_to_go[$i] eq '=>' ) {
11125                 if (@unmatched_opening_indexes_in_this_batch) {
11126                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
11127                     my $seqno = $type_sequence_to_go[$j];
11128                     $comma_arrow_count{$seqno}++;
11129                 }
11130             }
11131         }
11132     }
11133
11134     sub save_opening_indentation {
11135
11136         # This should be called after each batch of tokens is output. It
11137         # saves indentations of lines of all unmatched opening tokens.
11138         # These will be used by sub get_opening_indentation.
11139
11140         my ( $ri_first, $ri_last, $rindentation_list ) = @_;
11141
11142         # we no longer need indentations of any saved indentations which
11143         # are unmatched closing tokens in this batch, because we will
11144         # never encounter them again.  So we can delete them to keep
11145         # the hash size down.
11146         foreach (@unmatched_closing_indexes_in_this_batch) {
11147             my $seqno = $type_sequence_to_go[$_];
11148             delete $saved_opening_indentation{$seqno};
11149         }
11150
11151         # we need to save indentations of any unmatched opening tokens
11152         # in this batch because we may need them in a subsequent batch.
11153         foreach (@unmatched_opening_indexes_in_this_batch) {
11154             my $seqno = $type_sequence_to_go[$_];
11155             $saved_opening_indentation{$seqno} = [
11156                 lookup_opening_indentation(
11157                     $_, $ri_first, $ri_last, $rindentation_list
11158                 )
11159             ];
11160         }
11161     }
11162 }    # end unmatched_indexes
11163
11164 sub get_opening_indentation {
11165
11166     # get the indentation of the line which output the opening token
11167     # corresponding to a given closing token in the current output batch.
11168     #
11169     # given:
11170     # $i_closing - index in this line of a closing token ')' '}' or ']'
11171     #
11172     # $ri_first - reference to list of the first index $i for each output
11173     #               line in this batch
11174     # $ri_last - reference to list of the last index $i for each output line
11175     #              in this batch
11176     # $rindentation_list - reference to a list containing the indentation
11177     #            used for each line.
11178     #
11179     # return:
11180     #   -the indentation of the line which contained the opening token
11181     #    which matches the token at index $i_opening
11182     #   -and its offset (number of columns) from the start of the line
11183     #
11184     my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
11185
11186     # first, see if the opening token is in the current batch
11187     my $i_opening = $mate_index_to_go[$i_closing];
11188     my ( $indent, $offset, $is_leading, $exists );
11189     $exists = 1;
11190     if ( $i_opening >= 0 ) {
11191
11192         # it is..look up the indentation
11193         ( $indent, $offset, $is_leading ) =
11194           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
11195             $rindentation_list );
11196     }
11197
11198     # if not, it should have been stored in the hash by a previous batch
11199     else {
11200         my $seqno = $type_sequence_to_go[$i_closing];
11201         if ($seqno) {
11202             if ( $saved_opening_indentation{$seqno} ) {
11203                 ( $indent, $offset, $is_leading ) =
11204                   @{ $saved_opening_indentation{$seqno} };
11205             }
11206
11207             # some kind of serious error
11208             # (example is badfile.t)
11209             else {
11210                 $indent     = 0;
11211                 $offset     = 0;
11212                 $is_leading = 0;
11213                 $exists     = 0;
11214             }
11215         }
11216
11217         # if no sequence number it must be an unbalanced container
11218         else {
11219             $indent     = 0;
11220             $offset     = 0;
11221             $is_leading = 0;
11222             $exists     = 0;
11223         }
11224     }
11225     return ( $indent, $offset, $is_leading, $exists );
11226 }
11227
11228 sub lookup_opening_indentation {
11229
11230     # get the indentation of the line in the current output batch
11231     # which output a selected opening token
11232     #
11233     # given:
11234     #   $i_opening - index of an opening token in the current output batch
11235     #                whose line indentation we need
11236     #   $ri_first - reference to list of the first index $i for each output
11237     #               line in this batch
11238     #   $ri_last - reference to list of the last index $i for each output line
11239     #              in this batch
11240     #   $rindentation_list - reference to a list containing the indentation
11241     #            used for each line.  (NOTE: the first slot in
11242     #            this list is the last returned line number, and this is
11243     #            followed by the list of indentations).
11244     #
11245     # return
11246     #   -the indentation of the line which contained token $i_opening
11247     #   -and its offset (number of columns) from the start of the line
11248
11249     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11250
11251     my $nline = $rindentation_list->[0];    # line number of previous lookup
11252
11253     # reset line location if necessary
11254     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
11255
11256     # find the correct line
11257     unless ( $i_opening > $ri_last->[-1] ) {
11258         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11259     }
11260
11261     # error - token index is out of bounds - shouldn't happen
11262     else {
11263         warning(
11264 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
11265         );
11266         report_definite_bug();
11267         $nline = $#{$ri_last};
11268     }
11269
11270     $rindentation_list->[0] =
11271       $nline;    # save line number to start looking next call
11272     my $ibeg       = $ri_start->[$nline];
11273     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
11274     my $is_leading = ( $ibeg == $i_opening );
11275     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
11276 }
11277
11278 {
11279     my %is_if_elsif_else_unless_while_until_for_foreach;
11280
11281     BEGIN {
11282
11283         # These block types may have text between the keyword and opening
11284         # curly.  Note: 'else' does not, but must be included to allow trailing
11285         # if/elsif text to be appended.
11286         # patch for SWITCH/CASE: added 'case' and 'when'
11287         @_ = qw(if elsif else unless while until for foreach case when);
11288         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11289     }
11290
11291     sub set_adjusted_indentation {
11292
11293         # This routine has the final say regarding the actual indentation of
11294         # a line.  It starts with the basic indentation which has been
11295         # defined for the leading token, and then takes into account any
11296         # options that the user has set regarding special indenting and
11297         # outdenting.
11298
11299         my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11300             $rindentation_list )
11301           = @_;
11302
11303         # we need to know the last token of this line
11304         my ( $terminal_type, $i_terminal ) =
11305           terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11306
11307         my $is_outdented_line = 0;
11308
11309         my $is_semicolon_terminated = $terminal_type eq ';'
11310           && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11311
11312         ##########################################################
11313         # Section 1: set a flag and a default indentation
11314         #
11315         # Most lines are indented according to the initial token.
11316         # But it is common to outdent to the level just after the
11317         # terminal token in certain cases...
11318         # adjust_indentation flag:
11319         #       0 - do not adjust
11320         #       1 - outdent
11321         #       2 - vertically align with opening token
11322         #       3 - indent
11323         ##########################################################
11324         my $adjust_indentation         = 0;
11325         my $default_adjust_indentation = $adjust_indentation;
11326
11327         my (
11328             $opening_indentation, $opening_offset,
11329             $is_leading,          $opening_exists
11330         );
11331
11332         # if we are at a closing token of some type..
11333         if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11334
11335             # get the indentation of the line containing the corresponding
11336             # opening token
11337             (
11338                 $opening_indentation, $opening_offset,
11339                 $is_leading,          $opening_exists
11340               )
11341               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11342                 $rindentation_list );
11343
11344             # First set the default behavior:
11345             # default behavior is to outdent closing lines
11346             # of the form:   ");  };  ];  )->xxx;"
11347             if (
11348                 $is_semicolon_terminated
11349
11350                 # and 'cuddled parens' of the form:   ")->pack("
11351                 || (
11352                        $terminal_type      eq '('
11353                     && $types_to_go[$ibeg] eq ')'
11354                     && ( $nesting_depth_to_go[$iend] + 1 ==
11355                         $nesting_depth_to_go[$ibeg] )
11356                 )
11357               )
11358             {
11359                 $adjust_indentation = 1;
11360             }
11361
11362             # TESTING: outdent something like '),'
11363             if (
11364                 $terminal_type eq ','
11365
11366                 # allow just one character before the comma
11367                 && $i_terminal == $ibeg + 1
11368
11369                 # requre LIST environment; otherwise, we may outdent too much --
11370                 # this can happen in calls without parentheses (overload.t);
11371                 && $container_environment_to_go[$i_terminal] eq 'LIST'
11372               )
11373             {
11374                 $adjust_indentation = 1;
11375             }
11376
11377             # undo continuation indentation of a terminal closing token if
11378             # it is the last token before a level decrease.  This will allow
11379             # a closing token to line up with its opening counterpart, and
11380             # avoids a indentation jump larger than 1 level.
11381             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11382                 && $i_terminal == $ibeg )
11383             {
11384                 my $ci        = $ci_levels_to_go[$ibeg];
11385                 my $lev       = $levels_to_go[$ibeg];
11386                 my $next_type = $types_to_go[ $ibeg + 1 ];
11387                 my $i_next_nonblank =
11388                   ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11389                 if (   $i_next_nonblank <= $max_index_to_go
11390                     && $levels_to_go[$i_next_nonblank] < $lev )
11391                 {
11392                     $adjust_indentation = 1;
11393                 }
11394             }
11395
11396             $default_adjust_indentation = $adjust_indentation;
11397
11398             # Now modify default behavior according to user request:
11399             # handle option to indent non-blocks of the form );  };  ];
11400             # But don't do special indentation to something like ')->pack('
11401             if ( !$block_type_to_go[$ibeg] ) {
11402                 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11403                 if ( $cti == 1 ) {
11404                     if (   $i_terminal <= $ibeg + 1
11405                         || $is_semicolon_terminated )
11406                     {
11407                         $adjust_indentation = 2;
11408                     }
11409                     else {
11410                         $adjust_indentation = 0;
11411                     }
11412                 }
11413                 elsif ( $cti == 2 ) {
11414                     if ($is_semicolon_terminated) {
11415                         $adjust_indentation = 3;
11416                     }
11417                     else {
11418                         $adjust_indentation = 0;
11419                     }
11420                 }
11421                 elsif ( $cti == 3 ) {
11422                     $adjust_indentation = 3;
11423                 }
11424             }
11425
11426             # handle option to indent blocks
11427             else {
11428                 if (
11429                     $rOpts->{'indent-closing-brace'}
11430                     && (
11431                         $i_terminal == $ibeg    #  isolated terminal '}'
11432                         || $is_semicolon_terminated
11433                     )
11434                   )                             #  } xxxx ;
11435                 {
11436                     $adjust_indentation = 3;
11437                 }
11438             }
11439         }
11440
11441         # if at ');', '};', '>;', and '];' of a terminal qw quote
11442         elsif ($$rpatterns[0] =~ /^qb*;$/
11443             && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
11444         {
11445             if ( $closing_token_indentation{$1} == 0 ) {
11446                 $adjust_indentation = 1;
11447             }
11448             else {
11449                 $adjust_indentation = 3;
11450             }
11451         }
11452
11453         # if line begins with a ':', align it with any
11454         # previous line leading with corresponding ?
11455         elsif ( $types_to_go[$ibeg] eq ':' ) {
11456             (
11457                 $opening_indentation, $opening_offset,
11458                 $is_leading,          $opening_exists
11459               )
11460               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11461                 $rindentation_list );
11462             if ($is_leading) { $adjust_indentation = 2; }
11463         }
11464
11465         ##########################################################
11466         # Section 2: set indentation according to flag set above
11467         #
11468         # Select the indentation object to define leading
11469         # whitespace.  If we are outdenting something like '} } );'
11470         # then we want to use one level below the last token
11471         # ($i_terminal) in order to get it to fully outdent through
11472         # all levels.
11473         ##########################################################
11474         my $indentation;
11475         my $lev;
11476         my $level_end = $levels_to_go[$iend];
11477
11478         if ( $adjust_indentation == 0 ) {
11479             $indentation = $leading_spaces_to_go[$ibeg];
11480             $lev         = $levels_to_go[$ibeg];
11481         }
11482         elsif ( $adjust_indentation == 1 ) {
11483             $indentation = $reduced_spaces_to_go[$i_terminal];
11484             $lev         = $levels_to_go[$i_terminal];
11485         }
11486
11487         # handle indented closing token which aligns with opening token
11488         elsif ( $adjust_indentation == 2 ) {
11489
11490             # handle option to align closing token with opening token
11491             $lev = $levels_to_go[$ibeg];
11492
11493             # calculate spaces needed to align with opening token
11494             my $space_count =
11495               get_SPACES($opening_indentation) + $opening_offset;
11496
11497             # Indent less than the previous line.
11498             #
11499             # Problem: For -lp we don't exactly know what it was if there
11500             # were recoverable spaces sent to the aligner.  A good solution
11501             # would be to force a flush of the vertical alignment buffer, so
11502             # that we would know.  For now, this rule is used for -lp:
11503             #
11504             # When the last line did not start with a closing token we will
11505             # be optimistic that the aligner will recover everything wanted.
11506             #
11507             # This rule will prevent us from breaking a hierarchy of closing
11508             # tokens, and in a worst case will leave a closing paren too far
11509             # indented, but this is better than frequently leaving it not
11510             # indented enough.
11511             my $last_spaces = get_SPACES($last_indentation_written);
11512             if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
11513                 $last_spaces +=
11514                   get_RECOVERABLE_SPACES($last_indentation_written);
11515             }
11516
11517             # reset the indentation to the new space count if it works
11518             # only options are all or none: nothing in-between looks good
11519             $lev = $levels_to_go[$ibeg];
11520             if ( $space_count < $last_spaces ) {
11521                 if ($rOpts_line_up_parentheses) {
11522                     my $lev = $levels_to_go[$ibeg];
11523                     $indentation =
11524                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11525                 }
11526                 else {
11527                     $indentation = $space_count;
11528                 }
11529             }
11530
11531             # revert to default if it doesnt work
11532             else {
11533                 $space_count = leading_spaces_to_go($ibeg);
11534                 if ( $default_adjust_indentation == 0 ) {
11535                     $indentation = $leading_spaces_to_go[$ibeg];
11536                 }
11537                 elsif ( $default_adjust_indentation == 1 ) {
11538                     $indentation = $reduced_spaces_to_go[$i_terminal];
11539                     $lev         = $levels_to_go[$i_terminal];
11540                 }
11541             }
11542         }
11543
11544         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
11545         else {
11546
11547             # handle -icb (indented closing code block braces)
11548             # Updated method for indented block braces: indent one full level if
11549             # there is no continuation indentation.  This will occur for major
11550             # structures such as sub, if, else, but not for things like map
11551             # blocks.
11552             #
11553             # Note: only code blocks without continuation indentation are
11554             # handled here (if, else, unless, ..). In the following snippet,
11555             # the terminal brace of the sort block will have continuation
11556             # indentation as shown so it will not be handled by the coding
11557             # here.  We would have to undo the continuation indentation to do
11558             # this, but it probably looks ok as is.  This is a possible future
11559             # update for semicolon terminated lines.
11560             #
11561             #     if ($sortby eq 'date' or $sortby eq 'size') {
11562             #         @files = sort {
11563             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
11564             #                 or $a cmp $b
11565             #                 } @files;
11566             #         }
11567             #
11568             if (   $block_type_to_go[$ibeg]
11569                 && $ci_levels_to_go[$i_terminal] == 0 )
11570             {
11571                 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
11572                 $indentation = $spaces + $rOpts_indent_columns;
11573
11574                 # NOTE: for -lp we could create a new indentation object, but
11575                 # there is probably no need to do it
11576             }
11577
11578             # handle -icp and any -icb block braces which fall through above
11579             # test such as the 'sort' block mentioned above.
11580             else {
11581
11582                 # There are currently two ways to handle -icp...
11583                 # One way is to use the indentation of the previous line:
11584                 # $indentation = $last_indentation_written;
11585
11586                 # The other way is to use the indentation that the previous line
11587                 # would have had if it hadn't been adjusted:
11588                 $indentation = $last_unadjusted_indentation;
11589
11590                 # Current method: use the minimum of the two. This avoids
11591                 # inconsistent indentation.
11592                 if ( get_SPACES($last_indentation_written) <
11593                     get_SPACES($indentation) )
11594                 {
11595                     $indentation = $last_indentation_written;
11596                 }
11597             }
11598
11599             # use previous indentation but use own level
11600             # to cause list to be flushed properly
11601             $lev = $levels_to_go[$ibeg];
11602         }
11603
11604         # remember indentation except for multi-line quotes, which get
11605         # no indentation
11606         unless ( $ibeg == 0 && $starting_in_quote ) {
11607             $last_indentation_written    = $indentation;
11608             $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
11609             $last_leading_token          = $tokens_to_go[$ibeg];
11610         }
11611
11612         # be sure lines with leading closing tokens are not outdented more
11613         # than the line which contained the corresponding opening token.
11614
11615         #############################################################
11616         # updated per bug report in alex_bug.pl: we must not
11617         # mess with the indentation of closing logical braces so
11618         # we must treat something like '} else {' as if it were
11619         # an isolated brace my $is_isolated_block_brace = (
11620         # $iend == $ibeg ) && $block_type_to_go[$ibeg];
11621         #############################################################
11622         my $is_isolated_block_brace = $block_type_to_go[$ibeg]
11623           && ( $iend == $ibeg
11624             || $is_if_elsif_else_unless_while_until_for_foreach{
11625                 $block_type_to_go[$ibeg] } );
11626
11627         # only do this for a ':; which is aligned with its leading '?'
11628         my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
11629         if (   defined($opening_indentation)
11630             && !$is_isolated_block_brace
11631             && !$is_unaligned_colon )
11632         {
11633             if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
11634                 $indentation = $opening_indentation;
11635             }
11636         }
11637
11638         # remember the indentation of each line of this batch
11639         push @{$rindentation_list}, $indentation;
11640
11641         # outdent lines with certain leading tokens...
11642         if (
11643
11644             # must be first word of this batch
11645             $ibeg == 0
11646
11647             # and ...
11648             && (
11649
11650                 # certain leading keywords if requested
11651                 (
11652                        $rOpts->{'outdent-keywords'}
11653                     && $types_to_go[$ibeg] eq 'k'
11654                     && $outdent_keyword{ $tokens_to_go[$ibeg] }
11655                 )
11656
11657                 # or labels if requested
11658                 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
11659
11660                 # or static block comments if requested
11661                 || (   $types_to_go[$ibeg] eq '#'
11662                     && $rOpts->{'outdent-static-block-comments'}
11663                     && $is_static_block_comment )
11664             )
11665           )
11666
11667         {
11668             my $space_count = leading_spaces_to_go($ibeg);
11669             if ( $space_count > 0 ) {
11670                 $space_count -= $rOpts_continuation_indentation;
11671                 $is_outdented_line = 1;
11672                 if ( $space_count < 0 ) { $space_count = 0 }
11673
11674                 # do not promote a spaced static block comment to non-spaced;
11675                 # this is not normally necessary but could be for some
11676                 # unusual user inputs (such as -ci = -i)
11677                 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
11678                     $space_count = 1;
11679                 }
11680
11681                 if ($rOpts_line_up_parentheses) {
11682                     $indentation =
11683                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11684                 }
11685                 else {
11686                     $indentation = $space_count;
11687                 }
11688             }
11689         }
11690
11691         return ( $indentation, $lev, $level_end, $terminal_type,
11692             $is_semicolon_terminated, $is_outdented_line );
11693     }
11694 }
11695
11696 sub set_vertical_tightness_flags {
11697
11698     my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
11699
11700     # Define vertical tightness controls for the nth line of a batch.
11701     # We create an array of parameters which tell the vertical aligner
11702     # if we should combine this line with the next line to achieve the
11703     # desired vertical tightness.  The array of parameters contains:
11704     #
11705     #   [0] type: 1=is opening tok 2=is closing tok  3=is opening block brace
11706     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
11707     #             if closing: spaces of padding to use
11708     #   [2] sequence number of container
11709     #   [3] valid flag: do not append if this flag is false. Will be
11710     #       true if appropriate -vt flag is set.  Otherwise, Will be
11711     #       made true only for 2 line container in parens with -lp
11712     #
11713     # These flags are used by sub set_leading_whitespace in
11714     # the vertical aligner
11715
11716     my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
11717
11718     # For non-BLOCK tokens, we will need to examine the next line
11719     # too, so we won't consider the last line.
11720     if ( $n < $n_last_line ) {
11721
11722         # see if last token is an opening token...not a BLOCK...
11723         my $ibeg_next = $$ri_first[ $n + 1 ];
11724         my $token_end = $tokens_to_go[$iend];
11725         my $iend_next = $$ri_last[ $n + 1 ];
11726         if (
11727                $type_sequence_to_go[$iend]
11728             && !$block_type_to_go[$iend]
11729             && $is_opening_token{$token_end}
11730             && (
11731                 $opening_vertical_tightness{$token_end} > 0
11732
11733                 # allow 2-line method call to be closed up
11734                 || (   $rOpts_line_up_parentheses
11735                     && $token_end eq '('
11736                     && $iend > $ibeg
11737                     && $types_to_go[ $iend - 1 ] ne 'b' )
11738             )
11739           )
11740         {
11741
11742             # avoid multiple jumps in nesting depth in one line if
11743             # requested
11744             my $ovt       = $opening_vertical_tightness{$token_end};
11745             my $iend_next = $$ri_last[ $n + 1 ];
11746             unless (
11747                 $ovt < 2
11748                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
11749                     $nesting_depth_to_go[$ibeg_next] )
11750               )
11751             {
11752
11753                 # If -vt flag has not been set, mark this as invalid
11754                 # and aligner will validate it if it sees the closing paren
11755                 # within 2 lines.
11756                 my $valid_flag = $ovt;
11757                 @{$rvertical_tightness_flags} =
11758                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
11759             }
11760         }
11761
11762         # see if first token of next line is a closing token...
11763         # ..and be sure this line does not have a side comment
11764         my $token_next = $tokens_to_go[$ibeg_next];
11765         if (   $type_sequence_to_go[$ibeg_next]
11766             && !$block_type_to_go[$ibeg_next]
11767             && $is_closing_token{$token_next}
11768             && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
11769         {
11770             my $ovt = $opening_vertical_tightness{$token_next};
11771             my $cvt = $closing_vertical_tightness{$token_next};
11772             if (
11773
11774                 # never append a trailing line like   )->pack(
11775                 # because it will throw off later alignment
11776                 (
11777                     $nesting_depth_to_go[$ibeg_next] ==
11778                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
11779                 )
11780                 && (
11781                     $cvt == 2
11782                     || (
11783                         $container_environment_to_go[$ibeg_next] ne 'LIST'
11784                         && (
11785                             $cvt == 1
11786
11787                             # allow closing up 2-line method calls
11788                             || (   $rOpts_line_up_parentheses
11789                                 && $token_next eq ')' )
11790                         )
11791                     )
11792                 )
11793               )
11794             {
11795
11796                 # decide which trailing closing tokens to append..
11797                 my $ok = 0;
11798                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
11799                 else {
11800                     my $str = join( '',
11801                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
11802
11803                     # append closing token if followed by comment or ';'
11804                     if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
11805                 }
11806
11807                 if ($ok) {
11808                     my $valid_flag = $cvt;
11809                     @{$rvertical_tightness_flags} = (
11810                         2,
11811                         $tightness{$token_next} == 2 ? 0 : 1,
11812                         $type_sequence_to_go[$ibeg_next], $valid_flag,
11813                     );
11814                 }
11815             }
11816         }
11817
11818         # Opening Token Right
11819         # If requested, move an isolated trailing opening token to the end of
11820         # the previous line which ended in a comma.  We could do this
11821         # in sub recombine_breakpoints but that would cause problems
11822         # with -lp formatting.  The problem is that indentation will
11823         # quickly move far to the right in nested expressions.  By
11824         # doing it after indentation has been set, we avoid changes
11825         # to the indentation.  Actual movement of the token takes place
11826         # in sub write_leader_and_string.
11827         if (
11828             $opening_token_right{ $tokens_to_go[$ibeg_next] }
11829
11830             # previous line is not opening
11831             # (use -sot to combine with it)
11832             && !$is_opening_token{$token_end}
11833
11834             # previous line ended in one of these
11835             # (add other cases if necessary; '=>' and '.' are not necessary
11836             ##&& ($is_opening_token{$token_end} || $token_end eq ',')
11837             && !$block_type_to_go[$ibeg_next]
11838
11839             # this is a line with just an opening token
11840             && (   $iend_next == $ibeg_next
11841                 || $iend_next == $ibeg_next + 2
11842                 && $types_to_go[$iend_next] eq '#' )
11843
11844             # looks bad if we align vertically with the wrong container
11845             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
11846           )
11847         {
11848             my $valid_flag = 1;
11849             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11850             @{$rvertical_tightness_flags} =
11851               ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
11852         }
11853
11854         # Stacking of opening and closing tokens
11855         my $stackable;
11856         my $token_beg_next = $tokens_to_go[$ibeg_next];
11857
11858         # patch to make something like 'qw(' behave like an opening paren
11859         # (aran.t)
11860         if ( $types_to_go[$ibeg_next] eq 'q' ) {
11861             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
11862                 $token_beg_next = $1;
11863             }
11864         }
11865
11866         if (   $is_closing_token{$token_end}
11867             && $is_closing_token{$token_beg_next} )
11868         {
11869             $stackable = $stack_closing_token{$token_beg_next}
11870               unless ( $block_type_to_go[$ibeg_next] )
11871               ;    # shouldn't happen; just checking
11872         }
11873         elsif ($is_opening_token{$token_end}
11874             && $is_opening_token{$token_beg_next} )
11875         {
11876             $stackable = $stack_opening_token{$token_beg_next}
11877               unless ( $block_type_to_go[$ibeg_next] )
11878               ;    # shouldn't happen; just checking
11879         }
11880
11881         if ($stackable) {
11882
11883             my $is_semicolon_terminated;
11884             if ( $n + 1 == $n_last_line ) {
11885                 my ( $terminal_type, $i_terminal ) = terminal_type(
11886                     \@types_to_go, \@block_type_to_go,
11887                     $ibeg_next,    $iend_next
11888                 );
11889                 $is_semicolon_terminated = $terminal_type eq ';'
11890                   && $nesting_depth_to_go[$iend_next] <
11891                   $nesting_depth_to_go[$ibeg_next];
11892             }
11893
11894             # this must be a line with just an opening token
11895             # or end in a semicolon
11896             if (
11897                 $is_semicolon_terminated
11898                 || (   $iend_next == $ibeg_next
11899                     || $iend_next == $ibeg_next + 2
11900                     && $types_to_go[$iend_next] eq '#' )
11901               )
11902             {
11903                 my $valid_flag = 1;
11904                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11905                 @{$rvertical_tightness_flags} =
11906                   ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
11907                   );
11908             }
11909         }
11910     }
11911
11912     # Check for a last line with isolated opening BLOCK curly
11913     elsif ($rOpts_block_brace_vertical_tightness
11914         && $ibeg               eq $iend
11915         && $types_to_go[$iend] eq '{'
11916         && $block_type_to_go[$iend] =~
11917         /$block_brace_vertical_tightness_pattern/o )
11918     {
11919         @{$rvertical_tightness_flags} =
11920           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
11921     }
11922
11923     # pack in the sequence numbers of the ends of this line
11924     $rvertical_tightness_flags->[4] = get_seqno($ibeg);
11925     $rvertical_tightness_flags->[5] = get_seqno($iend);
11926     return $rvertical_tightness_flags;
11927 }
11928
11929 sub get_seqno {
11930
11931     # get opening and closing sequence numbers of a token for the vertical
11932     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
11933     # to be treated somewhat like opening and closing tokens for stacking
11934     # tokens by the vertical aligner.
11935     my ($ii) = @_;
11936     my $seqno = $type_sequence_to_go[$ii];
11937     if ( $types_to_go[$ii] eq 'q' ) {
11938         my $SEQ_QW = -1;
11939         if ( $ii > 0 ) {
11940             $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
11941         }
11942         else {
11943             if ( !$ending_in_quote ) {
11944                 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
11945             }
11946         }
11947     }
11948     return ($seqno);
11949 }
11950
11951 {
11952     my %is_vertical_alignment_type;
11953     my %is_vertical_alignment_keyword;
11954
11955     BEGIN {
11956
11957         @_ = qw#
11958           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
11959           { ? : => =~ && || // ~~ !~~
11960           #;
11961         @is_vertical_alignment_type{@_} = (1) x scalar(@_);
11962
11963         @_ = qw(if unless and or err eq ne for foreach while until);
11964         @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
11965     }
11966
11967     sub set_vertical_alignment_markers {
11968
11969         # This routine takes the first step toward vertical alignment of the
11970         # lines of output text.  It looks for certain tokens which can serve as
11971         # vertical alignment markers (such as an '=').
11972         #
11973         # Method: We look at each token $i in this output batch and set
11974         # $matching_token_to_go[$i] equal to those tokens at which we would
11975         # accept vertical alignment.
11976
11977         # nothing to do if we aren't allowed to change whitespace
11978         if ( !$rOpts_add_whitespace ) {
11979             for my $i ( 0 .. $max_index_to_go ) {
11980                 $matching_token_to_go[$i] = '';
11981             }
11982             return;
11983         }
11984
11985         my ( $ri_first, $ri_last ) = @_;
11986
11987         # remember the index of last nonblank token before any sidecomment
11988         my $i_terminal = $max_index_to_go;
11989         if ( $types_to_go[$i_terminal] eq '#' ) {
11990             if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
11991                 if ( $i_terminal > 0 ) { --$i_terminal }
11992             }
11993         }
11994
11995         # look at each line of this batch..
11996         my $last_vertical_alignment_before_index;
11997         my $vert_last_nonblank_type;
11998         my $vert_last_nonblank_token;
11999         my $vert_last_nonblank_block_type;
12000         my $max_line = @$ri_first - 1;
12001         my ( $i, $type, $token, $block_type, $alignment_type );
12002         my ( $ibeg, $iend, $line );
12003
12004         foreach $line ( 0 .. $max_line ) {
12005             $ibeg                                 = $$ri_first[$line];
12006             $iend                                 = $$ri_last[$line];
12007             $last_vertical_alignment_before_index = -1;
12008             $vert_last_nonblank_type              = '';
12009             $vert_last_nonblank_token             = '';
12010             $vert_last_nonblank_block_type        = '';
12011
12012             # look at each token in this output line..
12013             foreach $i ( $ibeg .. $iend ) {
12014                 $alignment_type = '';
12015                 $type           = $types_to_go[$i];
12016                 $block_type     = $block_type_to_go[$i];
12017                 $token          = $tokens_to_go[$i];
12018
12019                 # check for flag indicating that we should not align
12020                 # this token
12021                 if ( $matching_token_to_go[$i] ) {
12022                     $matching_token_to_go[$i] = '';
12023                     next;
12024                 }
12025
12026                 #--------------------------------------------------------
12027                 # First see if we want to align BEFORE this token
12028                 #--------------------------------------------------------
12029
12030                 # The first possible token that we can align before
12031                 # is index 2 because: 1) it doesn't normally make sense to
12032                 # align before the first token and 2) the second
12033                 # token must be a blank if we are to align before
12034                 # the third
12035                 if ( $i < $ibeg + 2 ) { }
12036
12037                 # must follow a blank token
12038                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
12039
12040                 # align a side comment --
12041                 elsif ( $type eq '#' ) {
12042
12043                     unless (
12044
12045                         # it is a static side comment
12046                         (
12047                                $rOpts->{'static-side-comments'}
12048                             && $token =~ /$static_side_comment_pattern/o
12049                         )
12050
12051                         # or a closing side comment
12052                         || (   $vert_last_nonblank_block_type
12053                             && $token =~
12054                             /$closing_side_comment_prefix_pattern/o )
12055                       )
12056                     {
12057                         $alignment_type = $type;
12058                     }    ## Example of a static side comment
12059                 }
12060
12061                 # otherwise, do not align two in a row to create a
12062                 # blank field
12063                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
12064
12065                 # align before one of these keywords
12066                 # (within a line, since $i>1)
12067                 elsif ( $type eq 'k' ) {
12068
12069                     #  /^(if|unless|and|or|eq|ne)$/
12070                     if ( $is_vertical_alignment_keyword{$token} ) {
12071                         $alignment_type = $token;
12072                     }
12073                 }
12074
12075                 # align before one of these types..
12076                 # Note: add '.' after new vertical aligner is operational
12077                 elsif ( $is_vertical_alignment_type{$type} ) {
12078                     $alignment_type = $token;
12079
12080                     # Do not align a terminal token.  Although it might
12081                     # occasionally look ok to do this, it has been found to be
12082                     # a good general rule.  The main problems are:
12083                     # (1) that the terminal token (such as an = or :) might get
12084                     # moved far to the right where it is hard to see because
12085                     # nothing follows it, and
12086                     # (2) doing so may prevent other good alignments.
12087                     if ( $i == $iend || $i >= $i_terminal ) {
12088                         $alignment_type = "";
12089                     }
12090
12091                     # Do not align leading ': (' or '. ('.  This would prevent
12092                     # alignment in something like the following:
12093                     #   $extra_space .=
12094                     #       ( $input_line_number < 10 )  ? "  "
12095                     #     : ( $input_line_number < 100 ) ? " "
12096                     #     :                                "";
12097                     # or
12098                     #  $code =
12099                     #      ( $case_matters ? $accessor : " lc($accessor) " )
12100                     #    . ( $yesno        ? " eq "       : " ne " )
12101                     if (   $i == $ibeg + 2
12102                         && $types_to_go[$ibeg] =~ /^[\.\:]$/
12103                         && $types_to_go[ $i - 1 ] eq 'b' )
12104                     {
12105                         $alignment_type = "";
12106                     }
12107
12108                     # For a paren after keyword, only align something like this:
12109                     #    if    ( $a ) { &a }
12110                     #    elsif ( $b ) { &b }
12111                     if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
12112                         $alignment_type = ""
12113                           unless $vert_last_nonblank_token =~
12114                               /^(if|unless|elsif)$/;
12115                     }
12116
12117                     # be sure the alignment tokens are unique
12118                     # This didn't work well: reason not determined
12119                     # if ($token ne $type) {$alignment_type .= $type}
12120                 }
12121
12122                 # NOTE: This is deactivated because it causes the previous
12123                 # if/elsif alignment to fail
12124                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
12125                 #{ $alignment_type = $type; }
12126
12127                 if ($alignment_type) {
12128                     $last_vertical_alignment_before_index = $i;
12129                 }
12130
12131                 #--------------------------------------------------------
12132                 # Next see if we want to align AFTER the previous nonblank
12133                 #--------------------------------------------------------
12134
12135                 # We want to line up ',' and interior ';' tokens, with the added
12136                 # space AFTER these tokens.  (Note: interior ';' is included
12137                 # because it may occur in short blocks).
12138                 if (
12139
12140                     # we haven't already set it
12141                     !$alignment_type
12142
12143                     # and its not the first token of the line
12144                     && ( $i > $ibeg )
12145
12146                     # and it follows a blank
12147                     && $types_to_go[ $i - 1 ] eq 'b'
12148
12149                     # and previous token IS one of these:
12150                     && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12151
12152                     # and it's NOT one of these
12153                     && ( $type !~ /^[b\#\)\]\}]$/ )
12154
12155                     # then go ahead and align
12156                   )
12157
12158                 {
12159                     $alignment_type = $vert_last_nonblank_type;
12160                 }
12161
12162                 #--------------------------------------------------------
12163                 # then store the value
12164                 #--------------------------------------------------------
12165                 $matching_token_to_go[$i] = $alignment_type;
12166                 if ( $type ne 'b' ) {
12167                     $vert_last_nonblank_type       = $type;
12168                     $vert_last_nonblank_token      = $token;
12169                     $vert_last_nonblank_block_type = $block_type;
12170                 }
12171             }
12172         }
12173     }
12174 }
12175
12176 sub terminal_type {
12177
12178     #    returns type of last token on this line (terminal token), as follows:
12179     #    returns # for a full-line comment
12180     #    returns ' ' for a blank line
12181     #    otherwise returns final token type
12182
12183     my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
12184
12185     # check for full-line comment..
12186     if ( $$rtype[$ibeg] eq '#' ) {
12187         return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
12188     }
12189     else {
12190
12191         # start at end and walk bakwards..
12192         for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
12193
12194             # skip past any side comment and blanks
12195             next if ( $$rtype[$i] eq 'b' );
12196             next if ( $$rtype[$i] eq '#' );
12197
12198             # found it..make sure it is a BLOCK termination,
12199             # but hide a terminal } after sort/grep/map because it is not
12200             # necessarily the end of the line.  (terminal.t)
12201             my $terminal_type = $$rtype[$i];
12202             if (
12203                 $terminal_type eq '}'
12204                 && ( !$$rblock_type[$i]
12205                     || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
12206               )
12207             {
12208                 $terminal_type = 'b';
12209             }
12210             return wantarray ? ( $terminal_type, $i ) : $terminal_type;
12211         }
12212
12213         # empty line
12214         return wantarray ? ( ' ', $ibeg ) : ' ';
12215     }
12216 }
12217
12218 {
12219     my %is_good_keyword_breakpoint;
12220     my %is_lt_gt_le_ge;
12221
12222     sub set_bond_strengths {
12223
12224         BEGIN {
12225
12226             @_ = qw(if unless while until for foreach);
12227             @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
12228
12229             @_ = qw(lt gt le ge);
12230             @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
12231
12232             ###############################################################
12233             # NOTE: NO_BREAK's set here are HINTS which may not be honored;
12234             # essential NO_BREAKS's must be enforced in section 2, below.
12235             ###############################################################
12236
12237             # adding NEW_TOKENS: add a left and right bond strength by
12238             # mimmicking what is done for an existing token type.  You
12239             # can skip this step at first and take the default, then
12240             # tweak later to get desired results.
12241
12242             # The bond strengths should roughly follow precenence order where
12243             # possible.  If you make changes, please check the results very
12244             # carefully on a variety of scripts.
12245
12246             # no break around possible filehandle
12247             $left_bond_strength{'Z'}  = NO_BREAK;
12248             $right_bond_strength{'Z'} = NO_BREAK;
12249
12250             # never put a bare word on a new line:
12251             # example print (STDERR, "bla"); will fail with break after (
12252             $left_bond_strength{'w'} = NO_BREAK;
12253
12254         # blanks always have infinite strength to force breaks after real tokens
12255             $right_bond_strength{'b'} = NO_BREAK;
12256
12257             # try not to break on exponentation
12258             @_                       = qw" ** .. ... <=> ";
12259             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12260             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12261
12262             # The comma-arrow has very low precedence but not a good break point
12263             $left_bond_strength{'=>'}  = NO_BREAK;
12264             $right_bond_strength{'=>'} = NOMINAL;
12265
12266             # ok to break after label
12267             $left_bond_strength{'J'}  = NO_BREAK;
12268             $right_bond_strength{'J'} = NOMINAL;
12269             $left_bond_strength{'j'}  = STRONG;
12270             $right_bond_strength{'j'} = STRONG;
12271             $left_bond_strength{'A'}  = STRONG;
12272             $right_bond_strength{'A'} = STRONG;
12273
12274             $left_bond_strength{'->'}  = STRONG;
12275             $right_bond_strength{'->'} = VERY_STRONG;
12276
12277             # breaking AFTER modulus operator is ok:
12278             @_ = qw" % ";
12279             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12280             @right_bond_strength{@_} =
12281               ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
12282
12283             # Break AFTER math operators * and /
12284             @_                       = qw" * / x  ";
12285             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12286             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12287
12288             # Break AFTER weakest math operators + and -
12289             # Make them weaker than * but a bit stronger than '.'
12290             @_ = qw" + - ";
12291             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12292             @right_bond_strength{@_} =
12293               ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
12294
12295             # breaking BEFORE these is just ok:
12296             @_                       = qw" >> << ";
12297             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12298             @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
12299
12300             # breaking before the string concatenation operator seems best
12301             # because it can be hard to see at the end of a line
12302             $right_bond_strength{'.'} = STRONG;
12303             $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
12304
12305             @_                       = qw"} ] ) ";
12306             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12307             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12308
12309             # make these a little weaker than nominal so that they get
12310             # favored for end-of-line characters
12311             @_ = qw"!= == =~ !~ ~~ !~~";
12312             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12313             @right_bond_strength{@_} =
12314               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
12315
12316             # break AFTER these
12317             @_ = qw" < >  | & >= <=";
12318             @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
12319             @right_bond_strength{@_} =
12320               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
12321
12322             # breaking either before or after a quote is ok
12323             # but bias for breaking before a quote
12324             $left_bond_strength{'Q'}  = NOMINAL;
12325             $right_bond_strength{'Q'} = NOMINAL + 0.02;
12326             $left_bond_strength{'q'}  = NOMINAL;
12327             $right_bond_strength{'q'} = NOMINAL;
12328
12329             # starting a line with a keyword is usually ok
12330             $left_bond_strength{'k'} = NOMINAL;
12331
12332             # we usually want to bond a keyword strongly to what immediately
12333             # follows, rather than leaving it stranded at the end of a line
12334             $right_bond_strength{'k'} = STRONG;
12335
12336             $left_bond_strength{'G'}  = NOMINAL;
12337             $right_bond_strength{'G'} = STRONG;
12338
12339             # it is good to break AFTER various assignment operators
12340             @_ = qw(
12341               = **= += *= &= <<= &&=
12342               -= /= |= >>= ||= //=
12343               .= %= ^=
12344               x=
12345             );
12346             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12347             @right_bond_strength{@_} =
12348               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12349
12350             # break BEFORE '&&' and '||' and '//'
12351             # set strength of '||' to same as '=' so that chains like
12352             # $a = $b || $c || $d   will break before the first '||'
12353             $right_bond_strength{'||'} = NOMINAL;
12354             $left_bond_strength{'||'}  = $right_bond_strength{'='};
12355
12356             # same thing for '//'
12357             $right_bond_strength{'//'} = NOMINAL;
12358             $left_bond_strength{'//'}  = $right_bond_strength{'='};
12359
12360             # set strength of && a little higher than ||
12361             $right_bond_strength{'&&'} = NOMINAL;
12362             $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
12363
12364             $left_bond_strength{';'}  = VERY_STRONG;
12365             $right_bond_strength{';'} = VERY_WEAK;
12366             $left_bond_strength{'f'}  = VERY_STRONG;
12367
12368             # make right strength of for ';' a little less than '='
12369             # to make for contents break after the ';' to avoid this:
12370             #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12371             #     $number_of_fields )
12372             # and make it weaker than ',' and 'and' too
12373             $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12374
12375             # The strengths of ?/: should be somewhere between
12376             # an '=' and a quote (NOMINAL),
12377             # make strength of ':' slightly less than '?' to help
12378             # break long chains of ? : after the colons
12379             $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
12380             $right_bond_strength{':'} = NO_BREAK;
12381             $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
12382             $right_bond_strength{'?'} = NO_BREAK;
12383
12384             $left_bond_strength{','}  = VERY_STRONG;
12385             $right_bond_strength{','} = VERY_WEAK;
12386
12387             # Set bond strengths of certain keywords
12388             # make 'or', 'err', 'and' slightly weaker than a ','
12389             $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
12390             $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
12391             $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
12392             $left_bond_strength{'xor'}  = NOMINAL;
12393             $right_bond_strength{'and'} = NOMINAL;
12394             $right_bond_strength{'or'}  = NOMINAL;
12395             $right_bond_strength{'err'} = NOMINAL;
12396             $right_bond_strength{'xor'} = STRONG;
12397         }
12398
12399         # patch-its always ok to break at end of line
12400         $nobreak_to_go[$max_index_to_go] = 0;
12401
12402         # adding a small 'bias' to strengths is a simple way to make a line
12403         # break at the first of a sequence of identical terms.  For example,
12404         # to force long string of conditional operators to break with
12405         # each line ending in a ':', we can add a small number to the bond
12406         # strength of each ':'
12407         my $colon_bias = 0;
12408         my $amp_bias   = 0;
12409         my $bar_bias   = 0;
12410         my $and_bias   = 0;
12411         my $or_bias    = 0;
12412         my $dot_bias   = 0;
12413         my $f_bias     = 0;
12414         my $code_bias  = -.01;
12415         my $type       = 'b';
12416         my $token      = ' ';
12417         my $last_type;
12418         my $last_nonblank_type  = $type;
12419         my $last_nonblank_token = $token;
12420         my $delta_bias          = 0.0001;
12421         my $list_str            = $left_bond_strength{'?'};
12422
12423         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
12424             $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
12425         );
12426
12427         # preliminary loop to compute bond strengths
12428         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
12429             $last_type = $type;
12430             if ( $type ne 'b' ) {
12431                 $last_nonblank_type  = $type;
12432                 $last_nonblank_token = $token;
12433             }
12434             $type = $types_to_go[$i];
12435
12436             # strength on both sides of a blank is the same
12437             if ( $type eq 'b' && $last_type ne 'b' ) {
12438                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
12439                 next;
12440             }
12441
12442             $token               = $tokens_to_go[$i];
12443             $block_type          = $block_type_to_go[$i];
12444             $i_next              = $i + 1;
12445             $next_type           = $types_to_go[$i_next];
12446             $next_token          = $tokens_to_go[$i_next];
12447             $total_nesting_depth = $nesting_depth_to_go[$i_next];
12448             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12449             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
12450             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12451
12452             # Some token chemistry...  The decision about where to break a
12453             # line depends upon a "bond strength" between tokens.  The LOWER
12454             # the bond strength, the MORE likely a break.  The strength
12455             # values are based on trial-and-error, and need to be tweaked
12456             # occasionally to get desired results.  Things to keep in mind
12457             # are:
12458             #   1. relative strengths are important.  small differences
12459             #      in strengths can make big formatting differences.
12460             #   2. each indentation level adds one unit of bond strength
12461             #   3. a value of NO_BREAK makes an unbreakable bond
12462             #   4. a value of VERY_WEAK is the strength of a ','
12463             #   5. values below NOMINAL are considered ok break points
12464             #   6. values above NOMINAL are considered poor break points
12465             # We are computing the strength of the bond between the current
12466             # token and the NEXT token.
12467             my $bond_str = VERY_STRONG;    # a default, high strength
12468
12469             #---------------------------------------------------------------
12470             # section 1:
12471             # use minimum of left and right bond strengths if defined;
12472             # digraphs and trigraphs like to break on their left
12473             #---------------------------------------------------------------
12474             my $bsr = $right_bond_strength{$type};
12475
12476             if ( !defined($bsr) ) {
12477
12478                 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
12479                     $bsr = STRONG;
12480                 }
12481                 else {
12482                     $bsr = VERY_STRONG;
12483                 }
12484             }
12485
12486             # define right bond strengths of certain keywords
12487             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
12488                 $bsr = $right_bond_strength{$token};
12489             }
12490             elsif ( $token eq 'ne' or $token eq 'eq' ) {
12491                 $bsr = NOMINAL;
12492             }
12493             my $bsl = $left_bond_strength{$next_nonblank_type};
12494
12495             # set terminal bond strength to the nominal value
12496             # this will cause good preceding breaks to be retained
12497             if ( $i_next_nonblank > $max_index_to_go ) {
12498                 $bsl = NOMINAL;
12499             }
12500
12501             if ( !defined($bsl) ) {
12502
12503                 if (   $is_digraph{$next_nonblank_type}
12504                     || $is_trigraph{$next_nonblank_type} )
12505                 {
12506                     $bsl = WEAK;
12507                 }
12508                 else {
12509                     $bsl = VERY_STRONG;
12510                 }
12511             }
12512
12513             # define right bond strengths of certain keywords
12514             if ( $next_nonblank_type eq 'k'
12515                 && defined( $left_bond_strength{$next_nonblank_token} ) )
12516             {
12517                 $bsl = $left_bond_strength{$next_nonblank_token};
12518             }
12519             elsif ($next_nonblank_token eq 'ne'
12520                 or $next_nonblank_token eq 'eq' )
12521             {
12522                 $bsl = NOMINAL;
12523             }
12524             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
12525                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
12526             }
12527
12528             # Note: it might seem that we would want to keep a NO_BREAK if
12529             # either token has this value.  This didn't work, because in an
12530             # arrow list, it prevents the comma from separating from the
12531             # following bare word (which is probably quoted by its arrow).
12532             # So necessary NO_BREAK's have to be handled as special cases
12533             # in the final section.
12534             $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
12535             my $bond_str_1 = $bond_str;
12536
12537             #---------------------------------------------------------------
12538             # section 2:
12539             # special cases
12540             #---------------------------------------------------------------
12541
12542             # allow long lines before final { in an if statement, as in:
12543             #    if (..........
12544             #      ..........)
12545             #    {
12546             #
12547             # Otherwise, the line before the { tends to be too short.
12548             if ( $type eq ')' ) {
12549                 if ( $next_nonblank_type eq '{' ) {
12550                     $bond_str = VERY_WEAK + 0.03;
12551                 }
12552             }
12553
12554             elsif ( $type eq '(' ) {
12555                 if ( $next_nonblank_type eq '{' ) {
12556                     $bond_str = NOMINAL;
12557                 }
12558             }
12559
12560             # break on something like '} (', but keep this stronger than a ','
12561             # example is in 'howe.pl'
12562             elsif ( $type eq 'R' or $type eq '}' ) {
12563                 if ( $next_nonblank_type eq '(' ) {
12564                     $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
12565                 }
12566             }
12567
12568             #-----------------------------------------------------------------
12569             # adjust bond strength bias
12570             #-----------------------------------------------------------------
12571
12572             elsif ( $type eq 'f' ) {
12573                 $bond_str += $f_bias;
12574                 $f_bias   += $delta_bias;
12575             }
12576
12577           # in long ?: conditionals, bias toward just one set per line (colon.t)
12578             elsif ( $type eq ':' ) {
12579                 if ( !$want_break_before{$type} ) {
12580                     $bond_str   += $colon_bias;
12581                     $colon_bias += $delta_bias;
12582                 }
12583             }
12584
12585             if (   $next_nonblank_type eq ':'
12586                 && $want_break_before{$next_nonblank_type} )
12587             {
12588                 $bond_str   += $colon_bias;
12589                 $colon_bias += $delta_bias;
12590             }
12591
12592             # if leading '.' is used, align all but 'short' quotes;
12593             # the idea is to not place something like "\n" on a single line.
12594             elsif ( $next_nonblank_type eq '.' ) {
12595                 if ( $want_break_before{'.'} ) {
12596                     unless (
12597                         $last_nonblank_type eq '.'
12598                         && (
12599                             length($token) <=
12600                             $rOpts_short_concatenation_item_length )
12601                         && ( $token !~ /^[\)\]\}]$/ )
12602                       )
12603                     {
12604                         $dot_bias += $delta_bias;
12605                     }
12606                     $bond_str += $dot_bias;
12607                 }
12608             }
12609             elsif ($next_nonblank_type eq '&&'
12610                 && $want_break_before{$next_nonblank_type} )
12611             {
12612                 $bond_str += $amp_bias;
12613                 $amp_bias += $delta_bias;
12614             }
12615             elsif ($next_nonblank_type eq '||'
12616                 && $want_break_before{$next_nonblank_type} )
12617             {
12618                 $bond_str += $bar_bias;
12619                 $bar_bias += $delta_bias;
12620             }
12621             elsif ( $next_nonblank_type eq 'k' ) {
12622
12623                 if (   $next_nonblank_token eq 'and'
12624                     && $want_break_before{$next_nonblank_token} )
12625                 {
12626                     $bond_str += $and_bias;
12627                     $and_bias += $delta_bias;
12628                 }
12629                 elsif ($next_nonblank_token =~ /^(or|err)$/
12630                     && $want_break_before{$next_nonblank_token} )
12631                 {
12632                     $bond_str += $or_bias;
12633                     $or_bias  += $delta_bias;
12634                 }
12635
12636                 # FIXME: needs more testing
12637                 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
12638                     $bond_str = $list_str if ( $bond_str > $list_str );
12639                 }
12640                 elsif ( $token eq 'err'
12641                     && !$want_break_before{$token} )
12642                 {
12643                     $bond_str += $or_bias;
12644                     $or_bias  += $delta_bias;
12645                 }
12646             }
12647
12648             if ( $type eq ':'
12649                 && !$want_break_before{$type} )
12650             {
12651                 $bond_str   += $colon_bias;
12652                 $colon_bias += $delta_bias;
12653             }
12654             elsif ( $type eq '&&'
12655                 && !$want_break_before{$type} )
12656             {
12657                 $bond_str += $amp_bias;
12658                 $amp_bias += $delta_bias;
12659             }
12660             elsif ( $type eq '||'
12661                 && !$want_break_before{$type} )
12662             {
12663                 $bond_str += $bar_bias;
12664                 $bar_bias += $delta_bias;
12665             }
12666             elsif ( $type eq 'k' ) {
12667
12668                 if ( $token eq 'and'
12669                     && !$want_break_before{$token} )
12670                 {
12671                     $bond_str += $and_bias;
12672                     $and_bias += $delta_bias;
12673                 }
12674                 elsif ( $token eq 'or'
12675                     && !$want_break_before{$token} )
12676                 {
12677                     $bond_str += $or_bias;
12678                     $or_bias  += $delta_bias;
12679                 }
12680             }
12681
12682             # keep matrix and hash indices together
12683             # but make them a little below STRONG to allow breaking open
12684             # something like {'some-word'}{'some-very-long-word'} at the }{
12685             # (bracebrk.t)
12686             if (   ( $type eq ']' or $type eq 'R' )
12687                 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
12688               )
12689             {
12690                 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
12691             }
12692
12693             if ( $next_nonblank_token =~ /^->/ ) {
12694
12695                 # increase strength to the point where a break in the following
12696                 # will be after the opening paren rather than at the arrow:
12697                 #    $a->$b($c);
12698                 if ( $type eq 'i' ) {
12699                     $bond_str = 1.45 * STRONG;
12700                 }
12701
12702                 elsif ( $type =~ /^[\)\]\}R]$/ ) {
12703                     $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
12704                 }
12705
12706                 # otherwise make strength before an '->' a little over a '+'
12707                 else {
12708                     if ( $bond_str <= NOMINAL ) {
12709                         $bond_str = NOMINAL + 0.01;
12710                     }
12711                 }
12712             }
12713
12714             if ( $token eq ')' && $next_nonblank_token eq '[' ) {
12715                 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
12716             }
12717
12718             # map1.t -- correct for a quirk in perl
12719             if (   $token eq '('
12720                 && $next_nonblank_type eq 'i'
12721                 && $last_nonblank_type eq 'k'
12722                 && $is_sort_map_grep{$last_nonblank_token} )
12723
12724               #     /^(sort|map|grep)$/ )
12725             {
12726                 $bond_str = NO_BREAK;
12727             }
12728
12729             # extrude.t: do not break before paren at:
12730             #    -l pid_filename(
12731             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
12732                 $bond_str = NO_BREAK;
12733             }
12734
12735             # good to break after end of code blocks
12736             if ( $type eq '}' && $block_type ) {
12737
12738                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
12739                 $code_bias += $delta_bias;
12740             }
12741
12742             if ( $type eq 'k' ) {
12743
12744                 # allow certain control keywords to stand out
12745                 if (   $next_nonblank_type eq 'k'
12746                     && $is_last_next_redo_return{$token} )
12747                 {
12748                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
12749                 }
12750
12751 # Don't break after keyword my.  This is a quick fix for a
12752 # rare problem with perl. An example is this line from file
12753 # Container.pm:
12754 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
12755
12756                 if ( $token eq 'my' ) {
12757                     $bond_str = NO_BREAK;
12758                 }
12759
12760             }
12761
12762             # good to break before 'if', 'unless', etc
12763             if ( $is_if_brace_follower{$next_nonblank_token} ) {
12764                 $bond_str = VERY_WEAK;
12765             }
12766
12767             if ( $next_nonblank_type eq 'k' ) {
12768
12769                 # keywords like 'unless', 'if', etc, within statements
12770                 # make good breaks
12771                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
12772                     $bond_str = VERY_WEAK / 1.05;
12773                 }
12774             }
12775
12776             # try not to break before a comma-arrow
12777             elsif ( $next_nonblank_type eq '=>' ) {
12778                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
12779             }
12780
12781          #----------------------------------------------------------------------
12782          # only set NO_BREAK's from here on
12783          #----------------------------------------------------------------------
12784             if ( $type eq 'C' or $type eq 'U' ) {
12785
12786                 # use strict requires that bare word and => not be separated
12787                 if ( $next_nonblank_type eq '=>' ) {
12788                     $bond_str = NO_BREAK;
12789                 }
12790
12791                 # Never break between a bareword and a following paren because
12792                 # perl may give an error.  For example, if a break is placed
12793                 # between 'to_filehandle' and its '(' the following line will
12794                 # give a syntax error [Carp.pm]: my( $no) =fileno(
12795                 # to_filehandle( $in)) ;
12796                 if ( $next_nonblank_token eq '(' ) {
12797                     $bond_str = NO_BREAK;
12798                 }
12799             }
12800
12801            # use strict requires that bare word within braces not start new line
12802             elsif ( $type eq 'L' ) {
12803
12804                 if ( $next_nonblank_type eq 'w' ) {
12805                     $bond_str = NO_BREAK;
12806                 }
12807             }
12808
12809             # in older version of perl, use strict can cause problems with
12810             # breaks before bare words following opening parens.  For example,
12811             # this will fail under older versions if a break is made between
12812             # '(' and 'MAIL':
12813             #  use strict;
12814             #  open( MAIL, "a long filename or command");
12815             #  close MAIL;
12816             elsif ( $type eq '{' ) {
12817
12818                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
12819
12820                     # but it's fine to break if the word is followed by a '=>'
12821                     # or if it is obviously a sub call
12822                     my $i_next_next_nonblank = $i_next_nonblank + 1;
12823                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
12824                     if (   $next_next_type eq 'b'
12825                         && $i_next_nonblank < $max_index_to_go )
12826                     {
12827                         $i_next_next_nonblank++;
12828                         $next_next_type = $types_to_go[$i_next_next_nonblank];
12829                     }
12830
12831                     ##if ( $next_next_type ne '=>' ) {
12832                     # these are ok: '->xxx', '=>', '('
12833
12834                     # We'll check for an old breakpoint and keep a leading
12835                     # bareword if it was that way in the input file.
12836                     # Presumably it was ok that way.  For example, the
12837                     # following would remain unchanged:
12838                     #
12839                     # @months = (
12840                     #   January,   February, March,    April,
12841                     #   May,       June,     July,     August,
12842                     #   September, October,  November, December,
12843                     # );
12844                     #
12845                     # This should be sufficient:
12846                     if ( !$old_breakpoint_to_go[$i]
12847                         && ( $next_next_type eq ',' || $next_next_type eq '}' )
12848                       )
12849                     {
12850                         $bond_str = NO_BREAK;
12851                     }
12852                 }
12853             }
12854
12855             elsif ( $type eq 'w' ) {
12856
12857                 if ( $next_nonblank_type eq 'R' ) {
12858                     $bond_str = NO_BREAK;
12859                 }
12860
12861                 # use strict requires that bare word and => not be separated
12862                 if ( $next_nonblank_type eq '=>' ) {
12863                     $bond_str = NO_BREAK;
12864                 }
12865             }
12866
12867             # in fact, use strict hates bare words on any new line.  For
12868             # example, a break before the underscore here provokes the
12869             # wrath of use strict:
12870             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
12871             elsif ( $type eq 'F' ) {
12872                 $bond_str = NO_BREAK;
12873             }
12874
12875             # use strict does not allow separating type info from trailing { }
12876             # testfile is readmail.pl
12877             elsif ( $type eq 't' or $type eq 'i' ) {
12878
12879                 if ( $next_nonblank_type eq 'L' ) {
12880                     $bond_str = NO_BREAK;
12881                 }
12882             }
12883
12884             # Do not break between a possible filehandle and a ? or / and do
12885             # not introduce a break after it if there is no blank
12886             # (extrude.t)
12887             elsif ( $type eq 'Z' ) {
12888
12889                 # dont break..
12890                 if (
12891
12892                     # if there is no blank and we do not want one. Examples:
12893                     #    print $x++    # do not break after $x
12894                     #    print HTML"HELLO"   # break ok after HTML
12895                     (
12896                            $next_type ne 'b'
12897                         && defined( $want_left_space{$next_type} )
12898                         && $want_left_space{$next_type} == WS_NO
12899                     )
12900
12901                     # or we might be followed by the start of a quote
12902                     || $next_nonblank_type =~ /^[\/\?]$/
12903                   )
12904                 {
12905                     $bond_str = NO_BREAK;
12906                 }
12907             }
12908
12909             # Do not break before a possible file handle
12910             if ( $next_nonblank_type eq 'Z' ) {
12911                 $bond_str = NO_BREAK;
12912             }
12913
12914             # As a defensive measure, do not break between a '(' and a
12915             # filehandle.  In some cases, this can cause an error.  For
12916             # example, the following program works:
12917             #    my $msg="hi!\n";
12918             #    print
12919             #    ( STDOUT
12920             #    $msg
12921             #    );
12922             #
12923             # But this program fails:
12924             #    my $msg="hi!\n";
12925             #    print
12926             #    (
12927             #    STDOUT
12928             #    $msg
12929             #    );
12930             #
12931             # This is normally only a problem with the 'extrude' option
12932             if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
12933                 $bond_str = NO_BREAK;
12934             }
12935
12936             # Breaking before a ++ can cause perl to guess wrong. For
12937             # example the following line will cause a syntax error
12938             # with -extrude if we break between '$i' and '++' [fixstyle2]
12939             #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
12940             elsif ( $next_nonblank_type eq '++' ) {
12941                 $bond_str = NO_BREAK;
12942             }
12943
12944             # Breaking before a ? before a quote can cause trouble if
12945             # they are not separated by a blank.
12946             # Example: a syntax error occurs if you break before the ? here
12947             #  my$logic=join$all?' && ':' || ',@regexps;
12948             # From: Professional_Perl_Programming_Code/multifind.pl
12949             elsif ( $next_nonblank_type eq '?' ) {
12950                 $bond_str = NO_BREAK
12951                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
12952             }
12953
12954             # Breaking before a . followed by a number
12955             # can cause trouble if there is no intervening space
12956             # Example: a syntax error occurs if you break before the .2 here
12957             #  $str .= pack($endian.2, ensurrogate($ord));
12958             # From: perl58/Unicode.pm
12959             elsif ( $next_nonblank_type eq '.' ) {
12960                 $bond_str = NO_BREAK
12961                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
12962             }
12963
12964             # patch to put cuddled elses back together when on multiple
12965             # lines, as in: } \n else \n { \n
12966             if ($rOpts_cuddled_else) {
12967
12968                 if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
12969                     || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
12970                 {
12971                     $bond_str = NO_BREAK;
12972                 }
12973             }
12974
12975             # keep '}' together with ';'
12976             if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
12977                 $bond_str = NO_BREAK;
12978             }
12979
12980             # never break between sub name and opening paren
12981             if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
12982                 $bond_str = NO_BREAK;
12983             }
12984
12985             #---------------------------------------------------------------
12986             # section 3:
12987             # now take nesting depth into account
12988             #---------------------------------------------------------------
12989             # final strength incorporates the bond strength and nesting depth
12990             my $strength;
12991
12992             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
12993                 if ( $total_nesting_depth > 0 ) {
12994                     $strength = $bond_str + $total_nesting_depth;
12995                 }
12996                 else {
12997                     $strength = $bond_str;
12998                 }
12999             }
13000             else {
13001                 $strength = NO_BREAK;
13002             }
13003
13004             # always break after side comment
13005             if ( $type eq '#' ) { $strength = 0 }
13006
13007             $bond_strength_to_go[$i] = $strength;
13008
13009             FORMATTER_DEBUG_FLAG_BOND && do {
13010                 my $str = substr( $token, 0, 15 );
13011                 $str .= ' ' x ( 16 - length($str) );
13012                 print
13013 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
13014             };
13015         }
13016     }
13017
13018 }
13019
13020 sub pad_array_to_go {
13021
13022     # to simplify coding in scan_list and set_bond_strengths, it helps
13023     # to create some extra blank tokens at the end of the arrays
13024     $tokens_to_go[ $max_index_to_go + 1 ] = '';
13025     $tokens_to_go[ $max_index_to_go + 2 ] = '';
13026     $types_to_go[ $max_index_to_go + 1 ]  = 'b';
13027     $types_to_go[ $max_index_to_go + 2 ]  = 'b';
13028     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
13029       $nesting_depth_to_go[$max_index_to_go];
13030
13031     #    /^[R\}\)\]]$/
13032     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
13033         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
13034
13035             # shouldn't happen:
13036             unless ( get_saw_brace_error() ) {
13037                 warning(
13038 "Program bug in scan_list: hit nesting error which should have been caught\n"
13039                 );
13040                 report_definite_bug();
13041             }
13042         }
13043         else {
13044             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
13045         }
13046     }
13047
13048     #       /^[L\{\(\[]$/
13049     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
13050         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
13051     }
13052 }
13053
13054 {    # begin scan_list
13055
13056     my (
13057         $block_type,                $current_depth,
13058         $depth,                     $i,
13059         $i_last_nonblank_token,     $last_colon_sequence_number,
13060         $last_nonblank_token,       $last_nonblank_type,
13061         $last_old_breakpoint_count, $minimum_depth,
13062         $next_nonblank_block_type,  $next_nonblank_token,
13063         $next_nonblank_type,        $old_breakpoint_count,
13064         $starting_breakpoint_count, $starting_depth,
13065         $token,                     $type,
13066         $type_sequence,
13067     );
13068
13069     my (
13070         @breakpoint_stack,              @breakpoint_undo_stack,
13071         @comma_index,                   @container_type,
13072         @identifier_count_stack,        @index_before_arrow,
13073         @interrupted_list,              @item_count_stack,
13074         @last_comma_index,              @last_dot_index,
13075         @last_nonblank_type,            @old_breakpoint_count_stack,
13076         @opening_structure_index_stack, @rfor_semicolon_list,
13077         @has_old_logical_breakpoints,   @rand_or_list,
13078         @i_equals,
13079     );
13080
13081     # routine to define essential variables when we go 'up' to
13082     # a new depth
13083     sub check_for_new_minimum_depth {
13084         my $depth = shift;
13085         if ( $depth < $minimum_depth ) {
13086
13087             $minimum_depth = $depth;
13088
13089             # these arrays need not retain values between calls
13090             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
13091             $container_type[$depth]                = "";
13092             $identifier_count_stack[$depth]        = 0;
13093             $index_before_arrow[$depth]            = -1;
13094             $interrupted_list[$depth]              = 1;
13095             $item_count_stack[$depth]              = 0;
13096             $last_nonblank_type[$depth]            = "";
13097             $opening_structure_index_stack[$depth] = -1;
13098
13099             $breakpoint_undo_stack[$depth]       = undef;
13100             $comma_index[$depth]                 = undef;
13101             $last_comma_index[$depth]            = undef;
13102             $last_dot_index[$depth]              = undef;
13103             $old_breakpoint_count_stack[$depth]  = undef;
13104             $has_old_logical_breakpoints[$depth] = 0;
13105             $rand_or_list[$depth]                = [];
13106             $rfor_semicolon_list[$depth]         = [];
13107             $i_equals[$depth]                    = -1;
13108
13109             # these arrays must retain values between calls
13110             if ( !defined( $has_broken_sublist[$depth] ) ) {
13111                 $dont_align[$depth]         = 0;
13112                 $has_broken_sublist[$depth] = 0;
13113                 $want_comma_break[$depth]   = 0;
13114             }
13115         }
13116     }
13117
13118     # routine to decide which commas to break at within a container;
13119     # returns:
13120     #   $bp_count = number of comma breakpoints set
13121     #   $do_not_break_apart = a flag indicating if container need not
13122     #     be broken open
13123     sub set_comma_breakpoints {
13124
13125         my $dd                 = shift;
13126         my $bp_count           = 0;
13127         my $do_not_break_apart = 0;
13128         if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
13129
13130             my $fbc = $forced_breakpoint_count;
13131
13132             # always open comma lists not preceded by keywords,
13133             # barewords, identifiers (that is, anything that doesn't
13134             # look like a function call)
13135             my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
13136
13137             set_comma_breakpoints_do(
13138                 $dd,
13139                 $opening_structure_index_stack[$dd],
13140                 $i,
13141                 $item_count_stack[$dd],
13142                 $identifier_count_stack[$dd],
13143                 $comma_index[$dd],
13144                 $next_nonblank_type,
13145                 $container_type[$dd],
13146                 $interrupted_list[$dd],
13147                 \$do_not_break_apart,
13148                 $must_break_open,
13149             );
13150             $bp_count = $forced_breakpoint_count - $fbc;
13151             $do_not_break_apart = 0 if $must_break_open;
13152         }
13153         return ( $bp_count, $do_not_break_apart );
13154     }
13155
13156     my %is_logical_container;
13157
13158     BEGIN {
13159         @_ = qw# if elsif unless while and or err not && | || ? : ! #;
13160         @is_logical_container{@_} = (1) x scalar(@_);
13161     }
13162
13163     sub set_for_semicolon_breakpoints {
13164         my $dd = shift;
13165         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
13166             set_forced_breakpoint($_);
13167         }
13168     }
13169
13170     sub set_logical_breakpoints {
13171         my $dd = shift;
13172         if (
13173                $item_count_stack[$dd] == 0
13174             && $is_logical_container{ $container_type[$dd] }
13175
13176             # TESTING:
13177             || $has_old_logical_breakpoints[$dd]
13178           )
13179         {
13180
13181             # Look for breaks in this order:
13182             # 0   1    2   3
13183             # or  and  ||  &&
13184             foreach my $i ( 0 .. 3 ) {
13185                 if ( $rand_or_list[$dd][$i] ) {
13186                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
13187                         set_forced_breakpoint($_);
13188                     }
13189
13190                     # break at any 'if' and 'unless' too
13191                     foreach ( @{ $rand_or_list[$dd][4] } ) {
13192                         set_forced_breakpoint($_);
13193                     }
13194                     $rand_or_list[$dd] = [];
13195                     last;
13196                 }
13197             }
13198         }
13199     }
13200
13201     sub is_unbreakable_container {
13202
13203         # never break a container of one of these types
13204         # because bad things can happen (map1.t)
13205         my $dd = shift;
13206         $is_sort_map_grep{ $container_type[$dd] };
13207     }
13208
13209     sub scan_list {
13210
13211         # This routine is responsible for setting line breaks for all lists,
13212         # so that hierarchical structure can be displayed and so that list
13213         # items can be vertically aligned.  The output of this routine is
13214         # stored in the array @forced_breakpoint_to_go, which is used to set
13215         # final breakpoints.
13216
13217         $starting_depth = $nesting_depth_to_go[0];
13218
13219         $block_type                 = ' ';
13220         $current_depth              = $starting_depth;
13221         $i                          = -1;
13222         $last_colon_sequence_number = -1;
13223         $last_nonblank_token        = ';';
13224         $last_nonblank_type         = ';';
13225         $last_nonblank_block_type   = ' ';
13226         $last_old_breakpoint_count  = 0;
13227         $minimum_depth = $current_depth + 1;    # forces update in check below
13228         $old_breakpoint_count      = 0;
13229         $starting_breakpoint_count = $forced_breakpoint_count;
13230         $token                     = ';';
13231         $type                      = ';';
13232         $type_sequence             = '';
13233
13234         check_for_new_minimum_depth($current_depth);
13235
13236         my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13237         my $want_previous_breakpoint = -1;
13238
13239         my $saw_good_breakpoint;
13240         my $i_line_end   = -1;
13241         my $i_line_start = -1;
13242
13243         # loop over all tokens in this batch
13244         while ( ++$i <= $max_index_to_go ) {
13245             if ( $type ne 'b' ) {
13246                 $i_last_nonblank_token    = $i - 1;
13247                 $last_nonblank_type       = $type;
13248                 $last_nonblank_token      = $token;
13249                 $last_nonblank_block_type = $block_type;
13250             }
13251             $type          = $types_to_go[$i];
13252             $block_type    = $block_type_to_go[$i];
13253             $token         = $tokens_to_go[$i];
13254             $type_sequence = $type_sequence_to_go[$i];
13255             my $next_type       = $types_to_go[ $i + 1 ];
13256             my $next_token      = $tokens_to_go[ $i + 1 ];
13257             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13258             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
13259             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
13260             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13261
13262             # set break if flag was set
13263             if ( $want_previous_breakpoint >= 0 ) {
13264                 set_forced_breakpoint($want_previous_breakpoint);
13265                 $want_previous_breakpoint = -1;
13266             }
13267
13268             $last_old_breakpoint_count = $old_breakpoint_count;
13269             if ( $old_breakpoint_to_go[$i] ) {
13270                 $i_line_end   = $i;
13271                 $i_line_start = $i_next_nonblank;
13272
13273                 $old_breakpoint_count++;
13274
13275                 # Break before certain keywords if user broke there and
13276                 # this is a 'safe' break point. The idea is to retain
13277                 # any preferred breaks for sequential list operations,
13278                 # like a schwartzian transform.
13279                 if ($rOpts_break_at_old_keyword_breakpoints) {
13280                     if (
13281                            $next_nonblank_type eq 'k'
13282                         && $is_keyword_returning_list{$next_nonblank_token}
13283                         && (   $type =~ /^[=\)\]\}Riw]$/
13284                             || $type eq 'k'
13285                             && $is_keyword_returning_list{$token} )
13286                       )
13287                     {
13288
13289                         # we actually have to set this break next time through
13290                         # the loop because if we are at a closing token (such
13291                         # as '}') which forms a one-line block, this break might
13292                         # get undone.
13293                         $want_previous_breakpoint = $i;
13294                     }
13295                 }
13296             }
13297             next if ( $type eq 'b' );
13298             $depth = $nesting_depth_to_go[ $i + 1 ];
13299
13300             # safety check - be sure we always break after a comment
13301             # Shouldn't happen .. an error here probably means that the
13302             # nobreak flag did not get turned off correctly during
13303             # formatting.
13304             if ( $type eq '#' ) {
13305                 if ( $i != $max_index_to_go ) {
13306                     warning(
13307 "Non-fatal program bug: backup logic needed to break after a comment\n"
13308                     );
13309                     report_definite_bug();
13310                     $nobreak_to_go[$i] = 0;
13311                     set_forced_breakpoint($i);
13312                 }
13313             }
13314
13315             # Force breakpoints at certain tokens in long lines.
13316             # Note that such breakpoints will be undone later if these tokens
13317             # are fully contained within parens on a line.
13318             if (
13319
13320                 # break before a keyword within a line
13321                 $type eq 'k'
13322                 && $i > 0
13323
13324                 # if one of these keywords:
13325                 && $token =~ /^(if|unless|while|until|for)$/
13326
13327                 # but do not break at something like '1 while'
13328                 && ( $last_nonblank_type ne 'n' || $i > 2 )
13329
13330                 # and let keywords follow a closing 'do' brace
13331                 && $last_nonblank_block_type ne 'do'
13332
13333                 && (
13334                     $is_long_line
13335
13336                     # or container is broken (by side-comment, etc)
13337                     || (   $next_nonblank_token eq '('
13338                         && $mate_index_to_go[$i_next_nonblank] < $i )
13339                 )
13340               )
13341             {
13342                 set_forced_breakpoint( $i - 1 );
13343             }
13344
13345             # remember locations of '||'  and '&&' for possible breaks if we
13346             # decide this is a long logical expression.
13347             if ( $type eq '||' ) {
13348                 push @{ $rand_or_list[$depth][2] }, $i;
13349                 ++$has_old_logical_breakpoints[$depth]
13350                   if ( ( $i == $i_line_start || $i == $i_line_end )
13351                     && $rOpts_break_at_old_logical_breakpoints );
13352             }
13353             elsif ( $type eq '&&' ) {
13354                 push @{ $rand_or_list[$depth][3] }, $i;
13355                 ++$has_old_logical_breakpoints[$depth]
13356                   if ( ( $i == $i_line_start || $i == $i_line_end )
13357                     && $rOpts_break_at_old_logical_breakpoints );
13358             }
13359             elsif ( $type eq 'f' ) {
13360                 push @{ $rfor_semicolon_list[$depth] }, $i;
13361             }
13362             elsif ( $type eq 'k' ) {
13363                 if ( $token eq 'and' ) {
13364                     push @{ $rand_or_list[$depth][1] }, $i;
13365                     ++$has_old_logical_breakpoints[$depth]
13366                       if ( ( $i == $i_line_start || $i == $i_line_end )
13367                         && $rOpts_break_at_old_logical_breakpoints );
13368                 }
13369
13370                 # break immediately at 'or's which are probably not in a logical
13371                 # block -- but we will break in logical breaks below so that
13372                 # they do not add to the forced_breakpoint_count
13373                 elsif ( $token eq 'or' ) {
13374                     push @{ $rand_or_list[$depth][0] }, $i;
13375                     ++$has_old_logical_breakpoints[$depth]
13376                       if ( ( $i == $i_line_start || $i == $i_line_end )
13377                         && $rOpts_break_at_old_logical_breakpoints );
13378                     if ( $is_logical_container{ $container_type[$depth] } ) {
13379                     }
13380                     else {
13381                         if ($is_long_line) { set_forced_breakpoint($i) }
13382                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
13383                             && $rOpts_break_at_old_logical_breakpoints )
13384                         {
13385                             $saw_good_breakpoint = 1;
13386                         }
13387                     }
13388                 }
13389                 elsif ( $token eq 'if' || $token eq 'unless' ) {
13390                     push @{ $rand_or_list[$depth][4] }, $i;
13391                     if ( ( $i == $i_line_start || $i == $i_line_end )
13392                         && $rOpts_break_at_old_logical_breakpoints )
13393                     {
13394                         set_forced_breakpoint($i);
13395                     }
13396                 }
13397             }
13398             elsif ( $is_assignment{$type} ) {
13399                 $i_equals[$depth] = $i;
13400             }
13401
13402             if ($type_sequence) {
13403
13404                 # handle any postponed closing breakpoints
13405                 if ( $token =~ /^[\)\]\}\:]$/ ) {
13406                     if ( $type eq ':' ) {
13407                         $last_colon_sequence_number = $type_sequence;
13408
13409                         # TESTING: retain break at a ':' line break
13410                         if ( ( $i == $i_line_start || $i == $i_line_end )
13411                             && $rOpts_break_at_old_ternary_breakpoints )
13412                         {
13413
13414                             # TESTING:
13415                             set_forced_breakpoint($i);
13416
13417                             # break at previous '='
13418                             if ( $i_equals[$depth] > 0 ) {
13419                                 set_forced_breakpoint( $i_equals[$depth] );
13420                                 $i_equals[$depth] = -1;
13421                             }
13422                         }
13423                     }
13424                     if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
13425                         my $inc = ( $type eq ':' ) ? 0 : 1;
13426                         set_forced_breakpoint( $i - $inc );
13427                         delete $postponed_breakpoint{$type_sequence};
13428                     }
13429                 }
13430
13431                 # set breaks at ?/: if they will get separated (and are
13432                 # not a ?/: chain), or if the '?' is at the end of the
13433                 # line
13434                 elsif ( $token eq '?' ) {
13435                     my $i_colon = $mate_index_to_go[$i];
13436                     if (
13437                         $i_colon <= 0  # the ':' is not in this batch
13438                         || $i == 0     # this '?' is the first token of the line
13439                         || $i ==
13440                         $max_index_to_go    # or this '?' is the last token
13441                       )
13442                     {
13443
13444                         # don't break at a '?' if preceded by ':' on
13445                         # this line of previous ?/: pair on this line.
13446                         # This is an attempt to preserve a chain of ?/:
13447                         # expressions (elsif2.t).  And don't break if
13448                         # this has a side comment.
13449                         set_forced_breakpoint($i)
13450                           unless (
13451                             $type_sequence == (
13452                                 $last_colon_sequence_number +
13453                                   TYPE_SEQUENCE_INCREMENT
13454                             )
13455                             || $tokens_to_go[$max_index_to_go] eq '#'
13456                           );
13457                         set_closing_breakpoint($i);
13458                     }
13459                 }
13460             }
13461
13462 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
13463
13464             #------------------------------------------------------------
13465             # Handle Increasing Depth..
13466             #
13467             # prepare for a new list when depth increases
13468             # token $i is a '(','{', or '['
13469             #------------------------------------------------------------
13470             if ( $depth > $current_depth ) {
13471
13472                 $breakpoint_stack[$depth]       = $forced_breakpoint_count;
13473                 $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
13474                 $has_broken_sublist[$depth]     = 0;
13475                 $identifier_count_stack[$depth] = 0;
13476                 $index_before_arrow[$depth]     = -1;
13477                 $interrupted_list[$depth]       = 0;
13478                 $item_count_stack[$depth]       = 0;
13479                 $last_comma_index[$depth]       = undef;
13480                 $last_dot_index[$depth]         = undef;
13481                 $last_nonblank_type[$depth]     = $last_nonblank_type;
13482                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
13483                 $opening_structure_index_stack[$depth] = $i;
13484                 $rand_or_list[$depth]                  = [];
13485                 $rfor_semicolon_list[$depth]           = [];
13486                 $i_equals[$depth]                      = -1;
13487                 $want_comma_break[$depth]              = 0;
13488                 $container_type[$depth] =
13489                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
13490                   ? $last_nonblank_token
13491                   : "";
13492                 $has_old_logical_breakpoints[$depth] = 0;
13493
13494                 # if line ends here then signal closing token to break
13495                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
13496                 {
13497                     set_closing_breakpoint($i);
13498                 }
13499
13500                 # Not all lists of values should be vertically aligned..
13501                 $dont_align[$depth] =
13502
13503                   # code BLOCKS are handled at a higher level
13504                   ( $block_type ne "" )
13505
13506                   # certain paren lists
13507                   || ( $type eq '(' ) && (
13508
13509                     # it does not usually look good to align a list of
13510                     # identifiers in a parameter list, as in:
13511                     #    my($var1, $var2, ...)
13512                     # (This test should probably be refined, for now I'm just
13513                     # testing for any keyword)
13514                     ( $last_nonblank_type eq 'k' )
13515
13516                     # a trailing '(' usually indicates a non-list
13517                     || ( $next_nonblank_type eq '(' )
13518                   );
13519
13520                 # patch to outdent opening brace of long if/for/..
13521                 # statements (like this one).  See similar coding in
13522                 # set_continuation breaks.  We have also catch it here for
13523                 # short line fragments which otherwise will not go through
13524                 # set_continuation_breaks.
13525                 if (
13526                     $block_type
13527
13528                     # if we have the ')' but not its '(' in this batch..
13529                     && ( $last_nonblank_token eq ')' )
13530                     && $mate_index_to_go[$i_last_nonblank_token] < 0
13531
13532                     # and user wants brace to left
13533                     && !$rOpts->{'opening-brace-always-on-right'}
13534
13535                     && ( $type  eq '{' )    # should be true
13536                     && ( $token eq '{' )    # should be true
13537                   )
13538                 {
13539                     set_forced_breakpoint( $i - 1 );
13540                 }
13541             }
13542
13543             #------------------------------------------------------------
13544             # Handle Decreasing Depth..
13545             #
13546             # finish off any old list when depth decreases
13547             # token $i is a ')','}', or ']'
13548             #------------------------------------------------------------
13549             elsif ( $depth < $current_depth ) {
13550
13551                 check_for_new_minimum_depth($depth);
13552
13553                 # force all outer logical containers to break after we see on
13554                 # old breakpoint
13555                 $has_old_logical_breakpoints[$depth] ||=
13556                   $has_old_logical_breakpoints[$current_depth];
13557
13558                 # Patch to break between ') {' if the paren list is broken.
13559                 # There is similar logic in set_continuation_breaks for
13560                 # non-broken lists.
13561                 if (   $token eq ')'
13562                     && $next_nonblank_block_type
13563                     && $interrupted_list[$current_depth]
13564                     && $next_nonblank_type eq '{'
13565                     && !$rOpts->{'opening-brace-always-on-right'} )
13566                 {
13567                     set_forced_breakpoint($i);
13568                 }
13569
13570 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
13571
13572                 # set breaks at commas if necessary
13573                 my ( $bp_count, $do_not_break_apart ) =
13574                   set_comma_breakpoints($current_depth);
13575
13576                 my $i_opening = $opening_structure_index_stack[$current_depth];
13577                 my $saw_opening_structure = ( $i_opening >= 0 );
13578
13579                 # this term is long if we had to break at interior commas..
13580                 my $is_long_term = $bp_count > 0;
13581
13582                 # ..or if the length between opening and closing parens exceeds
13583                 # allowed line length
13584                 if ( !$is_long_term && $saw_opening_structure ) {
13585                     my $i_opening_minus = find_token_starting_list($i_opening);
13586
13587                     # Note: we have to allow for one extra space after a
13588                     # closing token so that we do not strand a comma or
13589                     # semicolon, hence the '>=' here (oneline.t)
13590                     $is_long_term =
13591                       excess_line_length( $i_opening_minus, $i ) >= 0;
13592                 }
13593
13594                 # We've set breaks after all comma-arrows.  Now we have to
13595                 # undo them if this can be a one-line block
13596                 # (the only breakpoints set will be due to comma-arrows)
13597                 if (
13598
13599                     # user doesn't require breaking after all comma-arrows
13600                     ( $rOpts_comma_arrow_breakpoints != 0 )
13601
13602                     # and if the opening structure is in this batch
13603                     && $saw_opening_structure
13604
13605                     # and either on the same old line
13606                     && (
13607                         $old_breakpoint_count_stack[$current_depth] ==
13608                         $last_old_breakpoint_count
13609
13610                         # or user wants to form long blocks with arrows
13611                         || $rOpts_comma_arrow_breakpoints == 2
13612                     )
13613
13614                   # and we made some breakpoints between the opening and closing
13615                     && ( $breakpoint_undo_stack[$current_depth] <
13616                         $forced_breakpoint_undo_count )
13617
13618                     # and this block is short enough to fit on one line
13619                     # Note: use < because need 1 more space for possible comma
13620                     && !$is_long_term
13621
13622                   )
13623                 {
13624                     undo_forced_breakpoint_stack(
13625                         $breakpoint_undo_stack[$current_depth] );
13626                 }
13627
13628                 # now see if we have any comma breakpoints left
13629                 my $has_comma_breakpoints =
13630                   ( $breakpoint_stack[$current_depth] !=
13631                       $forced_breakpoint_count );
13632
13633                 # update broken-sublist flag of the outer container
13634                      $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
13635                   || $has_broken_sublist[$current_depth]
13636                   || $is_long_term
13637                   || $has_comma_breakpoints;
13638
13639 # Having come to the closing ')', '}', or ']', now we have to decide if we
13640 # should 'open up' the structure by placing breaks at the opening and
13641 # closing containers.  This is a tricky decision.  Here are some of the
13642 # basic considerations:
13643 #
13644 # -If this is a BLOCK container, then any breakpoints will have already
13645 # been set (and according to user preferences), so we need do nothing here.
13646 #
13647 # -If we have a comma-separated list for which we can align the list items,
13648 # then we need to do so because otherwise the vertical aligner cannot
13649 # currently do the alignment.
13650 #
13651 # -If this container does itself contain a container which has been broken
13652 # open, then it should be broken open to properly show the structure.
13653 #
13654 # -If there is nothing to align, and no other reason to break apart,
13655 # then do not do it.
13656 #
13657 # We will not break open the parens of a long but 'simple' logical expression.
13658 # For example:
13659 #
13660 # This is an example of a simple logical expression and its formatting:
13661 #
13662 #     if ( $bigwasteofspace1 && $bigwasteofspace2
13663 #         || $bigwasteofspace3 && $bigwasteofspace4 )
13664 #
13665 # Most people would prefer this than the 'spacey' version:
13666 #
13667 #     if (
13668 #         $bigwasteofspace1 && $bigwasteofspace2
13669 #         || $bigwasteofspace3 && $bigwasteofspace4
13670 #     )
13671 #
13672 # To illustrate the rules for breaking logical expressions, consider:
13673 #
13674 #             FULLY DENSE:
13675 #             if ( $opt_excl
13676 #                 and ( exists $ids_excl_uc{$id_uc}
13677 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
13678 #
13679 # This is on the verge of being difficult to read.  The current default is to
13680 # open it up like this:
13681 #
13682 #             DEFAULT:
13683 #             if (
13684 #                 $opt_excl
13685 #                 and ( exists $ids_excl_uc{$id_uc}
13686 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
13687 #               )
13688 #
13689 # This is a compromise which tries to avoid being too dense and to spacey.
13690 # A more spaced version would be:
13691 #
13692 #             SPACEY:
13693 #             if (
13694 #                 $opt_excl
13695 #                 and (
13696 #                     exists $ids_excl_uc{$id_uc}
13697 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
13698 #                 )
13699 #               )
13700 #
13701 # Some people might prefer the spacey version -- an option could be added.  The
13702 # innermost expression contains a long block '( exists $ids_...  ')'.
13703 #
13704 # Here is how the logic goes: We will force a break at the 'or' that the
13705 # innermost expression contains, but we will not break apart its opening and
13706 # closing containers because (1) it contains no multi-line sub-containers itself,
13707 # and (2) there is no alignment to be gained by breaking it open like this
13708 #
13709 #             and (
13710 #                 exists $ids_excl_uc{$id_uc}
13711 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
13712 #             )
13713 #
13714 # (although this looks perfectly ok and might be good for long expressions).  The
13715 # outer 'if' container, though, contains a broken sub-container, so it will be
13716 # broken open to avoid too much density.  Also, since it contains no 'or's, there
13717 # will be a forced break at its 'and'.
13718
13719                 # set some flags telling something about this container..
13720                 my $is_simple_logical_expression = 0;
13721                 if (   $item_count_stack[$current_depth] == 0
13722                     && $saw_opening_structure
13723                     && $tokens_to_go[$i_opening] eq '('
13724                     && $is_logical_container{ $container_type[$current_depth] }
13725                   )
13726                 {
13727
13728                     # This seems to be a simple logical expression with
13729                     # no existing breakpoints.  Set a flag to prevent
13730                     # opening it up.
13731                     if ( !$has_comma_breakpoints ) {
13732                         $is_simple_logical_expression = 1;
13733                     }
13734
13735                     # This seems to be a simple logical expression with
13736                     # breakpoints (broken sublists, for example).  Break
13737                     # at all 'or's and '||'s.
13738                     else {
13739                         set_logical_breakpoints($current_depth);
13740                     }
13741                 }
13742
13743                 if ( $is_long_term
13744                     && @{ $rfor_semicolon_list[$current_depth] } )
13745                 {
13746                     set_for_semicolon_breakpoints($current_depth);
13747
13748                     # open up a long 'for' or 'foreach' container to allow
13749                     # leading term alignment unless -lp is used.
13750                     $has_comma_breakpoints = 1
13751                       unless $rOpts_line_up_parentheses;
13752                 }
13753
13754                 if (
13755
13756                     # breaks for code BLOCKS are handled at a higher level
13757                     !$block_type
13758
13759                     # we do not need to break at the top level of an 'if'
13760                     # type expression
13761                     && !$is_simple_logical_expression
13762
13763                     ## modification to keep ': (' containers vertically tight;
13764                     ## but probably better to let user set -vt=1 to avoid
13765                     ## inconsistency with other paren types
13766                     ## && ($container_type[$current_depth] ne ':')
13767
13768                     # otherwise, we require one of these reasons for breaking:
13769                     && (
13770
13771                         # - this term has forced line breaks
13772                         $has_comma_breakpoints
13773
13774                        # - the opening container is separated from this batch
13775                        #   for some reason (comment, blank line, code block)
13776                        # - this is a non-paren container spanning multiple lines
13777                         || !$saw_opening_structure
13778
13779                         # - this is a long block contained in another breakable
13780                         #   container
13781                         || (   $is_long_term
13782                             && $container_environment_to_go[$i_opening] ne
13783                             'BLOCK' )
13784                     )
13785                   )
13786                 {
13787
13788                     # For -lp option, we must put a breakpoint before
13789                     # the token which has been identified as starting
13790                     # this indentation level.  This is necessary for
13791                     # proper alignment.
13792                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
13793                     {
13794                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
13795                         if (   $i_opening + 1 < $max_index_to_go
13796                             && $types_to_go[ $i_opening + 1 ] eq 'b' )
13797                         {
13798                             $item = $leading_spaces_to_go[ $i_opening + 2 ];
13799                         }
13800                         if ( defined($item) ) {
13801                             my $i_start_2 = $item->get_STARTING_INDEX();
13802                             if (
13803                                 defined($i_start_2)
13804
13805                                 # we are breaking after an opening brace, paren,
13806                                 # so don't break before it too
13807                                 && $i_start_2 ne $i_opening
13808                               )
13809                             {
13810
13811                                 # Only break for breakpoints at the same
13812                                 # indentation level as the opening paren
13813                                 my $test1 = $nesting_depth_to_go[$i_opening];
13814                                 my $test2 = $nesting_depth_to_go[$i_start_2];
13815                                 if ( $test2 == $test1 ) {
13816                                     set_forced_breakpoint( $i_start_2 - 1 );
13817                                 }
13818                             }
13819                         }
13820                     }
13821
13822                     # break after opening structure.
13823                     # note: break before closing structure will be automatic
13824                     if ( $minimum_depth <= $current_depth ) {
13825
13826                         set_forced_breakpoint($i_opening)
13827                           unless ( $do_not_break_apart
13828                             || is_unbreakable_container($current_depth) );
13829
13830                         # break at '.' of lower depth level before opening token
13831                         if ( $last_dot_index[$depth] ) {
13832                             set_forced_breakpoint( $last_dot_index[$depth] );
13833                         }
13834
13835                         # break before opening structure if preeced by another
13836                         # closing structure and a comma.  This is normally
13837                         # done by the previous closing brace, but not
13838                         # if it was a one-line block.
13839                         if ( $i_opening > 2 ) {
13840                             my $i_prev =
13841                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
13842                               ? $i_opening - 2
13843                               : $i_opening - 1;
13844
13845                             if (   $types_to_go[$i_prev] eq ','
13846                                 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
13847                             {
13848                                 set_forced_breakpoint($i_prev);
13849                             }
13850
13851                             # also break before something like ':('  or '?('
13852                             # if appropriate.
13853                             elsif (
13854                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
13855                             {
13856                                 my $token_prev = $tokens_to_go[$i_prev];
13857                                 if ( $want_break_before{$token_prev} ) {
13858                                     set_forced_breakpoint($i_prev);
13859                                 }
13860                             }
13861                         }
13862                     }
13863
13864                     # break after comma following closing structure
13865                     if ( $next_type eq ',' ) {
13866                         set_forced_breakpoint( $i + 1 );
13867                     }
13868
13869                     # break before an '=' following closing structure
13870                     if (
13871                         $is_assignment{$next_nonblank_type}
13872                         && ( $breakpoint_stack[$current_depth] !=
13873                             $forced_breakpoint_count )
13874                       )
13875                     {
13876                         set_forced_breakpoint($i);
13877                     }
13878
13879                     # break at any comma before the opening structure Added
13880                     # for -lp, but seems to be good in general.  It isn't
13881                     # obvious how far back to look; the '5' below seems to
13882                     # work well and will catch the comma in something like
13883                     #  push @list, myfunc( $param, $param, ..
13884
13885                     my $icomma = $last_comma_index[$depth];
13886                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
13887                         unless ( $forced_breakpoint_to_go[$icomma] ) {
13888                             set_forced_breakpoint($icomma);
13889                         }
13890                     }
13891                 }    # end logic to open up a container
13892
13893                 # Break open a logical container open if it was already open
13894                 elsif ($is_simple_logical_expression
13895                     && $has_old_logical_breakpoints[$current_depth] )
13896                 {
13897                     set_logical_breakpoints($current_depth);
13898                 }
13899
13900                 # Handle long container which does not get opened up
13901                 elsif ($is_long_term) {
13902
13903                     # must set fake breakpoint to alert outer containers that
13904                     # they are complex
13905                     set_fake_breakpoint();
13906                 }
13907             }
13908
13909             #------------------------------------------------------------
13910             # Handle this token
13911             #------------------------------------------------------------
13912
13913             $current_depth = $depth;
13914
13915             # handle comma-arrow
13916             if ( $type eq '=>' ) {
13917                 next if ( $last_nonblank_type eq '=>' );
13918                 next if $rOpts_break_at_old_comma_breakpoints;
13919                 next if $rOpts_comma_arrow_breakpoints == 3;
13920                 $want_comma_break[$depth]   = 1;
13921                 $index_before_arrow[$depth] = $i_last_nonblank_token;
13922                 next;
13923             }
13924
13925             elsif ( $type eq '.' ) {
13926                 $last_dot_index[$depth] = $i;
13927             }
13928
13929             # Turn off alignment if we are sure that this is not a list
13930             # environment.  To be safe, we will do this if we see certain
13931             # non-list tokens, such as ';', and also the environment is
13932             # not a list.  Note that '=' could be in any of the = operators
13933             # (lextest.t). We can't just use the reported environment
13934             # because it can be incorrect in some cases.
13935             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
13936                 && $container_environment_to_go[$i] ne 'LIST' )
13937             {
13938                 $dont_align[$depth]         = 1;
13939                 $want_comma_break[$depth]   = 0;
13940                 $index_before_arrow[$depth] = -1;
13941             }
13942
13943             # now just handle any commas
13944             next unless ( $type eq ',' );
13945
13946             $last_dot_index[$depth]   = undef;
13947             $last_comma_index[$depth] = $i;
13948
13949             # break here if this comma follows a '=>'
13950             # but not if there is a side comment after the comma
13951             if ( $want_comma_break[$depth] ) {
13952
13953                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
13954                     $want_comma_break[$depth]   = 0;
13955                     $index_before_arrow[$depth] = -1;
13956                     next;
13957                 }
13958
13959                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13960
13961                 # break before the previous token if it looks safe
13962                 # Example of something that we will not try to break before:
13963                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
13964                 # Also we don't want to break at a binary operator (like +):
13965                 # $c->createOval(
13966                 #    $x + $R, $y +
13967                 #    $R => $x - $R,
13968                 #    $y - $R, -fill   => 'black',
13969                 # );
13970                 my $ibreak = $index_before_arrow[$depth] - 1;
13971                 if (   $ibreak > 0
13972                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
13973                 {
13974                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
13975                     if ( $types_to_go[$ibreak]  eq 'b' ) { $ibreak-- }
13976                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
13977
13978                         # don't break pointer calls, such as the following:
13979                         #  File::Spec->curdir  => 1,
13980                         # (This is tokenized as adjacent 'w' tokens)
13981                         if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
13982                             set_forced_breakpoint($ibreak);
13983                         }
13984                     }
13985                 }
13986
13987                 $want_comma_break[$depth]   = 0;
13988                 $index_before_arrow[$depth] = -1;
13989
13990                 # handle list which mixes '=>'s and ','s:
13991                 # treat any list items so far as an interrupted list
13992                 $interrupted_list[$depth] = 1;
13993                 next;
13994             }
13995
13996             # skip past these commas if we are not supposed to format them
13997             next if ( $dont_align[$depth] );
13998
13999             # break after all commas above starting depth
14000             if ( $depth < $starting_depth ) {
14001                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14002                 next;
14003             }
14004
14005             # add this comma to the list..
14006             my $item_count = $item_count_stack[$depth];
14007             if ( $item_count == 0 ) {
14008
14009                 # but do not form a list with no opening structure
14010                 # for example:
14011
14012                 #            open INFILE_COPY, ">$input_file_copy"
14013                 #              or die ("very long message");
14014
14015                 if ( ( $opening_structure_index_stack[$depth] < 0 )
14016                     && $container_environment_to_go[$i] eq 'BLOCK' )
14017                 {
14018                     $dont_align[$depth] = 1;
14019                     next;
14020                 }
14021             }
14022
14023             $comma_index[$depth][$item_count] = $i;
14024             ++$item_count_stack[$depth];
14025             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
14026                 $identifier_count_stack[$depth]++;
14027             }
14028         }
14029
14030         #-------------------------------------------
14031         # end of loop over all tokens in this batch
14032         #-------------------------------------------
14033
14034         # set breaks for any unfinished lists ..
14035         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
14036
14037             $interrupted_list[$dd] = 1;
14038             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
14039             set_comma_breakpoints($dd);
14040             set_logical_breakpoints($dd)
14041               if ( $has_old_logical_breakpoints[$dd] );
14042             set_for_semicolon_breakpoints($dd);
14043
14044             # break open container...
14045             my $i_opening = $opening_structure_index_stack[$dd];
14046             set_forced_breakpoint($i_opening)
14047               unless (
14048                 is_unbreakable_container($dd)
14049
14050                 # Avoid a break which would place an isolated ' or "
14051                 # on a line
14052                 || (   $type eq 'Q'
14053                     && $i_opening >= $max_index_to_go - 2
14054                     && $token =~ /^['"]$/ )
14055               );
14056         }
14057
14058         # Return a flag indicating if the input file had some good breakpoints.
14059         # This flag will be used to force a break in a line shorter than the
14060         # allowed line length.
14061         if ( $has_old_logical_breakpoints[$current_depth] ) {
14062             $saw_good_breakpoint = 1;
14063         }
14064         return $saw_good_breakpoint;
14065     }
14066 }    # end scan_list
14067
14068 sub find_token_starting_list {
14069
14070     # When testing to see if a block will fit on one line, some
14071     # previous token(s) may also need to be on the line; particularly
14072     # if this is a sub call.  So we will look back at least one
14073     # token. NOTE: This isn't perfect, but not critical, because
14074     # if we mis-identify a block, it will be wrapped and therefore
14075     # fixed the next time it is formatted.
14076     my $i_opening_paren = shift;
14077     my $i_opening_minus = $i_opening_paren;
14078     my $im1             = $i_opening_paren - 1;
14079     my $im2             = $i_opening_paren - 2;
14080     my $im3             = $i_opening_paren - 3;
14081     my $typem1          = $types_to_go[$im1];
14082     my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
14083     if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
14084         $i_opening_minus = $i_opening_paren;
14085     }
14086     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
14087         $i_opening_minus = $im1 if $im1 >= 0;
14088
14089         # walk back to improve length estimate
14090         for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
14091             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
14092             $i_opening_minus = $j;
14093         }
14094         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
14095     }
14096     elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
14097     elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
14098         $i_opening_minus = $im2;
14099     }
14100     return $i_opening_minus;
14101 }
14102
14103 {    # begin set_comma_breakpoints_do
14104
14105     my %is_keyword_with_special_leading_term;
14106
14107     BEGIN {
14108
14109         # These keywords have prototypes which allow a special leading item
14110         # followed by a list
14111         @_ =
14112           qw(formline grep kill map printf sprintf push chmod join pack unshift);
14113         @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
14114     }
14115
14116     sub set_comma_breakpoints_do {
14117
14118         # Given a list with some commas, set breakpoints at some of the
14119         # commas, if necessary, to make it easy to read.  This list is
14120         # an example:
14121         my (
14122             $depth,               $i_opening_paren,  $i_closing_paren,
14123             $item_count,          $identifier_count, $rcomma_index,
14124             $next_nonblank_type,  $list_type,        $interrupted,
14125             $rdo_not_break_apart, $must_break_open,
14126         ) = @_;
14127
14128         # nothing to do if no commas seen
14129         return if ( $item_count < 1 );
14130         my $i_first_comma     = $$rcomma_index[0];
14131         my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
14132         my $i_last_comma      = $i_true_last_comma;
14133         if ( $i_last_comma >= $max_index_to_go ) {
14134             $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
14135             return if ( $item_count < 1 );
14136         }
14137
14138         #---------------------------------------------------------------
14139         # find lengths of all items in the list to calculate page layout
14140         #---------------------------------------------------------------
14141         my $comma_count = $item_count;
14142         my @item_lengths;
14143         my @i_term_begin;
14144         my @i_term_end;
14145         my @i_term_comma;
14146         my $i_prev_plus;
14147         my @max_length = ( 0, 0 );
14148         my $first_term_length;
14149         my $i      = $i_opening_paren;
14150         my $is_odd = 1;
14151
14152         for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
14153             $is_odd      = 1 - $is_odd;
14154             $i_prev_plus = $i + 1;
14155             $i           = $$rcomma_index[$j];
14156
14157             my $i_term_end =
14158               ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
14159             my $i_term_begin =
14160               ( $types_to_go[$i_prev_plus] eq 'b' )
14161               ? $i_prev_plus + 1
14162               : $i_prev_plus;
14163             push @i_term_begin, $i_term_begin;
14164             push @i_term_end,   $i_term_end;
14165             push @i_term_comma, $i;
14166
14167             # note: currently adding 2 to all lengths (for comma and space)
14168             my $length =
14169               2 + token_sequence_length( $i_term_begin, $i_term_end );
14170             push @item_lengths, $length;
14171
14172             if ( $j == 0 ) {
14173                 $first_term_length = $length;
14174             }
14175             else {
14176
14177                 if ( $length > $max_length[$is_odd] ) {
14178                     $max_length[$is_odd] = $length;
14179                 }
14180             }
14181         }
14182
14183         # now we have to make a distinction between the comma count and item
14184         # count, because the item count will be one greater than the comma
14185         # count if the last item is not terminated with a comma
14186         my $i_b =
14187           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
14188           ? $i_last_comma + 1
14189           : $i_last_comma;
14190         my $i_e =
14191           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
14192           ? $i_closing_paren - 2
14193           : $i_closing_paren - 1;
14194         my $i_effective_last_comma = $i_last_comma;
14195
14196         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
14197
14198         if ( $last_item_length > 0 ) {
14199
14200             # add 2 to length because other lengths include a comma and a blank
14201             $last_item_length += 2;
14202             push @item_lengths, $last_item_length;
14203             push @i_term_begin, $i_b + 1;
14204             push @i_term_end,   $i_e;
14205             push @i_term_comma, undef;
14206
14207             my $i_odd = $item_count % 2;
14208
14209             if ( $last_item_length > $max_length[$i_odd] ) {
14210                 $max_length[$i_odd] = $last_item_length;
14211             }
14212
14213             $item_count++;
14214             $i_effective_last_comma = $i_e + 1;
14215
14216             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
14217                 $identifier_count++;
14218             }
14219         }
14220
14221         #---------------------------------------------------------------
14222         # End of length calculations
14223         #---------------------------------------------------------------
14224
14225         #---------------------------------------------------------------
14226         # Compound List Rule 1:
14227         # Break at (almost) every comma for a list containing a broken
14228         # sublist.  This has higher priority than the Interrupted List
14229         # Rule.
14230         #---------------------------------------------------------------
14231         if ( $has_broken_sublist[$depth] ) {
14232
14233             # Break at every comma except for a comma between two
14234             # simple, small terms.  This prevents long vertical
14235             # columns of, say, just 0's.
14236             my $small_length = 10;    # 2 + actual maximum length wanted
14237
14238             # We'll insert a break in long runs of small terms to
14239             # allow alignment in uniform tables.
14240             my $skipped_count = 0;
14241             my $columns       = table_columns_available($i_first_comma);
14242             my $fields        = int( $columns / $small_length );
14243             if (   $rOpts_maximum_fields_per_table
14244                 && $fields > $rOpts_maximum_fields_per_table )
14245             {
14246                 $fields = $rOpts_maximum_fields_per_table;
14247             }
14248             my $max_skipped_count = $fields - 1;
14249
14250             my $is_simple_last_term = 0;
14251             my $is_simple_next_term = 0;
14252             foreach my $j ( 0 .. $item_count ) {
14253                 $is_simple_last_term = $is_simple_next_term;
14254                 $is_simple_next_term = 0;
14255                 if (   $j < $item_count
14256                     && $i_term_end[$j] == $i_term_begin[$j]
14257                     && $item_lengths[$j] <= $small_length )
14258                 {
14259                     $is_simple_next_term = 1;
14260                 }
14261                 next if $j == 0;
14262                 if (   $is_simple_last_term
14263                     && $is_simple_next_term
14264                     && $skipped_count < $max_skipped_count )
14265                 {
14266                     $skipped_count++;
14267                 }
14268                 else {
14269                     $skipped_count = 0;
14270                     my $i = $i_term_comma[ $j - 1 ];
14271                     last unless defined $i;
14272                     set_forced_breakpoint($i);
14273                 }
14274             }
14275
14276             # always break at the last comma if this list is
14277             # interrupted; we wouldn't want to leave a terminal '{', for
14278             # example.
14279             if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14280             return;
14281         }
14282
14283 #my ( $a, $b, $c ) = caller();
14284 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
14285 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
14286 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14287
14288         #---------------------------------------------------------------
14289         # Interrupted List Rule:
14290         # A list is is forced to use old breakpoints if it was interrupted
14291         # by side comments or blank lines, or requested by user.
14292         #---------------------------------------------------------------
14293         if (   $rOpts_break_at_old_comma_breakpoints
14294             || $interrupted
14295             || $i_opening_paren < 0 )
14296         {
14297             copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14298             return;
14299         }
14300
14301         #---------------------------------------------------------------
14302         # Looks like a list of items.  We have to look at it and size it up.
14303         #---------------------------------------------------------------
14304
14305         my $opening_token = $tokens_to_go[$i_opening_paren];
14306         my $opening_environment =
14307           $container_environment_to_go[$i_opening_paren];
14308
14309         #-------------------------------------------------------------------
14310         # Return if this will fit on one line
14311         #-------------------------------------------------------------------
14312
14313         my $i_opening_minus = find_token_starting_list($i_opening_paren);
14314         return
14315           unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14316
14317         #-------------------------------------------------------------------
14318         # Now we know that this block spans multiple lines; we have to set
14319         # at least one breakpoint -- real or fake -- as a signal to break
14320         # open any outer containers.
14321         #-------------------------------------------------------------------
14322         set_fake_breakpoint();
14323
14324         # be sure we do not extend beyond the current list length
14325         if ( $i_effective_last_comma >= $max_index_to_go ) {
14326             $i_effective_last_comma = $max_index_to_go - 1;
14327         }
14328
14329         # Set a flag indicating if we need to break open to keep -lp
14330         # items aligned.  This is necessary if any of the list terms
14331         # exceeds the available space after the '('.
14332         my $need_lp_break_open = $must_break_open;
14333         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14334             my $columns_if_unbroken = $rOpts_maximum_line_length -
14335               total_line_length( $i_opening_minus, $i_opening_paren );
14336             $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
14337               || ( $max_length[1] > $columns_if_unbroken )
14338               || ( $first_term_length > $columns_if_unbroken );
14339         }
14340
14341         # Specify if the list must have an even number of fields or not.
14342         # It is generally safest to assume an even number, because the
14343         # list items might be a hash list.  But if we can be sure that
14344         # it is not a hash, then we can allow an odd number for more
14345         # flexibility.
14346         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
14347
14348         if (   $identifier_count >= $item_count - 1
14349             || $is_assignment{$next_nonblank_type}
14350             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
14351           )
14352         {
14353             $odd_or_even = 1;
14354         }
14355
14356         # do we have a long first term which should be
14357         # left on a line by itself?
14358         my $use_separate_first_term = (
14359             $odd_or_even == 1       # only if we can use 1 field/line
14360               && $item_count > 3    # need several items
14361               && $first_term_length >
14362               2 * $max_length[0] - 2    # need long first term
14363               && $first_term_length >
14364               2 * $max_length[1] - 2    # need long first term
14365         );
14366
14367         # or do we know from the type of list that the first term should
14368         # be placed alone?
14369         if ( !$use_separate_first_term ) {
14370             if ( $is_keyword_with_special_leading_term{$list_type} ) {
14371                 $use_separate_first_term = 1;
14372
14373                 # should the container be broken open?
14374                 if ( $item_count < 3 ) {
14375                     if ( $i_first_comma - $i_opening_paren < 4 ) {
14376                         $$rdo_not_break_apart = 1;
14377                     }
14378                 }
14379                 elsif ($first_term_length < 20
14380                     && $i_first_comma - $i_opening_paren < 4 )
14381                 {
14382                     my $columns = table_columns_available($i_first_comma);
14383                     if ( $first_term_length < $columns ) {
14384                         $$rdo_not_break_apart = 1;
14385                     }
14386                 }
14387             }
14388         }
14389
14390         # if so,
14391         if ($use_separate_first_term) {
14392
14393             # ..set a break and update starting values
14394             $use_separate_first_term = 1;
14395             set_forced_breakpoint($i_first_comma);
14396             $i_opening_paren = $i_first_comma;
14397             $i_first_comma   = $$rcomma_index[1];
14398             $item_count--;
14399             return if $comma_count == 1;
14400             shift @item_lengths;
14401             shift @i_term_begin;
14402             shift @i_term_end;
14403             shift @i_term_comma;
14404         }
14405
14406         # if not, update the metrics to include the first term
14407         else {
14408             if ( $first_term_length > $max_length[0] ) {
14409                 $max_length[0] = $first_term_length;
14410             }
14411         }
14412
14413         # Field width parameters
14414         my $pair_width = ( $max_length[0] + $max_length[1] );
14415         my $max_width =
14416           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
14417
14418         # Number of free columns across the page width for laying out tables
14419         my $columns = table_columns_available($i_first_comma);
14420
14421         # Estimated maximum number of fields which fit this space
14422         # This will be our first guess
14423         my $number_of_fields_max =
14424           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
14425             $pair_width );
14426         my $number_of_fields = $number_of_fields_max;
14427
14428         # Find the best-looking number of fields
14429         # and make this our second guess if possible
14430         my ( $number_of_fields_best, $ri_ragged_break_list,
14431             $new_identifier_count )
14432           = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
14433             $max_width );
14434
14435         if (   $number_of_fields_best != 0
14436             && $number_of_fields_best < $number_of_fields_max )
14437         {
14438             $number_of_fields = $number_of_fields_best;
14439         }
14440
14441         # ----------------------------------------------------------------------
14442         # If we are crowded and the -lp option is being used, try to
14443         # undo some indentation
14444         # ----------------------------------------------------------------------
14445         if (
14446             $rOpts_line_up_parentheses
14447             && (
14448                 $number_of_fields == 0
14449                 || (   $number_of_fields == 1
14450                     && $number_of_fields != $number_of_fields_best )
14451             )
14452           )
14453         {
14454             my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
14455             if ( $available_spaces > 0 ) {
14456
14457                 my $spaces_wanted = $max_width - $columns;    # for 1 field
14458
14459                 if ( $number_of_fields_best == 0 ) {
14460                     $number_of_fields_best =
14461                       get_maximum_fields_wanted( \@item_lengths );
14462                 }
14463
14464                 if ( $number_of_fields_best != 1 ) {
14465                     my $spaces_wanted_2 =
14466                       1 + $pair_width - $columns;             # for 2 fields
14467                     if ( $available_spaces > $spaces_wanted_2 ) {
14468                         $spaces_wanted = $spaces_wanted_2;
14469                     }
14470                 }
14471
14472                 if ( $spaces_wanted > 0 ) {
14473                     my $deleted_spaces =
14474                       reduce_lp_indentation( $i_first_comma, $spaces_wanted );
14475
14476                     # redo the math
14477                     if ( $deleted_spaces > 0 ) {
14478                         $columns = table_columns_available($i_first_comma);
14479                         $number_of_fields_max =
14480                           maximum_number_of_fields( $columns, $odd_or_even,
14481                             $max_width, $pair_width );
14482                         $number_of_fields = $number_of_fields_max;
14483
14484                         if (   $number_of_fields_best == 1
14485                             && $number_of_fields >= 1 )
14486                         {
14487                             $number_of_fields = $number_of_fields_best;
14488                         }
14489                     }
14490                 }
14491             }
14492         }
14493
14494         # try for one column if two won't work
14495         if ( $number_of_fields <= 0 ) {
14496             $number_of_fields = int( $columns / $max_width );
14497         }
14498
14499         # The user can place an upper bound on the number of fields,
14500         # which can be useful for doing maintenance on tables
14501         if (   $rOpts_maximum_fields_per_table
14502             && $number_of_fields > $rOpts_maximum_fields_per_table )
14503         {
14504             $number_of_fields = $rOpts_maximum_fields_per_table;
14505         }
14506
14507         # How many columns (characters) and lines would this container take
14508         # if no additional whitespace were added?
14509         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
14510             $i_effective_last_comma + 1 );
14511         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
14512         my $packed_lines = 1 + int( $packed_columns / $columns );
14513
14514         # are we an item contained in an outer list?
14515         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
14516
14517         if ( $number_of_fields <= 0 ) {
14518
14519 #         #---------------------------------------------------------------
14520 #         # We're in trouble.  We can't find a single field width that works.
14521 #         # There is no simple answer here; we may have a single long list
14522 #         # item, or many.
14523 #         #---------------------------------------------------------------
14524 #
14525 #         In many cases, it may be best to not force a break if there is just one
14526 #         comma, because the standard continuation break logic will do a better
14527 #         job without it.
14528 #
14529 #         In the common case that all but one of the terms can fit
14530 #         on a single line, it may look better not to break open the
14531 #         containing parens.  Consider, for example
14532 #
14533 #             $color =
14534 #               join ( '/',
14535 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
14536 #                 keys %colors );
14537 #
14538 #         which will look like this with the container broken:
14539 #
14540 #             $color = join (
14541 #                 '/',
14542 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
14543 #             );
14544 #
14545 #         Here is an example of this rule for a long last term:
14546 #
14547 #             log_message( 0, 256, 128,
14548 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
14549 #
14550 #         And here is an example with a long first term:
14551 #
14552 #         $s = sprintf(
14553 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
14554 #             $r, $pu, $ps, $cu, $cs, $tt
14555 #           )
14556 #           if $style eq 'all';
14557
14558             my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
14559             my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
14560             my $long_first_term =
14561               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
14562
14563             # break at every comma ...
14564             if (
14565
14566                 # if requested by user or is best looking
14567                 $number_of_fields_best == 1
14568
14569                 # or if this is a sublist of a larger list
14570                 || $in_hierarchical_list
14571
14572                 # or if multiple commas and we dont have a long first or last
14573                 # term
14574                 || ( $comma_count > 1
14575                     && !( $long_last_term || $long_first_term ) )
14576               )
14577             {
14578                 foreach ( 0 .. $comma_count - 1 ) {
14579                     set_forced_breakpoint( $$rcomma_index[$_] );
14580                 }
14581             }
14582             elsif ($long_last_term) {
14583
14584                 set_forced_breakpoint($i_last_comma);
14585                 $$rdo_not_break_apart = 1 unless $must_break_open;
14586             }
14587             elsif ($long_first_term) {
14588
14589                 set_forced_breakpoint($i_first_comma);
14590             }
14591             else {
14592
14593                 # let breaks be defined by default bond strength logic
14594             }
14595             return;
14596         }
14597
14598         # --------------------------------------------------------
14599         # We have a tentative field count that seems to work.
14600         # How many lines will this require?
14601         # --------------------------------------------------------
14602         my $formatted_lines = $item_count / ($number_of_fields);
14603         if ( $formatted_lines != int $formatted_lines ) {
14604             $formatted_lines = 1 + int $formatted_lines;
14605         }
14606
14607         # So far we've been trying to fill out to the right margin.  But
14608         # compact tables are easier to read, so let's see if we can use fewer
14609         # fields without increasing the number of lines.
14610         $number_of_fields =
14611           compactify_table( $item_count, $number_of_fields, $formatted_lines,
14612             $odd_or_even );
14613
14614         # How many spaces across the page will we fill?
14615         my $columns_per_line =
14616           ( int $number_of_fields / 2 ) * $pair_width +
14617           ( $number_of_fields % 2 ) * $max_width;
14618
14619         my $formatted_columns;
14620
14621         if ( $number_of_fields > 1 ) {
14622             $formatted_columns =
14623               ( $pair_width * ( int( $item_count / 2 ) ) +
14624                   ( $item_count % 2 ) * $max_width );
14625         }
14626         else {
14627             $formatted_columns = $max_width * $item_count;
14628         }
14629         if ( $formatted_columns < $packed_columns ) {
14630             $formatted_columns = $packed_columns;
14631         }
14632
14633         my $unused_columns = $formatted_columns - $packed_columns;
14634
14635         # set some empirical parameters to help decide if we should try to
14636         # align; high sparsity does not look good, especially with few lines
14637         my $sparsity = ($unused_columns) / ($formatted_columns);
14638         my $max_allowed_sparsity =
14639             ( $item_count < 3 )    ? 0.1
14640           : ( $packed_lines == 1 ) ? 0.15
14641           : ( $packed_lines == 2 ) ? 0.4
14642           :                          0.7;
14643
14644         # Begin check for shortcut methods, which avoid treating a list
14645         # as a table for relatively small parenthesized lists.  These
14646         # are usually easier to read if not formatted as tables.
14647         if (
14648             $packed_lines <= 2    # probably can fit in 2 lines
14649             && $item_count < 9    # doesn't have too many items
14650             && $opening_environment eq 'BLOCK'    # not a sub-container
14651             && $opening_token       eq '('        # is paren list
14652           )
14653         {
14654
14655             # Shortcut method 1: for -lp and just one comma:
14656             # This is a no-brainer, just break at the comma.
14657             if (
14658                 $rOpts_line_up_parentheses        # -lp
14659                 && $item_count == 2               # two items, one comma
14660                 && !$must_break_open
14661               )
14662             {
14663                 my $i_break = $$rcomma_index[0];
14664                 set_forced_breakpoint($i_break);
14665                 $$rdo_not_break_apart = 1;
14666                 set_non_alignment_flags( $comma_count, $rcomma_index );
14667                 return;
14668
14669             }
14670
14671             # method 2 is for most small ragged lists which might look
14672             # best if not displayed as a table.
14673             if (
14674                 ( $number_of_fields == 2 && $item_count == 3 )
14675                 || (
14676                     $new_identifier_count > 0    # isn't all quotes
14677                     && $sparsity > 0.15
14678                 )    # would be fairly spaced gaps if aligned
14679               )
14680             {
14681
14682                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
14683                     $ri_ragged_break_list );
14684                 ++$break_count if ($use_separate_first_term);
14685
14686                 # NOTE: we should really use the true break count here,
14687                 # which can be greater if there are large terms and
14688                 # little space, but usually this will work well enough.
14689                 unless ($must_break_open) {
14690
14691                     if ( $break_count <= 1 ) {
14692                         $$rdo_not_break_apart = 1;
14693                     }
14694                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14695                     {
14696                         $$rdo_not_break_apart = 1;
14697                     }
14698                 }
14699                 set_non_alignment_flags( $comma_count, $rcomma_index );
14700                 return;
14701             }
14702
14703         }    # end shortcut methods
14704
14705         # debug stuff
14706
14707         FORMATTER_DEBUG_FLAG_SPARSE && do {
14708             print
14709 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
14710
14711         };
14712
14713         #---------------------------------------------------------------
14714         # Compound List Rule 2:
14715         # If this list is too long for one line, and it is an item of a
14716         # larger list, then we must format it, regardless of sparsity
14717         # (ian.t).  One reason that we have to do this is to trigger
14718         # Compound List Rule 1, above, which causes breaks at all commas of
14719         # all outer lists.  In this way, the structure will be properly
14720         # displayed.
14721         #---------------------------------------------------------------
14722
14723         # Decide if this list is too long for one line unless broken
14724         my $total_columns = table_columns_available($i_opening_paren);
14725         my $too_long      = $packed_columns > $total_columns;
14726
14727         # For a paren list, include the length of the token just before the
14728         # '(' because this is likely a sub call, and we would have to
14729         # include the sub name on the same line as the list.  This is still
14730         # imprecise, but not too bad.  (steve.t)
14731         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
14732
14733             $too_long = excess_line_length( $i_opening_minus,
14734                 $i_effective_last_comma + 1 ) > 0;
14735         }
14736
14737         # FIXME: For an item after a '=>', try to include the length of the
14738         # thing before the '=>'.  This is crude and should be improved by
14739         # actually looking back token by token.
14740         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
14741             my $i_opening_minus = $i_opening_paren - 4;
14742             if ( $i_opening_minus >= 0 ) {
14743                 $too_long = excess_line_length( $i_opening_minus,
14744                     $i_effective_last_comma + 1 ) > 0;
14745             }
14746         }
14747
14748         # Always break lists contained in '[' and '{' if too long for 1 line,
14749         # and always break lists which are too long and part of a more complex
14750         # structure.
14751         my $must_break_open_container = $must_break_open
14752           || ( $too_long
14753             && ( $in_hierarchical_list || $opening_token ne '(' ) );
14754
14755 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
14756
14757         #---------------------------------------------------------------
14758         # The main decision:
14759         # Now decide if we will align the data into aligned columns.  Do not
14760         # attempt to align columns if this is a tiny table or it would be
14761         # too spaced.  It seems that the more packed lines we have, the
14762         # sparser the list that can be allowed and still look ok.
14763         #---------------------------------------------------------------
14764
14765         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
14766             || ( $formatted_lines < 2 )
14767             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
14768           )
14769         {
14770
14771             #---------------------------------------------------------------
14772             # too sparse: would look ugly if aligned in a table;
14773             #---------------------------------------------------------------
14774
14775             # use old breakpoints if this is a 'big' list
14776             # FIXME: goal is to improve set_ragged_breakpoints so that
14777             # this is not necessary.
14778             if ( $packed_lines > 2 && $item_count > 10 ) {
14779                 write_logfile_entry("List sparse: using old breakpoints\n");
14780                 copy_old_breakpoints( $i_first_comma, $i_last_comma );
14781             }
14782
14783             # let the continuation logic handle it if 2 lines
14784             else {
14785
14786                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
14787                     $ri_ragged_break_list );
14788                 ++$break_count if ($use_separate_first_term);
14789
14790                 unless ($must_break_open_container) {
14791                     if ( $break_count <= 1 ) {
14792                         $$rdo_not_break_apart = 1;
14793                     }
14794                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14795                     {
14796                         $$rdo_not_break_apart = 1;
14797                     }
14798                 }
14799                 set_non_alignment_flags( $comma_count, $rcomma_index );
14800             }
14801             return;
14802         }
14803
14804         #---------------------------------------------------------------
14805         # go ahead and format as a table
14806         #---------------------------------------------------------------
14807         write_logfile_entry(
14808             "List: auto formatting with $number_of_fields fields/row\n");
14809
14810         my $j_first_break =
14811           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
14812
14813         for (
14814             my $j = $j_first_break ;
14815             $j < $comma_count ;
14816             $j += $number_of_fields
14817           )
14818         {
14819             my $i = $$rcomma_index[$j];
14820             set_forced_breakpoint($i);
14821         }
14822         return;
14823     }
14824 }
14825
14826 sub set_non_alignment_flags {
14827
14828     # set flag which indicates that these commas should not be
14829     # aligned
14830     my ( $comma_count, $rcomma_index ) = @_;
14831     foreach ( 0 .. $comma_count - 1 ) {
14832         $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
14833     }
14834 }
14835
14836 sub study_list_complexity {
14837
14838     # Look for complex tables which should be formatted with one term per line.
14839     # Returns the following:
14840     #
14841     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
14842     #    which are hard to read
14843     #  $number_of_fields_best = suggested number of fields based on
14844     #    complexity; = 0 if any number may be used.
14845     #
14846     my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
14847     my $item_count            = @{$ri_term_begin};
14848     my $complex_item_count    = 0;
14849     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
14850     my $i_max                 = @{$ritem_lengths} - 1;
14851     ##my @item_complexity;
14852
14853     my $i_last_last_break = -3;
14854     my $i_last_break      = -2;
14855     my @i_ragged_break_list;
14856
14857     my $definitely_complex = 30;
14858     my $definitely_simple  = 12;
14859     my $quote_count        = 0;
14860
14861     for my $i ( 0 .. $i_max ) {
14862         my $ib = $ri_term_begin->[$i];
14863         my $ie = $ri_term_end->[$i];
14864
14865         # define complexity: start with the actual term length
14866         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
14867
14868         ##TBD: join types here and check for variations
14869         ##my $str=join "", @tokens_to_go[$ib..$ie];
14870
14871         my $is_quote = 0;
14872         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
14873             $is_quote = 1;
14874             $quote_count++;
14875         }
14876         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
14877             $quote_count++;
14878         }
14879
14880         if ( $ib eq $ie ) {
14881             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
14882                 $complex_item_count++;
14883                 $weighted_length *= 2;
14884             }
14885             else {
14886             }
14887         }
14888         else {
14889             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
14890                 $complex_item_count++;
14891                 $weighted_length *= 2;
14892             }
14893             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
14894                 $weighted_length += 4;
14895             }
14896         }
14897
14898         # add weight for extra tokens.
14899         $weighted_length += 2 * ( $ie - $ib );
14900
14901 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
14902 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
14903
14904 ##push @item_complexity, $weighted_length;
14905
14906         # now mark a ragged break after this item it if it is 'long and
14907         # complex':
14908         if ( $weighted_length >= $definitely_complex ) {
14909
14910             # if we broke after the previous term
14911             # then break before it too
14912             if (   $i_last_break == $i - 1
14913                 && $i > 1
14914                 && $i_last_last_break != $i - 2 )
14915             {
14916
14917                 ## FIXME: don't strand a small term
14918                 pop @i_ragged_break_list;
14919                 push @i_ragged_break_list, $i - 2;
14920                 push @i_ragged_break_list, $i - 1;
14921             }
14922
14923             push @i_ragged_break_list, $i;
14924             $i_last_last_break = $i_last_break;
14925             $i_last_break      = $i;
14926         }
14927
14928         # don't break before a small last term -- it will
14929         # not look good on a line by itself.
14930         elsif ($i == $i_max
14931             && $i_last_break == $i - 1
14932             && $weighted_length <= $definitely_simple )
14933         {
14934             pop @i_ragged_break_list;
14935         }
14936     }
14937
14938     my $identifier_count = $i_max + 1 - $quote_count;
14939
14940     # Need more tuning here..
14941     if (   $max_width > 12
14942         && $complex_item_count > $item_count / 2
14943         && $number_of_fields_best != 2 )
14944     {
14945         $number_of_fields_best = 1;
14946     }
14947
14948     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
14949 }
14950
14951 sub get_maximum_fields_wanted {
14952
14953     # Not all tables look good with more than one field of items.
14954     # This routine looks at a table and decides if it should be
14955     # formatted with just one field or not.
14956     # This coding is still under development.
14957     my ($ritem_lengths) = @_;
14958
14959     my $number_of_fields_best = 0;
14960
14961     # For just a few items, we tentatively assume just 1 field.
14962     my $item_count = @{$ritem_lengths};
14963     if ( $item_count <= 5 ) {
14964         $number_of_fields_best = 1;
14965     }
14966
14967     # For larger tables, look at it both ways and see what looks best
14968     else {
14969
14970         my $is_odd            = 1;
14971         my @max_length        = ( 0, 0 );
14972         my @last_length_2     = ( undef, undef );
14973         my @first_length_2    = ( undef, undef );
14974         my $last_length       = undef;
14975         my $total_variation_1 = 0;
14976         my $total_variation_2 = 0;
14977         my @total_variation_2 = ( 0, 0 );
14978         for ( my $j = 0 ; $j < $item_count ; $j++ ) {
14979
14980             $is_odd = 1 - $is_odd;
14981             my $length = $ritem_lengths->[$j];
14982             if ( $length > $max_length[$is_odd] ) {
14983                 $max_length[$is_odd] = $length;
14984             }
14985
14986             if ( defined($last_length) ) {
14987                 my $dl = abs( $length - $last_length );
14988                 $total_variation_1 += $dl;
14989             }
14990             $last_length = $length;
14991
14992             my $ll = $last_length_2[$is_odd];
14993             if ( defined($ll) ) {
14994                 my $dl = abs( $length - $ll );
14995                 $total_variation_2[$is_odd] += $dl;
14996             }
14997             else {
14998                 $first_length_2[$is_odd] = $length;
14999             }
15000             $last_length_2[$is_odd] = $length;
15001         }
15002         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
15003
15004         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
15005         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
15006             $number_of_fields_best = 1;
15007         }
15008     }
15009     return ($number_of_fields_best);
15010 }
15011
15012 sub table_columns_available {
15013     my $i_first_comma = shift;
15014     my $columns =
15015       $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
15016
15017     # Patch: the vertical formatter does not line up lines whose lengths
15018     # exactly equal the available line length because of allowances
15019     # that must be made for side comments.  Therefore, the number of
15020     # available columns is reduced by 1 character.
15021     $columns -= 1;
15022     return $columns;
15023 }
15024
15025 sub maximum_number_of_fields {
15026
15027     # how many fields will fit in the available space?
15028     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
15029     my $max_pairs        = int( $columns / $pair_width );
15030     my $number_of_fields = $max_pairs * 2;
15031     if (   $odd_or_even == 1
15032         && $max_pairs * $pair_width + $max_width <= $columns )
15033     {
15034         $number_of_fields++;
15035     }
15036     return $number_of_fields;
15037 }
15038
15039 sub compactify_table {
15040
15041     # given a table with a certain number of fields and a certain number
15042     # of lines, see if reducing the number of fields will make it look
15043     # better.
15044     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
15045     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
15046         my $min_fields;
15047
15048         for (
15049             $min_fields = $number_of_fields ;
15050             $min_fields >= $odd_or_even
15051             && $min_fields * $formatted_lines >= $item_count ;
15052             $min_fields -= $odd_or_even
15053           )
15054         {
15055             $number_of_fields = $min_fields;
15056         }
15057     }
15058     return $number_of_fields;
15059 }
15060
15061 sub set_ragged_breakpoints {
15062
15063     # Set breakpoints in a list that cannot be formatted nicely as a
15064     # table.
15065     my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
15066
15067     my $break_count = 0;
15068     foreach (@$ri_ragged_break_list) {
15069         my $j = $ri_term_comma->[$_];
15070         if ($j) {
15071             set_forced_breakpoint($j);
15072             $break_count++;
15073         }
15074     }
15075     return $break_count;
15076 }
15077
15078 sub copy_old_breakpoints {
15079     my ( $i_first_comma, $i_last_comma ) = @_;
15080     for my $i ( $i_first_comma .. $i_last_comma ) {
15081         if ( $old_breakpoint_to_go[$i] ) {
15082             set_forced_breakpoint($i);
15083         }
15084     }
15085 }
15086
15087 sub set_nobreaks {
15088     my ( $i, $j ) = @_;
15089     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
15090
15091         FORMATTER_DEBUG_FLAG_NOBREAK && do {
15092             my ( $a, $b, $c ) = caller();
15093             print(
15094 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
15095             );
15096         };
15097
15098         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
15099     }
15100
15101     # shouldn't happen; non-critical error
15102     else {
15103         FORMATTER_DEBUG_FLAG_NOBREAK && do {
15104             my ( $a, $b, $c ) = caller();
15105             print(
15106 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
15107             );
15108         };
15109     }
15110 }
15111
15112 sub set_fake_breakpoint {
15113
15114     # Just bump up the breakpoint count as a signal that there are breaks.
15115     # This is useful if we have breaks but may want to postpone deciding where
15116     # to make them.
15117     $forced_breakpoint_count++;
15118 }
15119
15120 sub set_forced_breakpoint {
15121     my $i = shift;
15122
15123     return unless defined $i && $i >= 0;
15124
15125     # when called with certain tokens, use bond strengths to decide
15126     # if we break before or after it
15127     my $token = $tokens_to_go[$i];
15128
15129     if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
15130         if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
15131     }
15132
15133     # breaks are forced before 'if' and 'unless'
15134     elsif ( $is_if_unless{$token} ) { $i-- }
15135
15136     if ( $i >= 0 && $i <= $max_index_to_go ) {
15137         my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
15138
15139         FORMATTER_DEBUG_FLAG_FORCE && do {
15140             my ( $a, $b, $c ) = caller();
15141             print
15142 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
15143         };
15144
15145         if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
15146             $forced_breakpoint_to_go[$i_nonblank] = 1;
15147
15148             if ( $i_nonblank > $index_max_forced_break ) {
15149                 $index_max_forced_break = $i_nonblank;
15150             }
15151             $forced_breakpoint_count++;
15152             $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
15153               $i_nonblank;
15154
15155             # if we break at an opening container..break at the closing
15156             if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
15157                 set_closing_breakpoint($i_nonblank);
15158             }
15159         }
15160     }
15161 }
15162
15163 sub clear_breakpoint_undo_stack {
15164     $forced_breakpoint_undo_count = 0;
15165 }
15166
15167 sub undo_forced_breakpoint_stack {
15168
15169     my $i_start = shift;
15170     if ( $i_start < 0 ) {
15171         $i_start = 0;
15172         my ( $a, $b, $c ) = caller();
15173         warning(
15174 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
15175         );
15176     }
15177
15178     while ( $forced_breakpoint_undo_count > $i_start ) {
15179         my $i =
15180           $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
15181         if ( $i >= 0 && $i <= $max_index_to_go ) {
15182             $forced_breakpoint_to_go[$i] = 0;
15183             $forced_breakpoint_count--;
15184
15185             FORMATTER_DEBUG_FLAG_UNDOBP && do {
15186                 my ( $a, $b, $c ) = caller();
15187                 print(
15188 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
15189                 );
15190             };
15191         }
15192
15193         # shouldn't happen, but not a critical error
15194         else {
15195             FORMATTER_DEBUG_FLAG_UNDOBP && do {
15196                 my ( $a, $b, $c ) = caller();
15197                 print(
15198 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
15199                 );
15200             };
15201         }
15202     }
15203 }
15204
15205 sub recombine_breakpoints {
15206
15207     # sub set_continuation_breaks is very liberal in setting line breaks
15208     # for long lines, always setting breaks at good breakpoints, even
15209     # when that creates small lines.  Occasionally small line fragments
15210     # are produced which would look better if they were combined.
15211     # That's the task of this routine, recombine_breakpoints.
15212     my ( $ri_first, $ri_last ) = @_;
15213     my $more_to_do = 1;
15214
15215     # We keep looping over all of the lines of this batch
15216     # until there are no more possible recombinations
15217     my $nmax_last = @$ri_last;
15218     while ($more_to_do) {
15219         my $n_best = 0;
15220         my $bs_best;
15221         my $n;
15222         my $nmax = @$ri_last - 1;
15223
15224         # safety check for infinite loop
15225         unless ( $nmax < $nmax_last ) {
15226
15227             # shouldn't happen because splice below decreases nmax on each pass:
15228             # but i get paranoid sometimes
15229             die "Program bug-infinite loop in recombine breakpoints\n";
15230         }
15231         $nmax_last  = $nmax;
15232         $more_to_do = 0;
15233         my $previous_outdentable_closing_paren;
15234         my $leading_amp_count = 0;
15235         my $this_line_is_semicolon_terminated;
15236
15237         # loop over all remaining lines in this batch
15238         for $n ( 1 .. $nmax ) {
15239
15240             #----------------------------------------------------------
15241             # If we join the current pair of lines,
15242             # line $n-1 will become the left part of the joined line
15243             # line $n will become the right part of the joined line
15244             #
15245             # Here are Indexes of the endpoint tokens of the two lines:
15246             #
15247             #  ---left---- | ---right---
15248             #  $if   $imid | $imidr   $il
15249             #
15250             # We want to decide if we should join tokens $imid to $imidr
15251             #
15252             # We will apply a number of ad-hoc tests to see if joining
15253             # here will look ok.  The code will just issue a 'next'
15254             # command if the join doesn't look good.  If we get through
15255             # the gauntlet of tests, the lines will be recombined.
15256             #----------------------------------------------------------
15257             my $if       = $$ri_first[ $n - 1 ];
15258             my $il       = $$ri_last[$n];
15259             my $imid     = $$ri_last[ $n - 1 ];
15260             my $imidr    = $$ri_first[$n];
15261             my $bs_tweak = 0;
15262
15263             #my $depth_increase=( $nesting_depth_to_go[$imidr] -
15264             #        $nesting_depth_to_go[$if] );
15265
15266 ##print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
15267
15268             # If line $n is the last line, we set some flags and
15269             # do any special checks for it
15270             if ( $n == $nmax ) {
15271
15272                 # a terminal '{' should stay where it is
15273                 next if $types_to_go[$imidr] eq '{';
15274
15275                 # set flag if statement $n ends in ';'
15276                 $this_line_is_semicolon_terminated = $types_to_go[$il] eq ';'
15277
15278                   # with possible side comment
15279                   || ( $types_to_go[$il] eq '#'
15280                     && $il - $imidr >= 2
15281                     && $types_to_go[ $il - 2 ] eq ';'
15282                     && $types_to_go[ $il - 1 ] eq 'b' );
15283             }
15284
15285             #----------------------------------------------------------
15286             # Section 1: examine token at $imid (right end of first line
15287             # of pair)
15288             #----------------------------------------------------------
15289
15290             # an isolated '}' may join with a ';' terminated segment
15291             if ( $types_to_go[$imid] eq '}' ) {
15292
15293                 # Check for cases where combining a semicolon terminated
15294                 # statement with a previous isolated closing paren will
15295                 # allow the combined line to be outdented.  This is
15296                 # generally a good move.  For example, we can join up
15297                 # the last two lines here:
15298                 #  (
15299                 #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
15300                 #      $size, $atime, $mtime, $ctime, $blksize, $blocks
15301                 #    )
15302                 #    = stat($file);
15303                 #
15304                 # to get:
15305                 #  (
15306                 #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
15307                 #      $size, $atime, $mtime, $ctime, $blksize, $blocks
15308                 #  ) = stat($file);
15309                 #
15310                 # which makes the parens line up.
15311                 #
15312                 # Another example, from Joe Matarazzo, probably looks best
15313                 # with the 'or' clause appended to the trailing paren:
15314                 #  $self->some_method(
15315                 #      PARAM1 => 'foo',
15316                 #      PARAM2 => 'bar'
15317                 #  ) or die "Some_method didn't work";
15318                 #
15319                 $previous_outdentable_closing_paren =
15320                   $this_line_is_semicolon_terminated    # ends in ';'
15321                   && $if == $imid    # only one token on last line
15322                   && $tokens_to_go[$imid] eq ')'    # must be structural paren
15323
15324                   # only &&, ||, and : if no others seen
15325                   # (but note: our count made below could be wrong
15326                   # due to intervening comments)
15327                   && ( $leading_amp_count == 0
15328                     || $types_to_go[$imidr] !~ /^(:|\&\&|\|\|)$/ )
15329
15330                   # but leading colons probably line up with with a
15331                   # previous colon or question (count could be wrong).
15332                   && $types_to_go[$imidr] ne ':'
15333
15334                   # only one step in depth allowed.  this line must not
15335                   # begin with a ')' itself.
15336                   && ( $nesting_depth_to_go[$imid] ==
15337                     $nesting_depth_to_go[$il] + 1 );
15338
15339                 next
15340                   unless (
15341                     $previous_outdentable_closing_paren
15342
15343                     # handle '.' and '?' specially below
15344                     || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
15345                   );
15346             }
15347
15348             # do not recombine lines with ending &&, ||,
15349             elsif ( $types_to_go[$imid] =~ /^(\&\&|\|\|)$/ ) {
15350                 next unless $want_break_before{ $types_to_go[$imid] };
15351             }
15352
15353             # keep a terminal colon
15354             elsif ( $types_to_go[$imid] eq ':' ) {
15355                 next unless $want_break_before{ $types_to_go[$imid] };
15356             }
15357
15358             # Identify and recombine a broken ?/: chain
15359             elsif ( $types_to_go[$imid] eq '?' ) {
15360
15361                 # Do not recombine different levels
15362                 next if ( $levels_to_go[$if] ne $levels_to_go[$imidr] );
15363
15364                 # do not recombine unless next line ends in :
15365                 next unless $types_to_go[$il] eq ':';
15366             }
15367
15368             # for lines ending in a comma...
15369             elsif ( $types_to_go[$imid] eq ',' ) {
15370
15371                 # an isolated '},' may join with an identifier + ';'
15372                 # this is useful for the class of a 'bless' statement (bless.t)
15373                 if (   $types_to_go[$if] eq '}'
15374                     && $types_to_go[$imidr] eq 'i' )
15375                 {
15376                     next
15377                       unless ( ( $if == ( $imid - 1 ) )
15378                         && ( $il == ( $imidr + 1 ) )
15379                         && $this_line_is_semicolon_terminated );
15380
15381                     # override breakpoint
15382                     $forced_breakpoint_to_go[$imid] = 0;
15383                 }
15384
15385                 # but otherwise ..
15386                 else {
15387
15388                     # do not recombine after a comma unless this will leave
15389                     # just 1 more line
15390                     next unless ( $n + 1 >= $nmax );
15391
15392                     # do not recombine if there is a change in indentation depth
15393                     next if ( $levels_to_go[$imid] != $levels_to_go[$il] );
15394
15395                     # do not recombine a "complex expression" after a
15396                     # comma.  "complex" means no parens.
15397                     my $saw_paren;
15398                     foreach my $ii ( $imidr .. $il ) {
15399                         if ( $tokens_to_go[$ii] eq '(' ) {
15400                             $saw_paren = 1;
15401                             last;
15402                         }
15403                     }
15404                     next if $saw_paren;
15405                 }
15406             }
15407
15408             # opening paren..
15409             elsif ( $types_to_go[$imid] eq '(' ) {
15410
15411                 # No longer doing this
15412             }
15413
15414             elsif ( $types_to_go[$imid] eq ')' ) {
15415
15416                 # No longer doing this
15417             }
15418
15419             # keep a terminal for-semicolon
15420             elsif ( $types_to_go[$imid] eq 'f' ) {
15421                 next;
15422             }
15423
15424             # if '=' at end of line ...
15425             elsif ( $is_assignment{ $types_to_go[$imid] } ) {
15426
15427                 my $is_short_quote =
15428                   (      $types_to_go[$imidr] eq 'Q'
15429                       && $imidr == $il
15430                       && length( $tokens_to_go[$imidr] ) <
15431                       $rOpts_short_concatenation_item_length );
15432                 my $ifnmax = $$ri_first[$nmax];
15433                 my $ifnp = ( $nmax > $n ) ? $$ri_first[ $n + 1 ] : $ifnmax;
15434                 my $is_qk =
15435                   ( $types_to_go[$if] eq '?' && $types_to_go[$ifnp] eq ':' );
15436
15437                 # always join an isolated '=', a short quote, or if this
15438                 # will put ?/: at start of adjacent lines
15439                 if (   $if != $imid
15440                     && !$is_short_quote
15441                     && !$is_qk )
15442                 {
15443                     next
15444                       unless (
15445                         (
15446
15447                             # unless we can reduce this to two lines
15448                             $nmax < $n + 2
15449
15450                             # or three lines, the last with a leading semicolon
15451                             || (   $nmax == $n + 2
15452                                 && $types_to_go[$ifnmax] eq ';' )
15453
15454                             # or the next line ends with a here doc
15455                             || $types_to_go[$il] eq 'h'
15456                         )
15457
15458                         # do not recombine if the two lines might align well
15459                         # this is a very approximate test for this
15460                         && $types_to_go[$imidr] ne $types_to_go[$ifnp]
15461                       );
15462
15463                     # -lp users often prefer this:
15464                     #  my $title = function($env, $env, $sysarea,
15465                     #                       "bubba Borrower Entry");
15466                     #  so we will recombine if -lp is used we have ending comma
15467                     if ( !$rOpts_line_up_parentheses
15468                         || $types_to_go[$il] ne ',' )
15469                     {
15470
15471                         # otherwise, scan the rhs line up to last token for
15472                         # complexity.  Note that we are not counting the last
15473                         # token in case it is an opening paren.
15474                         my $tv    = 0;
15475                         my $depth = $nesting_depth_to_go[$imidr];
15476                         for ( my $i = $imidr + 1 ; $i < $il ; $i++ ) {
15477                             if ( $nesting_depth_to_go[$i] != $depth ) {
15478                                 $tv++;
15479                                 last if ( $tv > 1 );
15480                             }
15481                             $depth = $nesting_depth_to_go[$i];
15482                         }
15483
15484                         # ok to recombine if no level changes before last token
15485                         if ( $tv > 0 ) {
15486
15487                             # otherwise, do not recombine if more than two
15488                             # level changes.
15489                             next if ( $tv > 1 );
15490
15491                             # check total complexity of the two adjacent lines
15492                             # that will occur if we do this join
15493                             my $istop =
15494                               ( $n < $nmax ) ? $$ri_last[ $n + 1 ] : $il;
15495                             for ( my $i = $il ; $i <= $istop ; $i++ ) {
15496                                 if ( $nesting_depth_to_go[$i] != $depth ) {
15497                                     $tv++;
15498                                     last if ( $tv > 2 );
15499                                 }
15500                                 $depth = $nesting_depth_to_go[$i];
15501                             }
15502
15503                         # do not recombine if total is more than 2 level changes
15504                             next if ( $tv > 2 );
15505                         }
15506                     }
15507                 }
15508
15509                 unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
15510                     $forced_breakpoint_to_go[$imid] = 0;
15511                 }
15512             }
15513
15514             # for keywords..
15515             elsif ( $types_to_go[$imid] eq 'k' ) {
15516
15517                 # make major control keywords stand out
15518                 # (recombine.t)
15519                 next
15520                   if (
15521
15522                     #/^(last|next|redo|return)$/
15523                     $is_last_next_redo_return{ $tokens_to_go[$imid] }
15524
15525                     # but only if followed by multiple lines
15526                     && $n < $nmax
15527                   );
15528
15529                 if ( $is_and_or{ $tokens_to_go[$imid] } ) {
15530                     next unless $want_break_before{ $tokens_to_go[$imid] };
15531                 }
15532             }
15533
15534             # handle trailing + - * /
15535             elsif ( $types_to_go[$imid] =~ /^[\+\-\*\/]$/ ) {
15536                 my $i_next_nonblank = $imidr;
15537                 my $i_next_next     = $i_next_nonblank + 1;
15538                 $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
15539
15540                 # do not strand numbers
15541                 next
15542                   unless (
15543                     $types_to_go[$i_next_nonblank] eq 'n'
15544                     && (
15545                         $i_next_nonblank == $il
15546                         || (   $i_next_next == $il
15547                             && $types_to_go[$i_next_next] =~ /^[\+\-\*\/]$/ )
15548                         || $types_to_go[$i_next_next] eq ';'
15549                     )
15550                   );
15551             }
15552
15553             #----------------------------------------------------------
15554             # Section 2: Now examine token at $imidr (left end of second
15555             # line of pair)
15556             #----------------------------------------------------------
15557
15558             # join lines identified above as capable of
15559             # causing an outdented line with leading closing paren
15560             if ($previous_outdentable_closing_paren) {
15561                 $forced_breakpoint_to_go[$imid] = 0;
15562             }
15563
15564             # do not recombine lines with leading :
15565             elsif ( $types_to_go[$imidr] eq ':' ) {
15566                 $leading_amp_count++;
15567                 next if $want_break_before{ $types_to_go[$imidr] };
15568             }
15569
15570             # do not recombine lines with leading &&, ||
15571             elsif ( $types_to_go[$imidr] =~ /^(\&\&|\|\|)$/ ) {
15572
15573                 # unless it follows a ? or :
15574                 $leading_amp_count++;
15575                 my $ok = 0;
15576                 if ( $types_to_go[$if] =~ /^(\:|\?)$/ ) {
15577
15578                     # and is followed by an open paren..
15579                     if ( $tokens_to_go[$il] eq '(' ) {
15580                         $ok = 1;
15581                     }
15582
15583                     # or is followed by a ? or :
15584                     else {
15585                         my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
15586                         if ( $iff >= 0 && $types_to_go[$iff] =~ /^(\:|\?)$/ ) {
15587                             $ok = 1;
15588                         }
15589                     }
15590                 }
15591                 next if !$ok && $want_break_before{ $types_to_go[$imidr] };
15592                 $forced_breakpoint_to_go[$imid] = 0;
15593
15594                 # tweak the bond strength to give this joint priority
15595                 # over ? and :
15596                 $bs_tweak = 0.25;
15597             }
15598
15599             # Identify and recombine a broken ?/: chain
15600             elsif ( $types_to_go[$imidr] eq '?' ) {
15601
15602                 # Do not recombine different levels
15603                 my $lev = $levels_to_go[$imidr];
15604                 next if ( $lev ne $levels_to_go[$if] );
15605
15606                 # some indexes of line first tokens --
15607                 #  mm  - line before previous line
15608                 #  f   - previous line
15609                 #     <-- this line
15610                 #  ff  - next line
15611                 #  fff - line after next
15612                 my $iff  = $n < $nmax      ? $$ri_first[ $n + 1 ] : -1;
15613                 my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
15614                 my $imm  = $n > 1          ? $$ri_first[ $n - 2 ] : -1;
15615
15616                 # Do not recombine a '?' if either next line or previous line
15617                 # does not start with a ':'.  The reasons are that (1) no
15618                 # alignment of the ? will be possible and (2) the expression is
15619                 # somewhat complex, so the '?' is harder to see in the interior
15620                 # of the line.
15621                 my $follows_colon  = $if >= 0  && $types_to_go[$if]  eq ':';
15622                 my $precedes_colon = $iff >= 0 && $types_to_go[$iff] eq ':';
15623                 next unless ( $follows_colon || $precedes_colon );
15624
15625                 # we will always combining a ? line following a : line
15626                 if ( !$follows_colon ) {
15627
15628                     # ...otherwise recombine only if it looks like a chain.  we
15629                     # will just look at a few nearby lines to see if this looks
15630                     # like a chain.
15631                     my $local_count = 0;
15632                     foreach my $ii ( $imm, $if, $iff, $ifff ) {
15633                         $local_count++
15634                           if $ii >= 0
15635                               && $types_to_go[$ii] eq ':'
15636                               && $levels_to_go[$ii] == $lev;
15637                     }
15638                     next unless ( $local_count > 1 );
15639                 }
15640                 $forced_breakpoint_to_go[$imid] = 0;
15641             }
15642
15643             # do not recombine lines with leading '.'
15644             elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
15645                 my $i_next_nonblank = $imidr + 1;
15646                 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
15647                     $i_next_nonblank++;
15648                 }
15649
15650                 next
15651                   unless (
15652
15653                    # ... unless there is just one and we can reduce
15654                    # this to two lines if we do.  For example, this
15655                    #
15656                    #
15657                    #  $bodyA .=
15658                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
15659                    #
15660                    #  looks better than this:
15661                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
15662                    #    . '$args .= $pat;'
15663
15664                     (
15665                            $n == 2
15666                         && $n == $nmax
15667                         && $types_to_go[$if] ne $types_to_go[$imidr]
15668                     )
15669
15670                     #      ... or this would strand a short quote , like this
15671                     #                . "some long qoute"
15672                     #                . "\n";
15673
15674                     || (   $types_to_go[$i_next_nonblank] eq 'Q'
15675                         && $i_next_nonblank >= $il - 1
15676                         && length( $tokens_to_go[$i_next_nonblank] ) <
15677                         $rOpts_short_concatenation_item_length )
15678                   );
15679             }
15680
15681             # handle leading keyword..
15682             elsif ( $types_to_go[$imidr] eq 'k' ) {
15683
15684                 # handle leading "or"
15685                 if ( $tokens_to_go[$imidr] eq 'or' ) {
15686                     next
15687                       unless (
15688                         $this_line_is_semicolon_terminated
15689                         && (
15690
15691                             # following 'if' or 'unless' or 'or'
15692                             $types_to_go[$if] eq 'k'
15693                             && $is_if_unless{ $tokens_to_go[$if] }
15694
15695                             # important: only combine a very simple or
15696                             # statement because the step below may have
15697                             # combined a trailing 'and' with this or, and we do
15698                             # not want to then combine everything together
15699                             && ( $il - $imidr <= 7 )
15700                         )
15701                       );
15702                 }
15703
15704                 # handle leading 'and'
15705                 elsif ( $tokens_to_go[$imidr] eq 'and' ) {
15706
15707                     # Decide if we will combine a single terminal 'and'
15708                     # after an 'if' or 'unless'.
15709
15710                     #     This looks best with the 'and' on the same
15711                     #     line as the 'if':
15712                     #
15713                     #         $a = 1
15714                     #           if $seconds and $nu < 2;
15715                     #
15716                     #     But this looks better as shown:
15717                     #
15718                     #         $a = 1
15719                     #           if !$this->{Parents}{$_}
15720                     #           or $this->{Parents}{$_} eq $_;
15721                     #
15722                     next
15723                       unless (
15724                         $this_line_is_semicolon_terminated
15725                         && (
15726
15727                             # following 'if' or 'unless' or 'or'
15728                             $types_to_go[$if] eq 'k'
15729                             && (   $is_if_unless{ $tokens_to_go[$if] }
15730                                 || $tokens_to_go[$if] eq 'or' )
15731                         )
15732                       );
15733                 }
15734
15735                 # handle leading "if" and "unless"
15736                 elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
15737
15738                     # FIXME: This is still experimental..may not be too useful
15739                     next
15740                       unless (
15741                         $this_line_is_semicolon_terminated
15742
15743                         #  previous line begins with 'and' or 'or'
15744                         && $types_to_go[$if] eq 'k'
15745                         && $is_and_or{ $tokens_to_go[$if] }
15746
15747                       );
15748                 }
15749
15750                 # handle all other leading keywords
15751                 else {
15752
15753                     # keywords look best at start of lines,
15754                     # but combine things like "1 while"
15755                     unless ( $is_assignment{ $types_to_go[$imid] } ) {
15756                         next
15757                           if ( ( $types_to_go[$imid] ne 'k' )
15758                             && ( $tokens_to_go[$imidr] ne 'while' ) );
15759                     }
15760                 }
15761             }
15762
15763             # similar treatment of && and || as above for 'and' and 'or':
15764             # NOTE: This block of code is currently bypassed because
15765             # of a previous block but is retained for possible future use.
15766             elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
15767
15768                 # maybe looking at something like:
15769                 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
15770
15771                 next
15772                   unless (
15773                     $this_line_is_semicolon_terminated
15774
15775                     # previous line begins with an 'if' or 'unless' keyword
15776                     && $types_to_go[$if] eq 'k'
15777                     && $is_if_unless{ $tokens_to_go[$if] }
15778
15779                   );
15780             }
15781
15782             # handle leading + - * /
15783             elsif ( $types_to_go[$imidr] =~ /^[\+\-\*\/]$/ ) {
15784                 my $i_next_nonblank = $imidr + 1;
15785                 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
15786                     $i_next_nonblank++;
15787                 }
15788
15789                 my $i_next_next = $i_next_nonblank + 1;
15790                 $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
15791
15792                 next
15793                   unless (
15794
15795                     # unless there is just one and we can reduce
15796                     # this to two lines if we do.  For example, this
15797                     (
15798                            $n == 2
15799                         && $n == $nmax
15800                         && $types_to_go[$if] ne $types_to_go[$imidr]
15801                     )
15802
15803                     #  do not strand numbers
15804                     || (
15805                         $types_to_go[$i_next_nonblank] eq 'n'
15806                         && (   $i_next_nonblank >= $il - 1
15807                             || $types_to_go[$i_next_next] eq ';' )
15808                     )
15809                   );
15810             }
15811
15812             # handle line with leading = or similar
15813             elsif ( $is_assignment{ $types_to_go[$imidr] } ) {
15814                 next unless $n == 1;
15815                 my $ifnmax = $$ri_first[$nmax];
15816                 next
15817                   unless (
15818
15819                     # unless we can reduce this to two lines
15820                     $nmax == 2
15821
15822                     # or three lines, the last with a leading semicolon
15823                     || ( $nmax == 3 && $types_to_go[$ifnmax] eq ';' )
15824
15825                     # or the next line ends with a here doc
15826                     || $types_to_go[$il] eq 'h'
15827                   );
15828             }
15829
15830             #----------------------------------------------------------
15831             # Section 3:
15832             # Combine the lines if we arrive here and it is possible
15833             #----------------------------------------------------------
15834
15835             # honor hard breakpoints
15836             next if ( $forced_breakpoint_to_go[$imid] > 0 );
15837
15838             my $bs = $bond_strength_to_go[$imid] + $bs_tweak;
15839
15840             # combined line cannot be too long
15841             next
15842               if excess_line_length( $if, $il ) > 0;
15843
15844             # do not recombine if we would skip in indentation levels
15845             if ( $n < $nmax ) {
15846                 my $if_next = $$ri_first[ $n + 1 ];
15847                 next
15848                   if (
15849                        $levels_to_go[$if] < $levels_to_go[$imidr]
15850                     && $levels_to_go[$imidr] < $levels_to_go[$if_next]
15851
15852                     # but an isolated 'if (' is undesirable
15853                     && !(
15854                            $n == 1
15855                         && $imid - $if <= 2
15856                         && $types_to_go[$if]  eq 'k'
15857                         && $tokens_to_go[$if] eq 'if'
15858                         && $tokens_to_go[$imid] ne '('
15859                     )
15860                   );
15861             }
15862
15863             # honor no-break's
15864             next if ( $bs == NO_BREAK );
15865
15866             # remember the pair with the greatest bond strength
15867             if ( !$n_best ) {
15868                 $n_best  = $n;
15869                 $bs_best = $bs;
15870             }
15871             else {
15872
15873                 if ( $bs > $bs_best ) {
15874                     $n_best  = $n;
15875                     $bs_best = $bs;
15876                 }
15877             }
15878         }
15879
15880         # recombine the pair with the greatest bond strength
15881         if ($n_best) {
15882             splice @$ri_first, $n_best, 1;
15883             splice @$ri_last, $n_best - 1, 1;
15884
15885             # keep going if we are still making progress
15886             $more_to_do++;
15887         }
15888     }
15889     return ( $ri_first, $ri_last );
15890 }
15891
15892 sub break_all_chain_tokens {
15893
15894     # scan the current breakpoints looking for breaks at certain "chain
15895     # operators" (. : && || + etc) which often occur repeatedly in a long
15896     # statement.  If we see a break at any one, break at all similar tokens
15897     # within the same container.
15898     #
15899     my ( $ri_left, $ri_right ) = @_;
15900
15901     my %saw_chain_type;
15902     my %left_chain_type;
15903     my %right_chain_type;
15904     my %interior_chain_type;
15905     my $nmax = @$ri_right - 1;
15906
15907     # scan the left and right end tokens of all lines
15908     my $count = 0;
15909     for my $n ( 0 .. $nmax ) {
15910         my $il    = $$ri_left[$n];
15911         my $ir    = $$ri_right[$n];
15912         my $typel = $types_to_go[$il];
15913         my $typer = $types_to_go[$ir];
15914         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
15915         $typer = '+' if ( $typer eq '-' );
15916         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
15917         $typer = '*' if ( $typer eq '/' );
15918         my $tokenl = $tokens_to_go[$il];
15919         my $tokenr = $tokens_to_go[$ir];
15920
15921         if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
15922             next if ( $typel eq '?' );
15923             push @{ $left_chain_type{$typel} }, $il;
15924             $saw_chain_type{$typel} = 1;
15925             $count++;
15926         }
15927         if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
15928             next if ( $typer eq '?' );
15929             push @{ $right_chain_type{$typer} }, $ir;
15930             $saw_chain_type{$typer} = 1;
15931             $count++;
15932         }
15933     }
15934     return unless $count;
15935
15936     # now look for any interior tokens of the same types
15937     $count = 0;
15938     for my $n ( 0 .. $nmax ) {
15939         my $il = $$ri_left[$n];
15940         my $ir = $$ri_right[$n];
15941         for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
15942             my $type = $types_to_go[$i];
15943             $type = '+' if ( $type eq '-' );
15944             $type = '*' if ( $type eq '/' );
15945             if ( $saw_chain_type{$type} ) {
15946                 push @{ $interior_chain_type{$type} }, $i;
15947                 $count++;
15948             }
15949         }
15950     }
15951     return unless $count;
15952
15953     # now make a list of all new break points
15954     my @insert_list;
15955
15956     # loop over all chain types
15957     foreach my $type ( keys %saw_chain_type ) {
15958
15959         # quit if just ONE continuation line with leading .  For example--
15960         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
15961         #  . $contents;
15962         last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
15963
15964         # loop over all interior chain tokens
15965         foreach my $itest ( @{ $interior_chain_type{$type} } ) {
15966
15967             # loop over all left end tokens of same type
15968             if ( $left_chain_type{$type} ) {
15969                 next if $nobreak_to_go[ $itest - 1 ];
15970                 foreach my $i ( @{ $left_chain_type{$type} } ) {
15971                     next unless in_same_container( $i, $itest );
15972                     push @insert_list, $itest - 1;
15973
15974                     # Break at matching ? if this : is at a different level.
15975                     # For example, the ? before $THRf_DEAD in the following
15976                     # should get a break if its : gets a break.
15977                     #
15978                     # my $flags =
15979                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
15980                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
15981                     #   :              $THRf_R_JOINABLE;
15982                     if (   $type eq ':'
15983                         && $levels_to_go[$i] != $levels_to_go[$itest] )
15984                     {
15985                         my $i_question = $mate_index_to_go[$itest];
15986                         if ( $i_question > 0 ) {
15987                             push @insert_list, $i_question - 1;
15988                         }
15989                     }
15990                     last;
15991                 }
15992             }
15993
15994             # loop over all right end tokens of same type
15995             if ( $right_chain_type{$type} ) {
15996                 next if $nobreak_to_go[$itest];
15997                 foreach my $i ( @{ $right_chain_type{$type} } ) {
15998                     next unless in_same_container( $i, $itest );
15999                     push @insert_list, $itest;
16000
16001                     # break at matching ? if this : is at a different level
16002                     if (   $type eq ':'
16003                         && $levels_to_go[$i] != $levels_to_go[$itest] )
16004                     {
16005                         my $i_question = $mate_index_to_go[$itest];
16006                         if ( $i_question >= 0 ) {
16007                             push @insert_list, $i_question;
16008                         }
16009                     }
16010                     last;
16011                 }
16012             }
16013         }
16014     }
16015
16016     # insert any new break points
16017     if (@insert_list) {
16018         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16019     }
16020 }
16021
16022 sub insert_final_breaks {
16023
16024     my ( $ri_left, $ri_right ) = @_;
16025
16026     my $nmax = @$ri_right - 1;
16027
16028     # scan the left and right end tokens of all lines
16029     my $count         = 0;
16030     my $i_first_colon = -1;
16031     for my $n ( 0 .. $nmax ) {
16032         my $il    = $$ri_left[$n];
16033         my $ir    = $$ri_right[$n];
16034         my $typel = $types_to_go[$il];
16035         my $typer = $types_to_go[$ir];
16036         return if ( $typel eq '?' );
16037         return if ( $typer eq '?' );
16038         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
16039         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16040     }
16041
16042     # For long ternary chains,
16043     # if the first : we see has its # ? is in the interior
16044     # of a preceding line, then see if there are any good
16045     # breakpoints before the ?.
16046     if ( $i_first_colon > 0 ) {
16047         my $i_question = $mate_index_to_go[$i_first_colon];
16048         if ( $i_question > 0 ) {
16049             my @insert_list;
16050             for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16051                 my $token = $tokens_to_go[$ii];
16052                 my $type  = $types_to_go[$ii];
16053
16054                 # For now, a good break is either a comma or a 'return'.
16055                 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
16056                     && in_same_container( $ii, $i_question ) )
16057                 {
16058                     push @insert_list, $ii;
16059                     last;
16060                 }
16061             }
16062
16063             # insert any new break points
16064             if (@insert_list) {
16065                 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16066             }
16067         }
16068     }
16069 }
16070
16071 sub in_same_container {
16072
16073     # check to see if tokens at i1 and i2 are in the
16074     # same container, and not separated by a comma, ? or :
16075     my ( $i1, $i2 ) = @_;
16076     my $type  = $types_to_go[$i1];
16077     my $depth = $nesting_depth_to_go[$i1];
16078     return unless ( $nesting_depth_to_go[$i2] == $depth );
16079     if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
16080     for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
16081         next   if ( $nesting_depth_to_go[$i] > $depth );
16082         return if ( $nesting_depth_to_go[$i] < $depth );
16083
16084         my $tok = $tokens_to_go[$i];
16085         $tok = ',' if $tok eq '=>';    # treat => same as ,
16086
16087         # Example: we would not want to break at any of these .'s
16088         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
16089         if ( $type ne ':' ) {
16090             return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
16091         }
16092         else {
16093             return if ( $tok =~ /^[\,]$/ );
16094         }
16095     }
16096     return 1;
16097 }
16098
16099 sub set_continuation_breaks {
16100
16101     # Define an array of indexes for inserting newline characters to
16102     # keep the line lengths below the maximum desired length.  There is
16103     # an implied break after the last token, so it need not be included.
16104
16105     # Method:
16106     # This routine is part of series of routines which adjust line
16107     # lengths.  It is only called if a statement is longer than the
16108     # maximum line length, or if a preliminary scanning located
16109     # desirable break points.   Sub scan_list has already looked at
16110     # these tokens and set breakpoints (in array
16111     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
16112     # after commas, after opening parens, and before closing parens).
16113     # This routine will honor these breakpoints and also add additional
16114     # breakpoints as necessary to keep the line length below the maximum
16115     # requested.  It bases its decision on where the 'bond strength' is
16116     # lowest.
16117
16118     # Output: returns references to the arrays:
16119     #  @i_first
16120     #  @i_last
16121     # which contain the indexes $i of the first and last tokens on each
16122     # line.
16123
16124     # In addition, the array:
16125     #   $forced_breakpoint_to_go[$i]
16126     # may be updated to be =1 for any index $i after which there must be
16127     # a break.  This signals later routines not to undo the breakpoint.
16128
16129     my $saw_good_break = shift;
16130     my @i_first        = ();      # the first index to output
16131     my @i_last         = ();      # the last index to output
16132     my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
16133     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
16134
16135     set_bond_strengths();
16136
16137     my $imin = 0;
16138     my $imax = $max_index_to_go;
16139     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
16140     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
16141     my $i_begin = $imin;          # index for starting next iteration
16142
16143     my $leading_spaces          = leading_spaces_to_go($imin);
16144     my $line_count              = 0;
16145     my $last_break_strength     = NO_BREAK;
16146     my $i_last_break            = -1;
16147     my $max_bias                = 0.001;
16148     my $tiny_bias               = 0.0001;
16149     my $leading_alignment_token = "";
16150     my $leading_alignment_type  = "";
16151
16152     # see if any ?/:'s are in order
16153     my $colons_in_order = 1;
16154     my $last_tok        = "";
16155     my @colon_list  = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
16156     my $colon_count = @colon_list;
16157     foreach (@colon_list) {
16158         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
16159         $last_tok = $_;
16160     }
16161
16162     # This is a sufficient but not necessary condition for colon chain
16163     my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
16164
16165     #-------------------------------------------------------
16166     # BEGINNING of main loop to set continuation breakpoints
16167     # Keep iterating until we reach the end
16168     #-------------------------------------------------------
16169     while ( $i_begin <= $imax ) {
16170         my $lowest_strength        = NO_BREAK;
16171         my $starting_sum           = $lengths_to_go[$i_begin];
16172         my $i_lowest               = -1;
16173         my $i_test                 = -1;
16174         my $lowest_next_token      = '';
16175         my $lowest_next_type       = 'b';
16176         my $i_lowest_next_nonblank = -1;
16177
16178         #-------------------------------------------------------
16179         # BEGINNING of inner loop to find the best next breakpoint
16180         #-------------------------------------------------------
16181         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
16182             my $type       = $types_to_go[$i_test];
16183             my $token      = $tokens_to_go[$i_test];
16184             my $next_type  = $types_to_go[ $i_test + 1 ];
16185             my $next_token = $tokens_to_go[ $i_test + 1 ];
16186             my $i_next_nonblank =
16187               ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
16188             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
16189             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
16190             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
16191             my $strength                 = $bond_strength_to_go[$i_test];
16192             my $must_break               = 0;
16193
16194             # FIXME: TESTING: Might want to be able to break after these
16195             # force an immediate break at certain operators
16196             # with lower level than the start of the line
16197             if (
16198                 (
16199                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
16200                     || (   $next_nonblank_type eq 'k'
16201                         && $next_nonblank_token =~ /^(and|or)$/ )
16202                 )
16203                 && ( $nesting_depth_to_go[$i_begin] >
16204                     $nesting_depth_to_go[$i_next_nonblank] )
16205               )
16206             {
16207                 set_forced_breakpoint($i_next_nonblank);
16208             }
16209
16210             if (
16211
16212                 # Try to put a break where requested by scan_list
16213                 $forced_breakpoint_to_go[$i_test]
16214
16215                 # break between ) { in a continued line so that the '{' can
16216                 # be outdented
16217                 # See similar logic in scan_list which catches instances
16218                 # where a line is just something like ') {'
16219                 || (   $line_count
16220                     && ( $token eq ')' )
16221                     && ( $next_nonblank_type eq '{' )
16222                     && ($next_nonblank_block_type)
16223                     && !$rOpts->{'opening-brace-always-on-right'} )
16224
16225                 # There is an implied forced break at a terminal opening brace
16226                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
16227               )
16228             {
16229
16230                 # Forced breakpoints must sometimes be overridden, for example
16231                 # because of a side comment causing a NO_BREAK.  It is easier
16232                 # to catch this here than when they are set.
16233                 if ( $strength < NO_BREAK ) {
16234                     $strength   = $lowest_strength - $tiny_bias;
16235                     $must_break = 1;
16236                 }
16237             }
16238
16239             # quit if a break here would put a good terminal token on
16240             # the next line and we already have a possible break
16241             if (
16242                    !$must_break
16243                 && ( $next_nonblank_type =~ /^[\;\,]$/ )
16244                 && (
16245                     (
16246                         $leading_spaces +
16247                         $lengths_to_go[ $i_next_nonblank + 1 ] -
16248                         $starting_sum
16249                     ) > $rOpts_maximum_line_length
16250                 )
16251               )
16252             {
16253                 last if ( $i_lowest >= 0 );
16254             }
16255
16256             # Avoid a break which would strand a single punctuation
16257             # token.  For example, we do not want to strand a leading
16258             # '.' which is followed by a long quoted string.
16259             if (
16260                    !$must_break
16261                 && ( $i_test == $i_begin )
16262                 && ( $i_test < $imax )
16263                 && ( $token eq $type )
16264                 && (
16265                     (
16266                         $leading_spaces +
16267                         $lengths_to_go[ $i_test + 1 ] -
16268                         $starting_sum
16269                     ) <= $rOpts_maximum_line_length
16270                 )
16271               )
16272             {
16273                 $i_test++;
16274
16275                 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
16276                     $i_test++;
16277                 }
16278                 redo;
16279             }
16280
16281             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
16282             {
16283
16284                 # break at previous best break if it would have produced
16285                 # a leading alignment of certain common tokens, and it
16286                 # is different from the latest candidate break
16287                 last
16288                   if ($leading_alignment_type);
16289
16290                 # Force at least one breakpoint if old code had good
16291                 # break It is only called if a breakpoint is required or
16292                 # desired.  This will probably need some adjustments
16293                 # over time.  A goal is to try to be sure that, if a new
16294                 # side comment is introduced into formated text, then
16295                 # the same breakpoints will occur.  scbreak.t
16296                 last
16297                   if (
16298                     $i_test == $imax                # we are at the end
16299                     && !$forced_breakpoint_count    #
16300                     && $saw_good_break              # old line had good break
16301                     && $type =~ /^[#;\{]$/          # and this line ends in
16302                                                     # ';' or side comment
16303                     && $i_last_break < 0        # and we haven't made a break
16304                     && $i_lowest > 0            # and we saw a possible break
16305                     && $i_lowest < $imax - 1    # (but not just before this ;)
16306                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
16307                   );
16308
16309                 $lowest_strength        = $strength;
16310                 $i_lowest               = $i_test;
16311                 $lowest_next_token      = $next_nonblank_token;
16312                 $lowest_next_type       = $next_nonblank_type;
16313                 $i_lowest_next_nonblank = $i_next_nonblank;
16314                 last if $must_break;
16315
16316                 # set flags to remember if a break here will produce a
16317                 # leading alignment of certain common tokens
16318                 if (   $line_count > 0
16319                     && $i_test < $imax
16320                     && ( $lowest_strength - $last_break_strength <= $max_bias )
16321                   )
16322                 {
16323                     my $i_last_end = $i_begin - 1;
16324                     if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
16325                     my $tok_beg  = $tokens_to_go[$i_begin];
16326                     my $type_beg = $types_to_go[$i_begin];
16327                     if (
16328
16329                         # check for leading alignment of certain tokens
16330                         (
16331                                $tok_beg eq $next_nonblank_token
16332                             && $is_chain_operator{$tok_beg}
16333                             && (   $type_beg eq 'k'
16334                                 || $type_beg eq $tok_beg )
16335                             && $nesting_depth_to_go[$i_begin] >=
16336                             $nesting_depth_to_go[$i_next_nonblank]
16337                         )
16338
16339                         || (   $tokens_to_go[$i_last_end] eq $token
16340                             && $is_chain_operator{$token}
16341                             && ( $type eq 'k' || $type eq $token )
16342                             && $nesting_depth_to_go[$i_last_end] >=
16343                             $nesting_depth_to_go[$i_test] )
16344                       )
16345                     {
16346                         $leading_alignment_token = $next_nonblank_token;
16347                         $leading_alignment_type  = $next_nonblank_type;
16348                     }
16349                 }
16350             }
16351
16352             my $too_long =
16353               ( $i_test >= $imax )
16354               ? 1
16355               : (
16356                 (
16357                     $leading_spaces +
16358                       $lengths_to_go[ $i_test + 2 ] -
16359                       $starting_sum
16360                 ) > $rOpts_maximum_line_length
16361               );
16362
16363             FORMATTER_DEBUG_FLAG_BREAK
16364               && print
16365 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
16366
16367             # allow one extra terminal token after exceeding line length
16368             # if it would strand this token.
16369             if (   $rOpts_fuzzy_line_length
16370                 && $too_long
16371                 && ( $i_lowest == $i_test )
16372                 && ( length($token) > 1 )
16373                 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
16374             {
16375                 $too_long = 0;
16376             }
16377
16378             last
16379               if (
16380                 ( $i_test == $imax )    # we're done if no more tokens,
16381                 || (
16382                     ( $i_lowest >= 0 )    # or no more space and we have a break
16383                     && $too_long
16384                 )
16385               );
16386         }
16387
16388         #-------------------------------------------------------
16389         # END of inner loop to find the best next breakpoint
16390         # Now decide exactly where to put the breakpoint
16391         #-------------------------------------------------------
16392
16393         # it's always ok to break at imax if no other break was found
16394         if ( $i_lowest < 0 ) { $i_lowest = $imax }
16395
16396         # semi-final index calculation
16397         my $i_next_nonblank = (
16398             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
16399             ? $i_lowest + 2
16400             : $i_lowest + 1
16401         );
16402         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
16403         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16404
16405         #-------------------------------------------------------
16406         # ?/: rule 1 : if a break here will separate a '?' on this
16407         # line from its closing ':', then break at the '?' instead.
16408         #-------------------------------------------------------
16409         my $i;
16410         foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
16411             next unless ( $tokens_to_go[$i] eq '?' );
16412
16413             # do not break if probable sequence of ?/: statements
16414             next if ($is_colon_chain);
16415
16416             # do not break if statement is broken by side comment
16417             next
16418               if (
16419                 $tokens_to_go[$max_index_to_go] eq '#'
16420                 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
16421                     $max_index_to_go ) !~ /^[\;\}]$/
16422               );
16423
16424             # no break needed if matching : is also on the line
16425             next
16426               if ( $mate_index_to_go[$i] >= 0
16427                 && $mate_index_to_go[$i] <= $i_next_nonblank );
16428
16429             $i_lowest = $i;
16430             if ( $want_break_before{'?'} ) { $i_lowest-- }
16431             last;
16432         }
16433
16434         #-------------------------------------------------------
16435         # END of inner loop to find the best next breakpoint:
16436         # Break the line after the token with index i=$i_lowest
16437         #-------------------------------------------------------
16438
16439         # final index calculation
16440         $i_next_nonblank = (
16441             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
16442             ? $i_lowest + 2
16443             : $i_lowest + 1
16444         );
16445         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
16446         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16447
16448         FORMATTER_DEBUG_FLAG_BREAK
16449           && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
16450
16451         #-------------------------------------------------------
16452         # ?/: rule 2 : if we break at a '?', then break at its ':'
16453         #
16454         # Note: this rule is also in sub scan_list to handle a break
16455         # at the start and end of a line (in case breaks are dictated
16456         # by side comments).
16457         #-------------------------------------------------------
16458         if ( $next_nonblank_type eq '?' ) {
16459             set_closing_breakpoint($i_next_nonblank);
16460         }
16461         elsif ( $types_to_go[$i_lowest] eq '?' ) {
16462             set_closing_breakpoint($i_lowest);
16463         }
16464
16465         #-------------------------------------------------------
16466         # ?/: rule 3 : if we break at a ':' then we save
16467         # its location for further work below.  We may need to go
16468         # back and break at its '?'.
16469         #-------------------------------------------------------
16470         if ( $next_nonblank_type eq ':' ) {
16471             push @i_colon_breaks, $i_next_nonblank;
16472         }
16473         elsif ( $types_to_go[$i_lowest] eq ':' ) {
16474             push @i_colon_breaks, $i_lowest;
16475         }
16476
16477         # here we should set breaks for all '?'/':' pairs which are
16478         # separated by this line
16479
16480         $line_count++;
16481
16482         # save this line segment, after trimming blanks at the ends
16483         push( @i_first,
16484             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
16485         push( @i_last,
16486             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
16487
16488         # set a forced breakpoint at a container opening, if necessary, to
16489         # signal a break at a closing container.  Excepting '(' for now.
16490         if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
16491             && !$forced_breakpoint_to_go[$i_lowest] )
16492         {
16493             set_closing_breakpoint($i_lowest);
16494         }
16495
16496         # get ready to go again
16497         $i_begin                 = $i_lowest + 1;
16498         $last_break_strength     = $lowest_strength;
16499         $i_last_break            = $i_lowest;
16500         $leading_alignment_token = "";
16501         $leading_alignment_type  = "";
16502         $lowest_next_token       = '';
16503         $lowest_next_type        = 'b';
16504
16505         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
16506             $i_begin++;
16507         }
16508
16509         # update indentation size
16510         if ( $i_begin <= $imax ) {
16511             $leading_spaces = leading_spaces_to_go($i_begin);
16512         }
16513     }
16514
16515     #-------------------------------------------------------
16516     # END of main loop to set continuation breakpoints
16517     # Now go back and make any necessary corrections
16518     #-------------------------------------------------------
16519
16520     #-------------------------------------------------------
16521     # ?/: rule 4 -- if we broke at a ':', then break at
16522     # corresponding '?' unless this is a chain of ?: expressions
16523     #-------------------------------------------------------
16524     if (@i_colon_breaks) {
16525
16526         # using a simple method for deciding if we are in a ?/: chain --
16527         # this is a chain if it has multiple ?/: pairs all in order;
16528         # otherwise not.
16529         # Note that if line starts in a ':' we count that above as a break
16530         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
16531
16532         unless ($is_chain) {
16533             my @insert_list = ();
16534             foreach (@i_colon_breaks) {
16535                 my $i_question = $mate_index_to_go[$_];
16536                 if ( $i_question >= 0 ) {
16537                     if ( $want_break_before{'?'} ) {
16538                         $i_question--;
16539                         if (   $i_question > 0
16540                             && $types_to_go[$i_question] eq 'b' )
16541                         {
16542                             $i_question--;
16543                         }
16544                     }
16545
16546                     if ( $i_question >= 0 ) {
16547                         push @insert_list, $i_question;
16548                     }
16549                 }
16550                 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
16551             }
16552         }
16553     }
16554     return ( \@i_first, \@i_last, $colon_count );
16555 }
16556
16557 sub insert_additional_breaks {
16558
16559     # this routine will add line breaks at requested locations after
16560     # sub set_continuation_breaks has made preliminary breaks.
16561
16562     my ( $ri_break_list, $ri_first, $ri_last ) = @_;
16563     my $i_f;
16564     my $i_l;
16565     my $line_number = 0;
16566     my $i_break_left;
16567     foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
16568
16569         $i_f = $$ri_first[$line_number];
16570         $i_l = $$ri_last[$line_number];
16571         while ( $i_break_left >= $i_l ) {
16572             $line_number++;
16573
16574             # shouldn't happen unless caller passes bad indexes
16575             if ( $line_number >= @$ri_last ) {
16576                 warning(
16577 "Non-fatal program bug: couldn't set break at $i_break_left\n"
16578                 );
16579                 report_definite_bug();
16580                 return;
16581             }
16582             $i_f = $$ri_first[$line_number];
16583             $i_l = $$ri_last[$line_number];
16584         }
16585
16586         my $i_break_right = $i_break_left + 1;
16587         if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
16588
16589         if (   $i_break_left >= $i_f
16590             && $i_break_left < $i_l
16591             && $i_break_right > $i_f
16592             && $i_break_right <= $i_l )
16593         {
16594             splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
16595             splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
16596         }
16597     }
16598 }
16599
16600 sub set_closing_breakpoint {
16601
16602     # set a breakpoint at a matching closing token
16603     # at present, this is only used to break at a ':' which matches a '?'
16604     my $i_break = shift;
16605
16606     if ( $mate_index_to_go[$i_break] >= 0 ) {
16607
16608         # CAUTION: infinite recursion possible here:
16609         #   set_closing_breakpoint calls set_forced_breakpoint, and
16610         #   set_forced_breakpoint call set_closing_breakpoint
16611         #   ( test files attrib.t, BasicLyx.pm.html).
16612         # Don't reduce the '2' in the statement below
16613         if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
16614
16615             # break before } ] and ), but sub set_forced_breakpoint will decide
16616             # to break before or after a ? and :
16617             my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
16618             set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
16619         }
16620     }
16621     else {
16622         my $type_sequence = $type_sequence_to_go[$i_break];
16623         if ($type_sequence) {
16624             my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
16625             $postponed_breakpoint{$type_sequence} = 1;
16626         }
16627     }
16628 }
16629
16630 # check to see if output line tabbing agrees with input line
16631 # this can be very useful for debugging a script which has an extra
16632 # or missing brace
16633 sub compare_indentation_levels {
16634
16635     my ( $python_indentation_level, $structural_indentation_level ) = @_;
16636     if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
16637         $last_tabbing_disagreement = $input_line_number;
16638
16639         if ($in_tabbing_disagreement) {
16640         }
16641         else {
16642             $tabbing_disagreement_count++;
16643
16644             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
16645                 write_logfile_entry(
16646 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
16647                 );
16648             }
16649             $in_tabbing_disagreement    = $input_line_number;
16650             $first_tabbing_disagreement = $in_tabbing_disagreement
16651               unless ($first_tabbing_disagreement);
16652         }
16653     }
16654     else {
16655
16656         if ($in_tabbing_disagreement) {
16657
16658             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
16659                 write_logfile_entry(
16660 "End indentation disagreement from input line $in_tabbing_disagreement\n"
16661                 );
16662
16663                 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
16664                     write_logfile_entry(
16665                         "No further tabbing disagreements will be noted\n");
16666                 }
16667             }
16668             $in_tabbing_disagreement = 0;
16669         }
16670     }
16671 }
16672
16673 #####################################################################
16674 #
16675 # the Perl::Tidy::IndentationItem class supplies items which contain
16676 # how much whitespace should be used at the start of a line
16677 #
16678 #####################################################################
16679
16680 package Perl::Tidy::IndentationItem;
16681
16682 # Indexes for indentation items
16683 use constant SPACES             => 0;     # total leading white spaces
16684 use constant LEVEL              => 1;     # the indentation 'level'
16685 use constant CI_LEVEL           => 2;     # the 'continuation level'
16686 use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
16687                                           # for this level
16688 use constant CLOSED             => 4;     # index where we saw closing '}'
16689 use constant COMMA_COUNT        => 5;     # how many commas at this level?
16690 use constant SEQUENCE_NUMBER    => 6;     # output batch number
16691 use constant INDEX              => 7;     # index in output batch list
16692 use constant HAVE_CHILD         => 8;     # any dependents?
16693 use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
16694                                           # we would like to move to get
16695                                           # alignment (negative if left)
16696 use constant ALIGN_PAREN        => 10;    # do we want to try to align
16697                                           # with an opening structure?
16698 use constant MARKED             => 11;    # if visited by corrector logic
16699 use constant STACK_DEPTH        => 12;    # indentation nesting depth
16700 use constant STARTING_INDEX     => 13;    # first token index of this level
16701 use constant ARROW_COUNT        => 14;    # how many =>'s
16702
16703 sub new {
16704
16705     # Create an 'indentation_item' which describes one level of leading
16706     # whitespace when the '-lp' indentation is used.  We return
16707     # a reference to an anonymous array of associated variables.
16708     # See above constants for storage scheme.
16709     my (
16710         $class,               $spaces,           $level,
16711         $ci_level,            $available_spaces, $index,
16712         $gnu_sequence_number, $align_paren,      $stack_depth,
16713         $starting_index,
16714     ) = @_;
16715     my $closed            = -1;
16716     my $arrow_count       = 0;
16717     my $comma_count       = 0;
16718     my $have_child        = 0;
16719     my $want_right_spaces = 0;
16720     my $marked            = 0;
16721     bless [
16722         $spaces,              $level,          $ci_level,
16723         $available_spaces,    $closed,         $comma_count,
16724         $gnu_sequence_number, $index,          $have_child,
16725         $want_right_spaces,   $align_paren,    $marked,
16726         $stack_depth,         $starting_index, $arrow_count,
16727     ], $class;
16728 }
16729
16730 sub permanently_decrease_AVAILABLE_SPACES {
16731
16732     # make a permanent reduction in the available indentation spaces
16733     # at one indentation item.  NOTE: if there are child nodes, their
16734     # total SPACES must be reduced by the caller.
16735
16736     my ( $item, $spaces_needed ) = @_;
16737     my $available_spaces = $item->get_AVAILABLE_SPACES();
16738     my $deleted_spaces =
16739       ( $available_spaces > $spaces_needed )
16740       ? $spaces_needed
16741       : $available_spaces;
16742     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
16743     $item->decrease_SPACES($deleted_spaces);
16744     $item->set_RECOVERABLE_SPACES(0);
16745
16746     return $deleted_spaces;
16747 }
16748
16749 sub tentatively_decrease_AVAILABLE_SPACES {
16750
16751     # We are asked to tentatively delete $spaces_needed of indentation
16752     # for a indentation item.  We may want to undo this later.  NOTE: if
16753     # there are child nodes, their total SPACES must be reduced by the
16754     # caller.
16755     my ( $item, $spaces_needed ) = @_;
16756     my $available_spaces = $item->get_AVAILABLE_SPACES();
16757     my $deleted_spaces =
16758       ( $available_spaces > $spaces_needed )
16759       ? $spaces_needed
16760       : $available_spaces;
16761     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
16762     $item->decrease_SPACES($deleted_spaces);
16763     $item->increase_RECOVERABLE_SPACES($deleted_spaces);
16764     return $deleted_spaces;
16765 }
16766
16767 sub get_STACK_DEPTH {
16768     my $self = shift;
16769     return $self->[STACK_DEPTH];
16770 }
16771
16772 sub get_SPACES {
16773     my $self = shift;
16774     return $self->[SPACES];
16775 }
16776
16777 sub get_MARKED {
16778     my $self = shift;
16779     return $self->[MARKED];
16780 }
16781
16782 sub set_MARKED {
16783     my ( $self, $value ) = @_;
16784     if ( defined($value) ) {
16785         $self->[MARKED] = $value;
16786     }
16787     return $self->[MARKED];
16788 }
16789
16790 sub get_AVAILABLE_SPACES {
16791     my $self = shift;
16792     return $self->[AVAILABLE_SPACES];
16793 }
16794
16795 sub decrease_SPACES {
16796     my ( $self, $value ) = @_;
16797     if ( defined($value) ) {
16798         $self->[SPACES] -= $value;
16799     }
16800     return $self->[SPACES];
16801 }
16802
16803 sub decrease_AVAILABLE_SPACES {
16804     my ( $self, $value ) = @_;
16805     if ( defined($value) ) {
16806         $self->[AVAILABLE_SPACES] -= $value;
16807     }
16808     return $self->[AVAILABLE_SPACES];
16809 }
16810
16811 sub get_ALIGN_PAREN {
16812     my $self = shift;
16813     return $self->[ALIGN_PAREN];
16814 }
16815
16816 sub get_RECOVERABLE_SPACES {
16817     my $self = shift;
16818     return $self->[RECOVERABLE_SPACES];
16819 }
16820
16821 sub set_RECOVERABLE_SPACES {
16822     my ( $self, $value ) = @_;
16823     if ( defined($value) ) {
16824         $self->[RECOVERABLE_SPACES] = $value;
16825     }
16826     return $self->[RECOVERABLE_SPACES];
16827 }
16828
16829 sub increase_RECOVERABLE_SPACES {
16830     my ( $self, $value ) = @_;
16831     if ( defined($value) ) {
16832         $self->[RECOVERABLE_SPACES] += $value;
16833     }
16834     return $self->[RECOVERABLE_SPACES];
16835 }
16836
16837 sub get_CI_LEVEL {
16838     my $self = shift;
16839     return $self->[CI_LEVEL];
16840 }
16841
16842 sub get_LEVEL {
16843     my $self = shift;
16844     return $self->[LEVEL];
16845 }
16846
16847 sub get_SEQUENCE_NUMBER {
16848     my $self = shift;
16849     return $self->[SEQUENCE_NUMBER];
16850 }
16851
16852 sub get_INDEX {
16853     my $self = shift;
16854     return $self->[INDEX];
16855 }
16856
16857 sub get_STARTING_INDEX {
16858     my $self = shift;
16859     return $self->[STARTING_INDEX];
16860 }
16861
16862 sub set_HAVE_CHILD {
16863     my ( $self, $value ) = @_;
16864     if ( defined($value) ) {
16865         $self->[HAVE_CHILD] = $value;
16866     }
16867     return $self->[HAVE_CHILD];
16868 }
16869
16870 sub get_HAVE_CHILD {
16871     my $self = shift;
16872     return $self->[HAVE_CHILD];
16873 }
16874
16875 sub set_ARROW_COUNT {
16876     my ( $self, $value ) = @_;
16877     if ( defined($value) ) {
16878         $self->[ARROW_COUNT] = $value;
16879     }
16880     return $self->[ARROW_COUNT];
16881 }
16882
16883 sub get_ARROW_COUNT {
16884     my $self = shift;
16885     return $self->[ARROW_COUNT];
16886 }
16887
16888 sub set_COMMA_COUNT {
16889     my ( $self, $value ) = @_;
16890     if ( defined($value) ) {
16891         $self->[COMMA_COUNT] = $value;
16892     }
16893     return $self->[COMMA_COUNT];
16894 }
16895
16896 sub get_COMMA_COUNT {
16897     my $self = shift;
16898     return $self->[COMMA_COUNT];
16899 }
16900
16901 sub set_CLOSED {
16902     my ( $self, $value ) = @_;
16903     if ( defined($value) ) {
16904         $self->[CLOSED] = $value;
16905     }
16906     return $self->[CLOSED];
16907 }
16908
16909 sub get_CLOSED {
16910     my $self = shift;
16911     return $self->[CLOSED];
16912 }
16913
16914 #####################################################################
16915 #
16916 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
16917 # contain a single output line
16918 #
16919 #####################################################################
16920
16921 package Perl::Tidy::VerticalAligner::Line;
16922
16923 {
16924
16925     use strict;
16926     use Carp;
16927
16928     use constant JMAX                      => 0;
16929     use constant JMAX_ORIGINAL_LINE        => 1;
16930     use constant RTOKENS                   => 2;
16931     use constant RFIELDS                   => 3;
16932     use constant RPATTERNS                 => 4;
16933     use constant INDENTATION               => 5;
16934     use constant LEADING_SPACE_COUNT       => 6;
16935     use constant OUTDENT_LONG_LINES        => 7;
16936     use constant LIST_TYPE                 => 8;
16937     use constant IS_HANGING_SIDE_COMMENT   => 9;
16938     use constant RALIGNMENTS               => 10;
16939     use constant MAXIMUM_LINE_LENGTH       => 11;
16940     use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
16941
16942     my %_index_map;
16943     $_index_map{jmax}                      = JMAX;
16944     $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
16945     $_index_map{rtokens}                   = RTOKENS;
16946     $_index_map{rfields}                   = RFIELDS;
16947     $_index_map{rpatterns}                 = RPATTERNS;
16948     $_index_map{indentation}               = INDENTATION;
16949     $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
16950     $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
16951     $_index_map{list_type}                 = LIST_TYPE;
16952     $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
16953     $_index_map{ralignments}               = RALIGNMENTS;
16954     $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
16955     $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
16956
16957     my @_default_data = ();
16958     $_default_data[JMAX]                      = undef;
16959     $_default_data[JMAX_ORIGINAL_LINE]        = undef;
16960     $_default_data[RTOKENS]                   = undef;
16961     $_default_data[RFIELDS]                   = undef;
16962     $_default_data[RPATTERNS]                 = undef;
16963     $_default_data[INDENTATION]               = undef;
16964     $_default_data[LEADING_SPACE_COUNT]       = undef;
16965     $_default_data[OUTDENT_LONG_LINES]        = undef;
16966     $_default_data[LIST_TYPE]                 = undef;
16967     $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
16968     $_default_data[RALIGNMENTS]               = [];
16969     $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
16970     $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
16971
16972     {
16973
16974         # methods to count object population
16975         my $_count = 0;
16976         sub get_count        { $_count; }
16977         sub _increment_count { ++$_count }
16978         sub _decrement_count { --$_count }
16979     }
16980
16981     # Constructor may be called as a class method
16982     sub new {
16983         my ( $caller, %arg ) = @_;
16984         my $caller_is_obj = ref($caller);
16985         my $class = $caller_is_obj || $caller;
16986         no strict "refs";
16987         my $self = bless [], $class;
16988
16989         $self->[RALIGNMENTS] = [];
16990
16991         my $index;
16992         foreach ( keys %_index_map ) {
16993             $index = $_index_map{$_};
16994             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
16995             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
16996             else { $self->[$index] = $_default_data[$index] }
16997         }
16998
16999         $self->_increment_count();
17000         return $self;
17001     }
17002
17003     sub DESTROY {
17004         $_[0]->_decrement_count();
17005     }
17006
17007     sub get_jmax                      { $_[0]->[JMAX] }
17008     sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
17009     sub get_rtokens                   { $_[0]->[RTOKENS] }
17010     sub get_rfields                   { $_[0]->[RFIELDS] }
17011     sub get_rpatterns                 { $_[0]->[RPATTERNS] }
17012     sub get_indentation               { $_[0]->[INDENTATION] }
17013     sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
17014     sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
17015     sub get_list_type                 { $_[0]->[LIST_TYPE] }
17016     sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
17017     sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
17018
17019     sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
17020     sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
17021     sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
17022     sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
17023
17024     sub get_starting_column {
17025         $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
17026     }
17027
17028     sub increment_column {
17029         $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
17030     }
17031     sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
17032
17033     sub current_field_width {
17034         my $self = shift;
17035         my ($j) = @_;
17036         if ( $j == 0 ) {
17037             return $self->get_column($j);
17038         }
17039         else {
17040             return $self->get_column($j) - $self->get_column( $j - 1 );
17041         }
17042     }
17043
17044     sub field_width_growth {
17045         my $self = shift;
17046         my $j    = shift;
17047         return $self->get_column($j) - $self->get_starting_column($j);
17048     }
17049
17050     sub starting_field_width {
17051         my $self = shift;
17052         my $j    = shift;
17053         if ( $j == 0 ) {
17054             return $self->get_starting_column($j);
17055         }
17056         else {
17057             return $self->get_starting_column($j) -
17058               $self->get_starting_column( $j - 1 );
17059         }
17060     }
17061
17062     sub increase_field_width {
17063
17064         my $self = shift;
17065         my ( $j, $pad ) = @_;
17066         my $jmax = $self->get_jmax();
17067         for my $k ( $j .. $jmax ) {
17068             $self->increment_column( $k, $pad );
17069         }
17070     }
17071
17072     sub get_available_space_on_right {
17073         my $self = shift;
17074         my $jmax = $self->get_jmax();
17075         return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
17076     }
17077
17078     sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
17079     sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
17080     sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
17081     sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
17082     sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
17083     sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
17084     sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
17085     sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
17086     sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
17087     sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
17088     sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
17089
17090 }
17091
17092 #####################################################################
17093 #
17094 # the Perl::Tidy::VerticalAligner::Alignment class holds information
17095 # on a single column being aligned
17096 #
17097 #####################################################################
17098 package Perl::Tidy::VerticalAligner::Alignment;
17099
17100 {
17101
17102     use strict;
17103
17104     #use Carp;
17105
17106     # Symbolic array indexes
17107     use constant COLUMN          => 0;    # the current column number
17108     use constant STARTING_COLUMN => 1;    # column number when created
17109     use constant MATCHING_TOKEN  => 2;    # what token we are matching
17110     use constant STARTING_LINE   => 3;    # the line index of creation
17111     use constant ENDING_LINE     => 4;    # the most recent line to use it
17112     use constant SAVED_COLUMN    => 5;    # the most recent line to use it
17113     use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
17114                                           # (just its index in an array)
17115
17116     # Correspondence between variables and array indexes
17117     my %_index_map;
17118     $_index_map{column}          = COLUMN;
17119     $_index_map{starting_column} = STARTING_COLUMN;
17120     $_index_map{matching_token}  = MATCHING_TOKEN;
17121     $_index_map{starting_line}   = STARTING_LINE;
17122     $_index_map{ending_line}     = ENDING_LINE;
17123     $_index_map{saved_column}    = SAVED_COLUMN;
17124     $_index_map{serial_number}   = SERIAL_NUMBER;
17125
17126     my @_default_data = ();
17127     $_default_data[COLUMN]          = undef;
17128     $_default_data[STARTING_COLUMN] = undef;
17129     $_default_data[MATCHING_TOKEN]  = undef;
17130     $_default_data[STARTING_LINE]   = undef;
17131     $_default_data[ENDING_LINE]     = undef;
17132     $_default_data[SAVED_COLUMN]    = undef;
17133     $_default_data[SERIAL_NUMBER]   = undef;
17134
17135     # class population count
17136     {
17137         my $_count = 0;
17138         sub get_count        { $_count; }
17139         sub _increment_count { ++$_count }
17140         sub _decrement_count { --$_count }
17141     }
17142
17143     # constructor
17144     sub new {
17145         my ( $caller, %arg ) = @_;
17146         my $caller_is_obj = ref($caller);
17147         my $class = $caller_is_obj || $caller;
17148         no strict "refs";
17149         my $self = bless [], $class;
17150
17151         foreach ( keys %_index_map ) {
17152             my $index = $_index_map{$_};
17153             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
17154             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
17155             else { $self->[$index] = $_default_data[$index] }
17156         }
17157         $self->_increment_count();
17158         return $self;
17159     }
17160
17161     sub DESTROY {
17162         $_[0]->_decrement_count();
17163     }
17164
17165     sub get_column          { return $_[0]->[COLUMN] }
17166     sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
17167     sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
17168     sub get_starting_line   { return $_[0]->[STARTING_LINE] }
17169     sub get_ending_line     { return $_[0]->[ENDING_LINE] }
17170     sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
17171
17172     sub set_column          { $_[0]->[COLUMN]          = $_[1] }
17173     sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
17174     sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
17175     sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
17176     sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
17177     sub increment_column { $_[0]->[COLUMN] += $_[1] }
17178
17179     sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
17180     sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
17181
17182 }
17183
17184 package Perl::Tidy::VerticalAligner;
17185
17186 # The Perl::Tidy::VerticalAligner package collects output lines and
17187 # attempts to line up certain common tokens, such as => and #, which are
17188 # identified by the calling routine.
17189 #
17190 # There are two main routines: append_line and flush.  Append acts as a
17191 # storage buffer, collecting lines into a group which can be vertically
17192 # aligned.  When alignment is no longer possible or desirable, it dumps
17193 # the group to flush.
17194 #
17195 #     append_line -----> flush
17196 #
17197 #     collects          writes
17198 #     vertical          one
17199 #     groups            group
17200
17201 BEGIN {
17202
17203     # Caution: these debug flags produce a lot of output
17204     # They should all be 0 except when debugging small scripts
17205
17206     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
17207     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
17208     use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
17209
17210     my $debug_warning = sub {
17211         print "VALIGN_DEBUGGING with key $_[0]\n";
17212     };
17213
17214     VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
17215     VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
17216
17217 }
17218
17219 use vars qw(
17220   $vertical_aligner_self
17221   $current_line
17222   $maximum_alignment_index
17223   $ralignment_list
17224   $maximum_jmax_seen
17225   $minimum_jmax_seen
17226   $previous_minimum_jmax_seen
17227   $previous_maximum_jmax_seen
17228   $maximum_line_index
17229   $group_level
17230   $group_type
17231   $group_maximum_gap
17232   $marginal_match
17233   $last_group_level_written
17234   $last_leading_space_count
17235   $extra_indent_ok
17236   $zero_count
17237   @group_lines
17238   $last_comment_column
17239   $last_side_comment_line_number
17240   $last_side_comment_length
17241   $last_side_comment_level
17242   $outdented_line_count
17243   $first_outdented_line_at
17244   $last_outdented_line_at
17245   $diagnostics_object
17246   $logger_object
17247   $file_writer_object
17248   @side_comment_history
17249   $comment_leading_space_count
17250   $is_matching_terminal_line
17251
17252   $cached_line_text
17253   $cached_line_type
17254   $cached_line_flag
17255   $cached_seqno
17256   $cached_line_valid
17257   $cached_line_leading_space_count
17258   $cached_seqno_string
17259
17260   $seqno_string
17261   $last_nonblank_seqno_string
17262
17263   $rOpts
17264
17265   $rOpts_maximum_line_length
17266   $rOpts_continuation_indentation
17267   $rOpts_indent_columns
17268   $rOpts_tabs
17269   $rOpts_entab_leading_whitespace
17270   $rOpts_valign
17271
17272   $rOpts_fixed_position_side_comment
17273   $rOpts_minimum_space_to_comment
17274
17275 );
17276
17277 sub initialize {
17278
17279     my $class;
17280
17281     ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
17282       = @_;
17283
17284     # variables describing the entire space group:
17285     $ralignment_list            = [];
17286     $group_level                = 0;
17287     $last_group_level_written   = -1;
17288     $extra_indent_ok            = 0;    # can we move all lines to the right?
17289     $last_side_comment_length   = 0;
17290     $maximum_jmax_seen          = 0;
17291     $minimum_jmax_seen          = 0;
17292     $previous_minimum_jmax_seen = 0;
17293     $previous_maximum_jmax_seen = 0;
17294
17295     # variables describing each line of the group
17296     @group_lines = ();                  # list of all lines in group
17297
17298     $outdented_line_count          = 0;
17299     $first_outdented_line_at       = 0;
17300     $last_outdented_line_at        = 0;
17301     $last_side_comment_line_number = 0;
17302     $last_side_comment_level       = -1;
17303     $is_matching_terminal_line     = 0;
17304
17305     # most recent 3 side comments; [ line number, column ]
17306     $side_comment_history[0] = [ -300, 0 ];
17307     $side_comment_history[1] = [ -200, 0 ];
17308     $side_comment_history[2] = [ -100, 0 ];
17309
17310     # write_leader_and_string cache:
17311     $cached_line_text                = "";
17312     $cached_line_type                = 0;
17313     $cached_line_flag                = 0;
17314     $cached_seqno                    = 0;
17315     $cached_line_valid               = 0;
17316     $cached_line_leading_space_count = 0;
17317     $cached_seqno_string             = "";
17318
17319     # string of sequence numbers joined together
17320     $seqno_string               = "";
17321     $last_nonblank_seqno_string = "";
17322
17323     # frequently used parameters
17324     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
17325     $rOpts_tabs                     = $rOpts->{'tabs'};
17326     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
17327     $rOpts_fixed_position_side_comment =
17328       $rOpts->{'fixed-position-side-comment'};
17329     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
17330     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
17331     $rOpts_valign                   = $rOpts->{'valign'};
17332
17333     forget_side_comment();
17334
17335     initialize_for_new_group();
17336
17337     $vertical_aligner_self = {};
17338     bless $vertical_aligner_self, $class;
17339     return $vertical_aligner_self;
17340 }
17341
17342 sub initialize_for_new_group {
17343     $maximum_line_index      = -1;      # lines in the current group
17344     $maximum_alignment_index = -1;      # alignments in current group
17345     $zero_count              = 0;       # count consecutive lines without tokens
17346     $current_line            = undef;   # line being matched for alignment
17347     $group_maximum_gap       = 0;       # largest gap introduced
17348     $group_type              = "";
17349     $marginal_match          = 0;
17350     $comment_leading_space_count = 0;
17351     $last_leading_space_count    = 0;
17352 }
17353
17354 # interface to Perl::Tidy::Diagnostics routines
17355 sub write_diagnostics {
17356     if ($diagnostics_object) {
17357         $diagnostics_object->write_diagnostics(@_);
17358     }
17359 }
17360
17361 # interface to Perl::Tidy::Logger routines
17362 sub warning {
17363     if ($logger_object) {
17364         $logger_object->warning(@_);
17365     }
17366 }
17367
17368 sub write_logfile_entry {
17369     if ($logger_object) {
17370         $logger_object->write_logfile_entry(@_);
17371     }
17372 }
17373
17374 sub report_definite_bug {
17375     if ($logger_object) {
17376         $logger_object->report_definite_bug();
17377     }
17378 }
17379
17380 sub get_SPACES {
17381
17382     # return the number of leading spaces associated with an indentation
17383     # variable $indentation is either a constant number of spaces or an
17384     # object with a get_SPACES method.
17385     my $indentation = shift;
17386     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
17387 }
17388
17389 sub get_RECOVERABLE_SPACES {
17390
17391     # return the number of spaces (+ means shift right, - means shift left)
17392     # that we would like to shift a group of lines with the same indentation
17393     # to get them to line up with their opening parens
17394     my $indentation = shift;
17395     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
17396 }
17397
17398 sub get_STACK_DEPTH {
17399
17400     my $indentation = shift;
17401     return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
17402 }
17403
17404 sub make_alignment {
17405     my ( $col, $token ) = @_;
17406
17407     # make one new alignment at column $col which aligns token $token
17408     ++$maximum_alignment_index;
17409     my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
17410         column          => $col,
17411         starting_column => $col,
17412         matching_token  => $token,
17413         starting_line   => $maximum_line_index,
17414         ending_line     => $maximum_line_index,
17415         serial_number   => $maximum_alignment_index,
17416     );
17417     $ralignment_list->[$maximum_alignment_index] = $alignment;
17418     return $alignment;
17419 }
17420
17421 sub dump_alignments {
17422     print
17423 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
17424     for my $i ( 0 .. $maximum_alignment_index ) {
17425         my $column          = $ralignment_list->[$i]->get_column();
17426         my $starting_column = $ralignment_list->[$i]->get_starting_column();
17427         my $matching_token  = $ralignment_list->[$i]->get_matching_token();
17428         my $starting_line   = $ralignment_list->[$i]->get_starting_line();
17429         my $ending_line     = $ralignment_list->[$i]->get_ending_line();
17430         print
17431 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
17432     }
17433 }
17434
17435 sub save_alignment_columns {
17436     for my $i ( 0 .. $maximum_alignment_index ) {
17437         $ralignment_list->[$i]->save_column();
17438     }
17439 }
17440
17441 sub restore_alignment_columns {
17442     for my $i ( 0 .. $maximum_alignment_index ) {
17443         $ralignment_list->[$i]->restore_column();
17444     }
17445 }
17446
17447 sub forget_side_comment {
17448     $last_comment_column = 0;
17449 }
17450
17451 sub append_line {
17452
17453     # sub append is called to place one line in the current vertical group.
17454     #
17455     # The input parameters are:
17456     #     $level = indentation level of this line
17457     #     $rfields = reference to array of fields
17458     #     $rpatterns = reference to array of patterns, one per field
17459     #     $rtokens   = reference to array of tokens starting fields 1,2,..
17460     #
17461     # Here is an example of what this package does.  In this example,
17462     # we are trying to line up both the '=>' and the '#'.
17463     #
17464     #         '18' => 'grave',    #   \`
17465     #         '19' => 'acute',    #   `'
17466     #         '20' => 'caron',    #   \v
17467     # <-tabs-><f1-><--field 2 ---><-f3->
17468     # |            |              |    |
17469     # |            |              |    |
17470     # col1        col2         col3 col4
17471     #
17472     # The calling routine has already broken the entire line into 3 fields as
17473     # indicated.  (So the work of identifying promising common tokens has
17474     # already been done).
17475     #
17476     # In this example, there will be 2 tokens being matched: '=>' and '#'.
17477     # They are the leading parts of fields 2 and 3, but we do need to know
17478     # what they are so that we can dump a group of lines when these tokens
17479     # change.
17480     #
17481     # The fields contain the actual characters of each field.  The patterns
17482     # are like the fields, but they contain mainly token types instead
17483     # of tokens, so they have fewer characters.  They are used to be
17484     # sure we are matching fields of similar type.
17485     #
17486     # In this example, there will be 4 column indexes being adjusted.  The
17487     # first one is always at zero.  The interior columns are at the start of
17488     # the matching tokens, and the last one tracks the maximum line length.
17489     #
17490     # Basically, each time a new line comes in, it joins the current vertical
17491     # group if possible.  Otherwise it causes the current group to be dumped
17492     # and a new group is started.
17493     #
17494     # For each new group member, the column locations are increased, as
17495     # necessary, to make room for the new fields.  When the group is finally
17496     # output, these column numbers are used to compute the amount of spaces of
17497     # padding needed for each field.
17498     #
17499     # Programming note: the fields are assumed not to have any tab characters.
17500     # Tabs have been previously removed except for tabs in quoted strings and
17501     # side comments.  Tabs in these fields can mess up the column counting.
17502     # The log file warns the user if there are any such tabs.
17503
17504     my (
17505         $level,               $level_end,
17506         $indentation,         $rfields,
17507         $rtokens,             $rpatterns,
17508         $is_forced_break,     $outdent_long_lines,
17509         $is_terminal_ternary, $is_terminal_statement,
17510         $do_not_pad,          $rvertical_tightness_flags,
17511         $level_jump,
17512     ) = @_;
17513
17514     # number of fields is $jmax
17515     # number of tokens between fields is $jmax-1
17516     my $jmax = $#{$rfields};
17517
17518     my $leading_space_count = get_SPACES($indentation);
17519
17520     # set outdented flag to be sure we either align within statements or
17521     # across statement boundaries, but not both.
17522     my $is_outdented = $last_leading_space_count > $leading_space_count;
17523     $last_leading_space_count = $leading_space_count;
17524
17525     # Patch: undo for hanging side comment
17526     my $is_hanging_side_comment =
17527       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
17528     $is_outdented = 0 if $is_hanging_side_comment;
17529
17530     VALIGN_DEBUG_FLAG_APPEND0 && do {
17531         print
17532 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
17533     };
17534
17535     # Validate cached line if necessary: If we can produce a container
17536     # with just 2 lines total by combining an existing cached opening
17537     # token with the closing token to follow, then we will mark both
17538     # cached flags as valid.
17539     if ($rvertical_tightness_flags) {
17540         if (   $maximum_line_index <= 0
17541             && $cached_line_type
17542             && $cached_seqno
17543             && $rvertical_tightness_flags->[2]
17544             && $rvertical_tightness_flags->[2] == $cached_seqno )
17545         {
17546             $rvertical_tightness_flags->[3] ||= 1;
17547             $cached_line_valid              ||= 1;
17548         }
17549     }
17550
17551     # do not join an opening block brace with an unbalanced line
17552     # unless requested with a flag value of 2
17553     if (   $cached_line_type == 3
17554         && $maximum_line_index < 0
17555         && $cached_line_flag < 2
17556         && $level_jump != 0 )
17557     {
17558         $cached_line_valid = 0;
17559     }
17560
17561     # patch until new aligner is finished
17562     if ($do_not_pad) { my_flush() }
17563
17564     # shouldn't happen:
17565     if ( $level < 0 ) { $level = 0 }
17566
17567     # do not align code across indentation level changes
17568     # or if vertical alignment is turned off for debugging
17569     if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
17570
17571         # we are allowed to shift a group of lines to the right if its
17572         # level is greater than the previous and next group
17573         $extra_indent_ok =
17574           ( $level < $group_level && $last_group_level_written < $group_level );
17575
17576         my_flush();
17577
17578         # If we know that this line will get flushed out by itself because
17579         # of level changes, we can leave the extra_indent_ok flag set.
17580         # That way, if we get an external flush call, we will still be
17581         # able to do some -lp alignment if necessary.
17582         $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
17583
17584         $group_level = $level;
17585
17586         # wait until after the above flush to get the leading space
17587         # count because it may have been changed if the -icp flag is in
17588         # effect
17589         $leading_space_count = get_SPACES($indentation);
17590
17591     }
17592
17593     # --------------------------------------------------------------------
17594     # Patch to collect outdentable block COMMENTS
17595     # --------------------------------------------------------------------
17596     my $is_blank_line = "";
17597     my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
17598     if ( $group_type eq 'COMMENT' ) {
17599         if (
17600             (
17601                    $is_block_comment
17602                 && $outdent_long_lines
17603                 && $leading_space_count == $comment_leading_space_count
17604             )
17605             || $is_blank_line
17606           )
17607         {
17608             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
17609             return;
17610         }
17611         else {
17612             my_flush();
17613         }
17614     }
17615
17616     # --------------------------------------------------------------------
17617     # add dummy fields for terminal ternary
17618     # --------------------------------------------------------------------
17619     my $j_terminal_match;
17620     if ( $is_terminal_ternary && $current_line ) {
17621         $j_terminal_match =
17622           fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
17623         $jmax = @{$rfields} - 1;
17624     }
17625
17626     # --------------------------------------------------------------------
17627     # add dummy fields for else statement
17628     # --------------------------------------------------------------------
17629     if (   $rfields->[0] =~ /^else\s*$/
17630         && $current_line
17631         && $level_jump == 0 )
17632     {
17633         $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
17634         $jmax = @{$rfields} - 1;
17635     }
17636
17637     # --------------------------------------------------------------------
17638     # Step 1. Handle simple line of code with no fields to match.
17639     # --------------------------------------------------------------------
17640     if ( $jmax <= 0 ) {
17641         $zero_count++;
17642
17643         if ( $maximum_line_index >= 0
17644             && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
17645         {
17646
17647             # flush the current group if it has some aligned columns..
17648             if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
17649
17650             # flush current group if we are just collecting side comments..
17651             elsif (
17652
17653                 # ...and we haven't seen a comment lately
17654                 ( $zero_count > 3 )
17655
17656                 # ..or if this new line doesn't fit to the left of the comments
17657                 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
17658                     $group_lines[0]->get_column(0) )
17659               )
17660             {
17661                 my_flush();
17662             }
17663         }
17664
17665         # patch to start new COMMENT group if this comment may be outdented
17666         if (   $is_block_comment
17667             && $outdent_long_lines
17668             && $maximum_line_index < 0 )
17669         {
17670             $group_type                           = 'COMMENT';
17671             $comment_leading_space_count          = $leading_space_count;
17672             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
17673             return;
17674         }
17675
17676         # just write this line directly if no current group, no side comment,
17677         # and no space recovery is needed.
17678         if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
17679         {
17680             write_leader_and_string( $leading_space_count, $$rfields[0], 0,
17681                 $outdent_long_lines, $rvertical_tightness_flags );
17682             return;
17683         }
17684     }
17685     else {
17686         $zero_count = 0;
17687     }
17688
17689     # programming check: (shouldn't happen)
17690     # an error here implies an incorrect call was made
17691     if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
17692         warning(
17693 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
17694         );
17695         report_definite_bug();
17696     }
17697
17698     # --------------------------------------------------------------------
17699     # create an object to hold this line
17700     # --------------------------------------------------------------------
17701     my $new_line = new Perl::Tidy::VerticalAligner::Line(
17702         jmax                      => $jmax,
17703         jmax_original_line        => $jmax,
17704         rtokens                   => $rtokens,
17705         rfields                   => $rfields,
17706         rpatterns                 => $rpatterns,
17707         indentation               => $indentation,
17708         leading_space_count       => $leading_space_count,
17709         outdent_long_lines        => $outdent_long_lines,
17710         list_type                 => "",
17711         is_hanging_side_comment   => $is_hanging_side_comment,
17712         maximum_line_length       => $rOpts->{'maximum-line-length'},
17713         rvertical_tightness_flags => $rvertical_tightness_flags,
17714     );
17715
17716     # Initialize a global flag saying if the last line of the group should
17717     # match end of group and also terminate the group.  There should be no
17718     # returns between here and where the flag is handled at the bottom.
17719     my $col_matching_terminal = 0;
17720     if ( defined($j_terminal_match) ) {
17721
17722         # remember the column of the terminal ? or { to match with
17723         $col_matching_terminal = $current_line->get_column($j_terminal_match);
17724
17725         # set global flag for sub decide_if_aligned
17726         $is_matching_terminal_line = 1;
17727     }
17728
17729     # --------------------------------------------------------------------
17730     # It simplifies things to create a zero length side comment
17731     # if none exists.
17732     # --------------------------------------------------------------------
17733     make_side_comment( $new_line, $level_end );
17734
17735     # --------------------------------------------------------------------
17736     # Decide if this is a simple list of items.
17737     # There are 3 list types: none, comma, comma-arrow.
17738     # We use this below to be less restrictive in deciding what to align.
17739     # --------------------------------------------------------------------
17740     if ($is_forced_break) {
17741         decide_if_list($new_line);
17742     }
17743
17744     if ($current_line) {
17745
17746         # --------------------------------------------------------------------
17747         # Allow hanging side comment to join current group, if any
17748         # This will help keep side comments aligned, because otherwise we
17749         # will have to start a new group, making alignment less likely.
17750         # --------------------------------------------------------------------
17751         join_hanging_comment( $new_line, $current_line )
17752           if $is_hanging_side_comment;
17753
17754         # --------------------------------------------------------------------
17755         # If there is just one previous line, and it has more fields
17756         # than the new line, try to join fields together to get a match with
17757         # the new line.  At the present time, only a single leading '=' is
17758         # allowed to be compressed out.  This is useful in rare cases where
17759         # a table is forced to use old breakpoints because of side comments,
17760         # and the table starts out something like this:
17761         #   my %MonthChars = ('0', 'Jan',   # side comment
17762         #                     '1', 'Feb',
17763         #                     '2', 'Mar',
17764         # Eliminating the '=' field will allow the remaining fields to line up.
17765         # This situation does not occur if there are no side comments
17766         # because scan_list would put a break after the opening '('.
17767         # --------------------------------------------------------------------
17768         eliminate_old_fields( $new_line, $current_line );
17769
17770         # --------------------------------------------------------------------
17771         # If the new line has more fields than the current group,
17772         # see if we can match the first fields and combine the remaining
17773         # fields of the new line.
17774         # --------------------------------------------------------------------
17775         eliminate_new_fields( $new_line, $current_line );
17776
17777         # --------------------------------------------------------------------
17778         # Flush previous group unless all common tokens and patterns match..
17779         # --------------------------------------------------------------------
17780         check_match( $new_line, $current_line );
17781
17782         # --------------------------------------------------------------------
17783         # See if there is space for this line in the current group (if any)
17784         # --------------------------------------------------------------------
17785         if ($current_line) {
17786             check_fit( $new_line, $current_line );
17787         }
17788     }
17789
17790     # --------------------------------------------------------------------
17791     # Append this line to the current group (or start new group)
17792     # --------------------------------------------------------------------
17793     accept_line($new_line);
17794
17795     # Future update to allow this to vary:
17796     $current_line = $new_line if ( $maximum_line_index == 0 );
17797
17798     # output this group if it ends in a terminal else or ternary line
17799     if ( defined($j_terminal_match) ) {
17800
17801         # if there is only one line in the group (maybe due to failure to match
17802         # perfectly with previous lines), then align the ? or { of this
17803         # terminal line with the previous one unless that would make the line
17804         # too long
17805         if ( $maximum_line_index == 0 ) {
17806             my $col_now = $current_line->get_column($j_terminal_match);
17807             my $pad     = $col_matching_terminal - $col_now;
17808             my $padding_available =
17809               $current_line->get_available_space_on_right();
17810             if ( $pad > 0 && $pad <= $padding_available ) {
17811                 $current_line->increase_field_width( $j_terminal_match, $pad );
17812             }
17813         }
17814         my_flush();
17815         $is_matching_terminal_line = 0;
17816     }
17817
17818     # --------------------------------------------------------------------
17819     # Step 8. Some old debugging stuff
17820     # --------------------------------------------------------------------
17821     VALIGN_DEBUG_FLAG_APPEND && do {
17822         print "APPEND fields:";
17823         dump_array(@$rfields);
17824         print "APPEND tokens:";
17825         dump_array(@$rtokens);
17826         print "APPEND patterns:";
17827         dump_array(@$rpatterns);
17828         dump_alignments();
17829     };
17830
17831     return;
17832 }
17833
17834 sub join_hanging_comment {
17835
17836     my $line = shift;
17837     my $jmax = $line->get_jmax();
17838     return 0 unless $jmax == 1;    # must be 2 fields
17839     my $rtokens = $line->get_rtokens();
17840     return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
17841     my $rfields = $line->get_rfields();
17842     return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
17843     my $old_line            = shift;
17844     my $maximum_field_index = $old_line->get_jmax();
17845     return 0
17846       unless $maximum_field_index > $jmax;    # the current line has more fields
17847     my $rpatterns = $line->get_rpatterns();
17848
17849     $line->set_is_hanging_side_comment(1);
17850     $jmax = $maximum_field_index;
17851     $line->set_jmax($jmax);
17852     $$rfields[$jmax]         = $$rfields[1];
17853     $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
17854     $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
17855     for ( my $j = 1 ; $j < $jmax ; $j++ ) {
17856         $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
17857         $$rtokens[ $j - 1 ]   = "";
17858         $$rpatterns[ $j - 1 ] = "";
17859     }
17860     return 1;
17861 }
17862
17863 sub eliminate_old_fields {
17864
17865     my $new_line = shift;
17866     my $jmax     = $new_line->get_jmax();
17867     if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
17868     if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
17869
17870     # there must be one previous line
17871     return unless ( $maximum_line_index == 0 );
17872
17873     my $old_line            = shift;
17874     my $maximum_field_index = $old_line->get_jmax();
17875
17876     # this line must have fewer fields
17877     return unless $maximum_field_index > $jmax;
17878
17879     # Identify specific cases where field elimination is allowed:
17880     # case=1: both lines have comma-separated lists, and the first
17881     #         line has an equals
17882     # case=2: both lines have leading equals
17883
17884     # case 1 is the default
17885     my $case = 1;
17886
17887     # See if case 2: both lines have leading '='
17888     # We'll require smiliar leading patterns in this case
17889     my $old_rtokens   = $old_line->get_rtokens();
17890     my $rtokens       = $new_line->get_rtokens();
17891     my $rpatterns     = $new_line->get_rpatterns();
17892     my $old_rpatterns = $old_line->get_rpatterns();
17893     if (   $rtokens->[0] =~ /^=\d*$/
17894         && $old_rtokens->[0]   eq $rtokens->[0]
17895         && $old_rpatterns->[0] eq $rpatterns->[0] )
17896     {
17897         $case = 2;
17898     }
17899
17900     # not too many fewer fields in new line for case 1
17901     return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
17902
17903     # case 1 must have side comment
17904     my $old_rfields = $old_line->get_rfields();
17905     return
17906       if ( $case == 1
17907         && length( $$old_rfields[$maximum_field_index] ) == 0 );
17908
17909     my $rfields = $new_line->get_rfields();
17910
17911     my $hid_equals = 0;
17912
17913     my @new_alignments        = ();
17914     my @new_fields            = ();
17915     my @new_matching_patterns = ();
17916     my @new_matching_tokens   = ();
17917
17918     my $j = 0;
17919     my $k;
17920     my $current_field   = '';
17921     my $current_pattern = '';
17922
17923     # loop over all old tokens
17924     my $in_match = 0;
17925     for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
17926         $current_field   .= $$old_rfields[$k];
17927         $current_pattern .= $$old_rpatterns[$k];
17928         last if ( $j > $jmax - 1 );
17929
17930         if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
17931             $in_match                  = 1;
17932             $new_fields[$j]            = $current_field;
17933             $new_matching_patterns[$j] = $current_pattern;
17934             $current_field             = '';
17935             $current_pattern           = '';
17936             $new_matching_tokens[$j]   = $$old_rtokens[$k];
17937             $new_alignments[$j]        = $old_line->get_alignment($k);
17938             $j++;
17939         }
17940         else {
17941
17942             if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
17943                 last if ( $case == 2 );    # avoid problems with stuff
17944                                            # like:   $a=$b=$c=$d;
17945                 $hid_equals = 1;
17946             }
17947             last
17948               if ( $in_match && $case == 1 )
17949               ;    # disallow gaps in matching field types in case 1
17950         }
17951     }
17952
17953     # Modify the current state if we are successful.
17954     # We must exactly reach the ends of both lists for success.
17955     if (   ( $j == $jmax )
17956         && ( $current_field eq '' )
17957         && ( $case != 1 || $hid_equals ) )
17958     {
17959         $k = $maximum_field_index;
17960         $current_field   .= $$old_rfields[$k];
17961         $current_pattern .= $$old_rpatterns[$k];
17962         $new_fields[$j]            = $current_field;
17963         $new_matching_patterns[$j] = $current_pattern;
17964
17965         $new_alignments[$j] = $old_line->get_alignment($k);
17966         $maximum_field_index = $j;
17967
17968         $old_line->set_alignments(@new_alignments);
17969         $old_line->set_jmax($jmax);
17970         $old_line->set_rtokens( \@new_matching_tokens );
17971         $old_line->set_rfields( \@new_fields );
17972         $old_line->set_rpatterns( \@$rpatterns );
17973     }
17974 }
17975
17976 # create an empty side comment if none exists
17977 sub make_side_comment {
17978     my $new_line  = shift;
17979     my $level_end = shift;
17980     my $jmax      = $new_line->get_jmax();
17981     my $rtokens   = $new_line->get_rtokens();
17982
17983     # if line does not have a side comment...
17984     if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
17985         my $rfields   = $new_line->get_rfields();
17986         my $rpatterns = $new_line->get_rpatterns();
17987         $$rtokens[$jmax]     = '#';
17988         $$rfields[ ++$jmax ] = '';
17989         $$rpatterns[$jmax]   = '#';
17990         $new_line->set_jmax($jmax);
17991         $new_line->set_jmax_original_line($jmax);
17992     }
17993
17994     # line has a side comment..
17995     else {
17996
17997         # don't remember old side comment location for very long
17998         my $line_number = $vertical_aligner_self->get_output_line_number();
17999         my $rfields     = $new_line->get_rfields();
18000         if (
18001             $line_number - $last_side_comment_line_number > 12
18002
18003             # and don't remember comment location across block level changes
18004             || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
18005           )
18006         {
18007             forget_side_comment();
18008         }
18009         $last_side_comment_line_number = $line_number;
18010         $last_side_comment_level       = $level_end;
18011     }
18012 }
18013
18014 sub decide_if_list {
18015
18016     my $line = shift;
18017
18018     # A list will be taken to be a line with a forced break in which all
18019     # of the field separators are commas or comma-arrows (except for the
18020     # trailing #)
18021
18022     # List separator tokens are things like ',3'   or '=>2',
18023     # where the trailing digit is the nesting depth.  Allow braces
18024     # to allow nested list items.
18025     my $rtokens    = $line->get_rtokens();
18026     my $test_token = $$rtokens[0];
18027     if ( $test_token =~ /^(\,|=>)/ ) {
18028         my $list_type = $test_token;
18029         my $jmax      = $line->get_jmax();
18030
18031         foreach ( 1 .. $jmax - 2 ) {
18032             if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
18033                 $list_type = "";
18034                 last;
18035             }
18036         }
18037         $line->set_list_type($list_type);
18038     }
18039 }
18040
18041 sub eliminate_new_fields {
18042
18043     return unless ( $maximum_line_index >= 0 );
18044     my ( $new_line, $old_line ) = @_;
18045     my $jmax = $new_line->get_jmax();
18046
18047     my $old_rtokens = $old_line->get_rtokens();
18048     my $rtokens     = $new_line->get_rtokens();
18049     my $is_assignment =
18050       ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
18051
18052     # must be monotonic variation
18053     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
18054
18055     # must be more fields in the new line
18056     my $maximum_field_index = $old_line->get_jmax();
18057     return unless ( $maximum_field_index < $jmax );
18058
18059     unless ($is_assignment) {
18060         return
18061           unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
18062           ;    # only if monotonic
18063
18064         # never combine fields of a comma list
18065         return
18066           unless ( $maximum_field_index > 1 )
18067           && ( $new_line->get_list_type() !~ /^,/ );
18068     }
18069
18070     my $rfields       = $new_line->get_rfields();
18071     my $rpatterns     = $new_line->get_rpatterns();
18072     my $old_rpatterns = $old_line->get_rpatterns();
18073
18074     # loop over all OLD tokens except comment and check match
18075     my $match = 1;
18076     my $k;
18077     for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
18078         if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
18079             || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
18080         {
18081             $match = 0;
18082             last;
18083         }
18084     }
18085
18086     # first tokens agree, so combine extra new tokens
18087     if ($match) {
18088         for $k ( $maximum_field_index .. $jmax - 1 ) {
18089
18090             $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
18091             $$rfields[$k] = "";
18092             $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
18093             $$rpatterns[$k] = "";
18094         }
18095
18096         $$rtokens[ $maximum_field_index - 1 ] = '#';
18097         $$rfields[$maximum_field_index]       = $$rfields[$jmax];
18098         $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
18099         $jmax                                 = $maximum_field_index;
18100     }
18101     $new_line->set_jmax($jmax);
18102 }
18103
18104 sub fix_terminal_ternary {
18105
18106     # Add empty fields as necessary to align a ternary term
18107     # like this:
18108     #
18109     #  my $leapyear =
18110     #      $year % 4   ? 0
18111     #    : $year % 100 ? 1
18112     #    : $year % 400 ? 0
18113     #    :               1;
18114     #
18115     # returns 1 if the terminal item should be indented
18116
18117     my ( $rfields, $rtokens, $rpatterns ) = @_;
18118
18119     my $jmax        = @{$rfields} - 1;
18120     my $old_line    = $group_lines[$maximum_line_index];
18121     my $rfields_old = $old_line->get_rfields();
18122
18123     my $rpatterns_old       = $old_line->get_rpatterns();
18124     my $rtokens_old         = $old_line->get_rtokens();
18125     my $maximum_field_index = $old_line->get_jmax();
18126
18127     # look for the question mark after the :
18128     my ($jquestion);
18129     my $depth_question;
18130     my $pad = "";
18131     for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
18132         my $tok = $rtokens_old->[$j];
18133         if ( $tok =~ /^\?(\d+)$/ ) {
18134             $depth_question = $1;
18135
18136             # depth must be correct
18137             next unless ( $depth_question eq $group_level );
18138
18139             $jquestion = $j;
18140             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
18141                 $pad = " " x length($1);
18142             }
18143             else {
18144                 return;    # shouldn't happen
18145             }
18146             last;
18147         }
18148     }
18149     return unless ( defined($jquestion) );    # shouldn't happen
18150
18151     # Now splice the tokens and patterns of the previous line
18152     # into the else line to insure a match.  Add empty fields
18153     # as necessary.
18154     my $jadd = $jquestion;
18155
18156     # Work on copies of the actual arrays in case we have
18157     # to return due to an error
18158     my @fields   = @{$rfields};
18159     my @patterns = @{$rpatterns};
18160     my @tokens   = @{$rtokens};
18161
18162     VALIGN_DEBUG_FLAG_TERNARY && do {
18163         local $" = '><';
18164         print "CURRENT FIELDS=<@{$rfields_old}>\n";
18165         print "CURRENT TOKENS=<@{$rtokens_old}>\n";
18166         print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
18167         print "UNMODIFIED FIELDS=<@{$rfields}>\n";
18168         print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
18169         print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
18170     };
18171
18172     # handle cases of leading colon on this line
18173     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
18174
18175         my ( $colon, $therest ) = ( $1, $2 );
18176
18177         # Handle sub-case of first field with leading colon plus additional code
18178         # This is the usual situation as at the '1' below:
18179         #  ...
18180         #  : $year % 400 ? 0
18181         #  :               1;
18182         if ($therest) {
18183
18184             # Split the first field after the leading colon and insert padding.
18185             # Note that this padding will remain even if the terminal value goes
18186             # out on a separate line.  This does not seem to look to bad, so no
18187             # mechanism has been included to undo it.
18188             my $field1 = shift @fields;
18189             unshift @fields, ( $colon, $pad . $therest );
18190
18191             # change the leading pattern from : to ?
18192             return unless ( $patterns[0] =~ s/^\:/?/ );
18193
18194             # install leading tokens and patterns of existing line
18195             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
18196             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
18197
18198             # insert appropriate number of empty fields
18199             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
18200         }
18201
18202         # handle sub-case of first field just equal to leading colon.
18203         # This can happen for example in the example below where
18204         # the leading '(' would create a new alignment token
18205         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
18206         # :                        ( $mname = $name . '->' );
18207         else {
18208
18209             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
18210
18211             # prepend a leading ? onto the second pattern
18212             $patterns[1] = "?b" . $patterns[1];
18213
18214             # pad the second field
18215             $fields[1] = $pad . $fields[1];
18216
18217             # install leading tokens and patterns of existing line, replacing
18218             # leading token and inserting appropriate number of empty fields
18219             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
18220             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
18221             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
18222         }
18223     }
18224
18225     # Handle case of no leading colon on this line.  This will
18226     # be the case when -wba=':' is used.  For example,
18227     #  $year % 400 ? 0 :
18228     #                1;
18229     else {
18230
18231         # install leading tokens and patterns of existing line
18232         $patterns[0] = '?' . 'b' . $patterns[0];
18233         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
18234         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
18235
18236         # insert appropriate number of empty fields
18237         $jadd = $jquestion + 1;
18238         $fields[0] = $pad . $fields[0];
18239         splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
18240     }
18241
18242     VALIGN_DEBUG_FLAG_TERNARY && do {
18243         local $" = '><';
18244         print "MODIFIED TOKENS=<@tokens>\n";
18245         print "MODIFIED PATTERNS=<@patterns>\n";
18246         print "MODIFIED FIELDS=<@fields>\n";
18247     };
18248
18249     # all ok .. update the arrays
18250     @{$rfields}   = @fields;
18251     @{$rtokens}   = @tokens;
18252     @{$rpatterns} = @patterns;
18253
18254     # force a flush after this line
18255     return $jquestion;
18256 }
18257
18258 sub fix_terminal_else {
18259
18260     # Add empty fields as necessary to align a balanced terminal
18261     # else block to a previous if/elsif/unless block,
18262     # like this:
18263     #
18264     #  if   ( 1 || $x ) { print "ok 13\n"; }
18265     #  else             { print "not ok 13\n"; }
18266     #
18267     # returns 1 if the else block should be indented
18268     #
18269     my ( $rfields, $rtokens, $rpatterns ) = @_;
18270     my $jmax = @{$rfields} - 1;
18271     return unless ( $jmax > 0 );
18272
18273     # check for balanced else block following if/elsif/unless
18274     my $rfields_old = $current_line->get_rfields();
18275
18276     # TBD: add handling for 'case'
18277     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
18278
18279     # look for the opening brace after the else, and extrace the depth
18280     my $tok_brace = $rtokens->[0];
18281     my $depth_brace;
18282     if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; }
18283
18284     # probably:  "else # side_comment"
18285     else { return }
18286
18287     my $rpatterns_old       = $current_line->get_rpatterns();
18288     my $rtokens_old         = $current_line->get_rtokens();
18289     my $maximum_field_index = $current_line->get_jmax();
18290
18291     # be sure the previous if/elsif is followed by an opening paren
18292     my $jparen    = 0;
18293     my $tok_paren = '(' . $depth_brace;
18294     my $tok_test  = $rtokens_old->[$jparen];
18295     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
18296
18297     # Now find the opening block brace
18298     my ($jbrace);
18299     for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
18300         my $tok = $rtokens_old->[$j];
18301         if ( $tok eq $tok_brace ) {
18302             $jbrace = $j;
18303             last;
18304         }
18305     }
18306     return unless ( defined($jbrace) );           # shouldn't happen
18307
18308     # Now splice the tokens and patterns of the previous line
18309     # into the else line to insure a match.  Add empty fields
18310     # as necessary.
18311     my $jadd = $jbrace - $jparen;
18312     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
18313     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
18314     splice( @{$rfields}, 1, 0, ('') x $jadd );
18315
18316     # force a flush after this line if it does not follow a case
18317     return $jbrace
18318       unless ( $rfields_old->[0] =~ /^case\s*$/ );
18319 }
18320
18321 sub check_match {
18322
18323     my $new_line = shift;
18324     my $old_line = shift;
18325
18326     # uses global variables:
18327     #  $previous_minimum_jmax_seen
18328     #  $maximum_jmax_seen
18329     #  $maximum_line_index
18330     #  $marginal_match
18331     my $jmax                = $new_line->get_jmax();
18332     my $maximum_field_index = $old_line->get_jmax();
18333
18334     # flush if this line has too many fields
18335     if ( $jmax > $maximum_field_index ) { my_flush(); return }
18336
18337     # flush if adding this line would make a non-monotonic field count
18338     if (
18339         ( $maximum_field_index > $jmax )    # this has too few fields
18340         && (
18341             ( $previous_minimum_jmax_seen < $jmax )  # and wouldn't be monotonic
18342             || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
18343         )
18344       )
18345     {
18346         my_flush();
18347         return;
18348     }
18349
18350     # otherwise append this line if everything matches
18351     my $jmax_original_line      = $new_line->get_jmax_original_line();
18352     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
18353     my $rtokens                 = $new_line->get_rtokens();
18354     my $rfields                 = $new_line->get_rfields();
18355     my $rpatterns               = $new_line->get_rpatterns();
18356     my $list_type               = $new_line->get_list_type();
18357
18358     my $group_list_type = $old_line->get_list_type();
18359     my $old_rpatterns   = $old_line->get_rpatterns();
18360     my $old_rtokens     = $old_line->get_rtokens();
18361
18362     my $jlimit = $jmax - 1;
18363     if ( $maximum_field_index > $jmax ) {
18364         $jlimit = $jmax_original_line;
18365         --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
18366     }
18367
18368     my $everything_matches = 1;
18369
18370     # common list types always match
18371     unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
18372         || $is_hanging_side_comment )
18373     {
18374
18375         my $leading_space_count = $new_line->get_leading_space_count();
18376         my $saw_equals          = 0;
18377         for my $j ( 0 .. $jlimit ) {
18378             my $match = 1;
18379
18380             my $old_tok = $$old_rtokens[$j];
18381             my $new_tok = $$rtokens[$j];
18382
18383             # Dumb down the match AFTER an equals and
18384             # also dumb down after seeing a ? ternary operator ...
18385             # Everything after a + is the token which preceded the previous
18386             # opening paren (container name).  We won't require them to match.
18387             if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
18388                 $new_tok = $1;
18389                 $old_tok =~ s/\+.*$//;
18390             }
18391
18392             if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 }
18393
18394             # we never match if the matching tokens differ
18395             if (   $j < $jlimit
18396                 && $old_tok ne $new_tok )
18397             {
18398                 $match = 0;
18399             }
18400
18401             # otherwise, if patterns match, we always have a match.
18402             # However, if patterns don't match, we have to be careful...
18403             elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
18404
18405                 # We have to be very careful about aligning commas when the
18406                 # pattern's don't match, because it can be worse to create an
18407                 # alignment where none is needed than to omit one.  The current
18408                 # rule: if we are within a matching sub call (indicated by '+'
18409                 # in the matching token), we'll allow a marginal match, but
18410                 # otherwise not.
18411                 #
18412                 # Here's an example where we'd like to align the '='
18413                 #  my $cfile = File::Spec->catfile( 't',    'callext.c' );
18414                 #  my $inc   = File::Spec->catdir( 'Basic', 'Core' );
18415                 # because the function names differ.
18416                 # Future alignment logic should make this unnecessary.
18417                 #
18418                 # Here's an example where the ','s are not contained in a call.
18419                 # The first line below should probably not match the next two:
18420                 #   ( $a, $b ) = ( $b, $r );
18421                 #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
18422                 #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
18423                 if ( $new_tok =~ /^,/ ) {
18424                     if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
18425                         $marginal_match = 1;
18426                     }
18427                     else {
18428                         $match = 0;
18429                     }
18430                 }
18431
18432                 # parens don't align well unless patterns match
18433                 elsif ( $new_tok =~ /^\(/ ) {
18434                     $match = 0;
18435                 }
18436
18437                 # Handle an '=' alignment with different patterns to
18438                 # the left.
18439                 elsif ( $new_tok =~ /^=\d*$/ ) {
18440
18441                     $saw_equals = 1;
18442
18443                     # It is best to be a little restrictive when
18444                     # aligning '=' tokens.  Here is an example of
18445                     # two lines that we will not align:
18446                     #       my $variable=6;
18447                     #       $bb=4;
18448                     # The problem is that one is a 'my' declaration,
18449                     # and the other isn't, so they're not very similar.
18450                     # We will filter these out by comparing the first
18451                     # letter of the pattern.  This is crude, but works
18452                     # well enough.
18453                     if (
18454                         substr( $$old_rpatterns[$j], 0, 1 ) ne
18455                         substr( $$rpatterns[$j], 0, 1 ) )
18456                     {
18457                         $match = 0;
18458                     }
18459
18460                     # If we pass that test, we'll call it a marginal match.
18461                     # Here is an example of a marginal match:
18462                     #       $done{$$op} = 1;
18463                     #       $op         = compile_bblock($op);
18464                     # The left tokens are both identifiers, but
18465                     # one accesses a hash and the other doesn't.
18466                     # We'll let this be a tentative match and undo
18467                     # it later if we don't find more than 2 lines
18468                     # in the group.
18469                     elsif ( $maximum_line_index == 0 ) {
18470                         $marginal_match = 1;
18471                     }
18472                 }
18473             }
18474
18475             # Don't let line with fewer fields increase column widths
18476             # ( align3.t )
18477             if ( $maximum_field_index > $jmax ) {
18478                 my $pad =
18479                   length( $$rfields[$j] ) - $old_line->current_field_width($j);
18480
18481                 if ( $j == 0 ) {
18482                     $pad += $leading_space_count;
18483                 }
18484
18485                 # TESTING: suspend this rule to allow last lines to join
18486                 if ( $pad > 0 ) { $match = 0; }
18487             }
18488
18489             unless ($match) {
18490                 $everything_matches = 0;
18491                 last;
18492             }
18493         }
18494     }
18495
18496     if ( $maximum_field_index > $jmax ) {
18497
18498         if ($everything_matches) {
18499
18500             my $comment = $$rfields[$jmax];
18501             for $jmax ( $jlimit .. $maximum_field_index ) {
18502                 $$rtokens[$jmax]     = $$old_rtokens[$jmax];
18503                 $$rfields[ ++$jmax ] = '';
18504                 $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
18505             }
18506             $$rfields[$jmax] = $comment;
18507             $new_line->set_jmax($jmax);
18508         }
18509     }
18510
18511     my_flush() unless ($everything_matches);
18512 }
18513
18514 sub check_fit {
18515
18516     return unless ( $maximum_line_index >= 0 );
18517     my $new_line = shift;
18518     my $old_line = shift;
18519
18520     my $jmax                    = $new_line->get_jmax();
18521     my $leading_space_count     = $new_line->get_leading_space_count();
18522     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
18523     my $rtokens                 = $new_line->get_rtokens();
18524     my $rfields                 = $new_line->get_rfields();
18525     my $rpatterns               = $new_line->get_rpatterns();
18526
18527     my $group_list_type = $group_lines[0]->get_list_type();
18528
18529     my $padding_so_far    = 0;
18530     my $padding_available = $old_line->get_available_space_on_right();
18531
18532     # save current columns in case this doesn't work
18533     save_alignment_columns();
18534
18535     my ( $j, $pad, $eight );
18536     my $maximum_field_index = $old_line->get_jmax();
18537     for $j ( 0 .. $jmax ) {
18538
18539         $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
18540
18541         if ( $j == 0 ) {
18542             $pad += $leading_space_count;
18543         }
18544
18545         # remember largest gap of the group, excluding gap to side comment
18546         if (   $pad < 0
18547             && $group_maximum_gap < -$pad
18548             && $j > 0
18549             && $j < $jmax - 1 )
18550         {
18551             $group_maximum_gap = -$pad;
18552         }
18553
18554         next if $pad < 0;
18555
18556         ## This patch helps sometimes, but it doesn't check to see if
18557         ## the line is too long even without the side comment.  It needs
18558         ## to be reworked.
18559         ##don't let a long token with no trailing side comment push
18560         ##side comments out, or end a group.  (sidecmt1.t)
18561         ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
18562
18563         # This line will need space; lets see if we want to accept it..
18564         if (
18565
18566             # not if this won't fit
18567             ( $pad > $padding_available )
18568
18569             # previously, there were upper bounds placed on padding here
18570             # (maximum_whitespace_columns), but they were not really helpful
18571
18572           )
18573         {
18574
18575             # revert to starting state then flush; things didn't work out
18576             restore_alignment_columns();
18577             my_flush();
18578             last;
18579         }
18580
18581         # patch to avoid excessive gaps in previous lines,
18582         # due to a line of fewer fields.
18583         #   return join( ".",
18584         #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
18585         #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
18586         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
18587
18588         # looks ok, squeeze this field in
18589         $old_line->increase_field_width( $j, $pad );
18590         $padding_available -= $pad;
18591
18592         # remember largest gap of the group, excluding gap to side comment
18593         if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
18594             $group_maximum_gap = $pad;
18595         }
18596     }
18597 }
18598
18599 sub accept_line {
18600
18601     # The current line either starts a new alignment group or is
18602     # accepted into the current alignment group.
18603     my $new_line = shift;
18604     $group_lines[ ++$maximum_line_index ] = $new_line;
18605
18606     # initialize field lengths if starting new group
18607     if ( $maximum_line_index == 0 ) {
18608
18609         my $jmax    = $new_line->get_jmax();
18610         my $rfields = $new_line->get_rfields();
18611         my $rtokens = $new_line->get_rtokens();
18612         my $j;
18613         my $col = $new_line->get_leading_space_count();
18614
18615         for $j ( 0 .. $jmax ) {
18616             $col += length( $$rfields[$j] );
18617
18618             # create initial alignments for the new group
18619             my $token = "";
18620             if ( $j < $jmax ) { $token = $$rtokens[$j] }
18621             my $alignment = make_alignment( $col, $token );
18622             $new_line->set_alignment( $j, $alignment );
18623         }
18624
18625         $maximum_jmax_seen = $jmax;
18626         $minimum_jmax_seen = $jmax;
18627     }
18628
18629     # use previous alignments otherwise
18630     else {
18631         my @new_alignments =
18632           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
18633         $new_line->set_alignments(@new_alignments);
18634     }
18635
18636     # remember group jmax extremes for next call to append_line
18637     $previous_minimum_jmax_seen = $minimum_jmax_seen;
18638     $previous_maximum_jmax_seen = $maximum_jmax_seen;
18639 }
18640
18641 sub dump_array {
18642
18643     # debug routine to dump array contents
18644     local $" = ')(';
18645     print "(@_)\n";
18646 }
18647
18648 # flush() sends the current Perl::Tidy::VerticalAligner group down the
18649 # pipeline to Perl::Tidy::FileWriter.
18650
18651 # This is the external flush, which also empties the cache
18652 sub flush {
18653
18654     if ( $maximum_line_index < 0 ) {
18655         if ($cached_line_type) {
18656             $seqno_string = $cached_seqno_string;
18657             entab_and_output( $cached_line_text,
18658                 $cached_line_leading_space_count,
18659                 $last_group_level_written );
18660             $cached_line_type    = 0;
18661             $cached_line_text    = "";
18662             $cached_seqno_string = "";
18663         }
18664     }
18665     else {
18666         my_flush();
18667     }
18668 }
18669
18670 # This is the internal flush, which leaves the cache intact
18671 sub my_flush {
18672
18673     return if ( $maximum_line_index < 0 );
18674
18675     # handle a group of comment lines
18676     if ( $group_type eq 'COMMENT' ) {
18677
18678         VALIGN_DEBUG_FLAG_APPEND0 && do {
18679             my ( $a, $b, $c ) = caller();
18680             print
18681 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
18682
18683         };
18684         my $leading_space_count = $comment_leading_space_count;
18685         my $leading_string      = get_leading_string($leading_space_count);
18686
18687         # zero leading space count if any lines are too long
18688         my $max_excess = 0;
18689         for my $i ( 0 .. $maximum_line_index ) {
18690             my $str = $group_lines[$i];
18691             my $excess =
18692               length($str) + $leading_space_count - $rOpts_maximum_line_length;
18693             if ( $excess > $max_excess ) {
18694                 $max_excess = $excess;
18695             }
18696         }
18697
18698         if ( $max_excess > 0 ) {
18699             $leading_space_count -= $max_excess;
18700             if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
18701             $last_outdented_line_at =
18702               $file_writer_object->get_output_line_number();
18703             unless ($outdented_line_count) {
18704                 $first_outdented_line_at = $last_outdented_line_at;
18705             }
18706             $outdented_line_count += ( $maximum_line_index + 1 );
18707         }
18708
18709         # write the group of lines
18710         my $outdent_long_lines = 0;
18711         for my $i ( 0 .. $maximum_line_index ) {
18712             write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
18713                 $outdent_long_lines, "" );
18714         }
18715     }
18716
18717     # handle a group of code lines
18718     else {
18719
18720         VALIGN_DEBUG_FLAG_APPEND0 && do {
18721             my $group_list_type = $group_lines[0]->get_list_type();
18722             my ( $a, $b, $c ) = caller();
18723             my $maximum_field_index = $group_lines[0]->get_jmax();
18724             print
18725 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
18726
18727         };
18728
18729         # some small groups are best left unaligned
18730         my $do_not_align = decide_if_aligned();
18731
18732         # optimize side comment location
18733         $do_not_align = adjust_side_comment($do_not_align);
18734
18735         # recover spaces for -lp option if possible
18736         my $extra_leading_spaces = get_extra_leading_spaces();
18737
18738         # all lines of this group have the same basic leading spacing
18739         my $group_leader_length = $group_lines[0]->get_leading_space_count();
18740
18741         # add extra leading spaces if helpful
18742         my $min_ci_gap = improve_continuation_indentation( $do_not_align,
18743             $group_leader_length );
18744
18745         # loop to output all lines
18746         for my $i ( 0 .. $maximum_line_index ) {
18747             my $line = $group_lines[$i];
18748             write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
18749                 $group_leader_length, $extra_leading_spaces );
18750         }
18751     }
18752     initialize_for_new_group();
18753 }
18754
18755 sub decide_if_aligned {
18756
18757     # Do not try to align two lines which are not really similar
18758     return unless $maximum_line_index == 1;
18759     return if ($is_matching_terminal_line);
18760
18761     my $group_list_type = $group_lines[0]->get_list_type();
18762
18763     my $do_not_align = (
18764
18765         # always align lists
18766         !$group_list_type
18767
18768           && (
18769
18770             # don't align if it was just a marginal match
18771             $marginal_match
18772
18773             # don't align two lines with big gap
18774             || $group_maximum_gap > 12
18775
18776             # or lines with differing number of alignment tokens
18777             # TODO: this could be improved.  It occasionally rejects
18778             # good matches.
18779             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
18780           )
18781     );
18782
18783     # But try to convert them into a simple comment group if the first line
18784     # a has side comment
18785     my $rfields             = $group_lines[0]->get_rfields();
18786     my $maximum_field_index = $group_lines[0]->get_jmax();
18787     if (   $do_not_align
18788         && ( $maximum_line_index > 0 )
18789         && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
18790     {
18791         combine_fields();
18792         $do_not_align = 0;
18793     }
18794     return $do_not_align;
18795 }
18796
18797 sub adjust_side_comment {
18798
18799     my $do_not_align = shift;
18800
18801     # let's see if we can move the side comment field out a little
18802     # to improve readability (the last field is always a side comment field)
18803     my $have_side_comment       = 0;
18804     my $first_side_comment_line = -1;
18805     my $maximum_field_index     = $group_lines[0]->get_jmax();
18806     for my $i ( 0 .. $maximum_line_index ) {
18807         my $line = $group_lines[$i];
18808
18809         if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
18810             $have_side_comment       = 1;
18811             $first_side_comment_line = $i;
18812             last;
18813         }
18814     }
18815
18816     my $kmax = $maximum_field_index + 1;
18817
18818     if ($have_side_comment) {
18819
18820         my $line = $group_lines[0];
18821
18822         # the maximum space without exceeding the line length:
18823         my $avail = $line->get_available_space_on_right();
18824
18825         # try to use the previous comment column
18826         my $side_comment_column = $line->get_column( $kmax - 2 );
18827         my $move                = $last_comment_column - $side_comment_column;
18828
18829 ##        my $sc_line0 = $side_comment_history[0]->[0];
18830 ##        my $sc_col0  = $side_comment_history[0]->[1];
18831 ##        my $sc_line1 = $side_comment_history[1]->[0];
18832 ##        my $sc_col1  = $side_comment_history[1]->[1];
18833 ##        my $sc_line2 = $side_comment_history[2]->[0];
18834 ##        my $sc_col2  = $side_comment_history[2]->[1];
18835 ##
18836 ##        # FUTURE UPDATES:
18837 ##        # Be sure to ignore 'do not align' and  '} # end comments'
18838 ##        # Find first $move > 0 and $move <= $avail as follows:
18839 ##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
18840 ##        # 2. try sc_col2 if (line-sc_line2) < 12
18841 ##        # 3. try min possible space, plus up to 8,
18842 ##        # 4. try min possible space
18843
18844         if ( $kmax > 0 && !$do_not_align ) {
18845
18846             # but if this doesn't work, give up and use the minimum space
18847             if ( $move > $avail ) {
18848                 $move = $rOpts_minimum_space_to_comment - 1;
18849             }
18850
18851             # but we want some minimum space to the comment
18852             my $min_move = $rOpts_minimum_space_to_comment - 1;
18853             if (   $move >= 0
18854                 && $last_side_comment_length > 0
18855                 && ( $first_side_comment_line == 0 )
18856                 && $group_level == $last_group_level_written )
18857             {
18858                 $min_move = 0;
18859             }
18860
18861             if ( $move < $min_move ) {
18862                 $move = $min_move;
18863             }
18864
18865             # prevously, an upper bound was placed on $move here,
18866             # (maximum_space_to_comment), but it was not helpful
18867
18868             # don't exceed the available space
18869             if ( $move > $avail ) { $move = $avail }
18870
18871             # we can only increase space, never decrease
18872             if ( $move > 0 ) {
18873                 $line->increase_field_width( $maximum_field_index - 1, $move );
18874             }
18875
18876             # remember this column for the next group
18877             $last_comment_column = $line->get_column( $kmax - 2 );
18878         }
18879         else {
18880
18881             # try to at least line up the existing side comment location
18882             if ( $kmax > 0 && $move > 0 && $move < $avail ) {
18883                 $line->increase_field_width( $maximum_field_index - 1, $move );
18884                 $do_not_align = 0;
18885             }
18886
18887             # reset side comment column if we can't align
18888             else {
18889                 forget_side_comment();
18890             }
18891         }
18892     }
18893     return $do_not_align;
18894 }
18895
18896 sub improve_continuation_indentation {
18897     my ( $do_not_align, $group_leader_length ) = @_;
18898
18899     # See if we can increase the continuation indentation
18900     # to move all continuation lines closer to the next field
18901     # (unless it is a comment).
18902     #
18903     # '$min_ci_gap'is the extra indentation that we may need to introduce.
18904     # We will only introduce this to fields which already have some ci.
18905     # Without this variable, we would occasionally get something like this
18906     # (Complex.pm):
18907     #
18908     # use overload '+' => \&plus,
18909     #   '-'            => \&minus,
18910     #   '*'            => \&multiply,
18911     #   ...
18912     #   'tan'          => \&tan,
18913     #   'atan2'        => \&atan2,
18914     #
18915     # Whereas with this variable, we can shift variables over to get this:
18916     #
18917     # use overload '+' => \&plus,
18918     #          '-'     => \&minus,
18919     #          '*'     => \&multiply,
18920     #          ...
18921     #          'tan'   => \&tan,
18922     #          'atan2' => \&atan2,
18923
18924     ## BUB: Deactivated####################
18925     # The trouble with this patch is that it may, for example,
18926     # move in some 'or's  or ':'s, and leave some out, so that the
18927     # left edge alignment suffers.
18928     return 0;
18929     ###########################################
18930
18931     my $maximum_field_index = $group_lines[0]->get_jmax();
18932
18933     my $min_ci_gap = $rOpts_maximum_line_length;
18934     if ( $maximum_field_index > 1 && !$do_not_align ) {
18935
18936         for my $i ( 0 .. $maximum_line_index ) {
18937             my $line                = $group_lines[$i];
18938             my $leading_space_count = $line->get_leading_space_count();
18939             my $rfields             = $line->get_rfields();
18940
18941             my $gap =
18942               $line->get_column(0) -
18943               $leading_space_count -
18944               length( $$rfields[0] );
18945
18946             if ( $leading_space_count > $group_leader_length ) {
18947                 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
18948             }
18949         }
18950
18951         if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
18952             $min_ci_gap = 0;
18953         }
18954     }
18955     else {
18956         $min_ci_gap = 0;
18957     }
18958     return $min_ci_gap;
18959 }
18960
18961 sub write_vertically_aligned_line {
18962
18963     my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
18964         $extra_leading_spaces )
18965       = @_;
18966     my $rfields                   = $line->get_rfields();
18967     my $leading_space_count       = $line->get_leading_space_count();
18968     my $outdent_long_lines        = $line->get_outdent_long_lines();
18969     my $maximum_field_index       = $line->get_jmax();
18970     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
18971
18972     # add any extra spaces
18973     if ( $leading_space_count > $group_leader_length ) {
18974         $leading_space_count += $min_ci_gap;
18975     }
18976
18977     my $str = $$rfields[0];
18978
18979     # loop to concatenate all fields of this line and needed padding
18980     my $total_pad_count = 0;
18981     my ( $j, $pad );
18982     for $j ( 1 .. $maximum_field_index ) {
18983
18984         # skip zero-length side comments
18985         last
18986           if ( ( $j == $maximum_field_index )
18987             && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
18988           );
18989
18990         # compute spaces of padding before this field
18991         my $col = $line->get_column( $j - 1 );
18992         $pad = $col - ( length($str) + $leading_space_count );
18993
18994         if ($do_not_align) {
18995             $pad =
18996               ( $j < $maximum_field_index )
18997               ? 0
18998               : $rOpts_minimum_space_to_comment - 1;
18999         }
19000
19001         # if the -fpsc flag is set, move the side comment to the selected
19002         # column if and only if it is possible, ignoring constraints on
19003         # line length and minimum space to comment
19004         if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
19005         {
19006             my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
19007             if ( $newpad >= 0 ) { $pad = $newpad; }
19008         }
19009
19010         # accumulate the padding
19011         if ( $pad > 0 ) { $total_pad_count += $pad; }
19012
19013         # add this field
19014         if ( !defined $$rfields[$j] ) {
19015             write_diagnostics("UNDEFined field at j=$j\n");
19016         }
19017
19018         # only add padding when we have a finite field;
19019         # this avoids extra terminal spaces if we have empty fields
19020         if ( length( $$rfields[$j] ) > 0 ) {
19021             $str .= ' ' x $total_pad_count;
19022             $total_pad_count = 0;
19023             $str .= $$rfields[$j];
19024         }
19025         else {
19026             $total_pad_count = 0;
19027         }
19028
19029         # update side comment history buffer
19030         if ( $j == $maximum_field_index ) {
19031             my $lineno = $file_writer_object->get_output_line_number();
19032             shift @side_comment_history;
19033             push @side_comment_history, [ $lineno, $col ];
19034         }
19035     }
19036
19037     my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
19038
19039     # ship this line off
19040     write_leader_and_string( $leading_space_count + $extra_leading_spaces,
19041         $str, $side_comment_length, $outdent_long_lines,
19042         $rvertical_tightness_flags );
19043 }
19044
19045 sub get_extra_leading_spaces {
19046
19047     #----------------------------------------------------------
19048     # Define any extra indentation space (for the -lp option).
19049     # Here is why:
19050     # If a list has side comments, sub scan_list must dump the
19051     # list before it sees everything.  When this happens, it sets
19052     # the indentation to the standard scheme, but notes how
19053     # many spaces it would have liked to use.  We may be able
19054     # to recover that space here in the event that that all of the
19055     # lines of a list are back together again.
19056     #----------------------------------------------------------
19057
19058     my $extra_leading_spaces = 0;
19059     if ($extra_indent_ok) {
19060         my $object = $group_lines[0]->get_indentation();
19061         if ( ref($object) ) {
19062             my $extra_indentation_spaces_wanted =
19063               get_RECOVERABLE_SPACES($object);
19064
19065             # all indentation objects must be the same
19066             my $i;
19067             for $i ( 1 .. $maximum_line_index ) {
19068                 if ( $object != $group_lines[$i]->get_indentation() ) {
19069                     $extra_indentation_spaces_wanted = 0;
19070                     last;
19071                 }
19072             }
19073
19074             if ($extra_indentation_spaces_wanted) {
19075
19076                 # the maximum space without exceeding the line length:
19077                 my $avail = $group_lines[0]->get_available_space_on_right();
19078                 $extra_leading_spaces =
19079                   ( $avail > $extra_indentation_spaces_wanted )
19080                   ? $extra_indentation_spaces_wanted
19081                   : $avail;
19082
19083                 # update the indentation object because with -icp the terminal
19084                 # ');' will use the same adjustment.
19085                 $object->permanently_decrease_AVAILABLE_SPACES(
19086                     -$extra_leading_spaces );
19087             }
19088         }
19089     }
19090     return $extra_leading_spaces;
19091 }
19092
19093 sub combine_fields {
19094
19095     # combine all fields except for the comment field  ( sidecmt.t )
19096     # Uses global variables:
19097     #  @group_lines
19098     #  $maximum_line_index
19099     my ( $j, $k );
19100     my $maximum_field_index = $group_lines[0]->get_jmax();
19101     for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
19102         my $line    = $group_lines[$j];
19103         my $rfields = $line->get_rfields();
19104         foreach ( 1 .. $maximum_field_index - 1 ) {
19105             $$rfields[0] .= $$rfields[$_];
19106         }
19107         $$rfields[1] = $$rfields[$maximum_field_index];
19108
19109         $line->set_jmax(1);
19110         $line->set_column( 0, 0 );
19111         $line->set_column( 1, 0 );
19112
19113     }
19114     $maximum_field_index = 1;
19115
19116     for $j ( 0 .. $maximum_line_index ) {
19117         my $line    = $group_lines[$j];
19118         my $rfields = $line->get_rfields();
19119         for $k ( 0 .. $maximum_field_index ) {
19120             my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
19121             if ( $k == 0 ) {
19122                 $pad += $group_lines[$j]->get_leading_space_count();
19123             }
19124
19125             if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
19126
19127         }
19128     }
19129 }
19130
19131 sub get_output_line_number {
19132
19133     # the output line number reported to a caller is the number of items
19134     # written plus the number of items in the buffer
19135     my $self = shift;
19136     1 + $maximum_line_index + $file_writer_object->get_output_line_number();
19137 }
19138
19139 sub write_leader_and_string {
19140
19141     my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
19142         $rvertical_tightness_flags )
19143       = @_;
19144
19145     # handle outdenting of long lines:
19146     if ($outdent_long_lines) {
19147         my $excess =
19148           length($str) -
19149           $side_comment_length +
19150           $leading_space_count -
19151           $rOpts_maximum_line_length;
19152         if ( $excess > 0 ) {
19153             $leading_space_count = 0;
19154             $last_outdented_line_at =
19155               $file_writer_object->get_output_line_number();
19156
19157             unless ($outdented_line_count) {
19158                 $first_outdented_line_at = $last_outdented_line_at;
19159             }
19160             $outdented_line_count++;
19161         }
19162     }
19163
19164     # Make preliminary leading whitespace.  It could get changed
19165     # later by entabbing, so we have to keep track of any changes
19166     # to the leading_space_count from here on.
19167     my $leading_string =
19168       $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
19169
19170     # Unpack any recombination data; it was packed by
19171     # sub send_lines_to_vertical_aligner. Contents:
19172     #
19173     #   [0] type: 1=opening  2=closing  3=opening block brace
19174     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
19175     #             if closing: spaces of padding to use
19176     #   [2] sequence number of container
19177     #   [3] valid flag: do not append if this flag is false
19178     #
19179     my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
19180         $seqno_end );
19181     if ($rvertical_tightness_flags) {
19182         (
19183             $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
19184             $seqno_end
19185         ) = @{$rvertical_tightness_flags};
19186     }
19187
19188     $seqno_string = $seqno_end;
19189
19190     # handle any cached line ..
19191     # either append this line to it or write it out
19192     if ( length($cached_line_text) ) {
19193
19194         if ( !$cached_line_valid ) {
19195             entab_and_output( $cached_line_text,
19196                 $cached_line_leading_space_count,
19197                 $last_group_level_written );
19198         }
19199
19200         # handle cached line with opening container token
19201         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
19202
19203             my $gap = $leading_space_count - length($cached_line_text);
19204
19205             # handle option of just one tight opening per line:
19206             if ( $cached_line_flag == 1 ) {
19207                 if ( defined($open_or_close) && $open_or_close == 1 ) {
19208                     $gap = -1;
19209                 }
19210             }
19211
19212             if ( $gap >= 0 ) {
19213                 $leading_string      = $cached_line_text . ' ' x $gap;
19214                 $leading_space_count = $cached_line_leading_space_count;
19215                 $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
19216             }
19217             else {
19218                 entab_and_output( $cached_line_text,
19219                     $cached_line_leading_space_count,
19220                     $last_group_level_written );
19221             }
19222         }
19223
19224         # handle cached line to place before this closing container token
19225         else {
19226             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
19227
19228             if ( length($test_line) <= $rOpts_maximum_line_length ) {
19229
19230                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
19231
19232                 # Patch to outdent closing tokens ending # in ');'
19233                 # If we are joining a line like ');' to a previous stacked
19234                 # set of closing tokens, then decide if we may outdent the
19235                 # combined stack to the indentation of the ');'.  Since we
19236                 # should not normally outdent any of the other tokens more than
19237                 # the indentation of the lines that contained them, we will
19238                 # only do this if all of the corresponding opening
19239                 # tokens were on the same line.  This can happen with
19240                 # -sot and -sct.  For example, it is ok here:
19241                 #   __PACKAGE__->load_components( qw(
19242                 #         PK::Auto
19243                 #         Core
19244                 #   ));
19245                 #
19246                 #   But, for example, we do not outdent in this example because
19247                 #   that would put the closing sub brace out farther than the
19248                 #   opening sub brace:
19249                 #
19250                 #   perltidy -sot -sct
19251                 #   $c->Tk::bind(
19252                 #       '<Control-f>' => sub {
19253                 #           my ($c) = @_;
19254                 #           my $e = $c->XEvent;
19255                 #           itemsUnderArea $c;
19256                 #       } );
19257                 #
19258                 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
19259
19260                     # The way to tell this is if the stacked sequence numbers
19261                     # of this output line are the reverse of the stacked
19262                     # sequence numbers of the previous non-blank line of
19263                     # sequence numbers.  So we can join if the previous
19264                     # nonblank string of tokens is the mirror image.  For
19265                     # example if stack )}] is 13:8:6 then we are looking for a
19266                     # leading stack like [{( which is 6:8:13 We only need to
19267                     # check the two ends, because the intermediate tokens must
19268                     # fall in order.  Note on speed: having to split on colons
19269                     # and eliminate multiple colons might appear to be slow,
19270                     # but it's not an issue because we almost never come
19271                     # through here.  In a typical file we don't.
19272                     $seqno_string               =~ s/^:+//;
19273                     $last_nonblank_seqno_string =~ s/^:+//;
19274                     $seqno_string               =~ s/:+/:/g;
19275                     $last_nonblank_seqno_string =~ s/:+/:/g;
19276
19277                     # how many spaces can we outdent?
19278                     my $diff =
19279                       $cached_line_leading_space_count - $leading_space_count;
19280                     if (   $diff > 0
19281                         && length($seqno_string)
19282                         && length($last_nonblank_seqno_string) ==
19283                         length($seqno_string) )
19284                     {
19285                         my @seqno_last =
19286                           ( split ':', $last_nonblank_seqno_string );
19287                         my @seqno_now = ( split ':', $seqno_string );
19288                         if (   $seqno_now[-1] == $seqno_last[0]
19289                             && $seqno_now[0] == $seqno_last[-1] )
19290                         {
19291
19292                             # OK to outdent ..
19293                             # for absolute safety, be sure we only remove
19294                             # whitespace
19295                             my $ws = substr( $test_line, 0, $diff );
19296                             if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
19297
19298                                 $test_line = substr( $test_line, $diff );
19299                                 $cached_line_leading_space_count -= $diff;
19300                             }
19301
19302                             # shouldn't happen, but not critical:
19303                             ##else {
19304                             ## ERROR transferring indentation here
19305                             ##}
19306                         }
19307                     }
19308                 }
19309
19310                 $str                 = $test_line;
19311                 $leading_string      = "";
19312                 $leading_space_count = $cached_line_leading_space_count;
19313             }
19314             else {
19315                 entab_and_output( $cached_line_text,
19316                     $cached_line_leading_space_count,
19317                     $last_group_level_written );
19318             }
19319         }
19320     }
19321     $cached_line_type = 0;
19322     $cached_line_text = "";
19323
19324     # make the line to be written
19325     my $line = $leading_string . $str;
19326
19327     # write or cache this line
19328     if ( !$open_or_close || $side_comment_length > 0 ) {
19329         entab_and_output( $line, $leading_space_count, $group_level );
19330     }
19331     else {
19332         $cached_line_text                = $line;
19333         $cached_line_type                = $open_or_close;
19334         $cached_line_flag                = $tightness_flag;
19335         $cached_seqno                    = $seqno;
19336         $cached_line_valid               = $valid;
19337         $cached_line_leading_space_count = $leading_space_count;
19338         $cached_seqno_string             = $seqno_string;
19339     }
19340
19341     $last_group_level_written = $group_level;
19342     $last_side_comment_length = $side_comment_length;
19343     $extra_indent_ok          = 0;
19344 }
19345
19346 sub entab_and_output {
19347     my ( $line, $leading_space_count, $level ) = @_;
19348
19349     # The line is currently correct if there is no tabbing (recommended!)
19350     # We may have to lop off some leading spaces and replace with tabs.
19351     if ( $leading_space_count > 0 ) {
19352
19353         # Nothing to do if no tabs
19354         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
19355             || $rOpts_indent_columns <= 0 )
19356         {
19357
19358             # nothing to do
19359         }
19360
19361         # Handle entab option
19362         elsif ($rOpts_entab_leading_whitespace) {
19363             my $space_count =
19364               $leading_space_count % $rOpts_entab_leading_whitespace;
19365             my $tab_count =
19366               int( $leading_space_count / $rOpts_entab_leading_whitespace );
19367             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
19368             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
19369                 substr( $line, 0, $leading_space_count ) = $leading_string;
19370             }
19371             else {
19372
19373                 # REMOVE AFTER TESTING
19374                 # shouldn't happen - program error counting whitespace
19375                 # we'll skip entabbing
19376                 warning(
19377 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
19378                 );
19379             }
19380         }
19381
19382         # Handle option of one tab per level
19383         else {
19384             my $leading_string = ( "\t" x $level );
19385             my $space_count =
19386               $leading_space_count - $level * $rOpts_indent_columns;
19387
19388             # shouldn't happen:
19389             if ( $space_count < 0 ) {
19390                 warning(
19391 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
19392                 );
19393                 $leading_string = ( ' ' x $leading_space_count );
19394             }
19395             else {
19396                 $leading_string .= ( ' ' x $space_count );
19397             }
19398             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
19399                 substr( $line, 0, $leading_space_count ) = $leading_string;
19400             }
19401             else {
19402
19403                 # REMOVE AFTER TESTING
19404                 # shouldn't happen - program error counting whitespace
19405                 # we'll skip entabbing
19406                 warning(
19407 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
19408                 );
19409             }
19410         }
19411     }
19412     $file_writer_object->write_code_line( $line . "\n" );
19413     if ($seqno_string) {
19414         $last_nonblank_seqno_string = $seqno_string;
19415     }
19416 }
19417
19418 {    # begin get_leading_string
19419
19420     my @leading_string_cache;
19421
19422     sub get_leading_string {
19423
19424         # define the leading whitespace string for this line..
19425         my $leading_whitespace_count = shift;
19426
19427         # Handle case of zero whitespace, which includes multi-line quotes
19428         # (which may have a finite level; this prevents tab problems)
19429         if ( $leading_whitespace_count <= 0 ) {
19430             return "";
19431         }
19432
19433         # look for previous result
19434         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
19435             return $leading_string_cache[$leading_whitespace_count];
19436         }
19437
19438         # must compute a string for this number of spaces
19439         my $leading_string;
19440
19441         # Handle simple case of no tabs
19442         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
19443             || $rOpts_indent_columns <= 0 )
19444         {
19445             $leading_string = ( ' ' x $leading_whitespace_count );
19446         }
19447
19448         # Handle entab option
19449         elsif ($rOpts_entab_leading_whitespace) {
19450             my $space_count =
19451               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
19452             my $tab_count = int(
19453                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
19454             $leading_string = "\t" x $tab_count . ' ' x $space_count;
19455         }
19456
19457         # Handle option of one tab per level
19458         else {
19459             $leading_string = ( "\t" x $group_level );
19460             my $space_count =
19461               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
19462
19463             # shouldn't happen:
19464             if ( $space_count < 0 ) {
19465                 warning(
19466 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
19467                 );
19468                 $leading_string = ( ' ' x $leading_whitespace_count );
19469             }
19470             else {
19471                 $leading_string .= ( ' ' x $space_count );
19472             }
19473         }
19474         $leading_string_cache[$leading_whitespace_count] = $leading_string;
19475         return $leading_string;
19476     }
19477 }    # end get_leading_string
19478
19479 sub report_anything_unusual {
19480     my $self = shift;
19481     if ( $outdented_line_count > 0 ) {
19482         write_logfile_entry(
19483             "$outdented_line_count long lines were outdented:\n");
19484         write_logfile_entry(
19485             "  First at output line $first_outdented_line_at\n");
19486
19487         if ( $outdented_line_count > 1 ) {
19488             write_logfile_entry(
19489                 "   Last at output line $last_outdented_line_at\n");
19490         }
19491         write_logfile_entry(
19492             "  use -noll to prevent outdenting, -l=n to increase line length\n"
19493         );
19494         write_logfile_entry("\n");
19495     }
19496 }
19497
19498 #####################################################################
19499 #
19500 # the Perl::Tidy::FileWriter class writes the output file
19501 #
19502 #####################################################################
19503
19504 package Perl::Tidy::FileWriter;
19505
19506 # Maximum number of little messages; probably need not be changed.
19507 use constant MAX_NAG_MESSAGES => 6;
19508
19509 sub write_logfile_entry {
19510     my $self          = shift;
19511     my $logger_object = $self->{_logger_object};
19512     if ($logger_object) {
19513         $logger_object->write_logfile_entry(@_);
19514     }
19515 }
19516
19517 sub new {
19518     my $class = shift;
19519     my ( $line_sink_object, $rOpts, $logger_object ) = @_;
19520
19521     bless {
19522         _line_sink_object           => $line_sink_object,
19523         _logger_object              => $logger_object,
19524         _rOpts                      => $rOpts,
19525         _output_line_number         => 1,
19526         _consecutive_blank_lines    => 0,
19527         _consecutive_nonblank_lines => 0,
19528         _first_line_length_error    => 0,
19529         _max_line_length_error      => 0,
19530         _last_line_length_error     => 0,
19531         _first_line_length_error_at => 0,
19532         _max_line_length_error_at   => 0,
19533         _last_line_length_error_at  => 0,
19534         _line_length_error_count    => 0,
19535         _max_output_line_length     => 0,
19536         _max_output_line_length_at  => 0,
19537     }, $class;
19538 }
19539
19540 sub tee_on {
19541     my $self = shift;
19542     $self->{_line_sink_object}->tee_on();
19543 }
19544
19545 sub tee_off {
19546     my $self = shift;
19547     $self->{_line_sink_object}->tee_off();
19548 }
19549
19550 sub get_output_line_number {
19551     my $self = shift;
19552     return $self->{_output_line_number};
19553 }
19554
19555 sub decrement_output_line_number {
19556     my $self = shift;
19557     $self->{_output_line_number}--;
19558 }
19559
19560 sub get_consecutive_nonblank_lines {
19561     my $self = shift;
19562     return $self->{_consecutive_nonblank_lines};
19563 }
19564
19565 sub reset_consecutive_blank_lines {
19566     my $self = shift;
19567     $self->{_consecutive_blank_lines} = 0;
19568 }
19569
19570 sub want_blank_line {
19571     my $self = shift;
19572     unless ( $self->{_consecutive_blank_lines} ) {
19573         $self->write_blank_code_line();
19574     }
19575 }
19576
19577 sub write_blank_code_line {
19578     my $self  = shift;
19579     my $rOpts = $self->{_rOpts};
19580     return
19581       if ( $self->{_consecutive_blank_lines} >=
19582         $rOpts->{'maximum-consecutive-blank-lines'} );
19583     $self->{_consecutive_blank_lines}++;
19584     $self->{_consecutive_nonblank_lines} = 0;
19585     $self->write_line("\n");
19586 }
19587
19588 sub write_code_line {
19589     my $self = shift;
19590     my $a    = shift;
19591
19592     if ( $a =~ /^\s*$/ ) {
19593         my $rOpts = $self->{_rOpts};
19594         return
19595           if ( $self->{_consecutive_blank_lines} >=
19596             $rOpts->{'maximum-consecutive-blank-lines'} );
19597         $self->{_consecutive_blank_lines}++;
19598         $self->{_consecutive_nonblank_lines} = 0;
19599     }
19600     else {
19601         $self->{_consecutive_blank_lines} = 0;
19602         $self->{_consecutive_nonblank_lines}++;
19603     }
19604     $self->write_line($a);
19605 }
19606
19607 sub write_line {
19608     my $self = shift;
19609     my $a    = shift;
19610
19611     # TODO: go through and see if the test is necessary here
19612     if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
19613
19614     $self->{_line_sink_object}->write_line($a);
19615
19616     # This calculation of excess line length ignores any internal tabs
19617     my $rOpts  = $self->{_rOpts};
19618     my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
19619     if ( $a =~ /^\t+/g ) {
19620         $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
19621     }
19622
19623     # Note that we just incremented output line number to future value
19624     # so we must subtract 1 for current line number
19625     if ( length($a) > 1 + $self->{_max_output_line_length} ) {
19626         $self->{_max_output_line_length}    = length($a) - 1;
19627         $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
19628     }
19629
19630     if ( $exceed > 0 ) {
19631         my $output_line_number = $self->{_output_line_number};
19632         $self->{_last_line_length_error}    = $exceed;
19633         $self->{_last_line_length_error_at} = $output_line_number - 1;
19634         if ( $self->{_line_length_error_count} == 0 ) {
19635             $self->{_first_line_length_error}    = $exceed;
19636             $self->{_first_line_length_error_at} = $output_line_number - 1;
19637         }
19638
19639         if (
19640             $self->{_last_line_length_error} > $self->{_max_line_length_error} )
19641         {
19642             $self->{_max_line_length_error}    = $exceed;
19643             $self->{_max_line_length_error_at} = $output_line_number - 1;
19644         }
19645
19646         if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
19647             $self->write_logfile_entry(
19648                 "Line length exceeded by $exceed characters\n");
19649         }
19650         $self->{_line_length_error_count}++;
19651     }
19652
19653 }
19654
19655 sub report_line_length_errors {
19656     my $self                    = shift;
19657     my $rOpts                   = $self->{_rOpts};
19658     my $line_length_error_count = $self->{_line_length_error_count};
19659     if ( $line_length_error_count == 0 ) {
19660         $self->write_logfile_entry(
19661             "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
19662         my $max_output_line_length    = $self->{_max_output_line_length};
19663         my $max_output_line_length_at = $self->{_max_output_line_length_at};
19664         $self->write_logfile_entry(
19665 "  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
19666         );
19667
19668     }
19669     else {
19670
19671         my $word = ( $line_length_error_count > 1 ) ? "s" : "";
19672         $self->write_logfile_entry(
19673 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
19674         );
19675
19676         $word = ( $line_length_error_count > 1 ) ? "First" : "";
19677         my $first_line_length_error    = $self->{_first_line_length_error};
19678         my $first_line_length_error_at = $self->{_first_line_length_error_at};
19679         $self->write_logfile_entry(
19680 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
19681         );
19682
19683         if ( $line_length_error_count > 1 ) {
19684             my $max_line_length_error     = $self->{_max_line_length_error};
19685             my $max_line_length_error_at  = $self->{_max_line_length_error_at};
19686             my $last_line_length_error    = $self->{_last_line_length_error};
19687             my $last_line_length_error_at = $self->{_last_line_length_error_at};
19688             $self->write_logfile_entry(
19689 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
19690             );
19691             $self->write_logfile_entry(
19692 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
19693             );
19694         }
19695     }
19696 }
19697
19698 #####################################################################
19699 #
19700 # The Perl::Tidy::Debugger class shows line tokenization
19701 #
19702 #####################################################################
19703
19704 package Perl::Tidy::Debugger;
19705
19706 sub new {
19707
19708     my ( $class, $filename ) = @_;
19709
19710     bless {
19711         _debug_file        => $filename,
19712         _debug_file_opened => 0,
19713         _fh                => undef,
19714     }, $class;
19715 }
19716
19717 sub really_open_debug_file {
19718
19719     my $self       = shift;
19720     my $debug_file = $self->{_debug_file};
19721     my $fh;
19722     unless ( $fh = IO::File->new("> $debug_file") ) {
19723         warn("can't open $debug_file: $!\n");
19724     }
19725     $self->{_debug_file_opened} = 1;
19726     $self->{_fh}                = $fh;
19727     print $fh
19728       "Use -dump-token-types (-dtt) to get a list of token type codes\n";
19729 }
19730
19731 sub close_debug_file {
19732
19733     my $self = shift;
19734     my $fh   = $self->{_fh};
19735     if ( $self->{_debug_file_opened} ) {
19736
19737         eval { $self->{_fh}->close() };
19738     }
19739 }
19740
19741 sub write_debug_entry {
19742
19743     # This is a debug dump routine which may be modified as necessary
19744     # to dump tokens on a line-by-line basis.  The output will be written
19745     # to the .DEBUG file when the -D flag is entered.
19746     my $self           = shift;
19747     my $line_of_tokens = shift;
19748
19749     my $input_line        = $line_of_tokens->{_line_text};
19750     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
19751     my $rtokens           = $line_of_tokens->{_rtokens};
19752     my $rlevels           = $line_of_tokens->{_rlevels};
19753     my $rslevels          = $line_of_tokens->{_rslevels};
19754     my $rblock_type       = $line_of_tokens->{_rblock_type};
19755     my $input_line_number = $line_of_tokens->{_line_number};
19756     my $line_type         = $line_of_tokens->{_line_type};
19757
19758     my ( $j, $num );
19759
19760     my $token_str              = "$input_line_number: ";
19761     my $reconstructed_original = "$input_line_number: ";
19762     my $block_str              = "$input_line_number: ";
19763
19764     #$token_str .= "$line_type: ";
19765     #$reconstructed_original .= "$line_type: ";
19766
19767     my $pattern   = "";
19768     my @next_char = ( '"', '"' );
19769     my $i_next    = 0;
19770     unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
19771     my $fh = $self->{_fh};
19772
19773     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
19774
19775         # testing patterns
19776         if ( $$rtoken_type[$j] eq 'k' ) {
19777             $pattern .= $$rtokens[$j];
19778         }
19779         else {
19780             $pattern .= $$rtoken_type[$j];
19781         }
19782         $reconstructed_original .= $$rtokens[$j];
19783         $block_str              .= "($$rblock_type[$j])";
19784         $num = length( $$rtokens[$j] );
19785         my $type_str = $$rtoken_type[$j];
19786
19787         # be sure there are no blank tokens (shouldn't happen)
19788         # This can only happen if a programming error has been made
19789         # because all valid tokens are non-blank
19790         if ( $type_str eq ' ' ) {
19791             print $fh "BLANK TOKEN on the next line\n";
19792             $type_str = $next_char[$i_next];
19793             $i_next   = 1 - $i_next;
19794         }
19795
19796         if ( length($type_str) == 1 ) {
19797             $type_str = $type_str x $num;
19798         }
19799         $token_str .= $type_str;
19800     }
19801
19802     # Write what you want here ...
19803     # print $fh "$input_line\n";
19804     # print $fh "$pattern\n";
19805     print $fh "$reconstructed_original\n";
19806     print $fh "$token_str\n";
19807
19808     #print $fh "$block_str\n";
19809 }
19810
19811 #####################################################################
19812 #
19813 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
19814 # method for returning the next line to be parsed, as well as a
19815 # 'peek_ahead()' method
19816 #
19817 # The input parameter is an object with a 'get_line()' method
19818 # which returns the next line to be parsed
19819 #
19820 #####################################################################
19821
19822 package Perl::Tidy::LineBuffer;
19823
19824 sub new {
19825
19826     my $class              = shift;
19827     my $line_source_object = shift;
19828
19829     return bless {
19830         _line_source_object => $line_source_object,
19831         _rlookahead_buffer  => [],
19832     }, $class;
19833 }
19834
19835 sub peek_ahead {
19836     my $self               = shift;
19837     my $buffer_index       = shift;
19838     my $line               = undef;
19839     my $line_source_object = $self->{_line_source_object};
19840     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
19841     if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
19842         $line = $$rlookahead_buffer[$buffer_index];
19843     }
19844     else {
19845         $line = $line_source_object->get_line();
19846         push( @$rlookahead_buffer, $line );
19847     }
19848     return $line;
19849 }
19850
19851 sub get_line {
19852     my $self               = shift;
19853     my $line               = undef;
19854     my $line_source_object = $self->{_line_source_object};
19855     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
19856
19857     if ( scalar(@$rlookahead_buffer) ) {
19858         $line = shift @$rlookahead_buffer;
19859     }
19860     else {
19861         $line = $line_source_object->get_line();
19862     }
19863     return $line;
19864 }
19865
19866 ########################################################################
19867 #
19868 # the Perl::Tidy::Tokenizer package is essentially a filter which
19869 # reads lines of perl source code from a source object and provides
19870 # corresponding tokenized lines through its get_line() method.  Lines
19871 # flow from the source_object to the caller like this:
19872 #
19873 # source_object --> LineBuffer_object --> Tokenizer -->  calling routine
19874 #   get_line()         get_line()           get_line()     line_of_tokens
19875 #
19876 # The source object can be any object with a get_line() method which
19877 # supplies one line (a character string) perl call.
19878 # The LineBuffer object is created by the Tokenizer.
19879 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
19880 # containing one tokenized line for each call to its get_line() method.
19881 #
19882 # WARNING: This is not a real class yet.  Only one tokenizer my be used.
19883 #
19884 ########################################################################
19885
19886 package Perl::Tidy::Tokenizer;
19887
19888 BEGIN {
19889
19890     # Caution: these debug flags produce a lot of output
19891     # They should all be 0 except when debugging small scripts
19892
19893     use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
19894     use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
19895     use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
19896     use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
19897     use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
19898
19899     my $debug_warning = sub {
19900         print "TOKENIZER_DEBUGGING with key $_[0]\n";
19901     };
19902
19903     TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
19904     TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
19905     TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
19906     TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
19907     TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
19908
19909 }
19910
19911 use Carp;
19912
19913 # PACKAGE VARIABLES for for processing an entire FILE.
19914 use vars qw{
19915   $tokenizer_self
19916
19917   $last_nonblank_token
19918   $last_nonblank_type
19919   $last_nonblank_block_type
19920   $statement_type
19921   $in_attribute_list
19922   $current_package
19923   $context
19924
19925   %is_constant
19926   %is_user_function
19927   %user_function_prototype
19928   %is_block_function
19929   %is_block_list_function
19930   %saw_function_definition
19931
19932   $brace_depth
19933   $paren_depth
19934   $square_bracket_depth
19935
19936   @current_depth
19937   @total_depth
19938   $total_depth
19939   @nesting_sequence_number
19940   @current_sequence_number
19941   @paren_type
19942   @paren_semicolon_count
19943   @paren_structural_type
19944   @brace_type
19945   @brace_structural_type
19946   @brace_statement_type
19947   @brace_context
19948   @brace_package
19949   @square_bracket_type
19950   @square_bracket_structural_type
19951   @depth_array
19952   @nested_ternary_flag
19953   @starting_line_of_current_depth
19954 };
19955
19956 # GLOBAL CONSTANTS for routines in this package
19957 use vars qw{
19958   %is_indirect_object_taker
19959   %is_block_operator
19960   %expecting_operator_token
19961   %expecting_operator_types
19962   %expecting_term_types
19963   %expecting_term_token
19964   %is_digraph
19965   %is_file_test_operator
19966   %is_trigraph
19967   %is_valid_token_type
19968   %is_keyword
19969   %is_code_block_token
19970   %really_want_term
19971   @opening_brace_names
19972   @closing_brace_names
19973   %is_keyword_taking_list
19974   %is_q_qq_qw_qx_qr_s_y_tr_m
19975 };
19976
19977 # possible values of operator_expected()
19978 use constant TERM     => -1;
19979 use constant UNKNOWN  => 0;
19980 use constant OPERATOR => 1;
19981
19982 # possible values of context
19983 use constant SCALAR_CONTEXT  => -1;
19984 use constant UNKNOWN_CONTEXT => 0;
19985 use constant LIST_CONTEXT    => 1;
19986
19987 # Maximum number of little messages; probably need not be changed.
19988 use constant MAX_NAG_MESSAGES => 6;
19989
19990 {
19991
19992     # methods to count instances
19993     my $_count = 0;
19994     sub get_count        { $_count; }
19995     sub _increment_count { ++$_count }
19996     sub _decrement_count { --$_count }
19997 }
19998
19999 sub DESTROY {
20000     $_[0]->_decrement_count();
20001 }
20002
20003 sub new {
20004
20005     my $class = shift;
20006
20007     # Note: 'tabs' and 'indent_columns' are temporary and should be
20008     # removed asap
20009     my %defaults = (
20010         source_object        => undef,
20011         debugger_object      => undef,
20012         diagnostics_object   => undef,
20013         logger_object        => undef,
20014         starting_level       => undef,
20015         indent_columns       => 4,
20016         tabs                 => 0,
20017         look_for_hash_bang   => 0,
20018         trim_qw              => 1,
20019         look_for_autoloader  => 1,
20020         look_for_selfloader  => 1,
20021         starting_line_number => 1,
20022     );
20023     my %args = ( %defaults, @_ );
20024
20025     # we are given an object with a get_line() method to supply source lines
20026     my $source_object = $args{source_object};
20027
20028     # we create another object with a get_line() and peek_ahead() method
20029     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
20030
20031     # Tokenizer state data is as follows:
20032     # _rhere_target_list    reference to list of here-doc targets
20033     # _here_doc_target      the target string for a here document
20034     # _here_quote_character the type of here-doc quoting (" ' ` or none)
20035     #                       to determine if interpolation is done
20036     # _quote_target         character we seek if chasing a quote
20037     # _line_start_quote     line where we started looking for a long quote
20038     # _in_here_doc          flag indicating if we are in a here-doc
20039     # _in_pod               flag set if we are in pod documentation
20040     # _in_error             flag set if we saw severe error (binary in script)
20041     # _in_data              flag set if we are in __DATA__ section
20042     # _in_end               flag set if we are in __END__ section
20043     # _in_format            flag set if we are in a format description
20044     # _in_attribute_list    flag telling if we are looking for attributes
20045     # _in_quote             flag telling if we are chasing a quote
20046     # _starting_level       indentation level of first line
20047     # _input_tabstr         string denoting one indentation level of input file
20048     # _know_input_tabstr    flag indicating if we know _input_tabstr
20049     # _line_buffer_object   object with get_line() method to supply source code
20050     # _diagnostics_object   place to write debugging information
20051     # _unexpected_error_count  error count used to limit output
20052     # _lower_case_labels_at  line numbers where lower case labels seen
20053     $tokenizer_self = {
20054         _rhere_target_list                  => [],
20055         _in_here_doc                        => 0,
20056         _here_doc_target                    => "",
20057         _here_quote_character               => "",
20058         _in_data                            => 0,
20059         _in_end                             => 0,
20060         _in_format                          => 0,
20061         _in_error                           => 0,
20062         _in_pod                             => 0,
20063         _in_attribute_list                  => 0,
20064         _in_quote                           => 0,
20065         _quote_target                       => "",
20066         _line_start_quote                   => -1,
20067         _starting_level                     => $args{starting_level},
20068         _know_starting_level                => defined( $args{starting_level} ),
20069         _tabs                               => $args{tabs},
20070         _indent_columns                     => $args{indent_columns},
20071         _look_for_hash_bang                 => $args{look_for_hash_bang},
20072         _trim_qw                            => $args{trim_qw},
20073         _input_tabstr                       => "",
20074         _know_input_tabstr                  => -1,
20075         _last_line_number                   => $args{starting_line_number} - 1,
20076         _saw_perl_dash_P                    => 0,
20077         _saw_perl_dash_w                    => 0,
20078         _saw_use_strict                     => 0,
20079         _saw_v_string                       => 0,
20080         _look_for_autoloader                => $args{look_for_autoloader},
20081         _look_for_selfloader                => $args{look_for_selfloader},
20082         _saw_autoloader                     => 0,
20083         _saw_selfloader                     => 0,
20084         _saw_hash_bang                      => 0,
20085         _saw_end                            => 0,
20086         _saw_data                           => 0,
20087         _saw_negative_indentation           => 0,
20088         _started_tokenizing                 => 0,
20089         _line_buffer_object                 => $line_buffer_object,
20090         _debugger_object                    => $args{debugger_object},
20091         _diagnostics_object                 => $args{diagnostics_object},
20092         _logger_object                      => $args{logger_object},
20093         _unexpected_error_count             => 0,
20094         _started_looking_for_here_target_at => 0,
20095         _nearly_matched_here_target_at      => undef,
20096         _line_text                          => "",
20097         _rlower_case_labels_at              => undef,
20098     };
20099
20100     prepare_for_a_new_file();
20101     find_starting_indentation_level();
20102
20103     bless $tokenizer_self, $class;
20104
20105     # This is not a full class yet, so die if an attempt is made to
20106     # create more than one object.
20107
20108     if ( _increment_count() > 1 ) {
20109         confess
20110 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
20111     }
20112
20113     return $tokenizer_self;
20114
20115 }
20116
20117 # interface to Perl::Tidy::Logger routines
20118 sub warning {
20119     my $logger_object = $tokenizer_self->{_logger_object};
20120     if ($logger_object) {
20121         $logger_object->warning(@_);
20122     }
20123 }
20124
20125 sub complain {
20126     my $logger_object = $tokenizer_self->{_logger_object};
20127     if ($logger_object) {
20128         $logger_object->complain(@_);
20129     }
20130 }
20131
20132 sub write_logfile_entry {
20133     my $logger_object = $tokenizer_self->{_logger_object};
20134     if ($logger_object) {
20135         $logger_object->write_logfile_entry(@_);
20136     }
20137 }
20138
20139 sub interrupt_logfile {
20140     my $logger_object = $tokenizer_self->{_logger_object};
20141     if ($logger_object) {
20142         $logger_object->interrupt_logfile();
20143     }
20144 }
20145
20146 sub resume_logfile {
20147     my $logger_object = $tokenizer_self->{_logger_object};
20148     if ($logger_object) {
20149         $logger_object->resume_logfile();
20150     }
20151 }
20152
20153 sub increment_brace_error {
20154     my $logger_object = $tokenizer_self->{_logger_object};
20155     if ($logger_object) {
20156         $logger_object->increment_brace_error();
20157     }
20158 }
20159
20160 sub report_definite_bug {
20161     my $logger_object = $tokenizer_self->{_logger_object};
20162     if ($logger_object) {
20163         $logger_object->report_definite_bug();
20164     }
20165 }
20166
20167 sub brace_warning {
20168     my $logger_object = $tokenizer_self->{_logger_object};
20169     if ($logger_object) {
20170         $logger_object->brace_warning(@_);
20171     }
20172 }
20173
20174 sub get_saw_brace_error {
20175     my $logger_object = $tokenizer_self->{_logger_object};
20176     if ($logger_object) {
20177         $logger_object->get_saw_brace_error();
20178     }
20179     else {
20180         0;
20181     }
20182 }
20183
20184 # interface to Perl::Tidy::Diagnostics routines
20185 sub write_diagnostics {
20186     if ( $tokenizer_self->{_diagnostics_object} ) {
20187         $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
20188     }
20189 }
20190
20191 sub report_tokenization_errors {
20192
20193     my $self = shift;
20194
20195     my $level = get_indentation_level();
20196     if ( $level != $tokenizer_self->{_starting_level} ) {
20197         warning("final indentation level: $level\n");
20198     }
20199
20200     check_final_nesting_depths();
20201
20202     if ( $tokenizer_self->{_look_for_hash_bang}
20203         && !$tokenizer_self->{_saw_hash_bang} )
20204     {
20205         warning(
20206             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
20207     }
20208
20209     if ( $tokenizer_self->{_in_format} ) {
20210         warning("hit EOF while in format description\n");
20211     }
20212
20213     if ( $tokenizer_self->{_in_pod} ) {
20214
20215         # Just write log entry if this is after __END__ or __DATA__
20216         # because this happens to often, and it is not likely to be
20217         # a parsing error.
20218         if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
20219             write_logfile_entry(
20220 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
20221             );
20222         }
20223
20224         else {
20225             complain(
20226 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
20227             );
20228         }
20229
20230     }
20231
20232     if ( $tokenizer_self->{_in_here_doc} ) {
20233         my $here_doc_target = $tokenizer_self->{_here_doc_target};
20234         my $started_looking_for_here_target_at =
20235           $tokenizer_self->{_started_looking_for_here_target_at};
20236         if ($here_doc_target) {
20237             warning(
20238 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
20239             );
20240         }
20241         else {
20242             warning(
20243 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
20244             );
20245         }
20246         my $nearly_matched_here_target_at =
20247           $tokenizer_self->{_nearly_matched_here_target_at};
20248         if ($nearly_matched_here_target_at) {
20249             warning(
20250 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
20251             );
20252         }
20253     }
20254
20255     if ( $tokenizer_self->{_in_quote} ) {
20256         my $line_start_quote = $tokenizer_self->{_line_start_quote};
20257         my $quote_target     = $tokenizer_self->{_quote_target};
20258         my $what =
20259           ( $tokenizer_self->{_in_attribute_list} )
20260           ? "attribute list"
20261           : "quote/pattern";
20262         warning(
20263 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
20264         );
20265     }
20266
20267     unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
20268         if ( $] < 5.006 ) {
20269             write_logfile_entry("Suggest including '-w parameter'\n");
20270         }
20271         else {
20272             write_logfile_entry("Suggest including 'use warnings;'\n");
20273         }
20274     }
20275
20276     if ( $tokenizer_self->{_saw_perl_dash_P} ) {
20277         write_logfile_entry("Use of -P parameter for defines is discouraged\n");
20278     }
20279
20280     unless ( $tokenizer_self->{_saw_use_strict} ) {
20281         write_logfile_entry("Suggest including 'use strict;'\n");
20282     }
20283
20284     # it is suggested that lables have at least one upper case character
20285     # for legibility and to avoid code breakage as new keywords are introduced
20286     if ( $tokenizer_self->{_rlower_case_labels_at} ) {
20287         my @lower_case_labels_at =
20288           @{ $tokenizer_self->{_rlower_case_labels_at} };
20289         write_logfile_entry(
20290             "Suggest using upper case characters in label(s)\n");
20291         local $" = ')(';
20292         write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
20293     }
20294 }
20295
20296 sub report_v_string {
20297
20298     # warn if this version can't handle v-strings
20299     my $tok = shift;
20300     unless ( $tokenizer_self->{_saw_v_string} ) {
20301         $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
20302     }
20303     if ( $] < 5.006 ) {
20304         warning(
20305 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
20306         );
20307     }
20308 }
20309
20310 sub get_input_line_number {
20311     return $tokenizer_self->{_last_line_number};
20312 }
20313
20314 # returns the next tokenized line
20315 sub get_line {
20316
20317     my $self = shift;
20318
20319     # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
20320     # $square_bracket_depth, $paren_depth
20321
20322     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
20323     $tokenizer_self->{_line_text} = $input_line;
20324
20325     return undef unless ($input_line);
20326
20327     my $input_line_number = ++$tokenizer_self->{_last_line_number};
20328
20329     # Find and remove what characters terminate this line, including any
20330     # control r
20331     my $input_line_separator = "";
20332     if ( chomp($input_line) ) { $input_line_separator = $/ }
20333
20334     # TODO: what other characters should be included here?
20335     if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
20336         $input_line_separator = $2 . $input_line_separator;
20337     }
20338
20339     # for backwards compatability we keep the line text terminated with
20340     # a newline character
20341     $input_line .= "\n";
20342     $tokenizer_self->{_line_text} = $input_line;    # update
20343
20344     # create a data structure describing this line which will be
20345     # returned to the caller.
20346
20347     # _line_type codes are:
20348     #   SYSTEM         - system-specific code before hash-bang line
20349     #   CODE           - line of perl code (including comments)
20350     #   POD_START      - line starting pod, such as '=head'
20351     #   POD            - pod documentation text
20352     #   POD_END        - last line of pod section, '=cut'
20353     #   HERE           - text of here-document
20354     #   HERE_END       - last line of here-doc (target word)
20355     #   FORMAT         - format section
20356     #   FORMAT_END     - last line of format section, '.'
20357     #   DATA_START     - __DATA__ line
20358     #   DATA           - unidentified text following __DATA__
20359     #   END_START      - __END__ line
20360     #   END            - unidentified text following __END__
20361     #   ERROR          - we are in big trouble, probably not a perl script
20362
20363     # Other variables:
20364     #   _curly_brace_depth     - depth of curly braces at start of line
20365     #   _square_bracket_depth  - depth of square brackets at start of line
20366     #   _paren_depth           - depth of parens at start of line
20367     #   _starting_in_quote     - this line continues a multi-line quote
20368     #                            (so don't trim leading blanks!)
20369     #   _ending_in_quote       - this line ends in a multi-line quote
20370     #                            (so don't trim trailing blanks!)
20371     my $line_of_tokens = {
20372         _line_type                => 'EOF',
20373         _line_text                => $input_line,
20374         _line_number              => $input_line_number,
20375         _rtoken_type              => undef,
20376         _rtokens                  => undef,
20377         _rlevels                  => undef,
20378         _rslevels                 => undef,
20379         _rblock_type              => undef,
20380         _rcontainer_type          => undef,
20381         _rcontainer_environment   => undef,
20382         _rtype_sequence           => undef,
20383         _rnesting_tokens          => undef,
20384         _rci_levels               => undef,
20385         _rnesting_blocks          => undef,
20386         _python_indentation_level => -1,                   ## 0,
20387         _starting_in_quote    => 0,                    # to be set by subroutine
20388         _ending_in_quote      => 0,
20389         _curly_brace_depth    => $brace_depth,
20390         _square_bracket_depth => $square_bracket_depth,
20391         _paren_depth          => $paren_depth,
20392         _quote_character      => '',
20393     };
20394
20395     # must print line unchanged if we are in a here document
20396     if ( $tokenizer_self->{_in_here_doc} ) {
20397
20398         $line_of_tokens->{_line_type} = 'HERE';
20399         my $here_doc_target      = $tokenizer_self->{_here_doc_target};
20400         my $here_quote_character = $tokenizer_self->{_here_quote_character};
20401         my $candidate_target     = $input_line;
20402         chomp $candidate_target;
20403         if ( $candidate_target eq $here_doc_target ) {
20404             $tokenizer_self->{_nearly_matched_here_target_at} = undef;
20405             $line_of_tokens->{_line_type}                     = 'HERE_END';
20406             write_logfile_entry("Exiting HERE document $here_doc_target\n");
20407
20408             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
20409             if (@$rhere_target_list) {    # there can be multiple here targets
20410                 ( $here_doc_target, $here_quote_character ) =
20411                   @{ shift @$rhere_target_list };
20412                 $tokenizer_self->{_here_doc_target} = $here_doc_target;
20413                 $tokenizer_self->{_here_quote_character} =
20414                   $here_quote_character;
20415                 write_logfile_entry(
20416                     "Entering HERE document $here_doc_target\n");
20417                 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
20418                 $tokenizer_self->{_started_looking_for_here_target_at} =
20419                   $input_line_number;
20420             }
20421             else {
20422                 $tokenizer_self->{_in_here_doc}          = 0;
20423                 $tokenizer_self->{_here_doc_target}      = "";
20424                 $tokenizer_self->{_here_quote_character} = "";
20425             }
20426         }
20427
20428         # check for error of extra whitespace
20429         # note for PERL6: leading whitespace is allowed
20430         else {
20431             $candidate_target =~ s/\s*$//;
20432             $candidate_target =~ s/^\s*//;
20433             if ( $candidate_target eq $here_doc_target ) {
20434                 $tokenizer_self->{_nearly_matched_here_target_at} =
20435                   $input_line_number;
20436             }
20437         }
20438         return $line_of_tokens;
20439     }
20440
20441     # must print line unchanged if we are in a format section
20442     elsif ( $tokenizer_self->{_in_format} ) {
20443
20444         if ( $input_line =~ /^\.[\s#]*$/ ) {
20445             write_logfile_entry("Exiting format section\n");
20446             $tokenizer_self->{_in_format} = 0;
20447             $line_of_tokens->{_line_type} = 'FORMAT_END';
20448         }
20449         else {
20450             $line_of_tokens->{_line_type} = 'FORMAT';
20451         }
20452         return $line_of_tokens;
20453     }
20454
20455     # must print line unchanged if we are in pod documentation
20456     elsif ( $tokenizer_self->{_in_pod} ) {
20457
20458         $line_of_tokens->{_line_type} = 'POD';
20459         if ( $input_line =~ /^=cut/ ) {
20460             $line_of_tokens->{_line_type} = 'POD_END';
20461             write_logfile_entry("Exiting POD section\n");
20462             $tokenizer_self->{_in_pod} = 0;
20463         }
20464         if ( $input_line =~ /^\#\!.*perl\b/ ) {
20465             warning(
20466                 "Hash-bang in pod can cause older versions of perl to fail! \n"
20467             );
20468         }
20469
20470         return $line_of_tokens;
20471     }
20472
20473     # must print line unchanged if we have seen a severe error (i.e., we
20474     # are seeing illegal tokens and connot continue.  Syntax errors do
20475     # not pass this route).  Calling routine can decide what to do, but
20476     # the default can be to just pass all lines as if they were after __END__
20477     elsif ( $tokenizer_self->{_in_error} ) {
20478         $line_of_tokens->{_line_type} = 'ERROR';
20479         return $line_of_tokens;
20480     }
20481
20482     # print line unchanged if we are __DATA__ section
20483     elsif ( $tokenizer_self->{_in_data} ) {
20484
20485         # ...but look for POD
20486         # Note that the _in_data and _in_end flags remain set
20487         # so that we return to that state after seeing the
20488         # end of a pod section
20489         if ( $input_line =~ /^=(?!cut)/ ) {
20490             $line_of_tokens->{_line_type} = 'POD_START';
20491             write_logfile_entry("Entering POD section\n");
20492             $tokenizer_self->{_in_pod} = 1;
20493             return $line_of_tokens;
20494         }
20495         else {
20496             $line_of_tokens->{_line_type} = 'DATA';
20497             return $line_of_tokens;
20498         }
20499     }
20500
20501     # print line unchanged if we are in __END__ section
20502     elsif ( $tokenizer_self->{_in_end} ) {
20503
20504         # ...but look for POD
20505         # Note that the _in_data and _in_end flags remain set
20506         # so that we return to that state after seeing the
20507         # end of a pod section
20508         if ( $input_line =~ /^=(?!cut)/ ) {
20509             $line_of_tokens->{_line_type} = 'POD_START';
20510             write_logfile_entry("Entering POD section\n");
20511             $tokenizer_self->{_in_pod} = 1;
20512             return $line_of_tokens;
20513         }
20514         else {
20515             $line_of_tokens->{_line_type} = 'END';
20516             return $line_of_tokens;
20517         }
20518     }
20519
20520     # check for a hash-bang line if we haven't seen one
20521     if ( !$tokenizer_self->{_saw_hash_bang} ) {
20522         if ( $input_line =~ /^\#\!.*perl\b/ ) {
20523             $tokenizer_self->{_saw_hash_bang} = $input_line_number;
20524
20525             # check for -w and -P flags
20526             if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
20527                 $tokenizer_self->{_saw_perl_dash_P} = 1;
20528             }
20529
20530             if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
20531                 $tokenizer_self->{_saw_perl_dash_w} = 1;
20532             }
20533
20534             if (   ( $input_line_number > 1 )
20535                 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
20536             {
20537
20538                 # this is helpful for VMS systems; we may have accidentally
20539                 # tokenized some DCL commands
20540                 if ( $tokenizer_self->{_started_tokenizing} ) {
20541                     warning(
20542 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
20543                     );
20544                 }
20545                 else {
20546                     complain("Useless hash-bang after line 1\n");
20547                 }
20548             }
20549
20550             # Report the leading hash-bang as a system line
20551             # This will prevent -dac from deleting it
20552             else {
20553                 $line_of_tokens->{_line_type} = 'SYSTEM';
20554                 return $line_of_tokens;
20555             }
20556         }
20557     }
20558
20559     # wait for a hash-bang before parsing if the user invoked us with -x
20560     if ( $tokenizer_self->{_look_for_hash_bang}
20561         && !$tokenizer_self->{_saw_hash_bang} )
20562     {
20563         $line_of_tokens->{_line_type} = 'SYSTEM';
20564         return $line_of_tokens;
20565     }
20566
20567     # a first line of the form ': #' will be marked as SYSTEM
20568     # since lines of this form may be used by tcsh
20569     if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
20570         $line_of_tokens->{_line_type} = 'SYSTEM';
20571         return $line_of_tokens;
20572     }
20573
20574     # now we know that it is ok to tokenize the line...
20575     # the line tokenizer will modify any of these private variables:
20576     #        _rhere_target_list
20577     #        _in_data
20578     #        _in_end
20579     #        _in_format
20580     #        _in_error
20581     #        _in_pod
20582     #        _in_quote
20583     my $ending_in_quote_last = $tokenizer_self->{_in_quote};
20584     tokenize_this_line($line_of_tokens);
20585
20586     # Now finish defining the return structure and return it
20587     $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
20588
20589     # handle severe error (binary data in script)
20590     if ( $tokenizer_self->{_in_error} ) {
20591         $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
20592         warning("Giving up after error\n");
20593         $line_of_tokens->{_line_type} = 'ERROR';
20594         reset_indentation_level(0);          # avoid error messages
20595         return $line_of_tokens;
20596     }
20597
20598     # handle start of pod documentation
20599     if ( $tokenizer_self->{_in_pod} ) {
20600
20601         # This gets tricky..above a __DATA__ or __END__ section, perl
20602         # accepts '=cut' as the start of pod section. But afterwards,
20603         # only pod utilities see it and they may ignore an =cut without
20604         # leading =head.  In any case, this isn't good.
20605         if ( $input_line =~ /^=cut\b/ ) {
20606             if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
20607                 complain("=cut while not in pod ignored\n");
20608                 $tokenizer_self->{_in_pod}    = 0;
20609                 $line_of_tokens->{_line_type} = 'POD_END';
20610             }
20611             else {
20612                 $line_of_tokens->{_line_type} = 'POD_START';
20613                 complain(
20614 "=cut starts a pod section .. this can fool pod utilities.\n"
20615                 );
20616                 write_logfile_entry("Entering POD section\n");
20617             }
20618         }
20619
20620         else {
20621             $line_of_tokens->{_line_type} = 'POD_START';
20622             write_logfile_entry("Entering POD section\n");
20623         }
20624
20625         return $line_of_tokens;
20626     }
20627
20628     # update indentation levels for log messages
20629     if ( $input_line !~ /^\s*$/ ) {
20630         my $rlevels                      = $line_of_tokens->{_rlevels};
20631         my $structural_indentation_level = $$rlevels[0];
20632         my ( $python_indentation_level, $msg ) =
20633           find_indentation_level( $input_line, $structural_indentation_level );
20634         if ($msg) { write_logfile_entry("$msg") }
20635         if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
20636             $line_of_tokens->{_python_indentation_level} =
20637               $python_indentation_level;
20638         }
20639     }
20640
20641     # see if this line contains here doc targets
20642     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
20643     if (@$rhere_target_list) {
20644
20645         my ( $here_doc_target, $here_quote_character ) =
20646           @{ shift @$rhere_target_list };
20647         $tokenizer_self->{_in_here_doc}          = 1;
20648         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
20649         $tokenizer_self->{_here_quote_character} = $here_quote_character;
20650         write_logfile_entry("Entering HERE document $here_doc_target\n");
20651         $tokenizer_self->{_started_looking_for_here_target_at} =
20652           $input_line_number;
20653     }
20654
20655     # NOTE: __END__ and __DATA__ statements are written unformatted
20656     # because they can theoretically contain additional characters
20657     # which are not tokenized (and cannot be read with <DATA> either!).
20658     if ( $tokenizer_self->{_in_data} ) {
20659         $line_of_tokens->{_line_type} = 'DATA_START';
20660         write_logfile_entry("Starting __DATA__ section\n");
20661         $tokenizer_self->{_saw_data} = 1;
20662
20663         # keep parsing after __DATA__ if use SelfLoader was seen
20664         if ( $tokenizer_self->{_saw_selfloader} ) {
20665             $tokenizer_self->{_in_data} = 0;
20666             write_logfile_entry(
20667                 "SelfLoader seen, continuing; -nlsl deactivates\n");
20668         }
20669
20670         return $line_of_tokens;
20671     }
20672
20673     elsif ( $tokenizer_self->{_in_end} ) {
20674         $line_of_tokens->{_line_type} = 'END_START';
20675         write_logfile_entry("Starting __END__ section\n");
20676         $tokenizer_self->{_saw_end} = 1;
20677
20678         # keep parsing after __END__ if use AutoLoader was seen
20679         if ( $tokenizer_self->{_saw_autoloader} ) {
20680             $tokenizer_self->{_in_end} = 0;
20681             write_logfile_entry(
20682                 "AutoLoader seen, continuing; -nlal deactivates\n");
20683         }
20684         return $line_of_tokens;
20685     }
20686
20687     # now, finally, we know that this line is type 'CODE'
20688     $line_of_tokens->{_line_type} = 'CODE';
20689
20690     # remember if we have seen any real code
20691     if (   !$tokenizer_self->{_started_tokenizing}
20692         && $input_line !~ /^\s*$/
20693         && $input_line !~ /^\s*#/ )
20694     {
20695         $tokenizer_self->{_started_tokenizing} = 1;
20696     }
20697
20698     if ( $tokenizer_self->{_debugger_object} ) {
20699         $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
20700     }
20701
20702     # Note: if keyword 'format' occurs in this line code, it is still CODE
20703     # (keyword 'format' need not start a line)
20704     if ( $tokenizer_self->{_in_format} ) {
20705         write_logfile_entry("Entering format section\n");
20706     }
20707
20708     if ( $tokenizer_self->{_in_quote}
20709         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
20710     {
20711
20712         #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
20713         if (
20714             ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
20715         {
20716             $tokenizer_self->{_line_start_quote} = $input_line_number;
20717             write_logfile_entry(
20718                 "Start multi-line quote or pattern ending in $quote_target\n");
20719         }
20720     }
20721     elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
20722         and !$tokenizer_self->{_in_quote} )
20723     {
20724         $tokenizer_self->{_line_start_quote} = -1;
20725         write_logfile_entry("End of multi-line quote or pattern\n");
20726     }
20727
20728     # we are returning a line of CODE
20729     return $line_of_tokens;
20730 }
20731
20732 sub find_starting_indentation_level {
20733
20734     # USES GLOBAL VARIABLES: $tokenizer_self
20735     my $starting_level    = 0;
20736     my $know_input_tabstr = -1;    # flag for find_indentation_level
20737
20738     # use value if given as parameter
20739     if ( $tokenizer_self->{_know_starting_level} ) {
20740         $starting_level = $tokenizer_self->{_starting_level};
20741     }
20742
20743     # if we know there is a hash_bang line, the level must be zero
20744     elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
20745         $tokenizer_self->{_know_starting_level} = 1;
20746     }
20747
20748     # otherwise figure it out from the input file
20749     else {
20750         my $line;
20751         my $i                            = 0;
20752         my $structural_indentation_level = -1; # flag for find_indentation_level
20753
20754         my $msg = "";
20755         while ( $line =
20756             $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
20757         {
20758
20759             # if first line is #! then assume starting level is zero
20760             if ( $i == 1 && $line =~ /^\#\!/ ) {
20761                 $starting_level = 0;
20762                 last;
20763             }
20764             next if ( $line =~ /^\s*#/ );      # must not be comment
20765             next if ( $line =~ /^\s*$/ );      # must not be blank
20766             ( $starting_level, $msg ) =
20767               find_indentation_level( $line, $structural_indentation_level );
20768             if ($msg) { write_logfile_entry("$msg") }
20769             last;
20770         }
20771         $msg = "Line $i implies starting-indentation-level = $starting_level\n";
20772
20773         if ( $starting_level > 0 ) {
20774
20775             my $input_tabstr = $tokenizer_self->{_input_tabstr};
20776             if ( $input_tabstr eq "\t" ) {
20777                 $msg .= "by guessing input tabbing uses 1 tab per level\n";
20778             }
20779             else {
20780                 my $cols = length($input_tabstr);
20781                 $msg .=
20782                   "by guessing input tabbing uses $cols blanks per level\n";
20783             }
20784         }
20785         write_logfile_entry("$msg");
20786     }
20787     $tokenizer_self->{_starting_level} = $starting_level;
20788     reset_indentation_level($starting_level);
20789 }
20790
20791 # Find indentation level given a input line.  At the same time, try to
20792 # figure out the input tabbing scheme.
20793 #
20794 # There are two types of calls:
20795 #
20796 # Type 1: $structural_indentation_level < 0
20797 #  In this case we have to guess $input_tabstr to figure out the level.
20798 #
20799 # Type 2: $structural_indentation_level >= 0
20800 #  In this case the level of this line is known, and this routine can
20801 #  update the tabbing string, if still unknown, to make the level correct.
20802
20803 sub find_indentation_level {
20804     my ( $line, $structural_indentation_level ) = @_;
20805
20806     # USES GLOBAL VARIABLES: $tokenizer_self
20807     my $level = 0;
20808     my $msg   = "";
20809
20810     my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
20811     my $input_tabstr      = $tokenizer_self->{_input_tabstr};
20812
20813     # find leading whitespace
20814     my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
20815
20816     # make first guess at input tabbing scheme if necessary
20817     if ( $know_input_tabstr < 0 ) {
20818
20819         $know_input_tabstr = 0;
20820
20821         if ( $tokenizer_self->{_tabs} ) {
20822             $input_tabstr = "\t";
20823             if ( length($leading_whitespace) > 0 ) {
20824                 if ( $leading_whitespace !~ /\t/ ) {
20825
20826                     my $cols = $tokenizer_self->{_indent_columns};
20827
20828                     if ( length($leading_whitespace) < $cols ) {
20829                         $cols = length($leading_whitespace);
20830                     }
20831                     $input_tabstr = " " x $cols;
20832                 }
20833             }
20834         }
20835         else {
20836             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
20837
20838             if ( length($leading_whitespace) > 0 ) {
20839                 if ( $leading_whitespace =~ /^\t/ ) {
20840                     $input_tabstr = "\t";
20841                 }
20842             }
20843         }
20844         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
20845         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
20846     }
20847
20848     # determine the input tabbing scheme if possible
20849     if (   ( $know_input_tabstr == 0 )
20850         && ( length($leading_whitespace) > 0 )
20851         && ( $structural_indentation_level > 0 ) )
20852     {
20853         my $saved_input_tabstr = $input_tabstr;
20854
20855         # check for common case of one tab per indentation level
20856         if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
20857             if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
20858                 $input_tabstr = "\t";
20859                 $msg          = "Guessing old indentation was tab character\n";
20860             }
20861         }
20862
20863         else {
20864
20865             # detab any tabs based on 8 blanks per tab
20866             my $entabbed = "";
20867             if ( $leading_whitespace =~ s/^\t+/        /g ) {
20868                 $entabbed = "entabbed";
20869             }
20870
20871             # now compute tabbing from number of spaces
20872             my $columns =
20873               length($leading_whitespace) / $structural_indentation_level;
20874             if ( $columns == int $columns ) {
20875                 $msg =
20876                   "Guessing old indentation was $columns $entabbed spaces\n";
20877             }
20878             else {
20879                 $columns = int $columns;
20880                 $msg =
20881 "old indentation is unclear, using $columns $entabbed spaces\n";
20882             }
20883             $input_tabstr = " " x $columns;
20884         }
20885         $know_input_tabstr                    = 1;
20886         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
20887         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
20888
20889         # see if mistakes were made
20890         if ( ( $tokenizer_self->{_starting_level} > 0 )
20891             && !$tokenizer_self->{_know_starting_level} )
20892         {
20893
20894             if ( $input_tabstr ne $saved_input_tabstr ) {
20895                 complain(
20896 "I made a bad starting level guess; rerun with a value for -sil \n"
20897                 );
20898             }
20899         }
20900     }
20901
20902     # use current guess at input tabbing to get input indentation level
20903     #
20904     # Patch to handle a common case of entabbed leading whitespace
20905     # If the leading whitespace equals 4 spaces and we also have
20906     # tabs, detab the input whitespace assuming 8 spaces per tab.
20907     if ( length($input_tabstr) == 4 ) {
20908         $leading_whitespace =~ s/^\t+/        /g;
20909     }
20910
20911     if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
20912         my $pos = 0;
20913
20914         while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
20915         {
20916             $pos += $len_tab;
20917             $level++;
20918         }
20919     }
20920     return ( $level, $msg );
20921 }
20922
20923 # This is a currently unused debug routine
20924 sub dump_functions {
20925
20926     my $fh = *STDOUT;
20927     my ( $pkg, $sub );
20928     foreach $pkg ( keys %is_user_function ) {
20929         print $fh "\nnon-constant subs in package $pkg\n";
20930
20931         foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
20932             my $msg = "";
20933             if ( $is_block_list_function{$pkg}{$sub} ) {
20934                 $msg = 'block_list';
20935             }
20936
20937             if ( $is_block_function{$pkg}{$sub} ) {
20938                 $msg = 'block';
20939             }
20940             print $fh "$sub $msg\n";
20941         }
20942     }
20943
20944     foreach $pkg ( keys %is_constant ) {
20945         print $fh "\nconstants and constant subs in package $pkg\n";
20946
20947         foreach $sub ( keys %{ $is_constant{$pkg} } ) {
20948             print $fh "$sub\n";
20949         }
20950     }
20951 }
20952
20953 sub ones_count {
20954
20955     # count number of 1's in a string of 1's and 0's
20956     # example: ones_count("010101010101") gives 6
20957     return ( my $cis = $_[0] ) =~ tr/1/0/;
20958 }
20959
20960 sub prepare_for_a_new_file {
20961
20962     # previous tokens needed to determine what to expect next
20963     $last_nonblank_token      = ';';    # the only possible starting state which
20964     $last_nonblank_type       = ';';    # will make a leading brace a code block
20965     $last_nonblank_block_type = '';
20966
20967     # scalars for remembering statement types across multiple lines
20968     $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
20969     $in_attribute_list = 0;
20970
20971     # scalars for remembering where we are in the file
20972     $current_package = "main";
20973     $context         = UNKNOWN_CONTEXT;
20974
20975     # hashes used to remember function information
20976     %is_constant             = ();      # user-defined constants
20977     %is_user_function        = ();      # user-defined functions
20978     %user_function_prototype = ();      # their prototypes
20979     %is_block_function       = ();
20980     %is_block_list_function  = ();
20981     %saw_function_definition = ();
20982
20983     # variables used to track depths of various containers
20984     # and report nesting errors
20985     $paren_depth          = 0;
20986     $brace_depth          = 0;
20987     $square_bracket_depth = 0;
20988     @current_depth[ 0 .. $#closing_brace_names ] =
20989       (0) x scalar @closing_brace_names;
20990     $total_depth = 0;
20991     @total_depth = ();
20992     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
20993       ( 0 .. $#closing_brace_names );
20994     @current_sequence_number             = ();
20995     $paren_type[$paren_depth]            = '';
20996     $paren_semicolon_count[$paren_depth] = 0;
20997     $paren_structural_type[$brace_depth] = '';
20998     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
20999     $brace_structural_type[$brace_depth]                   = '';
21000     $brace_statement_type[$brace_depth]                    = "";
21001     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
21002     $brace_package[$paren_depth]                           = $current_package;
21003     $square_bracket_type[$square_bracket_depth]            = '';
21004     $square_bracket_structural_type[$square_bracket_depth] = '';
21005
21006     initialize_tokenizer_state();
21007 }
21008
21009 {                                       # begin tokenize_this_line
21010
21011     use constant BRACE          => 0;
21012     use constant SQUARE_BRACKET => 1;
21013     use constant PAREN          => 2;
21014     use constant QUESTION_COLON => 3;
21015
21016     # TV1: scalars for processing one LINE.
21017     # Re-initialized on each entry to sub tokenize_this_line.
21018     my (
21019         $block_type,        $container_type,    $expecting,
21020         $i,                 $i_tok,             $input_line,
21021         $input_line_number, $last_nonblank_i,   $max_token_index,
21022         $next_tok,          $next_type,         $peeked_ahead,
21023         $prototype,         $rhere_target_list, $rtoken_map,
21024         $rtoken_type,       $rtokens,           $tok,
21025         $type,              $type_sequence,     $indent_flag,
21026     );
21027
21028     # TV2: refs to ARRAYS for processing one LINE
21029     # Re-initialized on each call.
21030     my $routput_token_list     = [];    # stack of output token indexes
21031     my $routput_token_type     = [];    # token types
21032     my $routput_block_type     = [];    # types of code block
21033     my $routput_container_type = [];    # paren types, such as if, elsif, ..
21034     my $routput_type_sequence  = [];    # nesting sequential number
21035     my $routput_indent_flag    = [];    #
21036
21037     # TV3: SCALARS for quote variables.  These are initialized with a
21038     # subroutine call and continually updated as lines are processed.
21039     my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
21040         $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
21041
21042     # TV4: SCALARS for multi-line identifiers and
21043     # statements. These are initialized with a subroutine call
21044     # and continually updated as lines are processed.
21045     my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
21046
21047     # TV5: SCALARS for tracking indentation level.
21048     # Initialized once and continually updated as lines are
21049     # processed.
21050     my (
21051         $nesting_token_string,      $nesting_type_string,
21052         $nesting_block_string,      $nesting_block_flag,
21053         $nesting_list_string,       $nesting_list_flag,
21054         $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
21055         $in_statement_continuation, $level_in_tokenizer,
21056         $slevel_in_tokenizer,       $rslevel_stack,
21057     );
21058
21059     # TV6: SCALARS for remembering several previous
21060     # tokens. Initialized once and continually updated as
21061     # lines are processed.
21062     my (
21063         $last_nonblank_container_type,     $last_nonblank_type_sequence,
21064         $last_last_nonblank_token,         $last_last_nonblank_type,
21065         $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
21066         $last_last_nonblank_type_sequence, $last_nonblank_prototype,
21067     );
21068
21069     # ----------------------------------------------------------------
21070     # beginning of tokenizer variable access and manipulation routines
21071     # ----------------------------------------------------------------
21072
21073     sub initialize_tokenizer_state {
21074
21075         # TV1: initialized on each call
21076         # TV2: initialized on each call
21077         # TV3:
21078         $in_quote                = 0;
21079         $quote_type              = 'Q';
21080         $quote_character         = "";
21081         $quote_pos               = 0;
21082         $quote_depth             = 0;
21083         $quoted_string_1         = "";
21084         $quoted_string_2         = "";
21085         $allowed_quote_modifiers = "";
21086
21087         # TV4:
21088         $id_scan_state     = '';
21089         $identifier        = '';
21090         $want_paren        = "";
21091         $indented_if_level = 0;
21092
21093         # TV5:
21094         $nesting_token_string             = "";
21095         $nesting_type_string              = "";
21096         $nesting_block_string             = '1';    # initially in a block
21097         $nesting_block_flag               = 1;
21098         $nesting_list_string              = '0';    # initially not in a list
21099         $nesting_list_flag                = 0;      # initially not in a list
21100         $ci_string_in_tokenizer           = "";
21101         $continuation_string_in_tokenizer = "0";
21102         $in_statement_continuation        = 0;
21103         $level_in_tokenizer               = 0;
21104         $slevel_in_tokenizer              = 0;
21105         $rslevel_stack                    = [];
21106
21107         # TV6:
21108         $last_nonblank_container_type      = '';
21109         $last_nonblank_type_sequence       = '';
21110         $last_last_nonblank_token          = ';';
21111         $last_last_nonblank_type           = ';';
21112         $last_last_nonblank_block_type     = '';
21113         $last_last_nonblank_container_type = '';
21114         $last_last_nonblank_type_sequence  = '';
21115         $last_nonblank_prototype           = "";
21116     }
21117
21118     sub save_tokenizer_state {
21119
21120         my $rTV1 = [
21121             $block_type,        $container_type,    $expecting,
21122             $i,                 $i_tok,             $input_line,
21123             $input_line_number, $last_nonblank_i,   $max_token_index,
21124             $next_tok,          $next_type,         $peeked_ahead,
21125             $prototype,         $rhere_target_list, $rtoken_map,
21126             $rtoken_type,       $rtokens,           $tok,
21127             $type,              $type_sequence,     $indent_flag,
21128         ];
21129
21130         my $rTV2 = [
21131             $routput_token_list,    $routput_token_type,
21132             $routput_block_type,    $routput_container_type,
21133             $routput_type_sequence, $routput_indent_flag,
21134         ];
21135
21136         my $rTV3 = [
21137             $in_quote,        $quote_type,
21138             $quote_character, $quote_pos,
21139             $quote_depth,     $quoted_string_1,
21140             $quoted_string_2, $allowed_quote_modifiers,
21141         ];
21142
21143         my $rTV4 =
21144           [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
21145
21146         my $rTV5 = [
21147             $nesting_token_string,      $nesting_type_string,
21148             $nesting_block_string,      $nesting_block_flag,
21149             $nesting_list_string,       $nesting_list_flag,
21150             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
21151             $in_statement_continuation, $level_in_tokenizer,
21152             $slevel_in_tokenizer,       $rslevel_stack,
21153         ];
21154
21155         my $rTV6 = [
21156             $last_nonblank_container_type,
21157             $last_nonblank_type_sequence,
21158             $last_last_nonblank_token,
21159             $last_last_nonblank_type,
21160             $last_last_nonblank_block_type,
21161             $last_last_nonblank_container_type,
21162             $last_last_nonblank_type_sequence,
21163             $last_nonblank_prototype,
21164         ];
21165         return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
21166     }
21167
21168     sub restore_tokenizer_state {
21169         my ($rstate) = @_;
21170         my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
21171         (
21172             $block_type,        $container_type,    $expecting,
21173             $i,                 $i_tok,             $input_line,
21174             $input_line_number, $last_nonblank_i,   $max_token_index,
21175             $next_tok,          $next_type,         $peeked_ahead,
21176             $prototype,         $rhere_target_list, $rtoken_map,
21177             $rtoken_type,       $rtokens,           $tok,
21178             $type,              $type_sequence,     $indent_flag,
21179         ) = @{$rTV1};
21180
21181         (
21182             $routput_token_list,    $routput_token_type,
21183             $routput_block_type,    $routput_container_type,
21184             $routput_type_sequence, $routput_type_sequence,
21185         ) = @{$rTV2};
21186
21187         (
21188             $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
21189             $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
21190         ) = @{$rTV3};
21191
21192         ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
21193           @{$rTV4};
21194
21195         (
21196             $nesting_token_string,      $nesting_type_string,
21197             $nesting_block_string,      $nesting_block_flag,
21198             $nesting_list_string,       $nesting_list_flag,
21199             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
21200             $in_statement_continuation, $level_in_tokenizer,
21201             $slevel_in_tokenizer,       $rslevel_stack,
21202         ) = @{$rTV5};
21203
21204         (
21205             $last_nonblank_container_type,
21206             $last_nonblank_type_sequence,
21207             $last_last_nonblank_token,
21208             $last_last_nonblank_type,
21209             $last_last_nonblank_block_type,
21210             $last_last_nonblank_container_type,
21211             $last_last_nonblank_type_sequence,
21212             $last_nonblank_prototype,
21213         ) = @{$rTV6};
21214     }
21215
21216     sub get_indentation_level {
21217
21218         # patch to avoid reporting error if indented if is not terminated
21219         if ($indented_if_level) { return $level_in_tokenizer - 1 }
21220         return $level_in_tokenizer;
21221     }
21222
21223     sub reset_indentation_level {
21224         $level_in_tokenizer  = $_[0];
21225         $slevel_in_tokenizer = $_[0];
21226         push @{$rslevel_stack}, $slevel_in_tokenizer;
21227     }
21228
21229     sub peeked_ahead {
21230         $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
21231     }
21232
21233     # ------------------------------------------------------------
21234     # end of tokenizer variable access and manipulation routines
21235     # ------------------------------------------------------------
21236
21237     # ------------------------------------------------------------
21238     # beginning of various scanner interface routines
21239     # ------------------------------------------------------------
21240     sub scan_replacement_text {
21241
21242         # check for here-docs in replacement text invoked by
21243         # a substitution operator with executable modifier 'e'.
21244         #
21245         # given:
21246         #  $replacement_text
21247         # return:
21248         #  $rht = reference to any here-doc targets
21249         my ($replacement_text) = @_;
21250
21251         # quick check
21252         return undef unless ( $replacement_text =~ /<</ );
21253
21254         write_logfile_entry("scanning replacement text for here-doc targets\n");
21255
21256         # save the logger object for error messages
21257         my $logger_object = $tokenizer_self->{_logger_object};
21258
21259         # localize all package variables
21260         local (
21261             $tokenizer_self,          $last_nonblank_token,
21262             $last_nonblank_type,      $last_nonblank_block_type,
21263             $statement_type,          $in_attribute_list,
21264             $current_package,         $context,
21265             %is_constant,             %is_user_function,
21266             %user_function_prototype, %is_block_function,
21267             %is_block_list_function,  %saw_function_definition,
21268             $brace_depth,             $paren_depth,
21269             $square_bracket_depth,    @current_depth,
21270             @total_depth,             $total_depth,
21271             @nesting_sequence_number, @current_sequence_number,
21272             @paren_type,              @paren_semicolon_count,
21273             @paren_structural_type,   @brace_type,
21274             @brace_structural_type,   @brace_statement_type,
21275             @brace_context,           @brace_package,
21276             @square_bracket_type,     @square_bracket_structural_type,
21277             @depth_array,             @starting_line_of_current_depth,
21278             @nested_ternary_flag,
21279         );
21280
21281         # save all lexical variables
21282         my $rstate = save_tokenizer_state();
21283         _decrement_count();    # avoid error check for multiple tokenizers
21284
21285         # make a new tokenizer
21286         my $rOpts = {};
21287         my $rpending_logfile_message;
21288         my $source_object =
21289           Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
21290             $rpending_logfile_message );
21291         my $tokenizer = Perl::Tidy::Tokenizer->new(
21292             source_object        => $source_object,
21293             logger_object        => $logger_object,
21294             starting_line_number => $input_line_number,
21295         );
21296
21297         # scan the replacement text
21298         1 while ( $tokenizer->get_line() );
21299
21300         # remove any here doc targets
21301         my $rht = undef;
21302         if ( $tokenizer_self->{_in_here_doc} ) {
21303             $rht = [];
21304             push @{$rht},
21305               [
21306                 $tokenizer_self->{_here_doc_target},
21307                 $tokenizer_self->{_here_quote_character}
21308               ];
21309             if ( $tokenizer_self->{_rhere_target_list} ) {
21310                 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
21311                 $tokenizer_self->{_rhere_target_list} = undef;
21312             }
21313             $tokenizer_self->{_in_here_doc} = undef;
21314         }
21315
21316         # now its safe to report errors
21317         $tokenizer->report_tokenization_errors();
21318
21319         # restore all tokenizer lexical variables
21320         restore_tokenizer_state($rstate);
21321
21322         # return the here doc targets
21323         return $rht;
21324     }
21325
21326     sub scan_bare_identifier {
21327         ( $i, $tok, $type, $prototype ) =
21328           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
21329             $rtoken_map, $max_token_index );
21330     }
21331
21332     sub scan_identifier {
21333         ( $i, $tok, $type, $id_scan_state, $identifier ) =
21334           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
21335             $max_token_index );
21336     }
21337
21338     sub scan_id {
21339         ( $i, $tok, $type, $id_scan_state ) =
21340           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
21341             $id_scan_state, $max_token_index );
21342     }
21343
21344     sub scan_number {
21345         my $number;
21346         ( $i, $type, $number ) =
21347           scan_number_do( $input_line, $i, $rtoken_map, $type,
21348             $max_token_index );
21349         return $number;
21350     }
21351
21352     # a sub to warn if token found where term expected
21353     sub error_if_expecting_TERM {
21354         if ( $expecting == TERM ) {
21355             if ( $really_want_term{$last_nonblank_type} ) {
21356                 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
21357                     $rtoken_type, $input_line );
21358                 1;
21359             }
21360         }
21361     }
21362
21363     # a sub to warn if token found where operator expected
21364     sub error_if_expecting_OPERATOR {
21365         if ( $expecting == OPERATOR ) {
21366             my $thing = defined $_[0] ? $_[0] : $tok;
21367             unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
21368                 $rtoken_map, $rtoken_type, $input_line );
21369             if ( $i_tok == 0 ) {
21370                 interrupt_logfile();
21371                 warning("Missing ';' above?\n");
21372                 resume_logfile();
21373             }
21374             1;
21375         }
21376     }
21377
21378     # ------------------------------------------------------------
21379     # end scanner interfaces
21380     # ------------------------------------------------------------
21381
21382     my %is_for_foreach;
21383     @_ = qw(for foreach);
21384     @is_for_foreach{@_} = (1) x scalar(@_);
21385
21386     my %is_my_our;
21387     @_ = qw(my our);
21388     @is_my_our{@_} = (1) x scalar(@_);
21389
21390     # These keywords may introduce blocks after parenthesized expressions,
21391     # in the form:
21392     # keyword ( .... ) { BLOCK }
21393     # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
21394     my %is_blocktype_with_paren;
21395     @_ = qw(if elsif unless while until for foreach switch case given when);
21396     @is_blocktype_with_paren{@_} = (1) x scalar(@_);
21397
21398     # ------------------------------------------------------------
21399     # begin hash of code for handling most token types
21400     # ------------------------------------------------------------
21401     my $tokenization_code = {
21402
21403         # no special code for these types yet, but syntax checks
21404         # could be added
21405
21406 ##      '!'   => undef,
21407 ##      '!='  => undef,
21408 ##      '!~'  => undef,
21409 ##      '%='  => undef,
21410 ##      '&&=' => undef,
21411 ##      '&='  => undef,
21412 ##      '+='  => undef,
21413 ##      '-='  => undef,
21414 ##      '..'  => undef,
21415 ##      '..'  => undef,
21416 ##      '...' => undef,
21417 ##      '.='  => undef,
21418 ##      '<<=' => undef,
21419 ##      '<='  => undef,
21420 ##      '<=>' => undef,
21421 ##      '<>'  => undef,
21422 ##      '='   => undef,
21423 ##      '=='  => undef,
21424 ##      '=~'  => undef,
21425 ##      '>='  => undef,
21426 ##      '>>'  => undef,
21427 ##      '>>=' => undef,
21428 ##      '\\'  => undef,
21429 ##      '^='  => undef,
21430 ##      '|='  => undef,
21431 ##      '||=' => undef,
21432 ##      '//=' => undef,
21433 ##      '~'   => undef,
21434 ##      '~~'  => undef,
21435 ##      '!~~'  => undef,
21436
21437         '>' => sub {
21438             error_if_expecting_TERM()
21439               if ( $expecting == TERM );
21440         },
21441         '|' => sub {
21442             error_if_expecting_TERM()
21443               if ( $expecting == TERM );
21444         },
21445         '$' => sub {
21446
21447             # start looking for a scalar
21448             error_if_expecting_OPERATOR("Scalar")
21449               if ( $expecting == OPERATOR );
21450             scan_identifier();
21451
21452             if ( $identifier eq '$^W' ) {
21453                 $tokenizer_self->{_saw_perl_dash_w} = 1;
21454             }
21455
21456             # Check for indentifier in indirect object slot
21457             # (vorboard.pl, sort.t).  Something like:
21458             #   /^(print|printf|sort|exec|system)$/
21459             if (
21460                 $is_indirect_object_taker{$last_nonblank_token}
21461
21462                 || ( ( $last_nonblank_token eq '(' )
21463                     && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
21464                 || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
21465               )
21466             {
21467                 $type = 'Z';
21468             }
21469         },
21470         '(' => sub {
21471
21472             ++$paren_depth;
21473             $paren_semicolon_count[$paren_depth] = 0;
21474             if ($want_paren) {
21475                 $container_type = $want_paren;
21476                 $want_paren     = "";
21477             }
21478             else {
21479                 $container_type = $last_nonblank_token;
21480
21481                 # We can check for a syntax error here of unexpected '(',
21482                 # but this is going to get messy...
21483                 if (
21484                     $expecting == OPERATOR
21485
21486                     # be sure this is not a method call of the form
21487                     # &method(...), $method->(..), &{method}(...),
21488                     # $ref[2](list) is ok & short for $ref[2]->(list)
21489                     # NOTE: at present, braces in something like &{ xxx }
21490                     # are not marked as a block, we might have a method call
21491                     && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
21492
21493                   )
21494                 {
21495
21496                     # ref: camel 3 p 703.
21497                     if ( $last_last_nonblank_token eq 'do' ) {
21498                         complain(
21499 "do SUBROUTINE is deprecated; consider & or -> notation\n"
21500                         );
21501                     }
21502                     else {
21503
21504                         # if this is an empty list, (), then it is not an
21505                         # error; for example, we might have a constant pi and
21506                         # invoke it with pi() or just pi;
21507                         my ( $next_nonblank_token, $i_next ) =
21508                           find_next_nonblank_token( $i, $rtokens,
21509                             $max_token_index );
21510                         if ( $next_nonblank_token ne ')' ) {
21511                             my $hint;
21512                             error_if_expecting_OPERATOR('(');
21513
21514                             if ( $last_nonblank_type eq 'C' ) {
21515                                 $hint =
21516                                   "$last_nonblank_token has a void prototype\n";
21517                             }
21518                             elsif ( $last_nonblank_type eq 'i' ) {
21519                                 if (   $i_tok > 0
21520                                     && $last_nonblank_token =~ /^\$/ )
21521                                 {
21522                                     $hint =
21523 "Do you mean '$last_nonblank_token->(' ?\n";
21524                                 }
21525                             }
21526                             if ($hint) {
21527                                 interrupt_logfile();
21528                                 warning($hint);
21529                                 resume_logfile();
21530                             }
21531                         } ## end if ( $next_nonblank_token...
21532                     } ## end else [ if ( $last_last_nonblank_token...
21533                 } ## end if ( $expecting == OPERATOR...
21534             }
21535             $paren_type[$paren_depth] = $container_type;
21536             ( $type_sequence, $indent_flag ) =
21537               increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
21538
21539             # propagate types down through nested parens
21540             # for example: the second paren in 'if ((' would be structural
21541             # since the first is.
21542
21543             if ( $last_nonblank_token eq '(' ) {
21544                 $type = $last_nonblank_type;
21545             }
21546
21547             #     We exclude parens as structural after a ',' because it
21548             #     causes subtle problems with continuation indentation for
21549             #     something like this, where the first 'or' will not get
21550             #     indented.
21551             #
21552             #         assert(
21553             #             __LINE__,
21554             #             ( not defined $check )
21555             #               or ref $check
21556             #               or $check eq "new"
21557             #               or $check eq "old",
21558             #         );
21559             #
21560             #     Likewise, we exclude parens where a statement can start
21561             #     because of problems with continuation indentation, like
21562             #     these:
21563             #
21564             #         ($firstline =~ /^#\!.*perl/)
21565             #         and (print $File::Find::name, "\n")
21566             #           and (return 1);
21567             #
21568             #         (ref($usage_fref) =~ /CODE/)
21569             #         ? &$usage_fref
21570             #           : (&blast_usage, &blast_params, &blast_general_params);
21571
21572             else {
21573                 $type = '{';
21574             }
21575
21576             if ( $last_nonblank_type eq ')' ) {
21577                 warning(
21578                     "Syntax error? found token '$last_nonblank_type' then '('\n"
21579                 );
21580             }
21581             $paren_structural_type[$paren_depth] = $type;
21582
21583         },
21584         ')' => sub {
21585             ( $type_sequence, $indent_flag ) =
21586               decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
21587
21588             if ( $paren_structural_type[$paren_depth] eq '{' ) {
21589                 $type = '}';
21590             }
21591
21592             $container_type = $paren_type[$paren_depth];
21593
21594             #    /^(for|foreach)$/
21595             if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
21596                 my $num_sc = $paren_semicolon_count[$paren_depth];
21597                 if ( $num_sc > 0 && $num_sc != 2 ) {
21598                     warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
21599                 }
21600             }
21601
21602             if ( $paren_depth > 0 ) { $paren_depth-- }
21603         },
21604         ',' => sub {
21605             if ( $last_nonblank_type eq ',' ) {
21606                 complain("Repeated ','s \n");
21607             }
21608
21609             # patch for operator_expected: note if we are in the list (use.t)
21610             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
21611 ##                FIXME: need to move this elsewhere, perhaps check after a '('
21612 ##                elsif ($last_nonblank_token eq '(') {
21613 ##                    warning("Leading ','s illegal in some versions of perl\n");
21614 ##                }
21615         },
21616         ';' => sub {
21617             $context        = UNKNOWN_CONTEXT;
21618             $statement_type = '';
21619
21620             #    /^(for|foreach)$/
21621             if ( $is_for_foreach{ $paren_type[$paren_depth] } )
21622             {    # mark ; in for loop
21623
21624                 # Be careful: we do not want a semicolon such as the
21625                 # following to be included:
21626                 #
21627                 #    for (sort {strcoll($a,$b);} keys %investments) {
21628
21629                 if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
21630                     && $square_bracket_depth ==
21631                     $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
21632                 {
21633
21634                     $type = 'f';
21635                     $paren_semicolon_count[$paren_depth]++;
21636                 }
21637             }
21638
21639         },
21640         '"' => sub {
21641             error_if_expecting_OPERATOR("String")
21642               if ( $expecting == OPERATOR );
21643             $in_quote                = 1;
21644             $type                    = 'Q';
21645             $allowed_quote_modifiers = "";
21646         },
21647         "'" => sub {
21648             error_if_expecting_OPERATOR("String")
21649               if ( $expecting == OPERATOR );
21650             $in_quote                = 1;
21651             $type                    = 'Q';
21652             $allowed_quote_modifiers = "";
21653         },
21654         '`' => sub {
21655             error_if_expecting_OPERATOR("String")
21656               if ( $expecting == OPERATOR );
21657             $in_quote                = 1;
21658             $type                    = 'Q';
21659             $allowed_quote_modifiers = "";
21660         },
21661         '/' => sub {
21662             my $is_pattern;
21663
21664             if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
21665                 my $msg;
21666                 ( $is_pattern, $msg ) =
21667                   guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
21668                     $max_token_index );
21669
21670                 if ($msg) {
21671                     write_diagnostics("DIVIDE:$msg\n");
21672                     write_logfile_entry($msg);
21673                 }
21674             }
21675             else { $is_pattern = ( $expecting == TERM ) }
21676
21677             if ($is_pattern) {
21678                 $in_quote                = 1;
21679                 $type                    = 'Q';
21680                 $allowed_quote_modifiers = '[cgimosx]';
21681             }
21682             else {    # not a pattern; check for a /= token
21683
21684                 if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
21685                     $i++;
21686                     $tok  = '/=';
21687                     $type = $tok;
21688                 }
21689
21690               #DEBUG - collecting info on what tokens follow a divide
21691               # for development of guessing algorithm
21692               #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
21693               #    #write_diagnostics( "DIVIDE? $input_line\n" );
21694               #}
21695             }
21696         },
21697         '{' => sub {
21698
21699             # if we just saw a ')', we will label this block with
21700             # its type.  We need to do this to allow sub
21701             # code_block_type to determine if this brace starts a
21702             # code block or anonymous hash.  (The type of a paren
21703             # pair is the preceding token, such as 'if', 'else',
21704             # etc).
21705             $container_type = "";
21706
21707             # ATTRS: for a '{' following an attribute list, reset
21708             # things to look like we just saw the sub name
21709             if ( $statement_type =~ /^sub/ ) {
21710                 $last_nonblank_token = $statement_type;
21711                 $last_nonblank_type  = 'i';
21712                 $statement_type      = "";
21713             }
21714
21715             # patch for SWITCH/CASE: hide these keywords from an immediately
21716             # following opening brace
21717             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
21718                 && $statement_type eq $last_nonblank_token )
21719             {
21720                 $last_nonblank_token = ";";
21721             }
21722
21723             elsif ( $last_nonblank_token eq ')' ) {
21724                 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
21725
21726                 # defensive move in case of a nesting error (pbug.t)
21727                 # in which this ')' had no previous '('
21728                 # this nesting error will have been caught
21729                 if ( !defined($last_nonblank_token) ) {
21730                     $last_nonblank_token = 'if';
21731                 }
21732
21733                 # check for syntax error here;
21734                 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
21735                     my $list = join( ' ', sort keys %is_blocktype_with_paren );
21736                     warning(
21737                         "syntax error at ') {', didn't see one of: $list\n");
21738                 }
21739             }
21740
21741             # patch for paren-less for/foreach glitch, part 2.
21742             # see note below under 'qw'
21743             elsif ($last_nonblank_token eq 'qw'
21744                 && $is_for_foreach{$want_paren} )
21745             {
21746                 $last_nonblank_token = $want_paren;
21747                 if ( $last_last_nonblank_token eq $want_paren ) {
21748                     warning(
21749 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
21750                     );
21751
21752                 }
21753                 $want_paren = "";
21754             }
21755
21756             # now identify which of the three possible types of
21757             # curly braces we have: hash index container, anonymous
21758             # hash reference, or code block.
21759
21760             # non-structural (hash index) curly brace pair
21761             # get marked 'L' and 'R'
21762             if ( is_non_structural_brace() ) {
21763                 $type = 'L';
21764
21765                 # patch for SWITCH/CASE:
21766                 # allow paren-less identifier after 'when'
21767                 # if the brace is preceded by a space
21768                 if (   $statement_type eq 'when'
21769                     && $last_nonblank_type      eq 'i'
21770                     && $last_last_nonblank_type eq 'k'
21771                     && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
21772                 {
21773                     $type       = '{';
21774                     $block_type = $statement_type;
21775                 }
21776             }
21777
21778             # code and anonymous hash have the same type, '{', but are
21779             # distinguished by 'block_type',
21780             # which will be blank for an anonymous hash
21781             else {
21782
21783                 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
21784                     $max_token_index );
21785
21786                 # patch to promote bareword type to function taking block
21787                 if (   $block_type
21788                     && $last_nonblank_type eq 'w'
21789                     && $last_nonblank_i >= 0 )
21790                 {
21791                     if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
21792                         $routput_token_type->[$last_nonblank_i] = 'G';
21793                     }
21794                 }
21795
21796                 # patch for SWITCH/CASE: if we find a stray opening block brace
21797                 # where we might accept a 'case' or 'when' block, then take it
21798                 if (   $statement_type eq 'case'
21799                     || $statement_type eq 'when' )
21800                 {
21801                     if ( !$block_type || $block_type eq '}' ) {
21802                         $block_type = $statement_type;
21803                     }
21804                 }
21805             }
21806             $brace_type[ ++$brace_depth ] = $block_type;
21807             $brace_package[$brace_depth] = $current_package;
21808             ( $type_sequence, $indent_flag ) =
21809               increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
21810             $brace_structural_type[$brace_depth] = $type;
21811             $brace_context[$brace_depth]         = $context;
21812             $brace_statement_type[$brace_depth]  = $statement_type;
21813         },
21814         '}' => sub {
21815             $block_type = $brace_type[$brace_depth];
21816             if ($block_type) { $statement_type = '' }
21817             if ( defined( $brace_package[$brace_depth] ) ) {
21818                 $current_package = $brace_package[$brace_depth];
21819             }
21820
21821             # can happen on brace error (caught elsewhere)
21822             else {
21823             }
21824             ( $type_sequence, $indent_flag ) =
21825               decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
21826
21827             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
21828                 $type = 'R';
21829             }
21830
21831             # propagate type information for 'do' and 'eval' blocks.
21832             # This is necessary to enable us to know if an operator
21833             # or term is expected next
21834             if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
21835                 $tok = $brace_type[$brace_depth];
21836             }
21837
21838             $context        = $brace_context[$brace_depth];
21839             $statement_type = $brace_statement_type[$brace_depth];
21840             if ( $brace_depth > 0 ) { $brace_depth--; }
21841         },
21842         '&' => sub {    # maybe sub call? start looking
21843
21844             # We have to check for sub call unless we are sure we
21845             # are expecting an operator.  This example from s2p
21846             # got mistaken as a q operator in an early version:
21847             #   print BODY &q(<<'EOT');
21848             if ( $expecting != OPERATOR ) {
21849                 scan_identifier();
21850             }
21851             else {
21852             }
21853         },
21854         '<' => sub {    # angle operator or less than?
21855
21856             if ( $expecting != OPERATOR ) {
21857                 ( $i, $type ) =
21858                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
21859                     $expecting, $max_token_index );
21860
21861             }
21862             else {
21863             }
21864         },
21865         '?' => sub {    # ?: conditional or starting pattern?
21866
21867             my $is_pattern;
21868
21869             if ( $expecting == UNKNOWN ) {
21870
21871                 my $msg;
21872                 ( $is_pattern, $msg ) =
21873                   guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
21874                     $max_token_index );
21875
21876                 if ($msg) { write_logfile_entry($msg) }
21877             }
21878             else { $is_pattern = ( $expecting == TERM ) }
21879
21880             if ($is_pattern) {
21881                 $in_quote                = 1;
21882                 $type                    = 'Q';
21883                 $allowed_quote_modifiers = '[cgimosx]';
21884             }
21885             else {
21886                 ( $type_sequence, $indent_flag ) =
21887                   increase_nesting_depth( QUESTION_COLON,
21888                     $$rtoken_map[$i_tok] );
21889             }
21890         },
21891         '*' => sub {    # typeglob, or multiply?
21892
21893             if ( $expecting == TERM ) {
21894                 scan_identifier();
21895             }
21896             else {
21897
21898                 if ( $$rtokens[ $i + 1 ] eq '=' ) {
21899                     $tok  = '*=';
21900                     $type = $tok;
21901                     $i++;
21902                 }
21903                 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
21904                     $tok  = '**';
21905                     $type = $tok;
21906                     $i++;
21907                     if ( $$rtokens[ $i + 1 ] eq '=' ) {
21908                         $tok  = '**=';
21909                         $type = $tok;
21910                         $i++;
21911                     }
21912                 }
21913             }
21914         },
21915         '.' => sub {    # what kind of . ?
21916
21917             if ( $expecting != OPERATOR ) {
21918                 scan_number();
21919                 if ( $type eq '.' ) {
21920                     error_if_expecting_TERM()
21921                       if ( $expecting == TERM );
21922                 }
21923             }
21924             else {
21925             }
21926         },
21927         ':' => sub {
21928
21929             # if this is the first nonblank character, call it a label
21930             # since perl seems to just swallow it
21931             if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
21932                 $type = 'J';
21933             }
21934
21935             # ATTRS: check for a ':' which introduces an attribute list
21936             # (this might eventually get its own token type)
21937             elsif ( $statement_type =~ /^sub/ ) {
21938                 $type              = 'A';
21939                 $in_attribute_list = 1;
21940             }
21941
21942             # check for scalar attribute, such as
21943             # my $foo : shared = 1;
21944             elsif ($is_my_our{$statement_type}
21945                 && $current_depth[QUESTION_COLON] == 0 )
21946             {
21947                 $type              = 'A';
21948                 $in_attribute_list = 1;
21949             }
21950
21951             # otherwise, it should be part of a ?/: operator
21952             else {
21953                 ( $type_sequence, $indent_flag ) =
21954                   decrease_nesting_depth( QUESTION_COLON,
21955                     $$rtoken_map[$i_tok] );
21956                 if ( $last_nonblank_token eq '?' ) {
21957                     warning("Syntax error near ? :\n");
21958                 }
21959             }
21960         },
21961         '+' => sub {    # what kind of plus?
21962
21963             if ( $expecting == TERM ) {
21964                 my $number = scan_number();
21965
21966                 # unary plus is safest assumption if not a number
21967                 if ( !defined($number) ) { $type = 'p'; }
21968             }
21969             elsif ( $expecting == OPERATOR ) {
21970             }
21971             else {
21972                 if ( $next_type eq 'w' ) { $type = 'p' }
21973             }
21974         },
21975         '@' => sub {
21976
21977             error_if_expecting_OPERATOR("Array")
21978               if ( $expecting == OPERATOR );
21979             scan_identifier();
21980         },
21981         '%' => sub {    # hash or modulo?
21982
21983             # first guess is hash if no following blank
21984             if ( $expecting == UNKNOWN ) {
21985                 if ( $next_type ne 'b' ) { $expecting = TERM }
21986             }
21987             if ( $expecting == TERM ) {
21988                 scan_identifier();
21989             }
21990         },
21991         '[' => sub {
21992             $square_bracket_type[ ++$square_bracket_depth ] =
21993               $last_nonblank_token;
21994             ( $type_sequence, $indent_flag ) =
21995               increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
21996
21997             # It may seem odd, but structural square brackets have
21998             # type '{' and '}'.  This simplifies the indentation logic.
21999             if ( !is_non_structural_brace() ) {
22000                 $type = '{';
22001             }
22002             $square_bracket_structural_type[$square_bracket_depth] = $type;
22003         },
22004         ']' => sub {
22005             ( $type_sequence, $indent_flag ) =
22006               decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
22007
22008             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
22009             {
22010                 $type = '}';
22011             }
22012             if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
22013         },
22014         '-' => sub {    # what kind of minus?
22015
22016             if ( ( $expecting != OPERATOR )
22017                 && $is_file_test_operator{$next_tok} )
22018             {
22019                 $i++;
22020                 $tok .= $next_tok;
22021                 $type = 'F';
22022             }
22023             elsif ( $expecting == TERM ) {
22024                 my $number = scan_number();
22025
22026                 # maybe part of bareword token? unary is safest
22027                 if ( !defined($number) ) { $type = 'm'; }
22028
22029             }
22030             elsif ( $expecting == OPERATOR ) {
22031             }
22032             else {
22033
22034                 if ( $next_type eq 'w' ) {
22035                     $type = 'm';
22036                 }
22037             }
22038         },
22039
22040         '^' => sub {
22041
22042             # check for special variables like ${^WARNING_BITS}
22043             if ( $expecting == TERM ) {
22044
22045                 # FIXME: this should work but will not catch errors
22046                 # because we also have to be sure that previous token is
22047                 # a type character ($,@,%).
22048                 if ( $last_nonblank_token eq '{'
22049                     && ( $next_tok =~ /^[A-Za-z_]/ ) )
22050                 {
22051
22052                     if ( $next_tok eq 'W' ) {
22053                         $tokenizer_self->{_saw_perl_dash_w} = 1;
22054                     }
22055                     $tok  = $tok . $next_tok;
22056                     $i    = $i + 1;
22057                     $type = 'w';
22058                 }
22059
22060                 else {
22061                     unless ( error_if_expecting_TERM() ) {
22062
22063                         # Something like this is valid but strange:
22064                         # undef ^I;
22065                         complain("The '^' seems unusual here\n");
22066                     }
22067                 }
22068             }
22069         },
22070
22071         '::' => sub {    # probably a sub call
22072             scan_bare_identifier();
22073         },
22074         '<<' => sub {    # maybe a here-doc?
22075             return
22076               unless ( $i < $max_token_index )
22077               ;          # here-doc not possible if end of line
22078
22079             if ( $expecting != OPERATOR ) {
22080                 my ( $found_target, $here_doc_target, $here_quote_character,
22081                     $saw_error );
22082                 (
22083                     $found_target, $here_doc_target, $here_quote_character, $i,
22084                     $saw_error
22085                   )
22086                   = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
22087                     $max_token_index );
22088
22089                 if ($found_target) {
22090                     push @{$rhere_target_list},
22091                       [ $here_doc_target, $here_quote_character ];
22092                     $type = 'h';
22093                     if ( length($here_doc_target) > 80 ) {
22094                         my $truncated = substr( $here_doc_target, 0, 80 );
22095                         complain("Long here-target: '$truncated' ...\n");
22096                     }
22097                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
22098                         complain(
22099                             "Unconventional here-target: '$here_doc_target'\n"
22100                         );
22101                     }
22102                 }
22103                 elsif ( $expecting == TERM ) {
22104                     unless ($saw_error) {
22105
22106                         # shouldn't happen..
22107                         warning("Program bug; didn't find here doc target\n");
22108                         report_definite_bug();
22109                     }
22110                 }
22111             }
22112             else {
22113             }
22114         },
22115         '->' => sub {
22116
22117             # if -> points to a bare word, we must scan for an identifier,
22118             # otherwise something like ->y would look like the y operator
22119             scan_identifier();
22120         },
22121
22122         # type = 'pp' for pre-increment, '++' for post-increment
22123         '++' => sub {
22124             if ( $expecting == TERM ) { $type = 'pp' }
22125             elsif ( $expecting == UNKNOWN ) {
22126                 my ( $next_nonblank_token, $i_next ) =
22127                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
22128                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
22129             }
22130         },
22131
22132         '=>' => sub {
22133             if ( $last_nonblank_type eq $tok ) {
22134                 complain("Repeated '=>'s \n");
22135             }
22136
22137             # patch for operator_expected: note if we are in the list (use.t)
22138             # TODO: make version numbers a new token type
22139             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
22140         },
22141
22142         # type = 'mm' for pre-decrement, '--' for post-decrement
22143         '--' => sub {
22144
22145             if ( $expecting == TERM ) { $type = 'mm' }
22146             elsif ( $expecting == UNKNOWN ) {
22147                 my ( $next_nonblank_token, $i_next ) =
22148                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
22149                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
22150             }
22151         },
22152
22153         '&&' => sub {
22154             error_if_expecting_TERM()
22155               if ( $expecting == TERM );
22156         },
22157
22158         '||' => sub {
22159             error_if_expecting_TERM()
22160               if ( $expecting == TERM );
22161         },
22162
22163         '//' => sub {
22164             error_if_expecting_TERM()
22165               if ( $expecting == TERM );
22166         },
22167     };
22168
22169     # ------------------------------------------------------------
22170     # end hash of code for handling individual token types
22171     # ------------------------------------------------------------
22172
22173     my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
22174
22175     # These block types terminate statements and do not need a trailing
22176     # semicolon
22177     # patched for SWITCH/CASE:
22178     my %is_zero_continuation_block_type;
22179     @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
22180       if elsif else unless while until for foreach switch case given when);
22181     @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
22182
22183     my %is_not_zero_continuation_block_type;
22184     @_ = qw(sort grep map do eval);
22185     @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
22186
22187     my %is_logical_container;
22188     @_ = qw(if elsif unless while and or err not && !  || for foreach);
22189     @is_logical_container{@_} = (1) x scalar(@_);
22190
22191     my %is_binary_type;
22192     @_ = qw(|| &&);
22193     @is_binary_type{@_} = (1) x scalar(@_);
22194
22195     my %is_binary_keyword;
22196     @_ = qw(and or err eq ne cmp);
22197     @is_binary_keyword{@_} = (1) x scalar(@_);
22198
22199     # 'L' is token for opening { at hash key
22200     my %is_opening_type;
22201     @_ = qw" L { ( [ ";
22202     @is_opening_type{@_} = (1) x scalar(@_);
22203
22204     # 'R' is token for closing } at hash key
22205     my %is_closing_type;
22206     @_ = qw" R } ) ] ";
22207     @is_closing_type{@_} = (1) x scalar(@_);
22208
22209     my %is_redo_last_next_goto;
22210     @_ = qw(redo last next goto);
22211     @is_redo_last_next_goto{@_} = (1) x scalar(@_);
22212
22213     my %is_use_require;
22214     @_ = qw(use require);
22215     @is_use_require{@_} = (1) x scalar(@_);
22216
22217     my %is_sub_package;
22218     @_ = qw(sub package);
22219     @is_sub_package{@_} = (1) x scalar(@_);
22220
22221     # This hash holds the hash key in $tokenizer_self for these keywords:
22222     my %is_format_END_DATA = (
22223         'format'   => '_in_format',
22224         '__END__'  => '_in_end',
22225         '__DATA__' => '_in_data',
22226     );
22227
22228     # ref: camel 3 p 147,
22229     # but perl may accept undocumented flags
22230     my %quote_modifiers = (
22231         's'  => '[cegimosx]',
22232         'y'  => '[cds]',
22233         'tr' => '[cds]',
22234         'm'  => '[cgimosx]',
22235         'qr' => '[imosx]',
22236         'q'  => "",
22237         'qq' => "",
22238         'qw' => "",
22239         'qx' => "",
22240     );
22241
22242     # table showing how many quoted things to look for after quote operator..
22243     # s, y, tr have 2 (pattern and replacement)
22244     # others have 1 (pattern only)
22245     my %quote_items = (
22246         's'  => 2,
22247         'y'  => 2,
22248         'tr' => 2,
22249         'm'  => 1,
22250         'qr' => 1,
22251         'q'  => 1,
22252         'qq' => 1,
22253         'qw' => 1,
22254         'qx' => 1,
22255     );
22256
22257     sub tokenize_this_line {
22258
22259   # This routine breaks a line of perl code into tokens which are of use in
22260   # indentation and reformatting.  One of my goals has been to define tokens
22261   # such that a newline may be inserted between any pair of tokens without
22262   # changing or invalidating the program. This version comes close to this,
22263   # although there are necessarily a few exceptions which must be caught by
22264   # the formatter.  Many of these involve the treatment of bare words.
22265   #
22266   # The tokens and their types are returned in arrays.  See previous
22267   # routine for their names.
22268   #
22269   # See also the array "valid_token_types" in the BEGIN section for an
22270   # up-to-date list.
22271   #
22272   # To simplify things, token types are either a single character, or they
22273   # are identical to the tokens themselves.
22274   #
22275   # As a debugging aid, the -D flag creates a file containing a side-by-side
22276   # comparison of the input string and its tokenization for each line of a file.
22277   # This is an invaluable debugging aid.
22278   #
22279   # In addition to tokens, and some associated quantities, the tokenizer
22280   # also returns flags indication any special line types.  These include
22281   # quotes, here_docs, formats.
22282   #
22283   # -----------------------------------------------------------------------
22284   #
22285   # How to add NEW_TOKENS:
22286   #
22287   # New token types will undoubtedly be needed in the future both to keep up
22288   # with changes in perl and to help adapt the tokenizer to other applications.
22289   #
22290   # Here are some notes on the minimal steps.  I wrote these notes while
22291   # adding the 'v' token type for v-strings, which are things like version
22292   # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
22293   # can use your editor to search for the string "NEW_TOKENS" to find the
22294   # appropriate sections to change):
22295   #
22296   # *. Try to talk somebody else into doing it!  If not, ..
22297   #
22298   # *. Make a backup of your current version in case things don't work out!
22299   #
22300   # *. Think of a new, unused character for the token type, and add to
22301   # the array @valid_token_types in the BEGIN section of this package.
22302   # For example, I used 'v' for v-strings.
22303   #
22304   # *. Implement coding to recognize the $type of the token in this routine.
22305   # This is the hardest part, and is best done by immitating or modifying
22306   # some of the existing coding.  For example, to recognize v-strings, I
22307   # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
22308   # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
22309   #
22310   # *. Update sub operator_expected.  This update is critically important but
22311   # the coding is trivial.  Look at the comments in that routine for help.
22312   # For v-strings, which should behave like numbers, I just added 'v' to the
22313   # regex used to handle numbers and strings (types 'n' and 'Q').
22314   #
22315   # *. Implement a 'bond strength' rule in sub set_bond_strengths in
22316   # Perl::Tidy::Formatter for breaking lines around this token type.  You can
22317   # skip this step and take the default at first, then adjust later to get
22318   # desired results.  For adding type 'v', I looked at sub bond_strength and
22319   # saw that number type 'n' was using default strengths, so I didn't do
22320   # anything.  I may tune it up someday if I don't like the way line
22321   # breaks with v-strings look.
22322   #
22323   # *. Implement a 'whitespace' rule in sub set_white_space_flag in
22324   # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
22325   # and saw that type 'n' used spaces on both sides, so I just added 'v'
22326   # to the array @spaces_both_sides.
22327   #
22328   # *. Update HtmlWriter package so that users can colorize the token as
22329   # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
22330   # that package.  For v-strings, I initially chose to use a default color
22331   # equal to the default for numbers, but it might be nice to change that
22332   # eventually.
22333   #
22334   # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
22335   #
22336   # *. Run lots and lots of debug tests.  Start with special files designed
22337   # to test the new token type.  Run with the -D flag to create a .DEBUG
22338   # file which shows the tokenization.  When these work ok, test as many old
22339   # scripts as possible.  Start with all of the '.t' files in the 'test'
22340   # directory of the distribution file.  Compare .tdy output with previous
22341   # version and updated version to see the differences.  Then include as
22342   # many more files as possible. My own technique has been to collect a huge
22343   # number of perl scripts (thousands!) into one directory and run perltidy
22344   # *, then run diff between the output of the previous version and the
22345   # current version.
22346   #
22347   # *. For another example, search for the smartmatch operator '~~'
22348   # with your editor to see where updates were made for it.
22349   #
22350   # -----------------------------------------------------------------------
22351
22352         my $line_of_tokens = shift;
22353         my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
22354
22355         # patch while coding change is underway
22356         # make callers private data to allow access
22357         # $tokenizer_self = $caller_tokenizer_self;
22358
22359         # extract line number for use in error messages
22360         $input_line_number = $line_of_tokens->{_line_number};
22361
22362         # reinitialize for multi-line quote
22363         $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
22364
22365         # check for pod documentation
22366         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
22367
22368             # must not be in multi-line quote
22369             # and must not be in an eqn
22370             if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
22371             {
22372                 $tokenizer_self->{_in_pod} = 1;
22373                 return;
22374             }
22375         }
22376
22377         $input_line = $untrimmed_input_line;
22378
22379         chomp $input_line;
22380
22381         # trim start of this line unless we are continuing a quoted line
22382         # do not trim end because we might end in a quote (test: deken4.pl)
22383         # Perl::Tidy::Formatter will delete needless trailing blanks
22384         unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
22385             $input_line =~ s/^\s*//;    # trim left end
22386         }
22387
22388         # update the copy of the line for use in error messages
22389         # This must be exactly what we give the pre_tokenizer
22390         $tokenizer_self->{_line_text} = $input_line;
22391
22392         # re-initialize for the main loop
22393         $routput_token_list     = [];    # stack of output token indexes
22394         $routput_token_type     = [];    # token types
22395         $routput_block_type     = [];    # types of code block
22396         $routput_container_type = [];    # paren types, such as if, elsif, ..
22397         $routput_type_sequence  = [];    # nesting sequential number
22398
22399         $rhere_target_list = [];
22400
22401         $tok             = $last_nonblank_token;
22402         $type            = $last_nonblank_type;
22403         $prototype       = $last_nonblank_prototype;
22404         $last_nonblank_i = -1;
22405         $block_type      = $last_nonblank_block_type;
22406         $container_type  = $last_nonblank_container_type;
22407         $type_sequence   = $last_nonblank_type_sequence;
22408         $indent_flag     = 0;
22409         $peeked_ahead    = 0;
22410
22411         # tokenization is done in two stages..
22412         # stage 1 is a very simple pre-tokenization
22413         my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
22414
22415         # a little optimization for a full-line comment
22416         if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
22417             $max_tokens_wanted = 1    # no use tokenizing a comment
22418         }
22419
22420         # start by breaking the line into pre-tokens
22421         ( $rtokens, $rtoken_map, $rtoken_type ) =
22422           pre_tokenize( $input_line, $max_tokens_wanted );
22423
22424         $max_token_index = scalar(@$rtokens) - 1;
22425         push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
22426         push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
22427         push( @$rtoken_type, 'b', 'b', 'b' );
22428
22429         # initialize for main loop
22430         for $i ( 0 .. $max_token_index + 3 ) {
22431             $routput_token_type->[$i]     = "";
22432             $routput_block_type->[$i]     = "";
22433             $routput_container_type->[$i] = "";
22434             $routput_type_sequence->[$i]  = "";
22435             $routput_indent_flag->[$i]    = 0;
22436         }
22437         $i     = -1;
22438         $i_tok = -1;
22439
22440         # ------------------------------------------------------------
22441         # begin main tokenization loop
22442         # ------------------------------------------------------------
22443
22444         # we are looking at each pre-token of one line and combining them
22445         # into tokens
22446         while ( ++$i <= $max_token_index ) {
22447
22448             if ($in_quote) {    # continue looking for end of a quote
22449                 $type = $quote_type;
22450
22451                 unless ( @{$routput_token_list} )
22452                 {               # initialize if continuation line
22453                     push( @{$routput_token_list}, $i );
22454                     $routput_token_type->[$i] = $type;
22455
22456                 }
22457                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
22458
22459                 # scan for the end of the quote or pattern
22460                 (
22461                     $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
22462                     $quoted_string_1, $quoted_string_2
22463                   )
22464                   = do_quote(
22465                     $i,               $in_quote,    $quote_character,
22466                     $quote_pos,       $quote_depth, $quoted_string_1,
22467                     $quoted_string_2, $rtokens,     $rtoken_map,
22468                     $max_token_index
22469                   );
22470
22471                 # all done if we didn't find it
22472                 last if ($in_quote);
22473
22474                 # save pattern and replacement text for rescanning
22475                 my $qs1 = $quoted_string_1;
22476                 my $qs2 = $quoted_string_2;
22477
22478                 # re-initialize for next search
22479                 $quote_character = '';
22480                 $quote_pos       = 0;
22481                 $quote_type      = 'Q';
22482                 $quoted_string_1 = "";
22483                 $quoted_string_2 = "";
22484                 last if ( ++$i > $max_token_index );
22485
22486                 # look for any modifiers
22487                 if ($allowed_quote_modifiers) {
22488
22489                     # check for exact quote modifiers
22490                     if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
22491                         my $str = $$rtokens[$i];
22492                         my $saw_modifier_e;
22493                         while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
22494                             my $pos = pos($str);
22495                             my $char = substr( $str, $pos - 1, 1 );
22496                             $saw_modifier_e ||= ( $char eq 'e' );
22497                         }
22498
22499                         # For an 'e' quote modifier we must scan the replacement
22500                         # text for here-doc targets.
22501                         if ($saw_modifier_e) {
22502
22503                             my $rht = scan_replacement_text($qs1);
22504
22505                             # Change type from 'Q' to 'h' for quotes with
22506                             # here-doc targets so that the formatter (see sub
22507                             # print_line_of_tokens) will not make any line
22508                             # breaks after this point.
22509                             if ($rht) {
22510                                 push @{$rhere_target_list}, @{$rht};
22511                                 $type = 'h';
22512                                 if ( $i_tok < 0 ) {
22513                                     my $ilast = $routput_token_list->[-1];
22514                                     $routput_token_type->[$ilast] = $type;
22515                                 }
22516                             }
22517                         }
22518
22519                         if ( defined( pos($str) ) ) {
22520
22521                             # matched
22522                             if ( pos($str) == length($str) ) {
22523                                 last if ( ++$i > $max_token_index );
22524                             }
22525
22526                             # Looks like a joined quote modifier
22527                             # and keyword, maybe something like
22528                             # s/xxx/yyy/gefor @k=...
22529                             # Example is "galgen.pl".  Would have to split
22530                             # the word and insert a new token in the
22531                             # pre-token list.  This is so rare that I haven't
22532                             # done it.  Will just issue a warning citation.
22533
22534                             # This error might also be triggered if my quote
22535                             # modifier characters are incomplete
22536                             else {
22537                                 warning(<<EOM);
22538
22539 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
22540 Please put a space between quote modifiers and trailing keywords.
22541 EOM
22542
22543                            # print "token $$rtokens[$i]\n";
22544                            # my $num = length($str) - pos($str);
22545                            # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
22546                            # print "continuing with new token $$rtokens[$i]\n";
22547
22548                                 # skipping past this token does least damage
22549                                 last if ( ++$i > $max_token_index );
22550                             }
22551                         }
22552                         else {
22553
22554                             # example file: rokicki4.pl
22555                             # This error might also be triggered if my quote
22556                             # modifier characters are incomplete
22557                             write_logfile_entry(
22558 "Note: found word $str at quote modifier location\n"
22559                             );
22560                         }
22561                     }
22562
22563                     # re-initialize
22564                     $allowed_quote_modifiers = "";
22565                 }
22566             }
22567
22568             unless ( $tok =~ /^\s*$/ ) {
22569
22570                 # try to catch some common errors
22571                 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
22572
22573                     if ( $last_nonblank_token eq 'eq' ) {
22574                         complain("Should 'eq' be '==' here ?\n");
22575                     }
22576                     elsif ( $last_nonblank_token eq 'ne' ) {
22577                         complain("Should 'ne' be '!=' here ?\n");
22578                     }
22579                 }
22580
22581                 $last_last_nonblank_token      = $last_nonblank_token;
22582                 $last_last_nonblank_type       = $last_nonblank_type;
22583                 $last_last_nonblank_block_type = $last_nonblank_block_type;
22584                 $last_last_nonblank_container_type =
22585                   $last_nonblank_container_type;
22586                 $last_last_nonblank_type_sequence =
22587                   $last_nonblank_type_sequence;
22588                 $last_nonblank_token          = $tok;
22589                 $last_nonblank_type           = $type;
22590                 $last_nonblank_prototype      = $prototype;
22591                 $last_nonblank_block_type     = $block_type;
22592                 $last_nonblank_container_type = $container_type;
22593                 $last_nonblank_type_sequence  = $type_sequence;
22594                 $last_nonblank_i              = $i_tok;
22595             }
22596
22597             # store previous token type
22598             if ( $i_tok >= 0 ) {
22599                 $routput_token_type->[$i_tok]     = $type;
22600                 $routput_block_type->[$i_tok]     = $block_type;
22601                 $routput_container_type->[$i_tok] = $container_type;
22602                 $routput_type_sequence->[$i_tok]  = $type_sequence;
22603                 $routput_indent_flag->[$i_tok]    = $indent_flag;
22604             }
22605             my $pre_tok  = $$rtokens[$i];        # get the next pre-token
22606             my $pre_type = $$rtoken_type[$i];    # and type
22607             $tok  = $pre_tok;
22608             $type = $pre_type;                   # to be modified as necessary
22609             $block_type = "";    # blank for all tokens except code block braces
22610             $container_type = "";    # blank for all tokens except some parens
22611             $type_sequence  = "";    # blank for all tokens except ?/:
22612             $indent_flag    = 0;
22613             $prototype = "";    # blank for all tokens except user defined subs
22614             $i_tok     = $i;
22615
22616             # this pre-token will start an output token
22617             push( @{$routput_token_list}, $i_tok );
22618
22619             # continue gathering identifier if necessary
22620             # but do not start on blanks and comments
22621             if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
22622
22623                 if ( $id_scan_state =~ /^(sub|package)/ ) {
22624                     scan_id();
22625                 }
22626                 else {
22627                     scan_identifier();
22628                 }
22629
22630                 last if ($id_scan_state);
22631                 next if ( ( $i > 0 ) || $type );
22632
22633                 # didn't find any token; start over
22634                 $type = $pre_type;
22635                 $tok  = $pre_tok;
22636             }
22637
22638             # handle whitespace tokens..
22639             next if ( $type eq 'b' );
22640             my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
22641             my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
22642
22643             # Build larger tokens where possible, since we are not in a quote.
22644             #
22645             # First try to assemble digraphs.  The following tokens are
22646             # excluded and handled specially:
22647             # '/=' is excluded because the / might start a pattern.
22648             # 'x=' is excluded since it might be $x=, with $ on previous line
22649             # '**' and *= might be typeglobs of punctuation variables
22650             # I have allowed tokens starting with <, such as <=,
22651             # because I don't think these could be valid angle operators.
22652             # test file: storrs4.pl
22653             my $test_tok   = $tok . $$rtokens[ $i + 1 ];
22654             my $combine_ok = $is_digraph{$test_tok};
22655
22656             # check for special cases which cannot be combined
22657             if ($combine_ok) {
22658
22659                 # '//' must be defined_or operator if an operator is expected.
22660                 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
22661                 # could be migrated here for clarity
22662                 if ( $test_tok eq '//' ) {
22663                     my $next_type = $$rtokens[ $i + 1 ];
22664                     my $expecting =
22665                       operator_expected( $prev_type, $tok, $next_type );
22666                     $combine_ok = 0 unless ( $expecting == OPERATOR );
22667                 }
22668             }
22669
22670             if (
22671                 $combine_ok
22672                 && ( $test_tok ne '/=' )    # might be pattern
22673                 && ( $test_tok ne 'x=' )    # might be $x
22674                 && ( $test_tok ne '**' )    # typeglob?
22675                 && ( $test_tok ne '*=' )    # typeglob?
22676               )
22677             {
22678                 $tok = $test_tok;
22679                 $i++;
22680
22681                 # Now try to assemble trigraphs.  Note that all possible
22682                 # perl trigraphs can be constructed by appending a character
22683                 # to a digraph.
22684                 $test_tok = $tok . $$rtokens[ $i + 1 ];
22685
22686                 if ( $is_trigraph{$test_tok} ) {
22687                     $tok = $test_tok;
22688                     $i++;
22689                 }
22690             }
22691
22692             $type      = $tok;
22693             $next_tok  = $$rtokens[ $i + 1 ];
22694             $next_type = $$rtoken_type[ $i + 1 ];
22695
22696             TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
22697                 local $" = ')(';
22698                 my @debug_list = (
22699                     $last_nonblank_token,      $tok,
22700                     $next_tok,                 $brace_depth,
22701                     $brace_type[$brace_depth], $paren_depth,
22702                     $paren_type[$paren_depth]
22703                 );
22704                 print "TOKENIZE:(@debug_list)\n";
22705             };
22706
22707             # turn off attribute list on first non-blank, non-bareword
22708             if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
22709
22710             ###############################################################
22711             # We have the next token, $tok.
22712             # Now we have to examine this token and decide what it is
22713             # and define its $type
22714             #
22715             # section 1: bare words
22716             ###############################################################
22717
22718             if ( $pre_type eq 'w' ) {
22719                 $expecting = operator_expected( $prev_type, $tok, $next_type );
22720                 my ( $next_nonblank_token, $i_next ) =
22721                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
22722
22723                 # ATTRS: handle sub and variable attributes
22724                 if ($in_attribute_list) {
22725
22726                     # treat bare word followed by open paren like qw(
22727                     if ( $next_nonblank_token eq '(' ) {
22728                         $in_quote                = $quote_items{'q'};
22729                         $allowed_quote_modifiers = $quote_modifiers{'q'};
22730                         $type                    = 'q';
22731                         $quote_type              = 'q';
22732                         next;
22733                     }
22734
22735                     # handle bareword not followed by open paren
22736                     else {
22737                         $type = 'w';
22738                         next;
22739                     }
22740                 }
22741
22742                 # quote a word followed by => operator
22743                 if ( $next_nonblank_token eq '=' ) {
22744
22745                     if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
22746                         if ( $is_constant{$current_package}{$tok} ) {
22747                             $type = 'C';
22748                         }
22749                         elsif ( $is_user_function{$current_package}{$tok} ) {
22750                             $type = 'U';
22751                             $prototype =
22752                               $user_function_prototype{$current_package}{$tok};
22753                         }
22754                         elsif ( $tok =~ /^v\d+$/ ) {
22755                             $type = 'v';
22756                             report_v_string($tok);
22757                         }
22758                         else { $type = 'w' }
22759
22760                         next;
22761                     }
22762                 }
22763
22764                 # quote a bare word within braces..like xxx->{s}; note that we
22765                 # must be sure this is not a structural brace, to avoid
22766                 # mistaking {s} in the following for a quoted bare word:
22767                 #     for(@[){s}bla}BLA}
22768                 if (   ( $last_nonblank_type eq 'L' )
22769                     && ( $next_nonblank_token eq '}' ) )
22770                 {
22771                     $type = 'w';
22772                     next;
22773                 }
22774
22775                 # a bare word immediately followed by :: is not a keyword;
22776                 # use $tok_kw when testing for keywords to avoid a mistake
22777                 my $tok_kw = $tok;
22778                 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
22779                 {
22780                     $tok_kw .= '::';
22781                 }
22782
22783                 # handle operator x (now we know it isn't $x=)
22784                 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
22785                     if ( $tok eq 'x' ) {
22786
22787                         if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
22788                             $tok  = 'x=';
22789                             $type = $tok;
22790                             $i++;
22791                         }
22792                         else {
22793                             $type = 'x';
22794                         }
22795                     }
22796
22797                     # FIXME: Patch: mark something like x4 as an integer for now
22798                     # It gets fixed downstream.  This is easier than
22799                     # splitting the pretoken.
22800                     else {
22801                         $type = 'n';
22802                     }
22803                 }
22804
22805                 elsif ( ( $tok eq 'strict' )
22806                     and ( $last_nonblank_token eq 'use' ) )
22807                 {
22808                     $tokenizer_self->{_saw_use_strict} = 1;
22809                     scan_bare_identifier();
22810                 }
22811
22812                 elsif ( ( $tok eq 'warnings' )
22813                     and ( $last_nonblank_token eq 'use' ) )
22814                 {
22815                     $tokenizer_self->{_saw_perl_dash_w} = 1;
22816
22817                     # scan as identifier, so that we pick up something like:
22818                     # use warnings::register
22819                     scan_bare_identifier();
22820                 }
22821
22822                 elsif (
22823                        $tok eq 'AutoLoader'
22824                     && $tokenizer_self->{_look_for_autoloader}
22825                     && (
22826                         $last_nonblank_token eq 'use'
22827
22828                         # these regexes are from AutoSplit.pm, which we want
22829                         # to mimic
22830                         || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
22831                         || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
22832                     )
22833                   )
22834                 {
22835                     write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
22836                     $tokenizer_self->{_saw_autoloader}      = 1;
22837                     $tokenizer_self->{_look_for_autoloader} = 0;
22838                     scan_bare_identifier();
22839                 }
22840
22841                 elsif (
22842                        $tok eq 'SelfLoader'
22843                     && $tokenizer_self->{_look_for_selfloader}
22844                     && (   $last_nonblank_token eq 'use'
22845                         || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
22846                         || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
22847                   )
22848                 {
22849                     write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
22850                     $tokenizer_self->{_saw_selfloader}      = 1;
22851                     $tokenizer_self->{_look_for_selfloader} = 0;
22852                     scan_bare_identifier();
22853                 }
22854
22855                 elsif ( ( $tok eq 'constant' )
22856                     and ( $last_nonblank_token eq 'use' ) )
22857                 {
22858                     scan_bare_identifier();
22859                     my ( $next_nonblank_token, $i_next ) =
22860                       find_next_nonblank_token( $i, $rtokens,
22861                         $max_token_index );
22862
22863                     if ($next_nonblank_token) {
22864
22865                         if ( $is_keyword{$next_nonblank_token} ) {
22866                             warning(
22867 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
22868                             );
22869                         }
22870
22871                         # FIXME: could check for error in which next token is
22872                         # not a word (number, punctuation, ..)
22873                         else {
22874                             $is_constant{$current_package}
22875                               {$next_nonblank_token} = 1;
22876                         }
22877                     }
22878                 }
22879
22880                 # various quote operators
22881                 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
22882                     if ( $expecting == OPERATOR ) {
22883
22884                         # patch for paren-less for/foreach glitch, part 1
22885                         # perl will accept this construct as valid:
22886                         #
22887                         #    foreach my $key qw\Uno Due Tres Quadro\ {
22888                         #        print "Set $key\n";
22889                         #    }
22890                         unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
22891                         {
22892                             error_if_expecting_OPERATOR();
22893                         }
22894                     }
22895                     $in_quote                = $quote_items{$tok};
22896                     $allowed_quote_modifiers = $quote_modifiers{$tok};
22897
22898                    # All quote types are 'Q' except possibly qw quotes.
22899                    # qw quotes are special in that they may generally be trimmed
22900                    # of leading and trailing whitespace.  So they are given a
22901                    # separate type, 'q', unless requested otherwise.
22902                     $type =
22903                       ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
22904                       ? 'q'
22905                       : 'Q';
22906                     $quote_type = $type;
22907                 }
22908
22909                 # check for a statement label
22910                 elsif (
22911                        ( $next_nonblank_token eq ':' )
22912                     && ( $$rtokens[ $i_next + 1 ] ne ':' )
22913                     && ( $i_next <= $max_token_index )    # colon on same line
22914                     && label_ok()
22915                   )
22916                 {
22917                     if ( $tok !~ /A-Z/ ) {
22918                         push @{ $tokenizer_self->{_rlower_case_labels_at} },
22919                           $input_line_number;
22920                     }
22921                     $type = 'J';
22922                     $tok .= ':';
22923                     $i = $i_next;
22924                     next;
22925                 }
22926
22927                 #      'sub' || 'package'
22928                 elsif ( $is_sub_package{$tok_kw} ) {
22929                     error_if_expecting_OPERATOR()
22930                       if ( $expecting == OPERATOR );
22931                     scan_id();
22932                 }
22933
22934                 # Note on token types for format, __DATA__, __END__:
22935                 # It simplifies things to give these type ';', so that when we
22936                 # start rescanning we will be expecting a token of type TERM.
22937                 # We will switch to type 'k' before outputting the tokens.
22938                 elsif ( $is_format_END_DATA{$tok_kw} ) {
22939                     $type = ';';    # make tokenizer look for TERM next
22940                     $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
22941                     last;
22942                 }
22943
22944                 elsif ( $is_keyword{$tok_kw} ) {
22945                     $type = 'k';
22946
22947                     # Since for and foreach may not be followed immediately
22948                     # by an opening paren, we have to remember which keyword
22949                     # is associated with the next '('
22950                     if ( $is_for_foreach{$tok} ) {
22951                         if ( new_statement_ok() ) {
22952                             $want_paren = $tok;
22953                         }
22954                     }
22955
22956                     # recognize 'use' statements, which are special
22957                     elsif ( $is_use_require{$tok} ) {
22958                         $statement_type = $tok;
22959                         error_if_expecting_OPERATOR()
22960                           if ( $expecting == OPERATOR );
22961                     }
22962
22963                     # remember my and our to check for trailing ": shared"
22964                     elsif ( $is_my_our{$tok} ) {
22965                         $statement_type = $tok;
22966                     }
22967
22968                     # Check for misplaced 'elsif' and 'else', but allow isolated
22969                     # else or elsif blocks to be formatted.  This is indicated
22970                     # by a last noblank token of ';'
22971                     elsif ( $tok eq 'elsif' ) {
22972                         if (   $last_nonblank_token ne ';'
22973                             && $last_nonblank_block_type !~
22974                             /^(if|elsif|unless)$/ )
22975                         {
22976                             warning(
22977 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
22978                             );
22979                         }
22980                     }
22981                     elsif ( $tok eq 'else' ) {
22982
22983                         # patched for SWITCH/CASE
22984                         if (   $last_nonblank_token ne ';'
22985                             && $last_nonblank_block_type !~
22986                             /^(if|elsif|unless|case|when)$/ )
22987                         {
22988                             warning(
22989 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
22990                             );
22991                         }
22992                     }
22993                     elsif ( $tok eq 'continue' ) {
22994                         if (   $last_nonblank_token ne ';'
22995                             && $last_nonblank_block_type !~
22996                             /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
22997                         {
22998
22999                             # note: ';' '{' and '}' in list above
23000                             # because continues can follow bare blocks;
23001                             # ':' is labeled block
23002                             warning("'$tok' should follow a block\n");
23003                         }
23004                     }
23005
23006                     # patch for SWITCH/CASE if 'case' and 'when are
23007                     # treated as keywords.
23008                     elsif ( $tok eq 'when' || $tok eq 'case' ) {
23009                         $statement_type = $tok;    # next '{' is block
23010                     }
23011
23012                     # indent trailing if/unless/while/until
23013                     # outdenting will be handled by later indentation loop
23014                     if (   $tok =~ /^(if|unless|while|until)$/
23015                         && $next_nonblank_token ne '(' )
23016                     {
23017                         $indent_flag = 1;
23018                     }
23019                 }
23020
23021                 # check for inline label following
23022                 #         /^(redo|last|next|goto)$/
23023                 elsif (( $last_nonblank_type eq 'k' )
23024                     && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
23025                 {
23026                     $type = 'j';
23027                     next;
23028                 }
23029
23030                 # something else --
23031                 else {
23032
23033                     scan_bare_identifier();
23034                     if ( $type eq 'w' ) {
23035
23036                         if ( $expecting == OPERATOR ) {
23037
23038                             # don't complain about possible indirect object
23039                             # notation.
23040                             # For example:
23041                             #   package main;
23042                             #   sub new($) { ... }
23043                             #   $b = new A::;  # calls A::new
23044                             #   $c = new A;    # same thing but suspicious
23045                             # This will call A::new but we have a 'new' in
23046                             # main:: which looks like a constant.
23047                             #
23048                             if ( $last_nonblank_type eq 'C' ) {
23049                                 if ( $tok !~ /::$/ ) {
23050                                     complain(<<EOM);
23051 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
23052        Maybe indirectet object notation?
23053 EOM
23054                                 }
23055                             }
23056                             else {
23057                                 error_if_expecting_OPERATOR("bareword");
23058                             }
23059                         }
23060
23061                         # mark bare words immediately followed by a paren as
23062                         # functions
23063                         $next_tok = $$rtokens[ $i + 1 ];
23064                         if ( $next_tok eq '(' ) {
23065                             $type = 'U';
23066                         }
23067
23068                         # underscore after file test operator is file handle
23069                         if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
23070                             $type = 'Z';
23071                         }
23072
23073                         # patch for SWITCH/CASE if 'case' and 'when are
23074                         # not treated as keywords:
23075                         if (
23076                             (
23077                                    $tok                      eq 'case'
23078                                 && $brace_type[$brace_depth] eq 'switch'
23079                             )
23080                             || (   $tok eq 'when'
23081                                 && $brace_type[$brace_depth] eq 'given' )
23082                           )
23083                         {
23084                             $statement_type = $tok;    # next '{' is block
23085                             $type = 'k';    # for keyword syntax coloring
23086                         }
23087
23088                         # patch for SWITCH/CASE if switch and given not keywords
23089                         # Switch is not a perl 5 keyword, but we will gamble
23090                         # and mark switch followed by paren as a keyword.  This
23091                         # is only necessary to get html syntax coloring nice,
23092                         # and does not commit this as being a switch/case.
23093                         if ( $next_nonblank_token eq '('
23094                             && ( $tok eq 'switch' || $tok eq 'given' ) )
23095                         {
23096                             $type = 'k';    # for keyword syntax coloring
23097                         }
23098                     }
23099                 }
23100             }
23101
23102             ###############################################################
23103             # section 2: strings of digits
23104             ###############################################################
23105             elsif ( $pre_type eq 'd' ) {
23106                 $expecting = operator_expected( $prev_type, $tok, $next_type );
23107                 error_if_expecting_OPERATOR("Number")
23108                   if ( $expecting == OPERATOR );
23109                 my $number = scan_number();
23110                 if ( !defined($number) ) {
23111
23112                     # shouldn't happen - we should always get a number
23113                     warning("non-number beginning with digit--program bug\n");
23114                     report_definite_bug();
23115                 }
23116             }
23117
23118             ###############################################################
23119             # section 3: all other tokens
23120             ###############################################################
23121
23122             else {
23123                 last if ( $tok eq '#' );
23124                 my $code = $tokenization_code->{$tok};
23125                 if ($code) {
23126                     $expecting =
23127                       operator_expected( $prev_type, $tok, $next_type );
23128                     $code->();
23129                     redo if $in_quote;
23130                 }
23131             }
23132         }
23133
23134         # -----------------------------
23135         # end of main tokenization loop
23136         # -----------------------------
23137
23138         if ( $i_tok >= 0 ) {
23139             $routput_token_type->[$i_tok]     = $type;
23140             $routput_block_type->[$i_tok]     = $block_type;
23141             $routput_container_type->[$i_tok] = $container_type;
23142             $routput_type_sequence->[$i_tok]  = $type_sequence;
23143             $routput_indent_flag->[$i_tok]    = $indent_flag;
23144         }
23145
23146         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
23147             $last_last_nonblank_token          = $last_nonblank_token;
23148             $last_last_nonblank_type           = $last_nonblank_type;
23149             $last_last_nonblank_block_type     = $last_nonblank_block_type;
23150             $last_last_nonblank_container_type = $last_nonblank_container_type;
23151             $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
23152             $last_nonblank_token               = $tok;
23153             $last_nonblank_type                = $type;
23154             $last_nonblank_block_type          = $block_type;
23155             $last_nonblank_container_type      = $container_type;
23156             $last_nonblank_type_sequence       = $type_sequence;
23157             $last_nonblank_prototype           = $prototype;
23158         }
23159
23160         # reset indentation level if necessary at a sub or package
23161         # in an attempt to recover from a nesting error
23162         if ( $level_in_tokenizer < 0 ) {
23163             if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
23164                 reset_indentation_level(0);
23165                 brace_warning("resetting level to 0 at $1 $2\n");
23166             }
23167         }
23168
23169         # all done tokenizing this line ...
23170         # now prepare the final list of tokens and types
23171
23172         my @token_type     = ();   # stack of output token types
23173         my @block_type     = ();   # stack of output code block types
23174         my @container_type = ();   # stack of output code container types
23175         my @type_sequence  = ();   # stack of output type sequence numbers
23176         my @tokens         = ();   # output tokens
23177         my @levels         = ();   # structural brace levels of output tokens
23178         my @slevels        = ();   # secondary nesting levels of output tokens
23179         my @nesting_tokens = ();   # string of tokens leading to this depth
23180         my @nesting_types  = ();   # string of token types leading to this depth
23181         my @nesting_blocks = ();   # string of block types leading to this depth
23182         my @nesting_lists  = ();   # string of list types leading to this depth
23183         my @ci_string = ();  # string needed to compute continuation indentation
23184         my @container_environment = ();    # BLOCK or LIST
23185         my $container_environment = '';
23186         my $im                    = -1;    # previous $i value
23187         my $num;
23188         my $ci_string_sum = ones_count($ci_string_in_tokenizer);
23189
23190 # Computing Token Indentation
23191 #
23192 #     The final section of the tokenizer forms tokens and also computes
23193 #     parameters needed to find indentation.  It is much easier to do it
23194 #     in the tokenizer than elsewhere.  Here is a brief description of how
23195 #     indentation is computed.  Perl::Tidy computes indentation as the sum
23196 #     of 2 terms:
23197 #
23198 #     (1) structural indentation, such as if/else/elsif blocks
23199 #     (2) continuation indentation, such as long parameter call lists.
23200 #
23201 #     These are occasionally called primary and secondary indentation.
23202 #
23203 #     Structural indentation is introduced by tokens of type '{', although
23204 #     the actual tokens might be '{', '(', or '['.  Structural indentation
23205 #     is of two types: BLOCK and non-BLOCK.  Default structural indentation
23206 #     is 4 characters if the standard indentation scheme is used.
23207 #
23208 #     Continuation indentation is introduced whenever a line at BLOCK level
23209 #     is broken before its termination.  Default continuation indentation
23210 #     is 2 characters in the standard indentation scheme.
23211 #
23212 #     Both types of indentation may be nested arbitrarily deep and
23213 #     interlaced.  The distinction between the two is somewhat arbitrary.
23214 #
23215 #     For each token, we will define two variables which would apply if
23216 #     the current statement were broken just before that token, so that
23217 #     that token started a new line:
23218 #
23219 #     $level = the structural indentation level,
23220 #     $ci_level = the continuation indentation level
23221 #
23222 #     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
23223 #     assuming defaults.  However, in some special cases it is customary
23224 #     to modify $ci_level from this strict value.
23225 #
23226 #     The total structural indentation is easy to compute by adding and
23227 #     subtracting 1 from a saved value as types '{' and '}' are seen.  The
23228 #     running value of this variable is $level_in_tokenizer.
23229 #
23230 #     The total continuation is much more difficult to compute, and requires
23231 #     several variables.  These veriables are:
23232 #
23233 #     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
23234 #       each indentation level, if there are intervening open secondary
23235 #       structures just prior to that level.
23236 #     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
23237 #       if the last token at that level is "continued", meaning that it
23238 #       is not the first token of an expression.
23239 #     $nesting_block_string = a string of 1's and 0's indicating, for each
23240 #       indentation level, if the level is of type BLOCK or not.
23241 #     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
23242 #     $nesting_list_string = a string of 1's and 0's indicating, for each
23243 #       indentation level, if it is is appropriate for list formatting.
23244 #       If so, continuation indentation is used to indent long list items.
23245 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
23246 #     @{$rslevel_stack} = a stack of total nesting depths at each
23247 #       structural indentation level, where "total nesting depth" means
23248 #       the nesting depth that would occur if every nesting token -- '{', '[',
23249 #       and '(' -- , regardless of context, is used to compute a nesting
23250 #       depth.
23251
23252         #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
23253         #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
23254
23255         my ( $ci_string_i, $level_i, $nesting_block_string_i,
23256             $nesting_list_string_i, $nesting_token_string_i,
23257             $nesting_type_string_i, );
23258
23259         foreach $i ( @{$routput_token_list} )
23260         {    # scan the list of pre-tokens indexes
23261
23262             # self-checking for valid token types
23263             my $type                    = $routput_token_type->[$i];
23264             my $forced_indentation_flag = $routput_indent_flag->[$i];
23265
23266             # See if we should undo the $forced_indentation_flag.
23267             # Forced indentation after 'if', 'unless', 'while' and 'until'
23268             # expressions without trailing parens is optional and doesn't
23269             # always look good.  It is usually okay for a trailing logical
23270             # expression, but if the expression is a function call, code block,
23271             # or some kind of list it puts in an unwanted extra indentation
23272             # level which is hard to remove.
23273             #
23274             # Example where extra indentation looks ok:
23275             # return 1
23276             #   if $det_a < 0 and $det_b > 0
23277             #       or $det_a > 0 and $det_b < 0;
23278             #
23279             # Example where extra indentation is not needed because
23280             # the eval brace also provides indentation:
23281             # print "not " if defined eval {
23282             #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
23283             # };
23284             #
23285             # The following rule works fairly well:
23286             #   Undo the flag if the end of this line, or start of the next
23287             #   line, is an opening container token or a comma.
23288             # This almost always works, but if not after another pass it will
23289             # be stable.
23290             if ( $forced_indentation_flag && $type eq 'k' ) {
23291                 my $ixlast  = -1;
23292                 my $ilast   = $routput_token_list->[$ixlast];
23293                 my $toklast = $routput_token_type->[$ilast];
23294                 if ( $toklast eq '#' ) {
23295                     $ixlast--;
23296                     $ilast   = $routput_token_list->[$ixlast];
23297                     $toklast = $routput_token_type->[$ilast];
23298                 }
23299                 if ( $toklast eq 'b' ) {
23300                     $ixlast--;
23301                     $ilast   = $routput_token_list->[$ixlast];
23302                     $toklast = $routput_token_type->[$ilast];
23303                 }
23304                 if ( $toklast =~ /^[\{,]$/ ) {
23305                     $forced_indentation_flag = 0;
23306                 }
23307                 else {
23308                     ( $toklast, my $i_next ) =
23309                       find_next_nonblank_token( $max_token_index, $rtokens,
23310                         $max_token_index );
23311                     if ( $toklast =~ /^[\{,]$/ ) {
23312                         $forced_indentation_flag = 0;
23313                     }
23314                 }
23315             }
23316
23317             # if we are already in an indented if, see if we should outdent
23318             if ($indented_if_level) {
23319
23320                 # don't try to nest trailing if's - shouldn't happen
23321                 if ( $type eq 'k' ) {
23322                     $forced_indentation_flag = 0;
23323                 }
23324
23325                 # check for the normal case - outdenting at next ';'
23326                 elsif ( $type eq ';' ) {
23327                     if ( $level_in_tokenizer == $indented_if_level ) {
23328                         $forced_indentation_flag = -1;
23329                         $indented_if_level       = 0;
23330                     }
23331                 }
23332
23333                 # handle case of missing semicolon
23334                 elsif ( $type eq '}' ) {
23335                     if ( $level_in_tokenizer == $indented_if_level ) {
23336                         $indented_if_level = 0;
23337
23338                         # TBD: This could be a subroutine call
23339                         $level_in_tokenizer--;
23340                         if ( @{$rslevel_stack} > 1 ) {
23341                             pop( @{$rslevel_stack} );
23342                         }
23343                         if ( length($nesting_block_string) > 1 )
23344                         {    # true for valid script
23345                             chop $nesting_block_string;
23346                             chop $nesting_list_string;
23347                         }
23348
23349                     }
23350                 }
23351             }
23352
23353             my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
23354             $level_i = $level_in_tokenizer;
23355
23356             # This can happen by running perltidy on non-scripts
23357             # although it could also be bug introduced by programming change.
23358             # Perl silently accepts a 032 (^Z) and takes it as the end
23359             if ( !$is_valid_token_type{$type} ) {
23360                 my $val = ord($type);
23361                 warning(
23362                     "unexpected character decimal $val ($type) in script\n");
23363                 $tokenizer_self->{_in_error} = 1;
23364             }
23365
23366             # ----------------------------------------------------------------
23367             # TOKEN TYPE PATCHES
23368             #  output __END__, __DATA__, and format as type 'k' instead of ';'
23369             # to make html colors correct, etc.
23370             my $fix_type = $type;
23371             if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
23372
23373             # output anonymous 'sub' as keyword
23374             if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
23375
23376             # -----------------------------------------------------------------
23377
23378             $nesting_token_string_i = $nesting_token_string;
23379             $nesting_type_string_i  = $nesting_type_string;
23380             $nesting_block_string_i = $nesting_block_string;
23381             $nesting_list_string_i  = $nesting_list_string;
23382
23383             # set primary indentation levels based on structural braces
23384             # Note: these are set so that the leading braces have a HIGHER
23385             # level than their CONTENTS, which is convenient for indentation
23386             # Also, define continuation indentation for each token.
23387             if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
23388             {
23389
23390                 # use environment before updating
23391                 $container_environment =
23392                     $nesting_block_flag ? 'BLOCK'
23393                   : $nesting_list_flag  ? 'LIST'
23394                   :                       "";
23395
23396                 # if the difference between total nesting levels is not 1,
23397                 # there are intervening non-structural nesting types between
23398                 # this '{' and the previous unclosed '{'
23399                 my $intervening_secondary_structure = 0;
23400                 if ( @{$rslevel_stack} ) {
23401                     $intervening_secondary_structure =
23402                       $slevel_in_tokenizer - $rslevel_stack->[-1];
23403                 }
23404
23405      # Continuation Indentation
23406      #
23407      # Having tried setting continuation indentation both in the formatter and
23408      # in the tokenizer, I can say that setting it in the tokenizer is much,
23409      # much easier.  The formatter already has too much to do, and can't
23410      # make decisions on line breaks without knowing what 'ci' will be at
23411      # arbitrary locations.
23412      #
23413      # But a problem with setting the continuation indentation (ci) here
23414      # in the tokenizer is that we do not know where line breaks will actually
23415      # be.  As a result, we don't know if we should propagate continuation
23416      # indentation to higher levels of structure.
23417      #
23418      # For nesting of only structural indentation, we never need to do this.
23419      # For example, in a long if statement, like this
23420      #
23421      #   if ( !$output_block_type[$i]
23422      #     && ($in_statement_continuation) )
23423      #   {           <--outdented
23424      #       do_something();
23425      #   }
23426      #
23427      # the second line has ci but we do normally give the lines within the BLOCK
23428      # any ci.  This would be true if we had blocks nested arbitrarily deeply.
23429      #
23430      # But consider something like this, where we have created a break after
23431      # an opening paren on line 1, and the paren is not (currently) a
23432      # structural indentation token:
23433      #
23434      # my $file = $menubar->Menubutton(
23435      #   qw/-text File -underline 0 -menuitems/ => [
23436      #       [
23437      #           Cascade    => '~View',
23438      #           -menuitems => [
23439      #           ...
23440      #
23441      # The second line has ci, so it would seem reasonable to propagate it
23442      # down, giving the third line 1 ci + 1 indentation.  This suggests the
23443      # following rule, which is currently used to propagating ci down: if there
23444      # are any non-structural opening parens (or brackets, or braces), before
23445      # an opening structural brace, then ci is propagated down, and otherwise
23446      # not.  The variable $intervening_secondary_structure contains this
23447      # information for the current token, and the string
23448      # "$ci_string_in_tokenizer" is a stack of previous values of this
23449      # variable.
23450
23451                 # save the current states
23452                 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
23453                 $level_in_tokenizer++;
23454
23455                 if ($forced_indentation_flag) {
23456
23457                     # break BEFORE '?' when there is forced indentation
23458                     if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
23459                     if ( $type eq 'k' ) {
23460                         $indented_if_level = $level_in_tokenizer;
23461                     }
23462                 }
23463
23464                 if ( $routput_block_type->[$i] ) {
23465                     $nesting_block_flag = 1;
23466                     $nesting_block_string .= '1';
23467                 }
23468                 else {
23469                     $nesting_block_flag = 0;
23470                     $nesting_block_string .= '0';
23471                 }
23472
23473                 # we will use continuation indentation within containers
23474                 # which are not blocks and not logical expressions
23475                 my $bit = 0;
23476                 if ( !$routput_block_type->[$i] ) {
23477
23478                     # propagate flag down at nested open parens
23479                     if ( $routput_container_type->[$i] eq '(' ) {
23480                         $bit = 1 if $nesting_list_flag;
23481                     }
23482
23483                   # use list continuation if not a logical grouping
23484                   # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
23485                     else {
23486                         $bit = 1
23487                           unless
23488                             $is_logical_container{ $routput_container_type->[$i]
23489                               };
23490                     }
23491                 }
23492                 $nesting_list_string .= $bit;
23493                 $nesting_list_flag = $bit;
23494
23495                 $ci_string_in_tokenizer .=
23496                   ( $intervening_secondary_structure != 0 ) ? '1' : '0';
23497                 $ci_string_sum = ones_count($ci_string_in_tokenizer);
23498                 $continuation_string_in_tokenizer .=
23499                   ( $in_statement_continuation > 0 ) ? '1' : '0';
23500
23501    #  Sometimes we want to give an opening brace continuation indentation,
23502    #  and sometimes not.  For code blocks, we don't do it, so that the leading
23503    #  '{' gets outdented, like this:
23504    #
23505    #   if ( !$output_block_type[$i]
23506    #     && ($in_statement_continuation) )
23507    #   {           <--outdented
23508    #
23509    #  For other types, we will give them continuation indentation.  For example,
23510    #  here is how a list looks with the opening paren indented:
23511    #
23512    #     @LoL =
23513    #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
23514    #         [ "homer", "marge", "bart" ], );
23515    #
23516    #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
23517
23518                 my $total_ci = $ci_string_sum;
23519                 if (
23520                     !$routput_block_type->[$i]    # patch: skip for BLOCK
23521                     && ($in_statement_continuation)
23522                     && !( $forced_indentation_flag && $type eq ':' )
23523                   )
23524                 {
23525                     $total_ci += $in_statement_continuation
23526                       unless ( $ci_string_in_tokenizer =~ /1$/ );
23527                 }
23528
23529                 $ci_string_i               = $total_ci;
23530                 $in_statement_continuation = 0;
23531             }
23532
23533             elsif ($type eq '}'
23534                 || $type eq 'R'
23535                 || $forced_indentation_flag < 0 )
23536             {
23537
23538                 # only a nesting error in the script would prevent popping here
23539                 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
23540
23541                 $level_i = --$level_in_tokenizer;
23542
23543                 # restore previous level values
23544                 if ( length($nesting_block_string) > 1 )
23545                 {    # true for valid script
23546                     chop $nesting_block_string;
23547                     $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
23548                     chop $nesting_list_string;
23549                     $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
23550
23551                     chop $ci_string_in_tokenizer;
23552                     $ci_string_sum = ones_count($ci_string_in_tokenizer);
23553
23554                     $in_statement_continuation =
23555                       chop $continuation_string_in_tokenizer;
23556
23557                     # zero continuation flag at terminal BLOCK '}' which
23558                     # ends a statement.
23559                     if ( $routput_block_type->[$i] ) {
23560
23561                         # ...These include non-anonymous subs
23562                         # note: could be sub ::abc { or sub 'abc
23563                         if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
23564
23565                          # note: older versions of perl require the /gc modifier
23566                          # here or else the \G does not work.
23567                             if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
23568                             {
23569                                 $in_statement_continuation = 0;
23570                             }
23571                         }
23572
23573 # ...and include all block types except user subs with
23574 # block prototypes and these: (sort|grep|map|do|eval)
23575 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
23576                         elsif (
23577                             $is_zero_continuation_block_type{
23578                                 $routput_block_type->[$i] } )
23579                         {
23580                             $in_statement_continuation = 0;
23581                         }
23582
23583                         # ..but these are not terminal types:
23584                         #     /^(sort|grep|map|do|eval)$/ )
23585                         elsif (
23586                             $is_not_zero_continuation_block_type{
23587                                 $routput_block_type->[$i] } )
23588                         {
23589                         }
23590
23591                         # ..and a block introduced by a label
23592                         # /^\w+\s*:$/gc ) {
23593                         elsif ( $routput_block_type->[$i] =~ /:$/ ) {
23594                             $in_statement_continuation = 0;
23595                         }
23596
23597                         # user function with block prototype
23598                         else {
23599                             $in_statement_continuation = 0;
23600                         }
23601                     }
23602
23603                     # If we are in a list, then
23604                     # we must set continuatoin indentation at the closing
23605                     # paren of something like this (paren after $check):
23606                     #     assert(
23607                     #         __LINE__,
23608                     #         ( not defined $check )
23609                     #           or ref $check
23610                     #           or $check eq "new"
23611                     #           or $check eq "old",
23612                     #     );
23613                     elsif ( $tok eq ')' ) {
23614                         $in_statement_continuation = 1
23615                           if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
23616                     }
23617
23618                     elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
23619                 }
23620
23621                 # use environment after updating
23622                 $container_environment =
23623                     $nesting_block_flag ? 'BLOCK'
23624                   : $nesting_list_flag  ? 'LIST'
23625                   :                       "";
23626                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
23627                 $nesting_block_string_i = $nesting_block_string;
23628                 $nesting_list_string_i  = $nesting_list_string;
23629             }
23630
23631             # not a structural indentation type..
23632             else {
23633
23634                 $container_environment =
23635                     $nesting_block_flag ? 'BLOCK'
23636                   : $nesting_list_flag  ? 'LIST'
23637                   :                       "";
23638
23639                 # zero the continuation indentation at certain tokens so
23640                 # that they will be at the same level as its container.  For
23641                 # commas, this simplifies the -lp indentation logic, which
23642                 # counts commas.  For ?: it makes them stand out.
23643                 if ($nesting_list_flag) {
23644                     if ( $type =~ /^[,\?\:]$/ ) {
23645                         $in_statement_continuation = 0;
23646                     }
23647                 }
23648
23649                 # be sure binary operators get continuation indentation
23650                 if (
23651                     $container_environment
23652                     && (   $type eq 'k' && $is_binary_keyword{$tok}
23653                         || $is_binary_type{$type} )
23654                   )
23655                 {
23656                     $in_statement_continuation = 1;
23657                 }
23658
23659                 # continuation indentation is sum of any open ci from previous
23660                 # levels plus the current level
23661                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
23662
23663                 # update continuation flag ...
23664                 # if this isn't a blank or comment..
23665                 if ( $type ne 'b' && $type ne '#' ) {
23666
23667                     # and we are in a BLOCK
23668                     if ($nesting_block_flag) {
23669
23670                         # the next token after a ';' and label starts a new stmt
23671                         if ( $type eq ';' || $type eq 'J' ) {
23672                             $in_statement_continuation = 0;
23673                         }
23674
23675                         # otherwise, we are continuing the current statement
23676                         else {
23677                             $in_statement_continuation = 1;
23678                         }
23679                     }
23680
23681                     # if we are not in a BLOCK..
23682                     else {
23683
23684                         # do not use continuation indentation if not list
23685                         # environment (could be within if/elsif clause)
23686                         if ( !$nesting_list_flag ) {
23687                             $in_statement_continuation = 0;
23688                         }
23689
23690                        # otherwise, the next token after a ',' starts a new term
23691                         elsif ( $type eq ',' ) {
23692                             $in_statement_continuation = 0;
23693                         }
23694
23695                         # otherwise, we are continuing the current term
23696                         else {
23697                             $in_statement_continuation = 1;
23698                         }
23699                     }
23700                 }
23701             }
23702
23703             if ( $level_in_tokenizer < 0 ) {
23704                 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
23705                     $tokenizer_self->{_saw_negative_indentation} = 1;
23706                     warning("Starting negative indentation\n");
23707                 }
23708             }
23709
23710             # set secondary nesting levels based on all continment token types
23711             # Note: these are set so that the nesting depth is the depth
23712             # of the PREVIOUS TOKEN, which is convenient for setting
23713             # the stength of token bonds
23714             my $slevel_i = $slevel_in_tokenizer;
23715
23716             #    /^[L\{\(\[]$/
23717             if ( $is_opening_type{$type} ) {
23718                 $slevel_in_tokenizer++;
23719                 $nesting_token_string .= $tok;
23720                 $nesting_type_string  .= $type;
23721             }
23722
23723             #       /^[R\}\)\]]$/
23724             elsif ( $is_closing_type{$type} ) {
23725                 $slevel_in_tokenizer--;
23726                 my $char = chop $nesting_token_string;
23727
23728                 if ( $char ne $matching_start_token{$tok} ) {
23729                     $nesting_token_string .= $char . $tok;
23730                     $nesting_type_string  .= $type;
23731                 }
23732                 else {
23733                     chop $nesting_type_string;
23734                 }
23735             }
23736
23737             push( @block_type,            $routput_block_type->[$i] );
23738             push( @ci_string,             $ci_string_i );
23739             push( @container_environment, $container_environment );
23740             push( @container_type,        $routput_container_type->[$i] );
23741             push( @levels,                $level_i );
23742             push( @nesting_tokens,        $nesting_token_string_i );
23743             push( @nesting_types,         $nesting_type_string_i );
23744             push( @slevels,               $slevel_i );
23745             push( @token_type,            $fix_type );
23746             push( @type_sequence,         $routput_type_sequence->[$i] );
23747             push( @nesting_blocks,        $nesting_block_string );
23748             push( @nesting_lists,         $nesting_list_string );
23749
23750             # now form the previous token
23751             if ( $im >= 0 ) {
23752                 $num =
23753                   $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
23754
23755                 if ( $num > 0 ) {
23756                     push( @tokens,
23757                         substr( $input_line, $$rtoken_map[$im], $num ) );
23758                 }
23759             }
23760             $im = $i;
23761         }
23762
23763         $num = length($input_line) - $$rtoken_map[$im];    # make the last token
23764         if ( $num > 0 ) {
23765             push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
23766         }
23767
23768         $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
23769         $tokenizer_self->{_in_quote}          = $in_quote;
23770         $tokenizer_self->{_quote_target} =
23771           $in_quote ? matching_end_token($quote_character) : "";
23772         $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
23773
23774         $line_of_tokens->{_rtoken_type}            = \@token_type;
23775         $line_of_tokens->{_rtokens}                = \@tokens;
23776         $line_of_tokens->{_rblock_type}            = \@block_type;
23777         $line_of_tokens->{_rcontainer_type}        = \@container_type;
23778         $line_of_tokens->{_rcontainer_environment} = \@container_environment;
23779         $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
23780         $line_of_tokens->{_rlevels}                = \@levels;
23781         $line_of_tokens->{_rslevels}               = \@slevels;
23782         $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
23783         $line_of_tokens->{_rci_levels}             = \@ci_string;
23784         $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
23785
23786         return;
23787     }
23788 }    # end tokenize_this_line
23789
23790 #########i#############################################################
23791 # Tokenizer routines which assist in identifying token types
23792 #######################################################################
23793
23794 sub operator_expected {
23795
23796     # Many perl symbols have two or more meanings.  For example, '<<'
23797     # can be a shift operator or a here-doc operator.  The
23798     # interpretation of these symbols depends on the current state of
23799     # the tokenizer, which may either be expecting a term or an
23800     # operator.  For this example, a << would be a shift if an operator
23801     # is expected, and a here-doc if a term is expected.  This routine
23802     # is called to make this decision for any current token.  It returns
23803     # one of three possible values:
23804     #
23805     #     OPERATOR - operator expected (or at least, not a term)
23806     #     UNKNOWN  - can't tell
23807     #     TERM     - a term is expected (or at least, not an operator)
23808     #
23809     # The decision is based on what has been seen so far.  This
23810     # information is stored in the "$last_nonblank_type" and
23811     # "$last_nonblank_token" variables.  For example, if the
23812     # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
23813     # if $last_nonblank_type is 'n' (numeric), we are expecting an
23814     # OPERATOR.
23815     #
23816     # If a UNKNOWN is returned, the calling routine must guess. A major
23817     # goal of this tokenizer is to minimize the possiblity of returning
23818     # UNKNOWN, because a wrong guess can spoil the formatting of a
23819     # script.
23820     #
23821     # adding NEW_TOKENS: it is critically important that this routine be
23822     # updated to allow it to determine if an operator or term is to be
23823     # expected after the new token.  Doing this simply involves adding
23824     # the new token character to one of the regexes in this routine or
23825     # to one of the hash lists
23826     # that it uses, which are initialized in the BEGIN section.
23827     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
23828     # $statement_type
23829
23830     my ( $prev_type, $tok, $next_type ) = @_;
23831
23832     my $op_expected = UNKNOWN;
23833
23834 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
23835
23836 # Note: function prototype is available for token type 'U' for future
23837 # program development.  It contains the leading and trailing parens,
23838 # and no blanks.  It might be used to eliminate token type 'C', for
23839 # example (prototype = '()'). Thus:
23840 # if ($last_nonblank_type eq 'U') {
23841 #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
23842 # }
23843
23844     # A possible filehandle (or object) requires some care...
23845     if ( $last_nonblank_type eq 'Z' ) {
23846
23847         # angle.t
23848         if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
23849             $op_expected = UNKNOWN;
23850         }
23851
23852         # For possible file handle like "$a", Perl uses weird parsing rules.
23853         # For example:
23854         # print $a/2,"/hi";   - division
23855         # print $a / 2,"/hi"; - division
23856         # print $a/ 2,"/hi";  - division
23857         # print $a /2,"/hi";  - pattern (and error)!
23858         elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
23859             $op_expected = TERM;
23860         }
23861
23862         # Note when an operation is being done where a
23863         # filehandle might be expected, since a change in whitespace
23864         # could change the interpretation of the statement.
23865         else {
23866             if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
23867                 complain("operator in print statement not recommended\n");
23868                 $op_expected = OPERATOR;
23869             }
23870         }
23871     }
23872
23873     # handle something after 'do' and 'eval'
23874     elsif ( $is_block_operator{$last_nonblank_token} ) {
23875
23876         # something like $a = eval "expression";
23877         #                          ^
23878         if ( $last_nonblank_type eq 'k' ) {
23879             $op_expected = TERM;    # expression or list mode following keyword
23880         }
23881
23882         # something like $a = do { BLOCK } / 2;
23883         #                                  ^
23884         else {
23885             $op_expected = OPERATOR;    # block mode following }
23886         }
23887     }
23888
23889     # handle bare word..
23890     elsif ( $last_nonblank_type eq 'w' ) {
23891
23892         # unfortunately, we can't tell what type of token to expect next
23893         # after most bare words
23894         $op_expected = UNKNOWN;
23895     }
23896
23897     # operator, but not term possible after these types
23898     # Note: moved ')' from type to token because parens in list context
23899     # get marked as '{' '}' now.  This is a minor glitch in the following:
23900     #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
23901     #
23902     elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
23903         || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
23904     {
23905         $op_expected = OPERATOR;
23906
23907         # in a 'use' statement, numbers and v-strings are not true
23908         # numbers, so to avoid incorrect error messages, we will
23909         # mark them as unknown for now (use.t)
23910         # TODO: it would be much nicer to create a new token V for VERSION
23911         # number in a use statement.  Then this could be a check on type V
23912         # and related patches which change $statement_type for '=>'
23913         # and ',' could be removed.  Further, it would clean things up to
23914         # scan the 'use' statement with a separate subroutine.
23915         if (   ( $statement_type eq 'use' )
23916             && ( $last_nonblank_type =~ /^[nv]$/ ) )
23917         {
23918             $op_expected = UNKNOWN;
23919         }
23920     }
23921
23922     # no operator after many keywords, such as "die", "warn", etc
23923     elsif ( $expecting_term_token{$last_nonblank_token} ) {
23924
23925         # patch for dor.t (defined or).
23926         # perl functions which may be unary operators
23927         # TODO: This list is incomplete, and these should be put
23928         # into a hash.
23929         if (   $tok eq '/'
23930             && $next_type          eq '/'
23931             && $last_nonblank_type eq 'k'
23932             && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
23933         {
23934             $op_expected = OPERATOR;
23935         }
23936         else {
23937             $op_expected = TERM;
23938         }
23939     }
23940
23941     # no operator after things like + - **  (i.e., other operators)
23942     elsif ( $expecting_term_types{$last_nonblank_type} ) {
23943         $op_expected = TERM;
23944     }
23945
23946     # a few operators, like "time", have an empty prototype () and so
23947     # take no parameters but produce a value to operate on
23948     elsif ( $expecting_operator_token{$last_nonblank_token} ) {
23949         $op_expected = OPERATOR;
23950     }
23951
23952     # post-increment and decrement produce values to be operated on
23953     elsif ( $expecting_operator_types{$last_nonblank_type} ) {
23954         $op_expected = OPERATOR;
23955     }
23956
23957     # no value to operate on after sub block
23958     elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
23959
23960     # a right brace here indicates the end of a simple block.
23961     # all non-structural right braces have type 'R'
23962     # all braces associated with block operator keywords have been given those
23963     # keywords as "last_nonblank_token" and caught above.
23964     # (This statement is order dependent, and must come after checking
23965     # $last_nonblank_token).
23966     elsif ( $last_nonblank_type eq '}' ) {
23967
23968         # patch for dor.t (defined or).
23969         if (   $tok eq '/'
23970             && $next_type           eq '/'
23971             && $last_nonblank_token eq ']' )
23972         {
23973             $op_expected = OPERATOR;
23974         }
23975         else {
23976             $op_expected = TERM;
23977         }
23978     }
23979
23980     # something else..what did I forget?
23981     else {
23982
23983         # collecting diagnostics on unknown operator types..see what was missed
23984         $op_expected = UNKNOWN;
23985         write_diagnostics(
23986 "OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
23987         );
23988     }
23989
23990     TOKENIZER_DEBUG_FLAG_EXPECT && do {
23991         print
23992 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
23993     };
23994     return $op_expected;
23995 }
23996
23997 sub new_statement_ok {
23998
23999     # return true if the current token can start a new statement
24000     # USES GLOBAL VARIABLES: $last_nonblank_type
24001
24002     return label_ok()    # a label would be ok here
24003
24004       || $last_nonblank_type eq 'J';    # or we follow a label
24005
24006 }
24007
24008 sub label_ok {
24009
24010     # Decide if a bare word followed by a colon here is a label
24011     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
24012     # $brace_depth, @brace_type
24013
24014     # if it follows an opening or closing code block curly brace..
24015     if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
24016         && $last_nonblank_type eq $last_nonblank_token )
24017     {
24018
24019         # it is a label if and only if the curly encloses a code block
24020         return $brace_type[$brace_depth];
24021     }
24022
24023     # otherwise, it is a label if and only if it follows a ';'
24024     # (real or fake)
24025     else {
24026         return ( $last_nonblank_type eq ';' );
24027     }
24028 }
24029
24030 sub code_block_type {
24031
24032     # Decide if this is a block of code, and its type.
24033     # Must be called only when $type = $token = '{'
24034     # The problem is to distinguish between the start of a block of code
24035     # and the start of an anonymous hash reference
24036     # Returns "" if not code block, otherwise returns 'last_nonblank_token'
24037     # to indicate the type of code block.  (For example, 'last_nonblank_token'
24038     # might be 'if' for an if block, 'else' for an else block, etc).
24039     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
24040     # $last_nonblank_block_type, $brace_depth, @brace_type
24041
24042     # handle case of multiple '{'s
24043
24044 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
24045
24046     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
24047     if (   $last_nonblank_token eq '{'
24048         && $last_nonblank_type eq $last_nonblank_token )
24049     {
24050
24051         # opening brace where a statement may appear is probably
24052         # a code block but might be and anonymous hash reference
24053         if ( $brace_type[$brace_depth] ) {
24054             return decide_if_code_block( $i, $rtokens, $rtoken_type,
24055                 $max_token_index );
24056         }
24057
24058         # cannot start a code block within an anonymous hash
24059         else {
24060             return "";
24061         }
24062     }
24063
24064     elsif ( $last_nonblank_token eq ';' ) {
24065
24066         # an opening brace where a statement may appear is probably
24067         # a code block but might be and anonymous hash reference
24068         return decide_if_code_block( $i, $rtokens, $rtoken_type,
24069             $max_token_index );
24070     }
24071
24072     # handle case of '}{'
24073     elsif ($last_nonblank_token eq '}'
24074         && $last_nonblank_type eq $last_nonblank_token )
24075     {
24076
24077         # a } { situation ...
24078         # could be hash reference after code block..(blktype1.t)
24079         if ($last_nonblank_block_type) {
24080             return decide_if_code_block( $i, $rtokens, $rtoken_type,
24081                 $max_token_index );
24082         }
24083
24084         # must be a block if it follows a closing hash reference
24085         else {
24086             return $last_nonblank_token;
24087         }
24088     }
24089
24090     # NOTE: braces after type characters start code blocks, but for
24091     # simplicity these are not identified as such.  See also
24092     # sub is_non_structural_brace.
24093     # elsif ( $last_nonblank_type eq 't' ) {
24094     #    return $last_nonblank_token;
24095     # }
24096
24097     # brace after label:
24098     elsif ( $last_nonblank_type eq 'J' ) {
24099         return $last_nonblank_token;
24100     }
24101
24102 # otherwise, look at previous token.  This must be a code block if
24103 # it follows any of these:
24104 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
24105     elsif ( $is_code_block_token{$last_nonblank_token} ) {
24106         return $last_nonblank_token;
24107     }
24108
24109     # or a sub definition
24110     elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
24111         && $last_nonblank_token =~ /^sub\b/ )
24112     {
24113         return $last_nonblank_token;
24114     }
24115
24116     # user-defined subs with block parameters (like grep/map/eval)
24117     elsif ( $last_nonblank_type eq 'G' ) {
24118         return $last_nonblank_token;
24119     }
24120
24121     # check bareword
24122     elsif ( $last_nonblank_type eq 'w' ) {
24123         return decide_if_code_block( $i, $rtokens, $rtoken_type,
24124             $max_token_index );
24125     }
24126
24127     # anything else must be anonymous hash reference
24128     else {
24129         return "";
24130     }
24131 }
24132
24133 sub decide_if_code_block {
24134
24135     # USES GLOBAL VARIABLES: $last_nonblank_token
24136     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
24137     my ( $next_nonblank_token, $i_next ) =
24138       find_next_nonblank_token( $i, $rtokens, $max_token_index );
24139
24140     # we are at a '{' where a statement may appear.
24141     # We must decide if this brace starts an anonymous hash or a code
24142     # block.
24143     # return "" if anonymous hash, and $last_nonblank_token otherwise
24144
24145     # initialize to be code BLOCK
24146     my $code_block_type = $last_nonblank_token;
24147
24148     # Check for the common case of an empty anonymous hash reference:
24149     # Maybe something like sub { { } }
24150     if ( $next_nonblank_token eq '}' ) {
24151         $code_block_type = "";
24152     }
24153
24154     else {
24155
24156         # To guess if this '{' is an anonymous hash reference, look ahead
24157         # and test as follows:
24158         #
24159         # it is a hash reference if next come:
24160         #   - a string or digit followed by a comma or =>
24161         #   - bareword followed by =>
24162         # otherwise it is a code block
24163         #
24164         # Examples of anonymous hash ref:
24165         # {'aa',};
24166         # {1,2}
24167         #
24168         # Examples of code blocks:
24169         # {1; print "hello\n", 1;}
24170         # {$a,1};
24171
24172         # We are only going to look ahead one more (nonblank/comment) line.
24173         # Strange formatting could cause a bad guess, but that's unlikely.
24174         my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
24175         my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
24176         my ( $rpre_tokens, $rpre_types ) =
24177           peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
24178                                                        # generous, and prevents
24179                                                        # wasting lots of
24180                                                        # time in mangled files
24181         if ( defined($rpre_types) && @$rpre_types ) {
24182             push @pre_types,  @$rpre_types;
24183             push @pre_tokens, @$rpre_tokens;
24184         }
24185
24186         # put a sentinal token to simplify stopping the search
24187         push @pre_types, '}';
24188
24189         my $jbeg = 0;
24190         $jbeg = 1 if $pre_types[0] eq 'b';
24191
24192         # first look for one of these
24193         #  - bareword
24194         #  - bareword with leading -
24195         #  - digit
24196         #  - quoted string
24197         my $j = $jbeg;
24198         if ( $pre_types[$j] =~ /^[\'\"]/ ) {
24199
24200             # find the closing quote; don't worry about escapes
24201             my $quote_mark = $pre_types[$j];
24202             for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
24203                 if ( $pre_types[$k] eq $quote_mark ) {
24204                     $j = $k + 1;
24205                     my $next = $pre_types[$j];
24206                     last;
24207                 }
24208             }
24209         }
24210         elsif ( $pre_types[$j] eq 'd' ) {
24211             $j++;
24212         }
24213         elsif ( $pre_types[$j] eq 'w' ) {
24214             unless ( $is_keyword{ $pre_tokens[$j] } ) {
24215                 $j++;
24216             }
24217         }
24218         elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
24219             $j++;
24220         }
24221         if ( $j > $jbeg ) {
24222
24223             $j++ if $pre_types[$j] eq 'b';
24224
24225             # it's a hash ref if a comma or => follow next
24226             if ( $pre_types[$j] eq ','
24227                 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
24228             {
24229                 $code_block_type = "";
24230             }
24231         }
24232     }
24233
24234     return $code_block_type;
24235 }
24236
24237 sub unexpected {
24238
24239     # report unexpected token type and show where it is
24240     # USES GLOBAL VARIABLES: $tokenizer_self
24241     my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
24242         $rpretoken_type, $input_line )
24243       = @_;
24244
24245     if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
24246         my $msg = "found $found where $expecting expected";
24247         my $pos = $$rpretoken_map[$i_tok];
24248         interrupt_logfile();
24249         my $input_line_number = $tokenizer_self->{_last_line_number};
24250         my ( $offset, $numbered_line, $underline ) =
24251           make_numbered_line( $input_line_number, $input_line, $pos );
24252         $underline = write_on_underline( $underline, $pos - $offset, '^' );
24253
24254         my $trailer = "";
24255         if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
24256             my $pos_prev = $$rpretoken_map[$last_nonblank_i];
24257             my $num;
24258             if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
24259                 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
24260             }
24261             else {
24262                 $num = $pos - $pos_prev;
24263             }
24264             if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
24265
24266             $underline =
24267               write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
24268             $trailer = " (previous token underlined)";
24269         }
24270         warning( $numbered_line . "\n" );
24271         warning( $underline . "\n" );
24272         warning( $msg . $trailer . "\n" );
24273         resume_logfile();
24274     }
24275 }
24276
24277 sub is_non_structural_brace {
24278
24279     # Decide if a brace or bracket is structural or non-structural
24280     # by looking at the previous token and type
24281     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
24282
24283     # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
24284     # Tentatively deactivated because it caused the wrong operator expectation
24285     # for this code:
24286     #      $user = @vars[1] / 100;
24287     # Must update sub operator_expected before re-implementing.
24288     # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
24289     #    return 0;
24290     # }
24291
24292     # NOTE: braces after type characters start code blocks, but for
24293     # simplicity these are not identified as such.  See also
24294     # sub code_block_type
24295     # if ($last_nonblank_type eq 't') {return 0}
24296
24297     # otherwise, it is non-structural if it is decorated
24298     # by type information.
24299     # For example, the '{' here is non-structural:   ${xxx}
24300     (
24301         $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
24302
24303           # or if we follow a hash or array closing curly brace or bracket
24304           # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
24305           # because the first '}' would have been given type 'R'
24306           || $last_nonblank_type =~ /^([R\]])$/
24307     );
24308 }
24309
24310 #########i#############################################################
24311 # Tokenizer routines for tracking container nesting depths
24312 #######################################################################
24313
24314 # The following routines keep track of nesting depths of the nesting
24315 # types, ( [ { and ?.  This is necessary for determining the indentation
24316 # level, and also for debugging programs.  Not only do they keep track of
24317 # nesting depths of the individual brace types, but they check that each
24318 # of the other brace types is balanced within matching pairs.  For
24319 # example, if the program sees this sequence:
24320 #
24321 #         {  ( ( ) }
24322 #
24323 # then it can determine that there is an extra left paren somewhere
24324 # between the { and the }.  And so on with every other possible
24325 # combination of outer and inner brace types.  For another
24326 # example:
24327 #
24328 #         ( [ ..... ]  ] )
24329 #
24330 # which has an extra ] within the parens.
24331 #
24332 # The brace types have indexes 0 .. 3 which are indexes into
24333 # the matrices.
24334 #
24335 # The pair ? : are treated as just another nesting type, with ? acting
24336 # as the opening brace and : acting as the closing brace.
24337 #
24338 # The matrix
24339 #
24340 #         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
24341 #
24342 # saves the nesting depth of brace type $b (where $b is either of the other
24343 # nesting types) when brace type $a enters a new depth.  When this depth
24344 # decreases, a check is made that the current depth of brace types $b is
24345 # unchanged, or otherwise there must have been an error.  This can
24346 # be very useful for localizing errors, particularly when perl runs to
24347 # the end of a large file (such as this one) and announces that there
24348 # is a problem somewhere.
24349 #
24350 # A numerical sequence number is maintained for every nesting type,
24351 # so that each matching pair can be uniquely identified in a simple
24352 # way.
24353
24354 sub increase_nesting_depth {
24355     my ( $aa, $pos ) = @_;
24356
24357     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
24358     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
24359     my $bb;
24360     $current_depth[$aa]++;
24361     $total_depth++;
24362     $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
24363     my $input_line_number = $tokenizer_self->{_last_line_number};
24364     my $input_line        = $tokenizer_self->{_line_text};
24365
24366     # Sequence numbers increment by number of items.  This keeps
24367     # a unique set of numbers but still allows the relative location
24368     # of any type to be determined.
24369     $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
24370     my $seqno = $nesting_sequence_number[$aa];
24371     $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
24372
24373     $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
24374       [ $input_line_number, $input_line, $pos ];
24375
24376     for $bb ( 0 .. $#closing_brace_names ) {
24377         next if ( $bb == $aa );
24378         $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
24379     }
24380
24381     # set a flag for indenting a nested ternary statement
24382     my $indent = 0;
24383     if ( $aa == QUESTION_COLON ) {
24384         $nested_ternary_flag[ $current_depth[$aa] ] = 0;
24385         if ( $current_depth[$aa] > 1 ) {
24386             if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
24387                 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
24388                 if ( $pdepth == $total_depth - 1 ) {
24389                     $indent = 1;
24390                     $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
24391                 }
24392             }
24393         }
24394     }
24395     return ( $seqno, $indent );
24396 }
24397
24398 sub decrease_nesting_depth {
24399
24400     my ( $aa, $pos ) = @_;
24401
24402     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
24403     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
24404     my $bb;
24405     my $seqno             = 0;
24406     my $input_line_number = $tokenizer_self->{_last_line_number};
24407     my $input_line        = $tokenizer_self->{_line_text};
24408
24409     my $outdent = 0;
24410     $total_depth--;
24411     if ( $current_depth[$aa] > 0 ) {
24412
24413         # set a flag for un-indenting after seeing a nested ternary statement
24414         $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
24415         if ( $aa == QUESTION_COLON ) {
24416             $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
24417         }
24418
24419         # check that any brace types $bb contained within are balanced
24420         for $bb ( 0 .. $#closing_brace_names ) {
24421             next if ( $bb == $aa );
24422
24423             unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
24424                 $current_depth[$bb] )
24425             {
24426                 my $diff =
24427                   $current_depth[$bb] -
24428                   $depth_array[$aa][$bb][ $current_depth[$aa] ];
24429
24430                 # don't whine too many times
24431                 my $saw_brace_error = get_saw_brace_error();
24432                 if (
24433                     $saw_brace_error <= MAX_NAG_MESSAGES
24434
24435                     # if too many closing types have occured, we probably
24436                     # already caught this error
24437                     && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
24438                   )
24439                 {
24440                     interrupt_logfile();
24441                     my $rsl =
24442                       $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
24443                     my $sl  = $$rsl[0];
24444                     my $rel = [ $input_line_number, $input_line, $pos ];
24445                     my $el  = $$rel[0];
24446                     my ($ess);
24447
24448                     if ( $diff == 1 || $diff == -1 ) {
24449                         $ess = '';
24450                     }
24451                     else {
24452                         $ess = 's';
24453                     }
24454                     my $bname =
24455                       ( $diff > 0 )
24456                       ? $opening_brace_names[$bb]
24457                       : $closing_brace_names[$bb];
24458                     write_error_indicator_pair( @$rsl, '^' );
24459                     my $msg = <<"EOM";
24460 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
24461 EOM
24462
24463                     if ( $diff > 0 ) {
24464                         my $rml =
24465                           $starting_line_of_current_depth[$bb]
24466                           [ $current_depth[$bb] ];
24467                         my $ml = $$rml[0];
24468                         $msg .=
24469 "    The most recent un-matched $bname is on line $ml\n";
24470                         write_error_indicator_pair( @$rml, '^' );
24471                     }
24472                     write_error_indicator_pair( @$rel, '^' );
24473                     warning($msg);
24474                     resume_logfile();
24475                 }
24476                 increment_brace_error();
24477             }
24478         }
24479         $current_depth[$aa]--;
24480     }
24481     else {
24482
24483         my $saw_brace_error = get_saw_brace_error();
24484         if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
24485             my $msg = <<"EOM";
24486 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
24487 EOM
24488             indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
24489         }
24490         increment_brace_error();
24491     }
24492     return ( $seqno, $outdent );
24493 }
24494
24495 sub check_final_nesting_depths {
24496     my ($aa);
24497
24498     # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
24499
24500     for $aa ( 0 .. $#closing_brace_names ) {
24501
24502         if ( $current_depth[$aa] ) {
24503             my $rsl = $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
24504             my $sl  = $$rsl[0];
24505             my $msg = <<"EOM";
24506 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
24507 The most recent un-matched $opening_brace_names[$aa] is on line $sl
24508 EOM
24509             indicate_error( $msg, @$rsl, '^' );
24510             increment_brace_error();
24511         }
24512     }
24513 }
24514
24515 #########i#############################################################
24516 # Tokenizer routines for looking ahead in input stream
24517 #######################################################################
24518
24519 sub peek_ahead_for_n_nonblank_pre_tokens {
24520
24521     # returns next n pretokens if they exist
24522     # returns undef's if hits eof without seeing any pretokens
24523     # USES GLOBAL VARIABLES: $tokenizer_self
24524     my $max_pretokens = shift;
24525     my $line;
24526     my $i = 0;
24527     my ( $rpre_tokens, $rmap, $rpre_types );
24528
24529     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
24530     {
24531         $line =~ s/^\s*//;    # trim leading blanks
24532         next if ( length($line) <= 0 );    # skip blank
24533         next if ( $line =~ /^#/ );         # skip comment
24534         ( $rpre_tokens, $rmap, $rpre_types ) =
24535           pre_tokenize( $line, $max_pretokens );
24536         last;
24537     }
24538     return ( $rpre_tokens, $rpre_types );
24539 }
24540
24541 # look ahead for next non-blank, non-comment line of code
24542 sub peek_ahead_for_nonblank_token {
24543
24544     # USES GLOBAL VARIABLES: $tokenizer_self
24545     my ( $rtokens, $max_token_index ) = @_;
24546     my $line;
24547     my $i = 0;
24548
24549     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
24550     {
24551         $line =~ s/^\s*//;    # trim leading blanks
24552         next if ( length($line) <= 0 );    # skip blank
24553         next if ( $line =~ /^#/ );         # skip comment
24554         my ( $rtok, $rmap, $rtype ) =
24555           pre_tokenize( $line, 2 );        # only need 2 pre-tokens
24556         my $j = $max_token_index + 1;
24557         my $tok;
24558
24559         foreach $tok (@$rtok) {
24560             last if ( $tok =~ "\n" );
24561             $$rtokens[ ++$j ] = $tok;
24562         }
24563         last;
24564     }
24565     return $rtokens;
24566 }
24567
24568 #########i#############################################################
24569 # Tokenizer guessing routines for ambiguous situations
24570 #######################################################################
24571
24572 sub guess_if_pattern_or_conditional {
24573
24574     # this routine is called when we have encountered a ? following an
24575     # unknown bareword, and we must decide if it starts a pattern or not
24576     # input parameters:
24577     #   $i - token index of the ? starting possible pattern
24578     # output parameters:
24579     #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
24580     #   msg = a warning or diagnostic message
24581     # USES GLOBAL VARIABLES: $last_nonblank_token
24582     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
24583     my $is_pattern = 0;
24584     my $msg        = "guessing that ? after $last_nonblank_token starts a ";
24585
24586     if ( $i >= $max_token_index ) {
24587         $msg .= "conditional (no end to pattern found on the line)\n";
24588     }
24589     else {
24590         my $ibeg = $i;
24591         $i = $ibeg + 1;
24592         my $next_token = $$rtokens[$i];    # first token after ?
24593
24594         # look for a possible ending ? on this line..
24595         my $in_quote        = 1;
24596         my $quote_depth     = 0;
24597         my $quote_character = '';
24598         my $quote_pos       = 0;
24599         my $quoted_string;
24600         (
24601             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
24602             $quoted_string
24603           )
24604           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
24605             $quote_pos, $quote_depth, $max_token_index );
24606
24607         if ($in_quote) {
24608
24609             # we didn't find an ending ? on this line,
24610             # so we bias towards conditional
24611             $is_pattern = 0;
24612             $msg .= "conditional (no ending ? on this line)\n";
24613
24614             # we found an ending ?, so we bias towards a pattern
24615         }
24616         else {
24617
24618             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
24619                 $is_pattern = 1;
24620                 $msg .= "pattern (found ending ? and pattern expected)\n";
24621             }
24622             else {
24623                 $msg .= "pattern (uncertain, but found ending ?)\n";
24624             }
24625         }
24626     }
24627     return ( $is_pattern, $msg );
24628 }
24629
24630 sub guess_if_pattern_or_division {
24631
24632     # this routine is called when we have encountered a / following an
24633     # unknown bareword, and we must decide if it starts a pattern or is a
24634     # division
24635     # input parameters:
24636     #   $i - token index of the / starting possible pattern
24637     # output parameters:
24638     #   $is_pattern = 0 if probably division,  =1 if probably a pattern
24639     #   msg = a warning or diagnostic message
24640     # USES GLOBAL VARIABLES: $last_nonblank_token
24641     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
24642     my $is_pattern = 0;
24643     my $msg        = "guessing that / after $last_nonblank_token starts a ";
24644
24645     if ( $i >= $max_token_index ) {
24646         "division (no end to pattern found on the line)\n";
24647     }
24648     else {
24649         my $ibeg = $i;
24650         my $divide_expected =
24651           numerator_expected( $i, $rtokens, $max_token_index );
24652         $i = $ibeg + 1;
24653         my $next_token = $$rtokens[$i];    # first token after slash
24654
24655         # look for a possible ending / on this line..
24656         my $in_quote        = 1;
24657         my $quote_depth     = 0;
24658         my $quote_character = '';
24659         my $quote_pos       = 0;
24660         my $quoted_string;
24661         (
24662             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
24663             $quoted_string
24664           )
24665           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
24666             $quote_pos, $quote_depth, $max_token_index );
24667
24668         if ($in_quote) {
24669
24670             # we didn't find an ending / on this line,
24671             # so we bias towards division
24672             if ( $divide_expected >= 0 ) {
24673                 $is_pattern = 0;
24674                 $msg .= "division (no ending / on this line)\n";
24675             }
24676             else {
24677                 $msg        = "multi-line pattern (division not possible)\n";
24678                 $is_pattern = 1;
24679             }
24680
24681         }
24682
24683         # we found an ending /, so we bias towards a pattern
24684         else {
24685
24686             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
24687
24688                 if ( $divide_expected >= 0 ) {
24689
24690                     if ( $i - $ibeg > 60 ) {
24691                         $msg .= "division (matching / too distant)\n";
24692                         $is_pattern = 0;
24693                     }
24694                     else {
24695                         $msg .= "pattern (but division possible too)\n";
24696                         $is_pattern = 1;
24697                     }
24698                 }
24699                 else {
24700                     $is_pattern = 1;
24701                     $msg .= "pattern (division not possible)\n";
24702                 }
24703             }
24704             else {
24705
24706                 if ( $divide_expected >= 0 ) {
24707                     $is_pattern = 0;
24708                     $msg .= "division (pattern not possible)\n";
24709                 }
24710                 else {
24711                     $is_pattern = 1;
24712                     $msg .=
24713                       "pattern (uncertain, but division would not work here)\n";
24714                 }
24715             }
24716         }
24717     }
24718     return ( $is_pattern, $msg );
24719 }
24720
24721 # try to resolve here-doc vs. shift by looking ahead for
24722 # non-code or the end token (currently only looks for end token)
24723 # returns 1 if it is probably a here doc, 0 if not
24724 sub guess_if_here_doc {
24725
24726     # This is how many lines we will search for a target as part of the
24727     # guessing strategy.  It is a constant because there is probably
24728     # little reason to change it.
24729     # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
24730     # %is_constant,
24731     use constant HERE_DOC_WINDOW => 40;
24732
24733     my $next_token        = shift;
24734     my $here_doc_expected = 0;
24735     my $line;
24736     my $k   = 0;
24737     my $msg = "checking <<";
24738
24739     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
24740     {
24741         chomp $line;
24742
24743         if ( $line =~ /^$next_token$/ ) {
24744             $msg .= " -- found target $next_token ahead $k lines\n";
24745             $here_doc_expected = 1;    # got it
24746             last;
24747         }
24748         last if ( $k >= HERE_DOC_WINDOW );
24749     }
24750
24751     unless ($here_doc_expected) {
24752
24753         if ( !defined($line) ) {
24754             $here_doc_expected = -1;    # hit eof without seeing target
24755             $msg .= " -- must be shift; target $next_token not in file\n";
24756
24757         }
24758         else {                          # still unsure..taking a wild guess
24759
24760             if ( !$is_constant{$current_package}{$next_token} ) {
24761                 $here_doc_expected = 1;
24762                 $msg .=
24763                   " -- guessing it's a here-doc ($next_token not a constant)\n";
24764             }
24765             else {
24766                 $msg .=
24767                   " -- guessing it's a shift ($next_token is a constant)\n";
24768             }
24769         }
24770     }
24771     write_logfile_entry($msg);
24772     return $here_doc_expected;
24773 }
24774
24775 #########i#############################################################
24776 # Tokenizer Routines for scanning identifiers and related items
24777 #######################################################################
24778
24779 sub scan_bare_identifier_do {
24780
24781     # this routine is called to scan a token starting with an alphanumeric
24782     # variable or package separator, :: or '.
24783     # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
24784     # $last_nonblank_type,@paren_type, $paren_depth
24785
24786     my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
24787         $max_token_index )
24788       = @_;
24789     my $i_begin = $i;
24790     my $package = undef;
24791
24792     my $i_beg = $i;
24793
24794     # we have to back up one pretoken at a :: since each : is one pretoken
24795     if ( $tok eq '::' ) { $i_beg-- }
24796     if ( $tok eq '->' ) { $i_beg-- }
24797     my $pos_beg = $$rtoken_map[$i_beg];
24798     pos($input_line) = $pos_beg;
24799
24800     #  Examples:
24801     #   A::B::C
24802     #   A::
24803     #   ::A
24804     #   A'B
24805     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
24806
24807         my $pos  = pos($input_line);
24808         my $numc = $pos - $pos_beg;
24809         $tok = substr( $input_line, $pos_beg, $numc );
24810
24811         # type 'w' includes anything without leading type info
24812         # ($,%,@,*) including something like abc::def::ghi
24813         $type = 'w';
24814
24815         my $sub_name = "";
24816         if ( defined($2) ) { $sub_name = $2; }
24817         if ( defined($1) ) {
24818             $package = $1;
24819
24820             # patch: don't allow isolated package name which just ends
24821             # in the old style package separator (single quote).  Example:
24822             #   use CGI':all';
24823             if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
24824                 $pos--;
24825             }
24826
24827             $package =~ s/\'/::/g;
24828             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
24829             $package =~ s/::$//;
24830         }
24831         else {
24832             $package = $current_package;
24833
24834             if ( $is_keyword{$tok} ) {
24835                 $type = 'k';
24836             }
24837         }
24838
24839         # if it is a bareword..
24840         if ( $type eq 'w' ) {
24841
24842             # check for v-string with leading 'v' type character
24843             # (This seems to have presidence over filehandle, type 'Y')
24844             if ( $tok =~ /^v\d[_\d]*$/ ) {
24845
24846                 # we only have the first part - something like 'v101' -
24847                 # look for more
24848                 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
24849                     $pos  = pos($input_line);
24850                     $numc = $pos - $pos_beg;
24851                     $tok  = substr( $input_line, $pos_beg, $numc );
24852                 }
24853                 $type = 'v';
24854
24855                 # warn if this version can't handle v-strings
24856                 report_v_string($tok);
24857             }
24858
24859             elsif ( $is_constant{$package}{$sub_name} ) {
24860                 $type = 'C';
24861             }
24862
24863             # bareword after sort has implied empty prototype; for example:
24864             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
24865             # This has priority over whatever the user has specified.
24866             elsif ($last_nonblank_token eq 'sort'
24867                 && $last_nonblank_type eq 'k' )
24868             {
24869                 $type = 'Z';
24870             }
24871
24872             # Note: strangely, perl does not seem to really let you create
24873             # functions which act like eval and do, in the sense that eval
24874             # and do may have operators following the final }, but any operators
24875             # that you create with prototype (&) apparently do not allow
24876             # trailing operators, only terms.  This seems strange.
24877             # If this ever changes, here is the update
24878             # to make perltidy behave accordingly:
24879
24880             # elsif ( $is_block_function{$package}{$tok} ) {
24881             #    $tok='eval'; # patch to do braces like eval  - doesn't work
24882             #    $type = 'k';
24883             #}
24884             # FIXME: This could become a separate type to allow for different
24885             # future behavior:
24886             elsif ( $is_block_function{$package}{$sub_name} ) {
24887                 $type = 'G';
24888             }
24889
24890             elsif ( $is_block_list_function{$package}{$sub_name} ) {
24891                 $type = 'G';
24892             }
24893             elsif ( $is_user_function{$package}{$sub_name} ) {
24894                 $type      = 'U';
24895                 $prototype = $user_function_prototype{$package}{$sub_name};
24896             }
24897
24898             # check for indirect object
24899             elsif (
24900
24901                 # added 2001-03-27: must not be followed immediately by '('
24902                 # see fhandle.t
24903                 ( $input_line !~ m/\G\(/gc )
24904
24905                 # and
24906                 && (
24907
24908                     # preceded by keyword like 'print', 'printf' and friends
24909                     $is_indirect_object_taker{$last_nonblank_token}
24910
24911                     # or preceded by something like 'print(' or 'printf('
24912                     || (
24913                         ( $last_nonblank_token eq '(' )
24914                         && $is_indirect_object_taker{ $paren_type[$paren_depth]
24915                         }
24916
24917                     )
24918                 )
24919               )
24920             {
24921
24922                 # may not be indirect object unless followed by a space
24923                 if ( $input_line =~ m/\G\s+/gc ) {
24924                     $type = 'Y';
24925
24926                     # Abandon Hope ...
24927                     # Perl's indirect object notation is a very bad
24928                     # thing and can cause subtle bugs, especially for
24929                     # beginning programmers.  And I haven't even been
24930                     # able to figure out a sane warning scheme which
24931                     # doesn't get in the way of good scripts.
24932
24933                     # Complain if a filehandle has any lower case
24934                     # letters.  This is suggested good practice, but the
24935                     # main reason for this warning is that prior to
24936                     # release 20010328, perltidy incorrectly parsed a
24937                     # function call after a print/printf, with the
24938                     # result that a space got added before the opening
24939                     # paren, thereby converting the function name to a
24940                     # filehandle according to perl's weird rules.  This
24941                     # will not usually generate a syntax error, so this
24942                     # is a potentially serious bug.  By warning
24943                     # of filehandles with any lower case letters,
24944                     # followed by opening parens, we will help the user
24945                     # find almost all of these older errors.
24946                     # use 'sub_name' because something like
24947                     # main::MYHANDLE is ok for filehandle
24948                     if ( $sub_name =~ /[a-z]/ ) {
24949
24950                         # could be bug caused by older perltidy if
24951                         # followed by '('
24952                         if ( $input_line =~ m/\G\s*\(/gc ) {
24953                             complain(
24954 "Caution: unknown word '$tok' in indirect object slot\n"
24955                             );
24956                         }
24957                     }
24958                 }
24959
24960                 # bareword not followed by a space -- may not be filehandle
24961                 # (may be function call defined in a 'use' statement)
24962                 else {
24963                     $type = 'Z';
24964                 }
24965             }
24966         }
24967
24968         # Now we must convert back from character position
24969         # to pre_token index.
24970         # I don't think an error flag can occur here ..but who knows
24971         my $error;
24972         ( $i, $error ) =
24973           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
24974         if ($error) {
24975             warning("scan_bare_identifier: Possibly invalid tokenization\n");
24976         }
24977     }
24978
24979     # no match but line not blank - could be syntax error
24980     # perl will take '::' alone without complaint
24981     else {
24982         $type = 'w';
24983
24984         # change this warning to log message if it becomes annoying
24985         warning("didn't find identifier after leading ::\n");
24986     }
24987     return ( $i, $tok, $type, $prototype );
24988 }
24989
24990 sub scan_id_do {
24991
24992 # This is the new scanner and will eventually replace scan_identifier.
24993 # Only type 'sub' and 'package' are implemented.
24994 # Token types $ * % @ & -> are not yet implemented.
24995 #
24996 # Scan identifier following a type token.
24997 # The type of call depends on $id_scan_state: $id_scan_state = ''
24998 # for starting call, in which case $tok must be the token defining
24999 # the type.
25000 #
25001 # If the type token is the last nonblank token on the line, a value
25002 # of $id_scan_state = $tok is returned, indicating that further
25003 # calls must be made to get the identifier.  If the type token is
25004 # not the last nonblank token on the line, the identifier is
25005 # scanned and handled and a value of '' is returned.
25006 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
25007 # $statement_type, $tokenizer_self
25008
25009     my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
25010         $max_token_index )
25011       = @_;
25012     my $type = '';
25013     my ( $i_beg, $pos_beg );
25014
25015     #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
25016     #my ($a,$b,$c) = caller;
25017     #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
25018
25019     # on re-entry, start scanning at first token on the line
25020     if ($id_scan_state) {
25021         $i_beg = $i;
25022         $type  = '';
25023     }
25024
25025     # on initial entry, start scanning just after type token
25026     else {
25027         $i_beg         = $i + 1;
25028         $id_scan_state = $tok;
25029         $type          = 't';
25030     }
25031
25032     # find $i_beg = index of next nonblank token,
25033     # and handle empty lines
25034     my $blank_line          = 0;
25035     my $next_nonblank_token = $$rtokens[$i_beg];
25036     if ( $i_beg > $max_token_index ) {
25037         $blank_line = 1;
25038     }
25039     else {
25040
25041         # only a '#' immediately after a '$' is not a comment
25042         if ( $next_nonblank_token eq '#' ) {
25043             unless ( $tok eq '$' ) {
25044                 $blank_line = 1;
25045             }
25046         }
25047
25048         if ( $next_nonblank_token =~ /^\s/ ) {
25049             ( $next_nonblank_token, $i_beg ) =
25050               find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
25051                 $max_token_index );
25052             if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
25053                 $blank_line = 1;
25054             }
25055         }
25056     }
25057
25058     # handle non-blank line; identifier, if any, must follow
25059     unless ($blank_line) {
25060
25061         if ( $id_scan_state eq 'sub' ) {
25062             ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
25063                 $input_line, $i,             $i_beg,
25064                 $tok,        $type,          $rtokens,
25065                 $rtoken_map, $id_scan_state, $max_token_index
25066             );
25067         }
25068
25069         elsif ( $id_scan_state eq 'package' ) {
25070             ( $i, $tok, $type ) =
25071               do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
25072                 $rtoken_map, $max_token_index );
25073             $id_scan_state = '';
25074         }
25075
25076         else {
25077             warning("invalid token in scan_id: $tok\n");
25078             $id_scan_state = '';
25079         }
25080     }
25081
25082     if ( $id_scan_state && ( !defined($type) || !$type ) ) {
25083
25084         # shouldn't happen:
25085         warning(
25086 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
25087         );
25088         report_definite_bug();
25089     }
25090
25091     TOKENIZER_DEBUG_FLAG_NSCAN && do {
25092         print
25093           "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
25094     };
25095     return ( $i, $tok, $type, $id_scan_state );
25096 }
25097
25098 sub check_prototype {
25099     my ( $proto, $package, $subname ) = @_;
25100     return unless ( defined($package) && defined($subname) );
25101     if ( defined($proto) ) {
25102         $proto =~ s/^\s*\(\s*//;
25103         $proto =~ s/\s*\)$//;
25104         if ($proto) {
25105             $is_user_function{$package}{$subname}        = 1;
25106             $user_function_prototype{$package}{$subname} = "($proto)";
25107
25108             # prototypes containing '&' must be treated specially..
25109             if ( $proto =~ /\&/ ) {
25110
25111                 # right curly braces of prototypes ending in
25112                 # '&' may be followed by an operator
25113                 if ( $proto =~ /\&$/ ) {
25114                     $is_block_function{$package}{$subname} = 1;
25115                 }
25116
25117                 # right curly braces of prototypes NOT ending in
25118                 # '&' may NOT be followed by an operator
25119                 elsif ( $proto !~ /\&$/ ) {
25120                     $is_block_list_function{$package}{$subname} = 1;
25121                 }
25122             }
25123         }
25124         else {
25125             $is_constant{$package}{$subname} = 1;
25126         }
25127     }
25128     else {
25129         $is_user_function{$package}{$subname} = 1;
25130     }
25131 }
25132
25133 sub do_scan_package {
25134
25135     # do_scan_package parses a package name
25136     # it is called with $i_beg equal to the index of the first nonblank
25137     # token following a 'package' token.
25138     # USES GLOBAL VARIABLES: $current_package,
25139
25140     my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
25141         $max_token_index )
25142       = @_;
25143     my $package = undef;
25144     my $pos_beg = $$rtoken_map[$i_beg];
25145     pos($input_line) = $pos_beg;
25146
25147     # handle non-blank line; package name, if any, must follow
25148     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
25149         $package = $1;
25150         $package = ( defined($1) && $1 ) ? $1 : 'main';
25151         $package =~ s/\'/::/g;
25152         if ( $package =~ /^\:/ ) { $package = 'main' . $package }
25153         $package =~ s/::$//;
25154         my $pos  = pos($input_line);
25155         my $numc = $pos - $pos_beg;
25156         $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
25157         $type = 'i';
25158
25159         # Now we must convert back from character position
25160         # to pre_token index.
25161         # I don't think an error flag can occur here ..but ?
25162         my $error;
25163         ( $i, $error ) =
25164           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
25165         if ($error) { warning("Possibly invalid package\n") }
25166         $current_package = $package;
25167
25168         # check for error
25169         my ( $next_nonblank_token, $i_next ) =
25170           find_next_nonblank_token( $i, $rtokens, $max_token_index );
25171         if ( $next_nonblank_token !~ /^[;\}]$/ ) {
25172             warning(
25173                 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
25174             );
25175         }
25176     }
25177
25178     # no match but line not blank --
25179     # could be a label with name package, like package:  , for example.
25180     else {
25181         $type = 'k';
25182     }
25183
25184     return ( $i, $tok, $type );
25185 }
25186
25187 sub scan_identifier_do {
25188
25189     # This routine assembles tokens into identifiers.  It maintains a
25190     # scan state, id_scan_state.  It updates id_scan_state based upon
25191     # current id_scan_state and token, and returns an updated
25192     # id_scan_state and the next index after the identifier.
25193     # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
25194     # $last_nonblank_type
25195
25196     my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_;
25197     my $i_begin   = $i;
25198     my $type      = '';
25199     my $tok_begin = $$rtokens[$i_begin];
25200     if ( $tok_begin eq ':' ) { $tok_begin = '::' }
25201     my $id_scan_state_begin = $id_scan_state;
25202     my $identifier_begin    = $identifier;
25203     my $tok                 = $tok_begin;
25204     my $message             = "";
25205
25206     # these flags will be used to help figure out the type:
25207     my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
25208     my $saw_type;
25209
25210     # allow old package separator (') except in 'use' statement
25211     my $allow_tick = ( $last_nonblank_token ne 'use' );
25212
25213     # get started by defining a type and a state if necessary
25214     unless ($id_scan_state) {
25215         $context = UNKNOWN_CONTEXT;
25216
25217         # fixup for digraph
25218         if ( $tok eq '>' ) {
25219             $tok       = '->';
25220             $tok_begin = $tok;
25221         }
25222         $identifier = $tok;
25223
25224         if ( $tok eq '$' || $tok eq '*' ) {
25225             $id_scan_state = '$';
25226             $context       = SCALAR_CONTEXT;
25227         }
25228         elsif ( $tok eq '%' || $tok eq '@' ) {
25229             $id_scan_state = '$';
25230             $context       = LIST_CONTEXT;
25231         }
25232         elsif ( $tok eq '&' ) {
25233             $id_scan_state = '&';
25234         }
25235         elsif ( $tok eq 'sub' or $tok eq 'package' ) {
25236             $saw_alpha     = 0;     # 'sub' is considered type info here
25237             $id_scan_state = '$';
25238             $identifier .= ' ';     # need a space to separate sub from sub name
25239         }
25240         elsif ( $tok eq '::' ) {
25241             $id_scan_state = 'A';
25242         }
25243         elsif ( $tok =~ /^[A-Za-z_]/ ) {
25244             $id_scan_state = ':';
25245         }
25246         elsif ( $tok eq '->' ) {
25247             $id_scan_state = '$';
25248         }
25249         else {
25250
25251             # shouldn't happen
25252             my ( $a, $b, $c ) = caller;
25253             warning("Program Bug: scan_identifier given bad token = $tok \n");
25254             warning("   called from sub $a  line: $c\n");
25255             report_definite_bug();
25256         }
25257         $saw_type = !$saw_alpha;
25258     }
25259     else {
25260         $i--;
25261         $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
25262     }
25263
25264     # now loop to gather the identifier
25265     my $i_save = $i;
25266
25267     while ( $i < $max_token_index ) {
25268         $i_save = $i unless ( $tok =~ /^\s*$/ );
25269         $tok = $$rtokens[ ++$i ];
25270
25271         if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
25272             $tok = '::';
25273             $i++;
25274         }
25275
25276         if ( $id_scan_state eq '$' ) {    # starting variable name
25277
25278             if ( $tok eq '$' ) {
25279
25280                 $identifier .= $tok;
25281
25282                 # we've got a punctuation variable if end of line (punct.t)
25283                 if ( $i == $max_token_index ) {
25284                     $type          = 'i';
25285                     $id_scan_state = '';
25286                     last;
25287                 }
25288             }
25289             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
25290                 $saw_alpha     = 1;
25291                 $id_scan_state = ':';           # now need ::
25292                 $identifier .= $tok;
25293             }
25294             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
25295                 $saw_alpha     = 1;
25296                 $id_scan_state = ':';                 # now need ::
25297                 $identifier .= $tok;
25298
25299                 # Perl will accept leading digits in identifiers,
25300                 # although they may not always produce useful results.
25301                 # Something like $main::0 is ok.  But this also works:
25302                 #
25303                 #  sub howdy::123::bubba{ print "bubba $54321!\n" }
25304                 #  howdy::123::bubba();
25305                 #
25306             }
25307             elsif ( $tok =~ /^[0-9]/ ) {              # numeric
25308                 $saw_alpha     = 1;
25309                 $id_scan_state = ':';                 # now need ::
25310                 $identifier .= $tok;
25311             }
25312             elsif ( $tok eq '::' ) {
25313                 $id_scan_state = 'A';
25314                 $identifier .= $tok;
25315             }
25316             elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
25317                 $identifier .= $tok;    # keep same state, a $ could follow
25318             }
25319             elsif ( $tok eq '{' ) {
25320
25321                 # check for something like ${#} or ${©}
25322                 if (   $identifier eq '$'
25323                     && $i + 2 <= $max_token_index
25324                     && $$rtokens[ $i + 2 ] eq '}'
25325                     && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
25326                 {
25327                     my $next2 = $$rtokens[ $i + 2 ];
25328                     my $next1 = $$rtokens[ $i + 1 ];
25329                     $identifier .= $tok . $next1 . $next2;
25330                     $i += 2;
25331                     $id_scan_state = '';
25332                     last;
25333                 }
25334
25335                 # skip something like ${xxx} or ->{
25336                 $id_scan_state = '';
25337
25338                 # if this is the first token of a line, any tokens for this
25339                 # identifier have already been accumulated
25340                 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
25341                 $i = $i_save;
25342                 last;
25343             }
25344
25345             # space ok after leading $ % * & @
25346             elsif ( $tok =~ /^\s*$/ ) {
25347
25348                 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
25349
25350                     if ( length($identifier) > 1 ) {
25351                         $id_scan_state = '';
25352                         $i             = $i_save;
25353                         $type          = 'i';    # probably punctuation variable
25354                         last;
25355                     }
25356                     else {
25357
25358                         # spaces after $'s are common, and space after @
25359                         # is harmless, so only complain about space
25360                         # after other type characters. Space after $ and
25361                         # @ will be removed in formatting.  Report space
25362                         # after % and * because they might indicate a
25363                         # parsing error.  In other words '% ' might be a
25364                         # modulo operator.  Delete this warning if it
25365                         # gets annoying.
25366                         if ( $identifier !~ /^[\@\$]$/ ) {
25367                             $message =
25368                               "Space in identifier, following $identifier\n";
25369                         }
25370                     }
25371                 }
25372
25373                 # else:
25374                 # space after '->' is ok
25375             }
25376             elsif ( $tok eq '^' ) {
25377
25378                 # check for some special variables like $^W
25379                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
25380                     $identifier .= $tok;
25381                     $id_scan_state = 'A';
25382
25383                     # Perl accepts '$^]' or '@^]', but
25384                     # there must not be a space before the ']'.
25385                     my $next1 = $$rtokens[ $i + 1 ];
25386                     if ( $next1 eq ']' ) {
25387                         $i++;
25388                         $identifier .= $next1;
25389                         $id_scan_state = "";
25390                         last;
25391                     }
25392                 }
25393                 else {
25394                     $id_scan_state = '';
25395                 }
25396             }
25397             else {    # something else
25398
25399                 # check for various punctuation variables
25400                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
25401                     $identifier .= $tok;
25402                 }
25403
25404                 elsif ( $identifier eq '$#' ) {
25405
25406                     if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
25407
25408                     # perl seems to allow just these: $#: $#- $#+
25409                     elsif ( $tok =~ /^[\:\-\+]$/ ) {
25410                         $type = 'i';
25411                         $identifier .= $tok;
25412                     }
25413                     else {
25414                         $i = $i_save;
25415                         write_logfile_entry( 'Use of $# is deprecated' . "\n" );
25416                     }
25417                 }
25418                 elsif ( $identifier eq '$$' ) {
25419
25420                     # perl does not allow references to punctuation
25421                     # variables without braces.  For example, this
25422                     # won't work:
25423                     #  $:=\4;
25424                     #  $a = $$:;
25425                     # You would have to use
25426                     #  $a = ${$:};
25427
25428                     $i = $i_save;
25429                     if   ( $tok eq '{' ) { $type = 't' }
25430                     else                 { $type = 'i' }
25431                 }
25432                 elsif ( $identifier eq '->' ) {
25433                     $i = $i_save;
25434                 }
25435                 else {
25436                     $i = $i_save;
25437                     if ( length($identifier) == 1 ) { $identifier = ''; }
25438                 }
25439                 $id_scan_state = '';
25440                 last;
25441             }
25442         }
25443         elsif ( $id_scan_state eq '&' ) {    # starting sub call?
25444
25445             if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
25446                 $id_scan_state = ':';          # now need ::
25447                 $saw_alpha     = 1;
25448                 $identifier .= $tok;
25449             }
25450             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
25451                 $id_scan_state = ':';                 # now need ::
25452                 $saw_alpha     = 1;
25453                 $identifier .= $tok;
25454             }
25455             elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
25456                 $id_scan_state = ':';       # now need ::
25457                 $saw_alpha     = 1;
25458                 $identifier .= $tok;
25459             }
25460             elsif ( $tok =~ /^\s*$/ ) {     # allow space
25461             }
25462             elsif ( $tok eq '::' ) {        # leading ::
25463                 $id_scan_state = 'A';       # accept alpha next
25464                 $identifier .= $tok;
25465             }
25466             elsif ( $tok eq '{' ) {
25467                 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
25468                 $i             = $i_save;
25469                 $id_scan_state = '';
25470                 last;
25471             }
25472             else {
25473
25474                 # punctuation variable?
25475                 # testfile: cunningham4.pl
25476                 if ( $identifier eq '&' ) {
25477                     $identifier .= $tok;
25478                 }
25479                 else {
25480                     $identifier = '';
25481                     $i          = $i_save;
25482                     $type       = '&';
25483                 }
25484                 $id_scan_state = '';
25485                 last;
25486             }
25487         }
25488         elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
25489
25490             if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
25491                 $identifier .= $tok;
25492                 $id_scan_state = ':';        # now need ::
25493                 $saw_alpha     = 1;
25494             }
25495             elsif ( $tok eq "'" && $allow_tick ) {
25496                 $identifier .= $tok;
25497                 $id_scan_state = ':';        # now need ::
25498                 $saw_alpha     = 1;
25499             }
25500             elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
25501                 $identifier .= $tok;
25502                 $id_scan_state = ':';        # now need ::
25503                 $saw_alpha     = 1;
25504             }
25505             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
25506                 $id_scan_state = '(';
25507                 $identifier .= $tok;
25508             }
25509             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
25510                 $id_scan_state = ')';
25511                 $identifier .= $tok;
25512             }
25513             else {
25514                 $id_scan_state = '';
25515                 $i             = $i_save;
25516                 last;
25517             }
25518         }
25519         elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
25520
25521             if ( $tok eq '::' ) {            # got it
25522                 $identifier .= $tok;
25523                 $id_scan_state = 'A';        # now require alpha
25524             }
25525             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
25526                 $identifier .= $tok;
25527                 $id_scan_state = ':';           # now need ::
25528                 $saw_alpha     = 1;
25529             }
25530             elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
25531                 $identifier .= $tok;
25532                 $id_scan_state = ':';           # now need ::
25533                 $saw_alpha     = 1;
25534             }
25535             elsif ( $tok eq "'" && $allow_tick ) {    # tick
25536
25537                 if ( $is_keyword{$identifier} ) {
25538                     $id_scan_state = '';              # that's all
25539                     $i             = $i_save;
25540                 }
25541                 else {
25542                     $identifier .= $tok;
25543                 }
25544             }
25545             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
25546                 $id_scan_state = '(';
25547                 $identifier .= $tok;
25548             }
25549             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
25550                 $id_scan_state = ')';
25551                 $identifier .= $tok;
25552             }
25553             else {
25554                 $id_scan_state = '';        # that's all
25555                 $i             = $i_save;
25556                 last;
25557             }
25558         }
25559         elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
25560
25561             if ( $tok eq '(' ) {             # got it
25562                 $identifier .= $tok;
25563                 $id_scan_state = ')';        # now find the end of it
25564             }
25565             elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
25566                 $identifier .= $tok;
25567             }
25568             else {
25569                 $id_scan_state = '';         # that's all - no prototype
25570                 $i             = $i_save;
25571                 last;
25572             }
25573         }
25574         elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
25575
25576             if ( $tok eq ')' ) {             # got it
25577                 $identifier .= $tok;
25578                 $id_scan_state = '';         # all done
25579                 last;
25580             }
25581             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
25582                 $identifier .= $tok;
25583             }
25584             else {    # probable error in script, but keep going
25585                 warning("Unexpected '$tok' while seeking end of prototype\n");
25586                 $identifier .= $tok;
25587             }
25588         }
25589         else {        # can get here due to error in initialization
25590             $id_scan_state = '';
25591             $i             = $i_save;
25592             last;
25593         }
25594     }
25595
25596     if ( $id_scan_state eq ')' ) {
25597         warning("Hit end of line while seeking ) to end prototype\n");
25598     }
25599
25600     # once we enter the actual identifier, it may not extend beyond
25601     # the end of the current line
25602     if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
25603         $id_scan_state = '';
25604     }
25605     if ( $i < 0 ) { $i = 0 }
25606
25607     unless ($type) {
25608
25609         if ($saw_type) {
25610
25611             if ($saw_alpha) {
25612                 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
25613                     $type = 'w';
25614                 }
25615                 else { $type = 'i' }
25616             }
25617             elsif ( $identifier eq '->' ) {
25618                 $type = '->';
25619             }
25620             elsif (
25621                 ( length($identifier) > 1 )
25622
25623                 # In something like '@$=' we have an identifier '@$'
25624                 # In something like '$${' we have type '$$' (and only
25625                 # part of an identifier)
25626                 && !( $identifier =~ /\$$/ && $tok eq '{' )
25627                 && ( $identifier !~ /^(sub |package )$/ )
25628               )
25629             {
25630                 $type = 'i';
25631             }
25632             else { $type = 't' }
25633         }
25634         elsif ($saw_alpha) {
25635
25636             # type 'w' includes anything without leading type info
25637             # ($,%,@,*) including something like abc::def::ghi
25638             $type = 'w';
25639         }
25640         else {
25641             $type = '';
25642         }    # this can happen on a restart
25643     }
25644
25645     if ($identifier) {
25646         $tok = $identifier;
25647         if ($message) { write_logfile_entry($message) }
25648     }
25649     else {
25650         $tok = $tok_begin;
25651         $i   = $i_begin;
25652     }
25653
25654     TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
25655         my ( $a, $b, $c ) = caller;
25656         print
25657 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
25658         print
25659 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
25660     };
25661     return ( $i, $tok, $type, $id_scan_state, $identifier );
25662 }
25663
25664 {
25665
25666     # saved package and subnames in case prototype is on separate line
25667     my ( $package_saved, $subname_saved );
25668
25669     sub do_scan_sub {
25670
25671         # do_scan_sub parses a sub name and prototype
25672         # it is called with $i_beg equal to the index of the first nonblank
25673         # token following a 'sub' token.
25674
25675         # TODO: add future error checks to be sure we have a valid
25676         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
25677         # a name is given if and only if a non-anonymous sub is
25678         # appropriate.
25679         # USES GLOBAL VARS: $current_package, $last_nonblank_token,
25680         # $in_attribute_list, %saw_function_definition,
25681         # $statement_type
25682
25683         my (
25684             $input_line, $i,             $i_beg,
25685             $tok,        $type,          $rtokens,
25686             $rtoken_map, $id_scan_state, $max_token_index
25687         ) = @_;
25688         $id_scan_state = "";    # normally we get everything in one call
25689         my $subname = undef;
25690         my $package = undef;
25691         my $proto   = undef;
25692         my $attrs   = undef;
25693         my $match;
25694
25695         my $pos_beg = $$rtoken_map[$i_beg];
25696         pos($input_line) = $pos_beg;
25697
25698         # sub NAME PROTO ATTRS
25699         if (
25700             $input_line =~ m/\G\s*
25701         ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
25702         (\w+)               # NAME    - required
25703         (\s*\([^){]*\))?    # PROTO   - something in parens
25704         (\s*:)?             # ATTRS   - leading : of attribute list
25705         /gcx
25706           )
25707         {
25708             $match   = 1;
25709             $subname = $2;
25710             $proto   = $3;
25711             $attrs   = $4;
25712
25713             $package = ( defined($1) && $1 ) ? $1 : $current_package;
25714             $package =~ s/\'/::/g;
25715             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
25716             $package =~ s/::$//;
25717             my $pos  = pos($input_line);
25718             my $numc = $pos - $pos_beg;
25719             $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
25720             $type = 'i';
25721         }
25722
25723         # Look for prototype/attributes not preceded on this line by subname;
25724         # This might be an anonymous sub with attributes,
25725         # or a prototype on a separate line from its sub name
25726         elsif (
25727             $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
25728             (\s*:)?                              # ATTRS leading ':'
25729             /gcx
25730             && ( $1 || $2 )
25731           )
25732         {
25733             $match = 1;
25734             $proto = $1;
25735             $attrs = $2;
25736
25737             # Handle prototype on separate line from subname
25738             if ($subname_saved) {
25739                 $package = $package_saved;
25740                 $subname = $subname_saved;
25741                 $tok     = $last_nonblank_token;
25742             }
25743             $type = 'i';
25744         }
25745
25746         if ($match) {
25747
25748             # ATTRS: if there are attributes, back up and let the ':' be
25749             # found later by the scanner.
25750             my $pos = pos($input_line);
25751             if ($attrs) {
25752                 $pos -= length($attrs);
25753             }
25754
25755             my $next_nonblank_token = $tok;
25756
25757             # catch case of line with leading ATTR ':' after anonymous sub
25758             if ( $pos == $pos_beg && $tok eq ':' ) {
25759                 $type              = 'A';
25760                 $in_attribute_list = 1;
25761             }
25762
25763             # We must convert back from character position
25764             # to pre_token index.
25765             else {
25766
25767                 # I don't think an error flag can occur here ..but ?
25768                 my $error;
25769                 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
25770                     $max_token_index );
25771                 if ($error) { warning("Possibly invalid sub\n") }
25772
25773                 # check for multiple definitions of a sub
25774                 ( $next_nonblank_token, my $i_next ) =
25775                   find_next_nonblank_token_on_this_line( $i, $rtokens,
25776                     $max_token_index );
25777             }
25778
25779             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
25780             {    # skip blank or side comment
25781                 my ( $rpre_tokens, $rpre_types ) =
25782                   peek_ahead_for_n_nonblank_pre_tokens(1);
25783                 if ( defined($rpre_tokens) && @$rpre_tokens ) {
25784                     $next_nonblank_token = $rpre_tokens->[0];
25785                 }
25786                 else {
25787                     $next_nonblank_token = '}';
25788                 }
25789             }
25790             $package_saved = "";
25791             $subname_saved = "";
25792             if ( $next_nonblank_token eq '{' ) {
25793                 if ($subname) {
25794
25795                     # Check for multiple definitions of a sub, but
25796                     # it is ok to have multiple sub BEGIN, etc,
25797                     # so we do not complain if name is all caps
25798                     if (   $saw_function_definition{$package}{$subname}
25799                         && $subname !~ /^[A-Z]+$/ )
25800                     {
25801                         my $lno = $saw_function_definition{$package}{$subname};
25802                         warning(
25803 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
25804                         );
25805                     }
25806                     $saw_function_definition{$package}{$subname} =
25807                       $tokenizer_self->{_last_line_number};
25808                 }
25809             }
25810             elsif ( $next_nonblank_token eq ';' ) {
25811             }
25812             elsif ( $next_nonblank_token eq '}' ) {
25813             }
25814
25815             # ATTRS - if an attribute list follows, remember the name
25816             # of the sub so the next opening brace can be labeled.
25817             # Setting 'statement_type' causes any ':'s to introduce
25818             # attributes.
25819             elsif ( $next_nonblank_token eq ':' ) {
25820                 $statement_type = $tok;
25821             }
25822
25823             # see if PROTO follows on another line:
25824             elsif ( $next_nonblank_token eq '(' ) {
25825                 if ( $attrs || $proto ) {
25826                     warning(
25827 "unexpected '(' after definition or declaration of sub '$subname'\n"
25828                     );
25829                 }
25830                 else {
25831                     $id_scan_state  = 'sub';    # we must come back to get proto
25832                     $statement_type = $tok;
25833                     $package_saved  = $package;
25834                     $subname_saved  = $subname;
25835                 }
25836             }
25837             elsif ($next_nonblank_token) {      # EOF technically ok
25838                 warning(
25839 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
25840                 );
25841             }
25842             check_prototype( $proto, $package, $subname );
25843         }
25844
25845         # no match but line not blank
25846         else {
25847         }
25848         return ( $i, $tok, $type, $id_scan_state );
25849     }
25850 }
25851
25852 #########i###############################################################
25853 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
25854 #########################################################################
25855
25856 sub find_next_nonblank_token {
25857     my ( $i, $rtokens, $max_token_index ) = @_;
25858
25859     if ( $i >= $max_token_index ) {
25860         if ( !peeked_ahead() ) {
25861             peeked_ahead(1);
25862             $rtokens =
25863               peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
25864         }
25865     }
25866     my $next_nonblank_token = $$rtokens[ ++$i ];
25867
25868     if ( $next_nonblank_token =~ /^\s*$/ ) {
25869         $next_nonblank_token = $$rtokens[ ++$i ];
25870     }
25871     return ( $next_nonblank_token, $i );
25872 }
25873
25874 sub numerator_expected {
25875
25876     # this is a filter for a possible numerator, in support of guessing
25877     # for the / pattern delimiter token.
25878     # returns -
25879     #   1 - yes
25880     #   0 - can't tell
25881     #  -1 - no
25882     # Note: I am using the convention that variables ending in
25883     # _expected have these 3 possible values.
25884     my ( $i, $rtokens, $max_token_index ) = @_;
25885     my $next_token = $$rtokens[ $i + 1 ];
25886     if ( $next_token eq '=' ) { $i++; }    # handle /=
25887     my ( $next_nonblank_token, $i_next ) =
25888       find_next_nonblank_token( $i, $rtokens, $max_token_index );
25889
25890     if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
25891         1;
25892     }
25893     else {
25894
25895         if ( $next_nonblank_token =~ /^\s*$/ ) {
25896             0;
25897         }
25898         else {
25899             -1;
25900         }
25901     }
25902 }
25903
25904 sub pattern_expected {
25905
25906     # This is the start of a filter for a possible pattern.
25907     # It looks at the token after a possbible pattern and tries to
25908     # determine if that token could end a pattern.
25909     # returns -
25910     #   1 - yes
25911     #   0 - can't tell
25912     #  -1 - no
25913     my ( $i, $rtokens, $max_token_index ) = @_;
25914     my $next_token = $$rtokens[ $i + 1 ];
25915     if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
25916     my ( $next_nonblank_token, $i_next ) =
25917       find_next_nonblank_token( $i, $rtokens, $max_token_index );
25918
25919     # list of tokens which may follow a pattern
25920     # (can probably be expanded)
25921     if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
25922     {
25923         1;
25924     }
25925     else {
25926
25927         if ( $next_nonblank_token =~ /^\s*$/ ) {
25928             0;
25929         }
25930         else {
25931             -1;
25932         }
25933     }
25934 }
25935
25936 sub find_next_nonblank_token_on_this_line {
25937     my ( $i, $rtokens, $max_token_index ) = @_;
25938     my $next_nonblank_token;
25939
25940     if ( $i < $max_token_index ) {
25941         $next_nonblank_token = $$rtokens[ ++$i ];
25942
25943         if ( $next_nonblank_token =~ /^\s*$/ ) {
25944
25945             if ( $i < $max_token_index ) {
25946                 $next_nonblank_token = $$rtokens[ ++$i ];
25947             }
25948         }
25949     }
25950     else {
25951         $next_nonblank_token = "";
25952     }
25953     return ( $next_nonblank_token, $i );
25954 }
25955
25956 sub find_angle_operator_termination {
25957
25958     # We are looking at a '<' and want to know if it is an angle operator.
25959     # We are to return:
25960     #   $i = pretoken index of ending '>' if found, current $i otherwise
25961     #   $type = 'Q' if found, '>' otherwise
25962     my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
25963     my $i    = $i_beg;
25964     my $type = '<';
25965     pos($input_line) = 1 + $$rtoken_map[$i];
25966
25967     my $filter;
25968
25969     # we just have to find the next '>' if a term is expected
25970     if ( $expecting == TERM ) { $filter = '[\>]' }
25971
25972     # we have to guess if we don't know what is expected
25973     elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
25974
25975     # shouldn't happen - we shouldn't be here if operator is expected
25976     else { warning("Program Bug in find_angle_operator_termination\n") }
25977
25978     # To illustrate what we might be looking at, in case we are
25979     # guessing, here are some examples of valid angle operators
25980     # (or file globs):
25981     #  <tmp_imp/*>
25982     #  <FH>
25983     #  <$fh>
25984     #  <*.c *.h>
25985     #  <_>
25986     #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
25987     #  <${PREFIX}*img*.$IMAGE_TYPE>
25988     #  <img*.$IMAGE_TYPE>
25989     #  <Timg*.$IMAGE_TYPE>
25990     #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
25991     #
25992     # Here are some examples of lines which do not have angle operators:
25993     #  return undef unless $self->[2]++ < $#{$self->[1]};
25994     #  < 2  || @$t >
25995     #
25996     # the following line from dlister.pl caused trouble:
25997     #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
25998     #
25999     # If the '<' starts an angle operator, it must end on this line and
26000     # it must not have certain characters like ';' and '=' in it.  I use
26001     # this to limit the testing.  This filter should be improved if
26002     # possible.
26003
26004     if ( $input_line =~ /($filter)/g ) {
26005
26006         if ( $1 eq '>' ) {
26007
26008             # We MAY have found an angle operator termination if we get
26009             # here, but we need to do more to be sure we haven't been
26010             # fooled.
26011             my $pos = pos($input_line);
26012
26013             my $pos_beg = $$rtoken_map[$i];
26014             my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
26015
26016             # Reject if the closing '>' follows a '-' as in:
26017             # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
26018             if ( $expecting eq UNKNOWN ) {
26019                 my $check = substr( $input_line, $pos - 2, 1 );
26020                 if ( $check eq '-' ) {
26021                     return ( $i, $type );
26022                 }
26023             }
26024
26025             ######################################debug#####
26026             #write_diagnostics( "ANGLE? :$str\n");
26027             #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
26028             ######################################debug#####
26029             $type = 'Q';
26030             my $error;
26031             ( $i, $error ) =
26032               inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26033
26034             # It may be possible that a quote ends midway in a pretoken.
26035             # If this happens, it may be necessary to split the pretoken.
26036             if ($error) {
26037                 warning(
26038                     "Possible tokinization error..please check this line\n");
26039                 report_possible_bug();
26040             }
26041
26042             # Now let's see where we stand....
26043             # OK if math op not possible
26044             if ( $expecting == TERM ) {
26045             }
26046
26047             # OK if there are no more than 2 pre-tokens inside
26048             # (not possible to write 2 token math between < and >)
26049             # This catches most common cases
26050             elsif ( $i <= $i_beg + 3 ) {
26051                 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
26052             }
26053
26054             # Not sure..
26055             else {
26056
26057                 # Let's try a Brace Test: any braces inside must balance
26058                 my $br = 0;
26059                 while ( $str =~ /\{/g ) { $br++ }
26060                 while ( $str =~ /\}/g ) { $br-- }
26061                 my $sb = 0;
26062                 while ( $str =~ /\[/g ) { $sb++ }
26063                 while ( $str =~ /\]/g ) { $sb-- }
26064                 my $pr = 0;
26065                 while ( $str =~ /\(/g ) { $pr++ }
26066                 while ( $str =~ /\)/g ) { $pr-- }
26067
26068                 # if braces do not balance - not angle operator
26069                 if ( $br || $sb || $pr ) {
26070                     $i    = $i_beg;
26071                     $type = '<';
26072                     write_diagnostics(
26073                         "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
26074                 }
26075
26076                 # we should keep doing more checks here...to be continued
26077                 # Tentatively accepting this as a valid angle operator.
26078                 # There are lots more things that can be checked.
26079                 else {
26080                     write_diagnostics(
26081                         "ANGLE-Guessing yes: $str expecting=$expecting\n");
26082                     write_logfile_entry("Guessing angle operator here: $str\n");
26083                 }
26084             }
26085         }
26086
26087         # didn't find ending >
26088         else {
26089             if ( $expecting == TERM ) {
26090                 warning("No ending > for angle operator\n");
26091             }
26092         }
26093     }
26094     return ( $i, $type );
26095 }
26096
26097 sub scan_number_do {
26098
26099     #  scan a number in any of the formats that Perl accepts
26100     #  Underbars (_) are allowed in decimal numbers.
26101     #  input parameters -
26102     #      $input_line  - the string to scan
26103     #      $i           - pre_token index to start scanning
26104     #    $rtoken_map    - reference to the pre_token map giving starting
26105     #                    character position in $input_line of token $i
26106     #  output parameters -
26107     #    $i            - last pre_token index of the number just scanned
26108     #    number        - the number (characters); or undef if not a number
26109
26110     my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
26111     my $pos_beg = $$rtoken_map[$i];
26112     my $pos;
26113     my $i_begin = $i;
26114     my $number  = undef;
26115     my $type    = $input_type;
26116
26117     my $first_char = substr( $input_line, $pos_beg, 1 );
26118
26119     # Look for bad starting characters; Shouldn't happen..
26120     if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
26121         warning("Program bug - scan_number given character $first_char\n");
26122         report_definite_bug();
26123         return ( $i, $type, $number );
26124     }
26125
26126     # handle v-string without leading 'v' character ('Two Dot' rule)
26127     # (vstring.t)
26128     # TODO: v-strings may contain underscores
26129     pos($input_line) = $pos_beg;
26130     if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
26131         $pos = pos($input_line);
26132         my $numc = $pos - $pos_beg;
26133         $number = substr( $input_line, $pos_beg, $numc );
26134         $type = 'v';
26135         report_v_string($number);
26136     }
26137
26138     # handle octal, hex, binary
26139     if ( !defined($number) ) {
26140         pos($input_line) = $pos_beg;
26141         if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
26142         {
26143             $pos = pos($input_line);
26144             my $numc = $pos - $pos_beg;
26145             $number = substr( $input_line, $pos_beg, $numc );
26146             $type = 'n';
26147         }
26148     }
26149
26150     # handle decimal
26151     if ( !defined($number) ) {
26152         pos($input_line) = $pos_beg;
26153
26154         if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
26155             $pos = pos($input_line);
26156
26157             # watch out for things like 0..40 which would give 0. by this;
26158             if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
26159                 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
26160             {
26161                 $pos--;
26162             }
26163             my $numc = $pos - $pos_beg;
26164             $number = substr( $input_line, $pos_beg, $numc );
26165             $type = 'n';
26166         }
26167     }
26168
26169     # filter out non-numbers like e + - . e2  .e3 +e6
26170     # the rule: at least one digit, and any 'e' must be preceded by a digit
26171     if (
26172         $number !~ /\d/    # no digits
26173         || (   $number =~ /^(.*)[eE]/
26174             && $1 !~ /\d/ )    # or no digits before the 'e'
26175       )
26176     {
26177         $number = undef;
26178         $type   = $input_type;
26179         return ( $i, $type, $number );
26180     }
26181
26182     # Found a number; now we must convert back from character position
26183     # to pre_token index. An error here implies user syntax error.
26184     # An example would be an invalid octal number like '009'.
26185     my $error;
26186     ( $i, $error ) =
26187       inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26188     if ($error) { warning("Possibly invalid number\n") }
26189
26190     return ( $i, $type, $number );
26191 }
26192
26193 sub inverse_pretoken_map {
26194
26195     # Starting with the current pre_token index $i, scan forward until
26196     # finding the index of the next pre_token whose position is $pos.
26197     my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
26198     my $error = 0;
26199
26200     while ( ++$i <= $max_token_index ) {
26201
26202         if ( $pos <= $$rtoken_map[$i] ) {
26203
26204             # Let the calling routine handle errors in which we do not
26205             # land on a pre-token boundary.  It can happen by running
26206             # perltidy on some non-perl scripts, for example.
26207             if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
26208             $i--;
26209             last;
26210         }
26211     }
26212     return ( $i, $error );
26213 }
26214
26215 sub find_here_doc {
26216
26217     # find the target of a here document, if any
26218     # input parameters:
26219     #   $i - token index of the second < of <<
26220     #   ($i must be less than the last token index if this is called)
26221     # output parameters:
26222     #   $found_target = 0 didn't find target; =1 found target
26223     #   HERE_TARGET - the target string (may be empty string)
26224     #   $i - unchanged if not here doc,
26225     #    or index of the last token of the here target
26226     #   $saw_error - flag noting unbalanced quote on here target
26227     my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
26228     my $ibeg                 = $i;
26229     my $found_target         = 0;
26230     my $here_doc_target      = '';
26231     my $here_quote_character = '';
26232     my $saw_error            = 0;
26233     my ( $next_nonblank_token, $i_next_nonblank, $next_token );
26234     $next_token = $$rtokens[ $i + 1 ];
26235
26236     # perl allows a backslash before the target string (heredoc.t)
26237     my $backslash = 0;
26238     if ( $next_token eq '\\' ) {
26239         $backslash  = 1;
26240         $next_token = $$rtokens[ $i + 2 ];
26241     }
26242
26243     ( $next_nonblank_token, $i_next_nonblank ) =
26244       find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
26245
26246     if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
26247
26248         my $in_quote    = 1;
26249         my $quote_depth = 0;
26250         my $quote_pos   = 0;
26251         my $quoted_string;
26252
26253         (
26254             $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
26255             $quoted_string
26256           )
26257           = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
26258             $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
26259
26260         if ($in_quote) {    # didn't find end of quote, so no target found
26261             $i = $ibeg;
26262             if ( $expecting == TERM ) {
26263                 warning(
26264 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
26265                 );
26266                 $saw_error = 1;
26267             }
26268         }
26269         else {              # found ending quote
26270             my $j;
26271             $found_target = 1;
26272
26273             my $tokj;
26274             for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
26275                 $tokj = $$rtokens[$j];
26276
26277                 # we have to remove any backslash before the quote character
26278                 # so that the here-doc-target exactly matches this string
26279                 next
26280                   if ( $tokj eq "\\"
26281                     && $j < $i - 1
26282                     && $$rtokens[ $j + 1 ] eq $here_quote_character );
26283                 $here_doc_target .= $tokj;
26284             }
26285         }
26286     }
26287
26288     elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
26289         $found_target = 1;
26290         write_logfile_entry(
26291             "found blank here-target after <<; suggest using \"\"\n");
26292         $i = $ibeg;
26293     }
26294     elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
26295
26296         my $here_doc_expected;
26297         if ( $expecting == UNKNOWN ) {
26298             $here_doc_expected = guess_if_here_doc($next_token);
26299         }
26300         else {
26301             $here_doc_expected = 1;
26302         }
26303
26304         if ($here_doc_expected) {
26305             $found_target    = 1;
26306             $here_doc_target = $next_token;
26307             $i               = $ibeg + 1;
26308         }
26309
26310     }
26311     else {
26312
26313         if ( $expecting == TERM ) {
26314             $found_target = 1;
26315             write_logfile_entry("Note: bare here-doc operator <<\n");
26316         }
26317         else {
26318             $i = $ibeg;
26319         }
26320     }
26321
26322     # patch to neglect any prepended backslash
26323     if ( $found_target && $backslash ) { $i++ }
26324
26325     return ( $found_target, $here_doc_target, $here_quote_character, $i,
26326         $saw_error );
26327 }
26328
26329 sub do_quote {
26330
26331     # follow (or continue following) quoted string(s)
26332     # $in_quote return code:
26333     #   0 - ok, found end
26334     #   1 - still must find end of quote whose target is $quote_character
26335     #   2 - still looking for end of first of two quotes
26336     #
26337     # Returns updated strings:
26338     #  $quoted_string_1 = quoted string seen while in_quote=1
26339     #  $quoted_string_2 = quoted string seen while in_quote=2
26340     my (
26341         $i,               $in_quote,    $quote_character,
26342         $quote_pos,       $quote_depth, $quoted_string_1,
26343         $quoted_string_2, $rtokens,     $rtoken_map,
26344         $max_token_index
26345     ) = @_;
26346
26347     my $in_quote_starting = $in_quote;
26348
26349     my $quoted_string;
26350     if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
26351         my $ibeg = $i;
26352         (
26353             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
26354             $quoted_string
26355           )
26356           = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
26357             $quote_pos, $quote_depth, $max_token_index );
26358         $quoted_string_2 .= $quoted_string;
26359         if ( $in_quote == 1 ) {
26360             if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
26361             $quote_character = '';
26362         }
26363         else {
26364             $quoted_string_2 .= "\n";
26365         }
26366     }
26367
26368     if ( $in_quote == 1 ) {    # one (more) quote to follow
26369         my $ibeg = $i;
26370         (
26371             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
26372             $quoted_string
26373           )
26374           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
26375             $quote_pos, $quote_depth, $max_token_index );
26376         $quoted_string_1 .= $quoted_string;
26377         if ( $in_quote == 1 ) {
26378             $quoted_string_1 .= "\n";
26379         }
26380     }
26381     return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
26382         $quoted_string_1, $quoted_string_2 );
26383 }
26384
26385 sub follow_quoted_string {
26386
26387     # scan for a specific token, skipping escaped characters
26388     # if the quote character is blank, use the first non-blank character
26389     # input parameters:
26390     #   $rtokens = reference to the array of tokens
26391     #   $i = the token index of the first character to search
26392     #   $in_quote = number of quoted strings being followed
26393     #   $beginning_tok = the starting quote character
26394     #   $quote_pos = index to check next for alphanumeric delimiter
26395     # output parameters:
26396     #   $i = the token index of the ending quote character
26397     #   $in_quote = decremented if found end, unchanged if not
26398     #   $beginning_tok = the starting quote character
26399     #   $quote_pos = index to check next for alphanumeric delimiter
26400     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
26401     #   $quoted_string = the text of the quote (without quotation tokens)
26402     my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
26403         $max_token_index )
26404       = @_;
26405     my ( $tok, $end_tok );
26406     my $i             = $i_beg - 1;
26407     my $quoted_string = "";
26408
26409     TOKENIZER_DEBUG_FLAG_QUOTE && do {
26410         print
26411 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
26412     };
26413
26414     # get the corresponding end token
26415     if ( $beginning_tok !~ /^\s*$/ ) {
26416         $end_tok = matching_end_token($beginning_tok);
26417     }
26418
26419     # a blank token means we must find and use the first non-blank one
26420     else {
26421         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
26422
26423         while ( $i < $max_token_index ) {
26424             $tok = $$rtokens[ ++$i ];
26425
26426             if ( $tok !~ /^\s*$/ ) {
26427
26428                 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
26429                     $i = $max_token_index;
26430                 }
26431                 else {
26432
26433                     if ( length($tok) > 1 ) {
26434                         if ( $quote_pos <= 0 ) { $quote_pos = 1 }
26435                         $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
26436                     }
26437                     else {
26438                         $beginning_tok = $tok;
26439                         $quote_pos     = 0;
26440                     }
26441                     $end_tok     = matching_end_token($beginning_tok);
26442                     $quote_depth = 1;
26443                     last;
26444                 }
26445             }
26446             else {
26447                 $allow_quote_comments = 1;
26448             }
26449         }
26450     }
26451
26452     # There are two different loops which search for the ending quote
26453     # character.  In the rare case of an alphanumeric quote delimiter, we
26454     # have to look through alphanumeric tokens character-by-character, since
26455     # the pre-tokenization process combines multiple alphanumeric
26456     # characters, whereas for a non-alphanumeric delimiter, only tokens of
26457     # length 1 can match.
26458
26459     ###################################################################
26460     # Case 1 (rare): loop for case of alphanumeric quote delimiter..
26461     # "quote_pos" is the position the current word to begin searching
26462     ###################################################################
26463     if ( $beginning_tok =~ /\w/ ) {
26464
26465         # Note this because it is not recommended practice except
26466         # for obfuscated perl contests
26467         if ( $in_quote == 1 ) {
26468             write_logfile_entry(
26469                 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
26470         }
26471
26472         while ( $i < $max_token_index ) {
26473
26474             if ( $quote_pos == 0 || ( $i < 0 ) ) {
26475                 $tok = $$rtokens[ ++$i ];
26476
26477                 if ( $tok eq '\\' ) {
26478
26479                     # retain backslash unless it hides the end token
26480                     $quoted_string .= $tok
26481                       unless $$rtokens[ $i + 1 ] eq $end_tok;
26482                     $quote_pos++;
26483                     last if ( $i >= $max_token_index );
26484                     $tok = $$rtokens[ ++$i ];
26485                 }
26486             }
26487             my $old_pos = $quote_pos;
26488
26489             unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
26490             {
26491
26492             }
26493             $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
26494
26495             if ( $quote_pos > 0 ) {
26496
26497                 $quoted_string .=
26498                   substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
26499
26500                 $quote_depth--;
26501
26502                 if ( $quote_depth == 0 ) {
26503                     $in_quote--;
26504                     last;
26505                 }
26506             }
26507             else {
26508                 $quoted_string .= substr( $tok, $old_pos );
26509             }
26510         }
26511     }
26512
26513     ########################################################################
26514     # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
26515     ########################################################################
26516     else {
26517
26518         while ( $i < $max_token_index ) {
26519             $tok = $$rtokens[ ++$i ];
26520
26521             if ( $tok eq $end_tok ) {
26522                 $quote_depth--;
26523
26524                 if ( $quote_depth == 0 ) {
26525                     $in_quote--;
26526                     last;
26527                 }
26528             }
26529             elsif ( $tok eq $beginning_tok ) {
26530                 $quote_depth++;
26531             }
26532             elsif ( $tok eq '\\' ) {
26533
26534                 # retain backslash unless it hides the beginning or end token
26535                 $tok = $$rtokens[ ++$i ];
26536                 $quoted_string .= '\\'
26537                   unless ( $tok eq $end_tok || $tok eq $beginning_tok );
26538             }
26539             $quoted_string .= $tok;
26540         }
26541     }
26542     if ( $i > $max_token_index ) { $i = $max_token_index }
26543     return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
26544         $quoted_string );
26545 }
26546
26547 sub indicate_error {
26548     my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
26549     interrupt_logfile();
26550     warning($msg);
26551     write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
26552     resume_logfile();
26553 }
26554
26555 sub write_error_indicator_pair {
26556     my ( $line_number, $input_line, $pos, $carrat ) = @_;
26557     my ( $offset, $numbered_line, $underline ) =
26558       make_numbered_line( $line_number, $input_line, $pos );
26559     $underline = write_on_underline( $underline, $pos - $offset, $carrat );
26560     warning( $numbered_line . "\n" );
26561     $underline =~ s/\s*$//;
26562     warning( $underline . "\n" );
26563 }
26564
26565 sub make_numbered_line {
26566
26567     #  Given an input line, its line number, and a character position of
26568     #  interest, create a string not longer than 80 characters of the form
26569     #     $lineno: sub_string
26570     #  such that the sub_string of $str contains the position of interest
26571     #
26572     #  Here is an example of what we want, in this case we add trailing
26573     #  '...' because the line is long.
26574     #
26575     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
26576     #
26577     #  Here is another example, this time in which we used leading '...'
26578     #  because of excessive length:
26579     #
26580     # 2: ... er of the World Wide Web Consortium's
26581     #
26582     #  input parameters are:
26583     #   $lineno = line number
26584     #   $str = the text of the line
26585     #   $pos = position of interest (the error) : 0 = first character
26586     #
26587     #   We return :
26588     #     - $offset = an offset which corrects the position in case we only
26589     #       display part of a line, such that $pos-$offset is the effective
26590     #       position from the start of the displayed line.
26591     #     - $numbered_line = the numbered line as above,
26592     #     - $underline = a blank 'underline' which is all spaces with the same
26593     #       number of characters as the numbered line.
26594
26595     my ( $lineno, $str, $pos ) = @_;
26596     my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
26597     my $excess = length($str) - $offset - 68;
26598     my $numc   = ( $excess > 0 ) ? 68 : undef;
26599
26600     if ( defined($numc) ) {
26601         if ( $offset == 0 ) {
26602             $str = substr( $str, $offset, $numc - 4 ) . " ...";
26603         }
26604         else {
26605             $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
26606         }
26607     }
26608     else {
26609
26610         if ( $offset == 0 ) {
26611         }
26612         else {
26613             $str = "... " . substr( $str, $offset + 4 );
26614         }
26615     }
26616
26617     my $numbered_line = sprintf( "%d: ", $lineno );
26618     $offset -= length($numbered_line);
26619     $numbered_line .= $str;
26620     my $underline = " " x length($numbered_line);
26621     return ( $offset, $numbered_line, $underline );
26622 }
26623
26624 sub write_on_underline {
26625
26626     # The "underline" is a string that shows where an error is; it starts
26627     # out as a string of blanks with the same length as the numbered line of
26628     # code above it, and we have to add marking to show where an error is.
26629     # In the example below, we want to write the string '--^' just below
26630     # the line of bad code:
26631     #
26632     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
26633     #                 ---^
26634     # We are given the current underline string, plus a position and a
26635     # string to write on it.
26636     #
26637     # In the above example, there will be 2 calls to do this:
26638     # First call:  $pos=19, pos_chr=^
26639     # Second call: $pos=16, pos_chr=---
26640     #
26641     # This is a trivial thing to do with substr, but there is some
26642     # checking to do.
26643
26644     my ( $underline, $pos, $pos_chr ) = @_;
26645
26646     # check for error..shouldn't happen
26647     unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
26648         return $underline;
26649     }
26650     my $excess = length($pos_chr) + $pos - length($underline);
26651     if ( $excess > 0 ) {
26652         $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
26653     }
26654     substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
26655     return ($underline);
26656 }
26657
26658 sub pre_tokenize {
26659
26660     # Break a string, $str, into a sequence of preliminary tokens.  We
26661     # are interested in these types of tokens:
26662     #   words       (type='w'),            example: 'max_tokens_wanted'
26663     #   digits      (type = 'd'),          example: '0755'
26664     #   whitespace  (type = 'b'),          example: '   '
26665     #   any other single character (i.e. punct; type = the character itself).
26666     # We cannot do better than this yet because we might be in a quoted
26667     # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
26668     # tokens.
26669     my ( $str, $max_tokens_wanted ) = @_;
26670
26671     # we return references to these 3 arrays:
26672     my @tokens    = ();     # array of the tokens themselves
26673     my @token_map = (0);    # string position of start of each token
26674     my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
26675
26676     do {
26677
26678         # whitespace
26679         if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
26680
26681         # numbers
26682         # note that this must come before words!
26683         elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
26684
26685         # words
26686         elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
26687
26688         # single-character punctuation
26689         elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
26690
26691         # that's all..
26692         else {
26693             return ( \@tokens, \@token_map, \@type );
26694         }
26695
26696         push @tokens,    $1;
26697         push @token_map, pos($str);
26698
26699     } while ( --$max_tokens_wanted != 0 );
26700
26701     return ( \@tokens, \@token_map, \@type );
26702 }
26703
26704 sub show_tokens {
26705
26706     # this is an old debug routine
26707     my ( $rtokens, $rtoken_map ) = @_;
26708     my $num = scalar(@$rtokens);
26709     my $i;
26710
26711     for ( $i = 0 ; $i < $num ; $i++ ) {
26712         my $len = length( $$rtokens[$i] );
26713         print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
26714     }
26715 }
26716
26717 sub matching_end_token {
26718
26719     # find closing character for a pattern
26720     my $beginning_token = shift;
26721
26722     if ( $beginning_token eq '{' ) {
26723         '}';
26724     }
26725     elsif ( $beginning_token eq '[' ) {
26726         ']';
26727     }
26728     elsif ( $beginning_token eq '<' ) {
26729         '>';
26730     }
26731     elsif ( $beginning_token eq '(' ) {
26732         ')';
26733     }
26734     else {
26735         $beginning_token;
26736     }
26737 }
26738
26739 sub dump_token_types {
26740     my $class = shift;
26741     my $fh    = shift;
26742
26743     # This should be the latest list of token types in use
26744     # adding NEW_TOKENS: add a comment here
26745     print $fh <<'END_OF_LIST';
26746
26747 Here is a list of the token types currently used for lines of type 'CODE'.
26748 For the following tokens, the "type" of a token is just the token itself.
26749
26750 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
26751 ( ) <= >= == =~ !~ != ++ -- /= x=
26752 ... **= <<= >>= &&= ||= //= <=>
26753 , + - / * | % ! x ~ = \ ? : . < > ^ &
26754
26755 The following additional token types are defined:
26756
26757  type    meaning
26758     b    blank (white space)
26759     {    indent: opening structural curly brace or square bracket or paren
26760          (code block, anonymous hash reference, or anonymous array reference)
26761     }    outdent: right structural curly brace or square bracket or paren
26762     [    left non-structural square bracket (enclosing an array index)
26763     ]    right non-structural square bracket
26764     (    left non-structural paren (all but a list right of an =)
26765     )    right non-structural parena
26766     L    left non-structural curly brace (enclosing a key)
26767     R    right non-structural curly brace
26768     ;    terminal semicolon
26769     f    indicates a semicolon in a "for" statement
26770     h    here_doc operator <<
26771     #    a comment
26772     Q    indicates a quote or pattern
26773     q    indicates a qw quote block
26774     k    a perl keyword
26775     C    user-defined constant or constant function (with void prototype = ())
26776     U    user-defined function taking parameters
26777     G    user-defined function taking block parameter (like grep/map/eval)
26778     M    (unused, but reserved for subroutine definition name)
26779     P    (unused, but -html uses it to label pod text)
26780     t    type indicater such as %,$,@,*,&,sub
26781     w    bare word (perhaps a subroutine call)
26782     i    identifier of some type (with leading %, $, @, *, &, sub, -> )
26783     n    a number
26784     v    a v-string
26785     F    a file test operator (like -e)
26786     Y    File handle
26787     Z    identifier in indirect object slot: may be file handle, object
26788     J    LABEL:  code block label
26789     j    LABEL after next, last, redo, goto
26790     p    unary +
26791     m    unary -
26792     pp   pre-increment operator ++
26793     mm   pre-decrement operator --
26794     A    : used as attribute separator
26795
26796     Here are the '_line_type' codes used internally:
26797     SYSTEM         - system-specific code before hash-bang line
26798     CODE           - line of perl code (including comments)
26799     POD_START      - line starting pod, such as '=head'
26800     POD            - pod documentation text
26801     POD_END        - last line of pod section, '=cut'
26802     HERE           - text of here-document
26803     HERE_END       - last line of here-doc (target word)
26804     FORMAT         - format section
26805     FORMAT_END     - last line of format section, '.'
26806     DATA_START     - __DATA__ line
26807     DATA           - unidentified text following __DATA__
26808     END_START      - __END__ line
26809     END            - unidentified text following __END__
26810     ERROR          - we are in big trouble, probably not a perl script
26811 END_OF_LIST
26812 }
26813
26814 BEGIN {
26815
26816     # These names are used in error messages
26817     @opening_brace_names = qw# '{' '[' '(' '?' #;
26818     @closing_brace_names = qw# '}' ']' ')' ':' #;
26819
26820     my @digraphs = qw(
26821       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
26822       <= >= == =~ !~ != ++ -- /= x= ~~
26823     );
26824     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
26825
26826     my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
26827     @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
26828
26829     # make a hash of all valid token types for self-checking the tokenizer
26830     # (adding NEW_TOKENS : select a new character and add to this list)
26831     my @valid_token_types = qw#
26832       A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
26833       { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
26834       #;
26835     push( @valid_token_types, @digraphs );
26836     push( @valid_token_types, @trigraphs );
26837     push( @valid_token_types, '#' );
26838     push( @valid_token_types, ',' );
26839     @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
26840
26841     # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
26842     my @file_test_operators =
26843       qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
26844     @is_file_test_operator{@file_test_operators} =
26845       (1) x scalar(@file_test_operators);
26846
26847     # these functions have prototypes of the form (&), so when they are
26848     # followed by a block, that block MAY BE followed by an operator.
26849     @_ = qw( do eval );
26850     @is_block_operator{@_} = (1) x scalar(@_);
26851
26852     # these functions allow an identifier in the indirect object slot
26853     @_ = qw( print printf sort exec system say);
26854     @is_indirect_object_taker{@_} = (1) x scalar(@_);
26855
26856     # These tokens may precede a code block
26857     # patched for SWITCH/CASE
26858     @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
26859       unless do while until eval for foreach map grep sort
26860       switch case given when);
26861     @is_code_block_token{@_} = (1) x scalar(@_);
26862
26863     # I'll build the list of keywords incrementally
26864     my @Keywords = ();
26865
26866     # keywords and tokens after which a value or pattern is expected,
26867     # but not an operator.  In other words, these should consume terms
26868     # to their right, or at least they are not expected to be followed
26869     # immediately by operators.
26870     my @value_requestor = qw(
26871       AUTOLOAD
26872       BEGIN
26873       CHECK
26874       DESTROY
26875       END
26876       EQ
26877       GE
26878       GT
26879       INIT
26880       LE
26881       LT
26882       NE
26883       abs
26884       accept
26885       alarm
26886       and
26887       atan2
26888       bind
26889       binmode
26890       bless
26891       caller
26892       chdir
26893       chmod
26894       chomp
26895       chop
26896       chown
26897       chr
26898       chroot
26899       close
26900       closedir
26901       cmp
26902       connect
26903       continue
26904       cos
26905       crypt
26906       dbmclose
26907       dbmopen
26908       defined
26909       delete
26910       die
26911       dump
26912       each
26913       else
26914       elsif
26915       eof
26916       eq
26917       exec
26918       exists
26919       exit
26920       exp
26921       fcntl
26922       fileno
26923       flock
26924       for
26925       foreach
26926       formline
26927       ge
26928       getc
26929       getgrgid
26930       getgrnam
26931       gethostbyaddr
26932       gethostbyname
26933       getnetbyaddr
26934       getnetbyname
26935       getpeername
26936       getpgrp
26937       getpriority
26938       getprotobyname
26939       getprotobynumber
26940       getpwnam
26941       getpwuid
26942       getservbyname
26943       getservbyport
26944       getsockname
26945       getsockopt
26946       glob
26947       gmtime
26948       goto
26949       grep
26950       gt
26951       hex
26952       if
26953       index
26954       int
26955       ioctl
26956       join
26957       keys
26958       kill
26959       last
26960       lc
26961       lcfirst
26962       le
26963       length
26964       link
26965       listen
26966       local
26967       localtime
26968       lock
26969       log
26970       lstat
26971       lt
26972       map
26973       mkdir
26974       msgctl
26975       msgget
26976       msgrcv
26977       msgsnd
26978       my
26979       ne
26980       next
26981       no
26982       not
26983       oct
26984       open
26985       opendir
26986       or
26987       ord
26988       our
26989       pack
26990       pipe
26991       pop
26992       pos
26993       print
26994       printf
26995       prototype
26996       push
26997       quotemeta
26998       rand
26999       read
27000       readdir
27001       readlink
27002       readline
27003       readpipe
27004       recv
27005       redo
27006       ref
27007       rename
27008       require
27009       reset
27010       return
27011       reverse
27012       rewinddir
27013       rindex
27014       rmdir
27015       scalar
27016       seek
27017       seekdir
27018       select
27019       semctl
27020       semget
27021       semop
27022       send
27023       sethostent
27024       setnetent
27025       setpgrp
27026       setpriority
27027       setprotoent
27028       setservent
27029       setsockopt
27030       shift
27031       shmctl
27032       shmget
27033       shmread
27034       shmwrite
27035       shutdown
27036       sin
27037       sleep
27038       socket
27039       socketpair
27040       sort
27041       splice
27042       split
27043       sprintf
27044       sqrt
27045       srand
27046       stat
27047       study
27048       substr
27049       symlink
27050       syscall
27051       sysopen
27052       sysread
27053       sysseek
27054       system
27055       syswrite
27056       tell
27057       telldir
27058       tie
27059       tied
27060       truncate
27061       uc
27062       ucfirst
27063       umask
27064       undef
27065       unless
27066       unlink
27067       unpack
27068       unshift
27069       untie
27070       until
27071       use
27072       utime
27073       values
27074       vec
27075       waitpid
27076       warn
27077       while
27078       write
27079       xor
27080
27081       switch
27082       case
27083       given
27084       when
27085       err
27086       say
27087     );
27088
27089     # patched above for SWITCH/CASE given/when err say
27090     # 'err' is a fairly safe addition.
27091     # TODO: 'default' still needed if appropriate
27092     # 'use feature' seen, but perltidy works ok without it.
27093     # Concerned that 'default' could break code.
27094     push( @Keywords, @value_requestor );
27095
27096     # These are treated the same but are not keywords:
27097     my @extra_vr = qw(
27098       constant
27099       vars
27100     );
27101     push( @value_requestor, @extra_vr );
27102
27103     @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
27104
27105     # this list contains keywords which do not look for arguments,
27106     # so that they might be followed by an operator, or at least
27107     # not a term.
27108     my @operator_requestor = qw(
27109       endgrent
27110       endhostent
27111       endnetent
27112       endprotoent
27113       endpwent
27114       endservent
27115       fork
27116       getgrent
27117       gethostent
27118       getlogin
27119       getnetent
27120       getppid
27121       getprotoent
27122       getpwent
27123       getservent
27124       setgrent
27125       setpwent
27126       time
27127       times
27128       wait
27129       wantarray
27130     );
27131
27132     push( @Keywords, @operator_requestor );
27133
27134     # These are treated the same but are not considered keywords:
27135     my @extra_or = qw(
27136       STDERR
27137       STDIN
27138       STDOUT
27139     );
27140
27141     push( @operator_requestor, @extra_or );
27142
27143     @expecting_operator_token{@operator_requestor} =
27144       (1) x scalar(@operator_requestor);
27145
27146     # these token TYPES expect trailing operator but not a term
27147     # note: ++ and -- are post-increment and decrement, 'C' = constant
27148     my @operator_requestor_types = qw( ++ -- C <> q );
27149     @expecting_operator_types{@operator_requestor_types} =
27150       (1) x scalar(@operator_requestor_types);
27151
27152     # these token TYPES consume values (terms)
27153     # note: pp and mm are pre-increment and decrement
27154     # f=semicolon in for,  F=file test operator
27155     my @value_requestor_type = qw#
27156       L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
27157       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
27158       <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
27159       f F pp mm Y p m U J G j >> << ^ t
27160       #;
27161     push( @value_requestor_type, ',' )
27162       ;    # (perl doesn't like a ',' in a qw block)
27163     @expecting_term_types{@value_requestor_type} =
27164       (1) x scalar(@value_requestor_type);
27165
27166     # Note: the following valid token types are not assigned here to
27167     # hashes requesting to be followed by values or terms, but are
27168     # instead currently hard-coded into sub operator_expected:
27169     # ) -> :: Q R Z ] b h i k n v w } #
27170
27171     # For simple syntax checking, it is nice to have a list of operators which
27172     # will really be unhappy if not followed by a term.  This includes most
27173     # of the above...
27174     %really_want_term = %expecting_term_types;
27175
27176     # with these exceptions...
27177     delete $really_want_term{'U'}; # user sub, depends on prototype
27178     delete $really_want_term{'F'}; # file test works on $_ if no following term
27179     delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
27180                                    # let perl do it
27181
27182     @_ = qw(q qq qw qx qr s y tr m);
27183     @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
27184
27185     # These keywords are handled specially in the tokenizer code:
27186     my @special_keywords = qw(
27187       do
27188       eval
27189       format
27190       m
27191       package
27192       q
27193       qq
27194       qr
27195       qw
27196       qx
27197       s
27198       sub
27199       tr
27200       y
27201     );
27202     push( @Keywords, @special_keywords );
27203
27204     # Keywords after which list formatting may be used
27205     # WARNING: do not include |map|grep|eval or perl may die on
27206     # syntax errors (map1.t).
27207     my @keyword_taking_list = qw(
27208       and
27209       chmod
27210       chomp
27211       chop
27212       chown
27213       dbmopen
27214       die
27215       elsif
27216       exec
27217       fcntl
27218       for
27219       foreach
27220       formline
27221       getsockopt
27222       if
27223       index
27224       ioctl
27225       join
27226       kill
27227       local
27228       msgctl
27229       msgrcv
27230       msgsnd
27231       my
27232       open
27233       or
27234       our
27235       pack
27236       print
27237       printf
27238       push
27239       read
27240       readpipe
27241       recv
27242       return
27243       reverse
27244       rindex
27245       seek
27246       select
27247       semctl
27248       semget
27249       send
27250       setpriority
27251       setsockopt
27252       shmctl
27253       shmget
27254       shmread
27255       shmwrite
27256       socket
27257       socketpair
27258       sort
27259       splice
27260       split
27261       sprintf
27262       substr
27263       syscall
27264       sysopen
27265       sysread
27266       sysseek
27267       system
27268       syswrite
27269       tie
27270       unless
27271       unlink
27272       unpack
27273       unshift
27274       until
27275       vec
27276       warn
27277       while
27278     );
27279     @is_keyword_taking_list{@keyword_taking_list} =
27280       (1) x scalar(@keyword_taking_list);
27281
27282     # These are not used in any way yet
27283     #    my @unused_keywords = qw(
27284     #      CORE
27285     #     __FILE__
27286     #     __LINE__
27287     #     __PACKAGE__
27288     #     );
27289
27290     #  The list of keywords was extracted from function 'keyword' in
27291     #  perl file toke.c version 5.005.03, using this utility, plus a
27292     #  little editing: (file getkwd.pl):
27293     #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
27294     #  Add 'get' prefix where necessary, then split into the above lists.
27295     #  This list should be updated as necessary.
27296     #  The list should not contain these special variables:
27297     #  ARGV DATA ENV SIG STDERR STDIN STDOUT
27298     #  __DATA__ __END__
27299
27300     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
27301 }
27302 1;
27303 __END__
27304
27305 =head1 NAME
27306
27307 Perl::Tidy - Parses and beautifies perl source
27308
27309 =head1 SYNOPSIS
27310
27311     use Perl::Tidy;
27312
27313     Perl::Tidy::perltidy(
27314         source            => $source,
27315         destination       => $destination,
27316         stderr            => $stderr,
27317         argv              => $argv,
27318         perltidyrc        => $perltidyrc,
27319         logfile           => $logfile,
27320         errorfile         => $errorfile,
27321         formatter         => $formatter,           # callback object (see below)
27322         dump_options      => $dump_options,
27323         dump_options_type => $dump_options_type,
27324     );
27325
27326 =head1 DESCRIPTION
27327
27328 This module makes the functionality of the perltidy utility available to perl
27329 scripts.  Any or all of the input parameters may be omitted, in which case the
27330 @ARGV array will be used to provide input parameters as described
27331 in the perltidy(1) man page.
27332
27333 For example, the perltidy script is basically just this:
27334
27335     use Perl::Tidy;
27336     Perl::Tidy::perltidy();
27337
27338 The module accepts input and output streams by a variety of methods.
27339 The following list of parameters may be any of a the following: a
27340 filename, an ARRAY reference, a SCALAR reference, or an object with
27341 either a B<getline> or B<print> method, as appropriate.
27342
27343         source            - the source of the script to be formatted
27344         destination       - the destination of the formatted output
27345         stderr            - standard error output
27346         perltidyrc        - the .perltidyrc file
27347         logfile           - the .LOG file stream, if any
27348         errorfile         - the .ERR file stream, if any
27349         dump_options      - ref to a hash to receive parameters (see below),
27350         dump_options_type - controls contents of dump_options
27351         dump_getopt_flags - ref to a hash to receive Getopt flags
27352         dump_options_category - ref to a hash giving category of options
27353         dump_abbreviations    - ref to a hash giving all abbreviations
27354
27355 The following chart illustrates the logic used to decide how to
27356 treat a parameter.
27357
27358    ref($param)  $param is assumed to be:
27359    -----------  ---------------------
27360    undef        a filename
27361    SCALAR       ref to string
27362    ARRAY        ref to array
27363    (other)      object with getline (if source) or print method
27364
27365 If the parameter is an object, and the object has a B<close> method, that
27366 close method will be called at the end of the stream.
27367
27368 =over 4
27369
27370 =item source
27371
27372 If the B<source> parameter is given, it defines the source of the
27373 input stream.
27374
27375 =item destination
27376
27377 If the B<destination> parameter is given, it will be used to define the
27378 file or memory location to receive output of perltidy.
27379
27380 =item stderr
27381
27382 The B<stderr> parameter allows the calling program to capture the output
27383 to what would otherwise go to the standard error output device.
27384
27385 =item perltidyrc
27386
27387 If the B<perltidyrc> file is given, it will be used instead of any
27388 F<.perltidyrc> configuration file that would otherwise be used.
27389
27390 =item argv
27391
27392 If the B<argv> parameter is given, it will be used instead of the
27393 B<@ARGV> array.  The B<argv> parameter may be a string, a reference to a
27394 string, or a reference to an array.  If it is a string or reference to a
27395 string, it will be parsed into an array of items just as if it were a
27396 command line string.
27397
27398 =item dump_options
27399
27400 If the B<dump_options> parameter is given, it must be the reference to a hash.
27401 In this case, the parameters contained in any perltidyrc configuration file
27402 will be placed in this hash and perltidy will return immediately.  This is
27403 equivalent to running perltidy with --dump-options, except that the perameters
27404 are returned in a hash rather than dumped to standard output.  Also, by default
27405 only the parameters in the perltidyrc file are returned, but this can be
27406 changed (see the next parameter).  This parameter provides a convenient method
27407 for external programs to read a perltidyrc file.  An example program using
27408 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
27409
27410 Any combination of the B<dump_> parameters may be used together.
27411
27412 =item dump_options_type
27413
27414 This parameter is a string which can be used to control the parameters placed
27415 in the hash reference supplied by B<dump_options>.  The possible values are
27416 'perltidyrc' (default) and 'full'.  The 'full' parameter causes both the
27417 default options plus any options found in a perltidyrc file to be returned.
27418
27419 =item dump_getopt_flags
27420
27421 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
27422 hash.  This hash will receive all of the parameters that perltidy understands
27423 and flags that are passed to Getopt::Long.  This parameter may be
27424 used alone or with the B<dump_options> flag.  Perltidy will
27425 exit immediately after filling this hash.  See the demo program
27426 F<perltidyrc_dump.pl> for example usage.
27427
27428 =item dump_options_category
27429
27430 If the B<dump_options_category> parameter is given, it must be the reference to a
27431 hash.  This hash will receive a hash with keys equal to all long parameter names
27432 and values equal to the title of the corresponding section of the perltidy manual.
27433 See the demo program F<perltidyrc_dump.pl> for example usage.
27434
27435 =item dump_abbreviations
27436
27437 If the B<dump_abbreviations> parameter is given, it must be the reference to a
27438 hash.  This hash will receive all abbreviations used by Perl::Tidy.  See the
27439 demo program F<perltidyrc_dump.pl> for example usage.
27440
27441 =back
27442
27443 =head1 EXAMPLE
27444
27445 The following example passes perltidy a snippet as a reference
27446 to a string and receives the result back in a reference to
27447 an array.
27448
27449  use Perl::Tidy;
27450
27451  # some messy source code to format
27452  my $source = <<'EOM';
27453  use strict;
27454  my @editors=('Emacs', 'Vi   '); my $rand = rand();
27455  print "A poll of 10 random programmers gave these results:\n";
27456  foreach(0..10) {
27457  my $i=int ($rand+rand());
27458  print " $editors[$i] users are from Venus" . ", " .
27459  "$editors[1-$i] users are from Mars" .
27460  "\n";
27461  }
27462  EOM
27463
27464  # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
27465  my @dest;
27466  perltidy( source => \$source, destination => \@dest );
27467  foreach (@dest) {print}
27468
27469 =head1 Using the B<formatter> Callback Object
27470
27471 The B<formatter> parameter is an optional callback object which allows
27472 the calling program to receive tokenized lines directly from perltidy for
27473 further specialized processing.  When this parameter is used, the two
27474 formatting options which are built into perltidy (beautification or
27475 html) are ignored.  The following diagram illustrates the logical flow:
27476
27477                     |-- (normal route)   -> code beautification
27478   caller->perltidy->|-- (-html flag )    -> create html
27479                     |-- (formatter given)-> callback to write_line
27480
27481 This can be useful for processing perl scripts in some way.  The
27482 parameter C<$formatter> in the perltidy call,
27483
27484         formatter   => $formatter,
27485
27486 is an object created by the caller with a C<write_line> method which
27487 will accept and process tokenized lines, one line per call.  Here is
27488 a simple example of a C<write_line> which merely prints the line number,
27489 the line type (as determined by perltidy), and the text of the line:
27490
27491  sub write_line {
27492
27493      # This is called from perltidy line-by-line
27494      my $self              = shift;
27495      my $line_of_tokens    = shift;
27496      my $line_type         = $line_of_tokens->{_line_type};
27497      my $input_line_number = $line_of_tokens->{_line_number};
27498      my $input_line        = $line_of_tokens->{_line_text};
27499      print "$input_line_number:$line_type:$input_line";
27500  }
27501
27502 The complete program, B<perllinetype>, is contained in the examples section of
27503 the source distribution.  As this example shows, the callback method
27504 receives a parameter B<$line_of_tokens>, which is a reference to a hash
27505 of other useful information.  This example uses these hash entries:
27506
27507  $line_of_tokens->{_line_number} - the line number (1,2,...)
27508  $line_of_tokens->{_line_text}   - the text of the line
27509  $line_of_tokens->{_line_type}   - the type of the line, one of:
27510
27511     SYSTEM         - system-specific code before hash-bang line
27512     CODE           - line of perl code (including comments)
27513     POD_START      - line starting pod, such as '=head'
27514     POD            - pod documentation text
27515     POD_END        - last line of pod section, '=cut'
27516     HERE           - text of here-document
27517     HERE_END       - last line of here-doc (target word)
27518     FORMAT         - format section
27519     FORMAT_END     - last line of format section, '.'
27520     DATA_START     - __DATA__ line
27521     DATA           - unidentified text following __DATA__
27522     END_START      - __END__ line
27523     END            - unidentified text following __END__
27524     ERROR          - we are in big trouble, probably not a perl script
27525
27526 Most applications will be only interested in lines of type B<CODE>.  For
27527 another example, let's write a program which checks for one of the
27528 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
27529 can slow down processing.  Here is a B<write_line>, from the example
27530 program B<find_naughty.pl>, which does that:
27531
27532  sub write_line {
27533
27534      # This is called back from perltidy line-by-line
27535      # We're looking for $`, $&, and $'
27536      my ( $self, $line_of_tokens ) = @_;
27537
27538      # pull out some stuff we might need
27539      my $line_type         = $line_of_tokens->{_line_type};
27540      my $input_line_number = $line_of_tokens->{_line_number};
27541      my $input_line        = $line_of_tokens->{_line_text};
27542      my $rtoken_type       = $line_of_tokens->{_rtoken_type};
27543      my $rtokens           = $line_of_tokens->{_rtokens};
27544      chomp $input_line;
27545
27546      # skip comments, pod, etc
27547      return if ( $line_type ne 'CODE' );
27548
27549      # loop over tokens looking for $`, $&, and $'
27550      for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
27551
27552          # we only want to examine token types 'i' (identifier)
27553          next unless $$rtoken_type[$j] eq 'i';
27554
27555          # pull out the actual token text
27556          my $token = $$rtokens[$j];
27557
27558          # and check it
27559          if ( $token =~ /^\$[\`\&\']$/ ) {
27560              print STDERR
27561                "$input_line_number: $token\n";
27562          }
27563      }
27564  }
27565
27566 This example pulls out these tokenization variables from the $line_of_tokens
27567 hash reference:
27568
27569      $rtoken_type = $line_of_tokens->{_rtoken_type};
27570      $rtokens     = $line_of_tokens->{_rtokens};
27571
27572 The variable C<$rtoken_type> is a reference to an array of token type codes,
27573 and C<$rtokens> is a reference to a corresponding array of token text.
27574 These are obviously only defined for lines of type B<CODE>.
27575 Perltidy classifies tokens into types, and has a brief code for each type.
27576 You can get a complete list at any time by running perltidy from the
27577 command line with
27578
27579      perltidy --dump-token-types
27580
27581 In the present example, we are only looking for tokens of type B<i>
27582 (identifiers), so the for loop skips past all other types.  When an
27583 identifier is found, its actual text is checked to see if it is one
27584 being sought.  If so, the above write_line prints the token and its
27585 line number.
27586
27587 The B<formatter> feature is relatively new in perltidy, and further
27588 documentation needs to be written to complete its description.  However,
27589 several example programs have been written and can be found in the
27590 B<examples> section of the source distribution.  Probably the best way
27591 to get started is to find one of the examples which most closely matches
27592 your application and start modifying it.
27593
27594 For help with perltidy's pecular way of breaking lines into tokens, you
27595 might run, from the command line,
27596
27597  perltidy -D filename
27598
27599 where F<filename> is a short script of interest.  This will produce
27600 F<filename.DEBUG> with interleaved lines of text and their token types.
27601 The B<-D> flag has been in perltidy from the beginning for this purpose.
27602 If you want to see the code which creates this file, it is
27603 C<write_debug_entry> in Tidy.pm.
27604
27605 =head1 EXPORT
27606
27607   &perltidy
27608
27609 =head1 CREDITS
27610
27611 Thanks to Hugh Myers who developed the initial modular interface
27612 to perltidy.
27613
27614 =head1 VERSION
27615
27616 This man page documents Perl::Tidy version 20070801.
27617
27618 =head1 AUTHOR
27619
27620  Steve Hancock
27621  perltidy at users.sourceforge.net
27622
27623 =head1 SEE ALSO
27624
27625 The perltidy(1) man page describes all of the features of perltidy.  It
27626 can be found at http://perltidy.sourceforge.net.
27627
27628 =cut