lib/Perl/Tidy.pm

   1 #
   2 ############################################################
   3 #
   4 #    perltidy - a perl script indenter and formatter
   5 #
   6 #    Copyright (c) 2000-2012 by Steve Hancock
   7 #    Distributed under the GPL license agreement; see file COPYING
   8 #
   9 #    This program is free software; you can redistribute it and/or modify
  10 #    it under the terms of the GNU General Public License as published by
  11 #    the Free Software Foundation; either version 2 of the License, or
  12 #    (at your option) any later version.
  13 #
  14 #    This program is distributed in the hope that it will be useful,
  15 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 #    GNU General Public License for more details.
  18 #
  19 #    You should have received a copy of the GNU General Public License
  20 #    along with this program; if not, write to the Free Software
  21 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22 #
  23 #    For brief instructions instructions, try 'perltidy -h'.
  24 #    For more complete documentation, try 'man perltidy'
  25 #    or visit http://perltidy.sourceforge.net
  26 #
  27 #    This script is an example of the default style.  It was formatted with:
  28 #
  29 #      perltidy Tidy.pm
  30 #
  31 #    Code Contributions: See ChangeLog.html for a complete history.
  32 #      Michael Cartmell supplied code for adaptation to VMS and helped with
  33 #        v-strings.
  34 #      Hugh S. Myers supplied sub streamhandle and the supporting code to
  35 #        create a Perl::Tidy module which can operate on strings, arrays, etc.
  36 #      Yves Orton supplied coding to help detect Windows versions.
  37 #      Axel Rose supplied a patch for MacPerl.
  38 #      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
  39 #      Dan Tyrell contributed a patch for binary I/O.
  40 #      Ueli Hugenschmidt contributed a patch for -fpsc
  41 #      Sam Kington supplied a patch to identify the initial indentation of
  42 #      entabbed code.
  43 #      jonathan swartz supplied patches for:
  44 #      * .../ pattern, which looks upwards from directory
  45 #      * --notidy, to be used in directories where we want to avoid
  46 #        accidentally tidying
  47 #      * prefilter and postfilter
  48 #      * iterations option
  49 #
  50 #      Many others have supplied key ideas, suggestions, and bug reports;
  51 #        see the CHANGES file.
  52 #
  53 ############################################################
  54
  55 package Perl::Tidy;
  56 use 5.004;    # need IO::File from 5.004 or later
  57 BEGIN { $^W = 1; }    # turn on warnings
  58
  59 use strict;
  60 use Exporter;
  61 use Carp;
  62 $|++;
  63
  64 use vars qw{
  65   $VERSION
  66   @ISA
  67   @EXPORT
  68   $missing_file_spec
  69 };
  70
  71 @ISA    = qw( Exporter );
  72 @EXPORT = qw( &perltidy );
  73
  74 use Cwd;
  75 use IO::File;
  76 use File::Basename;
  77 use File::Copy;
  78
  79 BEGIN {
  80     ( $VERSION = q($Id: Tidy.pm,v 1.74 2012/07/01 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  81 }
  82
  83 sub streamhandle {
  84
  85     # given filename and mode (r or w), create an object which:
  86     #   has a 'getline' method if mode='r', and
  87     #   has a 'print' method if mode='w'.
  88     # The objects also need a 'close' method.
  89     #
  90     # How the object is made:
  91     #
  92     # if $filename is:     Make object using:
  93     # ----------------     -----------------
  94     # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
  95     # string               IO::File
  96     # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
  97     # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
  98     # object               object
  99     #                      (check for 'print' method for 'w' mode)
 100     #                      (check for 'getline' method for 'r' mode)
 101     my $ref = ref( my $filename = shift );
 102     my $mode = shift;
 103     my $New;
 104     my $fh;
 105
 106     # handle a reference
 107     if ($ref) {
 108         if ( $ref eq 'ARRAY' ) {
 109             $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
 110         }
 111         elsif ( $ref eq 'SCALAR' ) {
 112             $New = sub { Perl::Tidy::IOScalar->new(@_) };
 113         }
 114         else {
 115
 116             # Accept an object with a getline method for reading. Note:
 117             # IO::File is built-in and does not respond to the defined
 118             # operator.  If this causes trouble, the check can be
 119             # skipped and we can just let it crash if there is no
 120             # getline.
 121             if ( $mode =~ /[rR]/ ) {
 122                 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
 123                     $New = sub { $filename };
 124                 }
 125                 else {
 126                     $New = sub { undef };
 127                     confess <<EOM;
 128 ------------------------------------------------------------------------
 129 No 'getline' method is defined for object of class $ref
 130 Please check your call to Perl::Tidy::perltidy.  Trace follows.
 131 ------------------------------------------------------------------------
 132 EOM
 133                 }
 134             }
 135
 136             # Accept an object with a print method for writing.
 137             # See note above about IO::File
 138             if ( $mode =~ /[wW]/ ) {
 139                 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
 140                     $New = sub { $filename };
 141                 }
 142                 else {
 143                     $New = sub { undef };
 144                     confess <<EOM;
 145 ------------------------------------------------------------------------
 146 No 'print' method is defined for object of class $ref
 147 Please check your call to Perl::Tidy::perltidy. Trace follows.
 148 ------------------------------------------------------------------------
 149 EOM
 150                 }
 151             }
 152         }
 153     }
 154
 155     # handle a string
 156     else {
 157         if ( $filename eq '-' ) {
 158             $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
 159         }
 160         else {
 161             $New = sub { IO::File->new(@_) };
 162         }
 163     }
 164     $fh = $New->( $filename, $mode )
 165       or warn "Couldn't open file:$filename in mode:$mode : $!\n";
 166     return $fh, ( $ref or $filename );
 167 }
 168
 169 sub find_input_line_ending {
 170
 171     # Peek at a file and return first line ending character.
 172     # Quietly return undef in case of any trouble.
 173     my ($input_file) = @_;
 174     my $ending;
 175
 176     # silently ignore input from object or stdin
 177     if ( ref($input_file) || $input_file eq '-' ) {
 178         return $ending;
 179     }
 180     open( INFILE, $input_file ) || return $ending;
 181
 182     binmode INFILE;
 183     my $buf;
 184     read( INFILE, $buf, 1024 );
 185     close INFILE;
 186     if ( $buf && $buf =~ /([\012\015]+)/ ) {
 187         my $test = $1;
 188
 189         # dos
 190         if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 191
 192         # mac
 193         elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 194
 195         # unix
 196         elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 197
 198         # unknown
 199         else { }
 200     }
 201
 202     # no ending seen
 203     else { }
 204
 205     return $ending;
 206 }
 207
 208 sub catfile {
 209
 210     # concatenate a path and file basename
 211     # returns undef in case of error
 212
 213     BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
 214
 215     # use File::Spec if we can
 216     unless ($missing_file_spec) {
 217         return File::Spec->catfile(@_);
 218     }
 219
 220     # Perl 5.004 systems may not have File::Spec so we'll make
 221     # a simple try.  We assume File::Basename is available.
 222     # return undef if not successful.
 223     my $name      = pop @_;
 224     my $path      = join '/', @_;
 225     my $test_file = $path . $name;
 226     my ( $test_name, $test_path ) = fileparse($test_file);
 227     return $test_file if ( $test_name eq $name );
 228     return undef if ( $^O eq 'VMS' );
 229
 230     # this should work at least for Windows and Unix:
 231     $test_file = $path . '/' . $name;
 232     ( $test_name, $test_path ) = fileparse($test_file);
 233     return $test_file if ( $test_name eq $name );
 234     return undef;
 235 }
 236
 237 sub make_temporary_filename {
 238
 239     # Make a temporary filename.
 240     # FIXME: return both a name and opened filehandle
 241     #
 242     # The POSIX tmpnam() function tends to be unreliable for non-unix systems
 243     # (at least for the win32 systems that I've tested), so use a pre-defined
 244     # name for them.  A disadvantage of this is that two perltidy
 245     # runs in the same working directory may conflict.  However, the chance of
 246     # that is small and managable by the user, especially on systems for which
 247     # the POSIX tmpnam function doesn't work.
 248     my $name = "perltidy.TMP";
 249     if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
 250         return $name;
 251     }
 252     eval "use POSIX qw(tmpnam)";
 253     if ($@) { return $name }
 254     use IO::File;
 255
 256     # just make a couple of tries before giving up and using the default
 257     for ( 0 .. 3 ) {
 258         my $tmpname = tmpnam();
 259         my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
 260         if ($fh) {
 261             $fh->close();
 262             return ($tmpname);
 263             last;
 264         }
 265     }
 266     return ($name);
 267 }
 268
 269 # Here is a map of the flow of data from the input source to the output
 270 # line sink:
 271 #
 272 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
 273 #       input                         groups                 output
 274 #       lines   tokens      lines       of          lines    lines
 275 #                                      lines
 276 #
 277 # The names correspond to the package names responsible for the unit processes.
 278 #
 279 # The overall process is controlled by the "main" package.
 280 #
 281 # LineSource is the stream of input lines
 282 #
 283 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
 284 # if necessary.  A token is any section of the input line which should be
 285 # manipulated as a single entity during formatting.  For example, a single
 286 # ',' character is a token, and so is an entire side comment.  It handles
 287 # the complexities of Perl syntax, such as distinguishing between '<<' as
 288 # a shift operator and as a here-document, or distinguishing between '/'
 289 # as a divide symbol and as a pattern delimiter.
 290 #
 291 # Formatter inserts and deletes whitespace between tokens, and breaks
 292 # sequences of tokens at appropriate points as output lines.  It bases its
 293 # decisions on the default rules as modified by any command-line options.
 294 #
 295 # VerticalAligner collects groups of lines together and tries to line up
 296 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
 297 #
 298 # FileWriter simply writes lines to the output stream.
 299 #
 300 # The Logger package, not shown, records significant events and warning
 301 # messages.  It writes a .LOG file, which may be saved with a
 302 # '-log' or a '-g' flag.
 303
 304 {
 305
 306     # variables needed by interrupt handler:
 307     my $tokenizer;
 308     my $input_file;
 309
 310     # this routine may be called to give a status report if interrupted.  If a
 311     # parameter is given, it will call exit with that parameter.  This is no
 312     # longer used because it works under Unix but not under Windows.
 313     sub interrupt_handler {
 314
 315         my $exit_flag = shift;
 316         print STDERR "perltidy interrupted";
 317         if ($tokenizer) {
 318             my $input_line_number =
 319               Perl::Tidy::Tokenizer::get_input_line_number();
 320             print STDERR " at line $input_line_number";
 321         }
 322         if ($input_file) {
 323
 324             if   ( ref $input_file ) { print STDERR " of reference to:" }
 325             else                     { print STDERR " of file:" }
 326             print STDERR " $input_file";
 327         }
 328         print STDERR "\n";
 329         exit $exit_flag if defined($exit_flag);
 330     }
 331
 332     sub perltidy {
 333
 334         my %defaults = (
 335             argv                  => undef,
 336             destination           => undef,
 337             formatter             => undef,
 338             logfile               => undef,
 339             errorfile             => undef,
 340             perltidyrc            => undef,
 341             source                => undef,
 342             stderr                => undef,
 343             dump_options          => undef,
 344             dump_options_type     => undef,
 345             dump_getopt_flags     => undef,
 346             dump_options_category => undef,
 347             dump_options_range    => undef,
 348             dump_abbreviations    => undef,
 349             prefilter             => undef,
 350             postfilter            => undef,
 351         );
 352
 353         # don't overwrite callers ARGV
 354         local @ARGV = @ARGV;
 355
 356         my %input_hash = @_;
 357
 358         if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
 359             local $" = ')(';
 360             my @good_keys = sort keys %defaults;
 361             @bad_keys = sort @bad_keys;
 362             confess <<EOM;
 363 ------------------------------------------------------------------------
 364 Unknown perltidy parameter : (@bad_keys)
 365 perltidy only understands : (@good_keys)
 366 ------------------------------------------------------------------------
 367
 368 EOM
 369         }
 370
 371         my $get_hash_ref = sub {
 372             my ($key) = @_;
 373             my $hash_ref = $input_hash{$key};
 374             if ( defined($hash_ref) ) {
 375                 unless ( ref($hash_ref) eq 'HASH' ) {
 376                     my $what = ref($hash_ref);
 377                     my $but_is =
 378                       $what ? "but is ref to $what" : "but is not a reference";
 379                     croak <<EOM;
 380 ------------------------------------------------------------------------
 381 error in call to perltidy:
 382 -$key must be reference to HASH $but_is
 383 ------------------------------------------------------------------------
 384 EOM
 385                 }
 386             }
 387             return $hash_ref;
 388         };
 389
 390         %input_hash = ( %defaults, %input_hash );
 391         my $argv               = $input_hash{'argv'};
 392         my $destination_stream = $input_hash{'destination'};
 393         my $errorfile_stream   = $input_hash{'errorfile'};
 394         my $logfile_stream     = $input_hash{'logfile'};
 395         my $perltidyrc_stream  = $input_hash{'perltidyrc'};
 396         my $source_stream      = $input_hash{'source'};
 397         my $stderr_stream      = $input_hash{'stderr'};
 398         my $user_formatter     = $input_hash{'formatter'};
 399         my $prefilter          = $input_hash{'prefilter'};
 400         my $postfilter         = $input_hash{'postfilter'};
 401
 402         # various dump parameters
 403         my $dump_options_type     = $input_hash{'dump_options_type'};
 404         my $dump_options          = $get_hash_ref->('dump_options');
 405         my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
 406         my $dump_options_category = $get_hash_ref->('dump_options_category');
 407         my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
 408         my $dump_options_range    = $get_hash_ref->('dump_options_range');
 409
 410         # validate dump_options_type
 411         if ( defined($dump_options) ) {
 412             unless ( defined($dump_options_type) ) {
 413                 $dump_options_type = 'perltidyrc';
 414             }
 415             unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
 416                 croak <<EOM;
 417 ------------------------------------------------------------------------
 418 Please check value of -dump_options_type in call to perltidy;
 419 saw: '$dump_options_type'
 420 expecting: 'perltidyrc' or 'full'
 421 ------------------------------------------------------------------------
 422 EOM
 423
 424             }
 425         }
 426         else {
 427             $dump_options_type = "";
 428         }
 429
 430         if ($user_formatter) {
 431
 432             # if the user defines a formatter, there is no output stream,
 433             # but we need a null stream to keep coding simple
 434             $destination_stream = Perl::Tidy::DevNull->new();
 435         }
 436
 437         # see if ARGV is overridden
 438         if ( defined($argv) ) {
 439
 440             my $rargv = ref $argv;
 441             if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
 442
 443             # ref to ARRAY
 444             if ($rargv) {
 445                 if ( $rargv eq 'ARRAY' ) {
 446                     @ARGV = @$argv;
 447                 }
 448                 else {
 449                     croak <<EOM;
 450 ------------------------------------------------------------------------
 451 Please check value of -argv in call to perltidy;
 452 it must be a string or ref to ARRAY but is: $rargv
 453 ------------------------------------------------------------------------
 454 EOM
 455                 }
 456             }
 457
 458             # string
 459             else {
 460                 my ( $rargv, $msg ) = parse_args($argv);
 461                 if ($msg) {
 462                     die <<EOM;
 463 Error parsing this string passed to to perltidy with 'argv':
 464 $msg
 465 EOM
 466                 }
 467                 @ARGV = @{$rargv};
 468             }
 469         }
 470
 471         # redirect STDERR if requested
 472         if ($stderr_stream) {
 473             my $ref_type = ref($stderr_stream);
 474             if ( $ref_type eq 'SCALAR' or $ref_type eq 'ARRAY' ) {
 475                 croak <<EOM;
 476 ------------------------------------------------------------------------
 477 You are trying to redirect STDERR to a reference of type $ref_type
 478 It can only be redirected to a file
 479 Please check value of -stderr in call to perltidy
 480 ------------------------------------------------------------------------
 481 EOM
 482             }
 483             my ( $fh_stderr, $stderr_file ) =
 484               Perl::Tidy::streamhandle( $stderr_stream, 'w' );
 485             if ($fh_stderr) { *STDERR = $fh_stderr }
 486             else {
 487                 croak <<EOM;
 488 ------------------------------------------------------------------------
 489 Unable to redirect STDERR to $stderr_stream
 490 Please check value of -stderr in call to perltidy
 491 ------------------------------------------------------------------------
 492 EOM
 493             }
 494         }
 495
 496         my $rpending_complaint;
 497         $$rpending_complaint = "";
 498         my $rpending_logfile_message;
 499         $$rpending_logfile_message = "";
 500
 501         my ( $is_Windows, $Windows_type ) =
 502           look_for_Windows($rpending_complaint);
 503
 504         # VMS file names are restricted to a 40.40 format, so we append _tdy
 505         # instead of .tdy, etc. (but see also sub check_vms_filename)
 506         my $dot;
 507         my $dot_pattern;
 508         if ( $^O eq 'VMS' ) {
 509             $dot         = '_';
 510             $dot_pattern = '_';
 511         }
 512         else {
 513             $dot         = '.';
 514             $dot_pattern = '\.';    # must escape for use in regex
 515         }
 516
 517         #---------------------------------------------------------------
 518         # get command line options
 519         #---------------------------------------------------------------
 520         my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
 521             $rexpansion, $roption_category, $roption_range )
 522           = process_command_line(
 523             $perltidyrc_stream,  $is_Windows, $Windows_type,
 524             $rpending_complaint, $dump_options_type,
 525           );
 526
 527         #---------------------------------------------------------------
 528         # Handle requests to dump information
 529         #---------------------------------------------------------------
 530
 531         # return or exit immediately after all dumps
 532         my $quit_now = 0;
 533
 534         # Getopt parameters and their flags
 535         if ( defined($dump_getopt_flags) ) {
 536             $quit_now = 1;
 537             foreach my $op ( @{$roption_string} ) {
 538                 my $opt  = $op;
 539                 my $flag = "";
 540
 541                 # Examples:
 542                 #  some-option=s
 543                 #  some-option=i
 544                 #  some-option:i
 545                 #  some-option!
 546                 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
 547                     $opt  = $1;
 548                     $flag = $2;
 549                 }
 550                 $dump_getopt_flags->{$opt} = $flag;
 551             }
 552         }
 553
 554         if ( defined($dump_options_category) ) {
 555             $quit_now = 1;
 556             %{$dump_options_category} = %{$roption_category};
 557         }
 558
 559         if ( defined($dump_options_range) ) {
 560             $quit_now = 1;
 561             %{$dump_options_range} = %{$roption_range};
 562         }
 563
 564         if ( defined($dump_abbreviations) ) {
 565             $quit_now = 1;
 566             %{$dump_abbreviations} = %{$rexpansion};
 567         }
 568
 569         if ( defined($dump_options) ) {
 570             $quit_now = 1;
 571             %{$dump_options} = %{$rOpts};
 572         }
 573
 574         return if ($quit_now);
 575
 576         # make printable string of options for this run as possible diagnostic
 577         my $readable_options = readable_options( $rOpts, $roption_string );
 578
 579         # dump from command line
 580         if ( $rOpts->{'dump-options'} ) {
 581             print STDOUT $readable_options;
 582             exit 0;
 583         }
 584
 585         #---------------------------------------------------------------
 586         # check parameters and their interactions
 587         #---------------------------------------------------------------
 588         check_options( $rOpts, $is_Windows, $Windows_type,
 589             $rpending_complaint );
 590
 591         if ($user_formatter) {
 592             $rOpts->{'format'} = 'user';
 593         }
 594
 595         # there must be one entry here for every possible format
 596         my %default_file_extension = (
 597             tidy => 'tdy',
 598             html => 'html',
 599             user => '',
 600         );
 601
 602         # be sure we have a valid output format
 603         unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
 604             my $formats = join ' ',
 605               sort map { "'" . $_ . "'" } keys %default_file_extension;
 606             my $fmt = $rOpts->{'format'};
 607             die "-format='$fmt' but must be one of: $formats\n";
 608         }
 609
 610         my $output_extension =
 611           make_extension( $rOpts->{'output-file-extension'},
 612             $default_file_extension{ $rOpts->{'format'} }, $dot );
 613
 614         # If the backup extension contains a / character then the backup should
 615         # be deleted when the -b option is used.   On older versions of
 616         # perltidy this will generate an error message due to an illegal
 617         # file name.
 618         #
 619         # A backup file will still be generated but will be deleted
 620         # at the end.  If -bext='/' then this extension will be
 621         # the default 'bak'.  Otherwise it will be whatever characters
 622         # remains after all '/' characters are removed.  For example:
 623         # -bext         extension     slashes
 624         #  '/'          bak           1
 625         #  '/delete'    delete        1
 626         #  'delete/'    delete        1
 627         #  '/dev/null'  devnull       2    (Currently not allowed)
 628         my $bext = $rOpts->{'backup-file-extension'};
 629         my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
 630
 631         # At present only one forward slash is allowed.  In the future multiple
 632         # slashes may be allowed to allow for other options
 633         if ( $delete_backup > 1 ) {
 634             die "-bext=$bext contains more than one '/'\n";
 635         }
 636
 637         my $backup_extension =
 638           make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
 639
 640         my $html_toc_extension =
 641           make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
 642
 643         my $html_src_extension =
 644           make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
 645
 646         # check for -b option;
 647         # silently ignore unless beautify mode
 648         my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
 649           && $rOpts->{'format'} eq 'tidy';
 650
 651         # turn off -b with warnings in case of conflicts with other options
 652         if ($in_place_modify) {
 653             if ( $rOpts->{'standard-output'} ) {
 654                 warn "Ignoring -b; you may not use -b and -st together\n";
 655                 $in_place_modify = 0;
 656             }
 657             if ($destination_stream) {
 658                 warn
 659 "Ignoring -b; you may not specify a destination stream and -b together\n";
 660                 $in_place_modify = 0;
 661             }
 662             if ( ref($source_stream) ) {
 663                 warn
 664 "Ignoring -b; you may not specify a source array and -b together\n";
 665                 $in_place_modify = 0;
 666             }
 667             if ( $rOpts->{'outfile'} ) {
 668                 warn "Ignoring -b; you may not use -b and -o together\n";
 669                 $in_place_modify = 0;
 670             }
 671             if ( defined( $rOpts->{'output-path'} ) ) {
 672                 warn "Ignoring -b; you may not use -b and -opath together\n";
 673                 $in_place_modify = 0;
 674             }
 675         }
 676
 677         Perl::Tidy::Formatter::check_options($rOpts);
 678         if ( $rOpts->{'format'} eq 'html' ) {
 679             Perl::Tidy::HtmlWriter->check_options($rOpts);
 680         }
 681
 682         # make the pattern of file extensions that we shouldn't touch
 683         my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
 684         if ($output_extension) {
 685             my $ext = quotemeta($output_extension);
 686             $forbidden_file_extensions .= "|$ext";
 687         }
 688         if ( $in_place_modify && $backup_extension ) {
 689             my $ext = quotemeta($backup_extension);
 690             $forbidden_file_extensions .= "|$ext";
 691         }
 692         $forbidden_file_extensions .= ')$';
 693
 694         # Create a diagnostics object if requested;
 695         # This is only useful for code development
 696         my $diagnostics_object = undef;
 697         if ( $rOpts->{'DIAGNOSTICS'} ) {
 698             $diagnostics_object = Perl::Tidy::Diagnostics->new();
 699         }
 700
 701         # no filenames should be given if input is from an array
 702         if ($source_stream) {
 703             if ( @ARGV > 0 ) {
 704                 die
 705 "You may not specify any filenames when a source array is given\n";
 706             }
 707
 708             # we'll stuff the source array into ARGV
 709             unshift( @ARGV, $source_stream );
 710
 711             # No special treatment for source stream which is a filename.
 712             # This will enable checks for binary files and other bad stuff.
 713             $source_stream = undef unless ref($source_stream);
 714         }
 715
 716         # use stdin by default if no source array and no args
 717         else {
 718             unshift( @ARGV, '-' ) unless @ARGV;
 719         }
 720
 721         #---------------------------------------------------------------
 722         # Ready to go...
 723         # main loop to process all files in argument list
 724         #---------------------------------------------------------------
 725         my $number_of_files = @ARGV;
 726         my $formatter       = undef;
 727         $tokenizer = undef;
 728         while ( $input_file = shift @ARGV ) {
 729             my $fileroot;
 730             my $input_file_permissions;
 731
 732             #---------------------------------------------------------------
 733             # prepare this input stream
 734             #---------------------------------------------------------------
 735             if ($source_stream) {
 736                 $fileroot = "perltidy";
 737             }
 738             elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
 739                 $fileroot = "perltidy";   # root name to use for .ERR, .LOG, etc
 740                 $in_place_modify = 0;
 741             }
 742             else {
 743                 $fileroot = $input_file;
 744                 unless ( -e $input_file ) {
 745
 746                     # file doesn't exist - check for a file glob
 747                     if ( $input_file =~ /([\?\*\[\{])/ ) {
 748
 749                         # Windows shell may not remove quotes, so do it
 750                         my $input_file = $input_file;
 751                         if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
 752                         if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
 753                         my $pattern = fileglob_to_re($input_file);
 754                         ##eval "/$pattern/";
 755                         if ( !$@ && opendir( DIR, './' ) ) {
 756                             my @files =
 757                               grep { /$pattern/ && !-d $_ } readdir(DIR);
 758                             closedir(DIR);
 759                             if (@files) {
 760                                 unshift @ARGV, @files;
 761                                 next;
 762                             }
 763                         }
 764                     }
 765                     print "skipping file: '$input_file': no matches found\n";
 766                     next;
 767                 }
 768
 769                 unless ( -f $input_file ) {
 770                     print "skipping file: $input_file: not a regular file\n";
 771                     next;
 772                 }
 773
 774                 # As a safety precaution, skip zero length files.
 775                 # If for example a source file got clobberred somehow,
 776                 # the old .tdy or .bak files might still exist so we
 777                 # shouldn't overwrite them with zero length files.
 778                 unless ( -s $input_file ) {
 779                     print "skipping file: $input_file: Zero size\n";
 780                     next;
 781                 }
 782
 783                 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
 784                     print
 785 "skipping file: $input_file: Non-text (override with -f)\n";
 786                     next;
 787                 }
 788
 789                 # we should have a valid filename now
 790                 $fileroot               = $input_file;
 791                 $input_file_permissions = ( stat $input_file )[2] & 07777;
 792
 793                 if ( $^O eq 'VMS' ) {
 794                     ( $fileroot, $dot ) = check_vms_filename($fileroot);
 795                 }
 796
 797                 # add option to change path here
 798                 if ( defined( $rOpts->{'output-path'} ) ) {
 799
 800                     my ( $base, $old_path ) = fileparse($fileroot);
 801                     my $new_path = $rOpts->{'output-path'};
 802                     unless ( -d $new_path ) {
 803                         unless ( mkdir $new_path, 0777 ) {
 804                             die "unable to create directory $new_path: $!\n";
 805                         }
 806                     }
 807                     my $path = $new_path;
 808                     $fileroot = catfile( $path, $base );
 809                     unless ($fileroot) {
 810                         die <<EOM;
 811 ------------------------------------------------------------------------
 812 Problem combining $new_path and $base to make a filename; check -opath
 813 ------------------------------------------------------------------------
 814 EOM
 815                     }
 816                 }
 817             }
 818
 819             # Skip files with same extension as the output files because
 820             # this can lead to a messy situation with files like
 821             # script.tdy.tdy.tdy ... or worse problems ...  when you
 822             # rerun perltidy over and over with wildcard input.
 823             if (
 824                 !$source_stream
 825                 && (   $input_file =~ /$forbidden_file_extensions/o
 826                     || $input_file eq 'DIAGNOSTICS' )
 827               )
 828             {
 829                 print "skipping file: $input_file: wrong extension\n";
 830                 next;
 831             }
 832
 833             # the 'source_object' supplies a method to read the input file
 834             my $source_object =
 835               Perl::Tidy::LineSource->new( $input_file, $rOpts,
 836                 $rpending_logfile_message );
 837             next unless ($source_object);
 838
 839             # Prefilters and postfilters: The prefilter is a code reference
 840             # that will be applied to the source before tidying, and the
 841             # postfilter is a code reference to the result before outputting.
 842             if ($prefilter) {
 843                 my $buf = '';
 844                 while ( my $line = $source_object->get_line() ) {
 845                     $buf .= $line;
 846                 }
 847                 $buf = $prefilter->($buf);
 848
 849                 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
 850                     $rpending_logfile_message );
 851             }
 852
 853             # register this file name with the Diagnostics package
 854             $diagnostics_object->set_input_file($input_file)
 855               if $diagnostics_object;
 856
 857             #---------------------------------------------------------------
 858             # prepare the output stream
 859             #---------------------------------------------------------------
 860             my $output_file = undef;
 861             my $actual_output_extension;
 862
 863             if ( $rOpts->{'outfile'} ) {
 864
 865                 if ( $number_of_files <= 1 ) {
 866
 867                     if ( $rOpts->{'standard-output'} ) {
 868                         die "You may not use -o and -st together\n";
 869                     }
 870                     elsif ($destination_stream) {
 871                         die
 872 "You may not specify a destination array and -o together\n";
 873                     }
 874                     elsif ( defined( $rOpts->{'output-path'} ) ) {
 875                         die "You may not specify -o and -opath together\n";
 876                     }
 877                     elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
 878                         die "You may not specify -o and -oext together\n";
 879                     }
 880                     $output_file = $rOpts->{outfile};
 881
 882                     # make sure user gives a file name after -o
 883                     if ( $output_file =~ /^-/ ) {
 884                         die "You must specify a valid filename after -o\n";
 885                     }
 886
 887                     # do not overwrite input file with -o
 888                     if ( defined($input_file_permissions)
 889                         && ( $output_file eq $input_file ) )
 890                     {
 891                         die
 892                           "Use 'perltidy -b $input_file' to modify in-place\n";
 893                     }
 894                 }
 895                 else {
 896                     die "You may not use -o with more than one input file\n";
 897                 }
 898             }
 899             elsif ( $rOpts->{'standard-output'} ) {
 900                 if ($destination_stream) {
 901                     die
 902 "You may not specify a destination array and -st together\n";
 903                 }
 904                 $output_file = '-';
 905
 906                 if ( $number_of_files <= 1 ) {
 907                 }
 908                 else {
 909                     die "You may not use -st with more than one input file\n";
 910                 }
 911             }
 912             elsif ($destination_stream) {
 913                 $output_file = $destination_stream;
 914             }
 915             elsif ($source_stream) {  # source but no destination goes to stdout
 916                 $output_file = '-';
 917             }
 918             elsif ( $input_file eq '-' ) {
 919                 $output_file = '-';
 920             }
 921             else {
 922                 if ($in_place_modify) {
 923                     $output_file = IO::File->new_tmpfile()
 924                       or die "cannot open temp file for -b option: $!\n";
 925                 }
 926                 else {
 927                     $actual_output_extension = $output_extension;
 928                     $output_file             = $fileroot . $output_extension;
 929                 }
 930             }
 931
 932             # the 'sink_object' knows how to write the output file
 933             my $tee_file = $fileroot . $dot . "TEE";
 934
 935             my $line_separator = $rOpts->{'output-line-ending'};
 936             if ( $rOpts->{'preserve-line-endings'} ) {
 937                 $line_separator = find_input_line_ending($input_file);
 938             }
 939
 940             # Eventually all I/O may be done with binmode, but for now it is
 941             # only done when a user requests a particular line separator
 942             # through the -ple or -ole flags
 943             my $binmode = 0;
 944             if   ( defined($line_separator) ) { $binmode        = 1 }
 945             else                              { $line_separator = "\n" }
 946
 947             my ( $sink_object, $postfilter_buffer );
 948             if ($postfilter) {
 949                 $sink_object =
 950                   Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
 951                     $line_separator, $rOpts, $rpending_logfile_message,
 952                     $binmode );
 953             }
 954             else {
 955                 $sink_object =
 956                   Perl::Tidy::LineSink->new( $output_file, $tee_file,
 957                     $line_separator, $rOpts, $rpending_logfile_message,
 958                     $binmode );
 959             }
 960
 961             #---------------------------------------------------------------
 962             # initialize the error logger
 963             #---------------------------------------------------------------
 964             my $warning_file = $fileroot . $dot . "ERR";
 965             if ($errorfile_stream) { $warning_file = $errorfile_stream }
 966             my $log_file = $fileroot . $dot . "LOG";
 967             if ($logfile_stream) { $log_file = $logfile_stream }
 968
 969             my $logger_object =
 970               Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
 971                 $saw_extrude );
 972             write_logfile_header(
 973                 $rOpts,        $logger_object, $config_file,
 974                 $rraw_options, $Windows_type,  $readable_options,
 975             );
 976             if ($$rpending_logfile_message) {
 977                 $logger_object->write_logfile_entry($$rpending_logfile_message);
 978             }
 979             if ($$rpending_complaint) {
 980                 $logger_object->complain($$rpending_complaint);
 981             }
 982
 983             #---------------------------------------------------------------
 984             # initialize the debug object, if any
 985             #---------------------------------------------------------------
 986             my $debugger_object = undef;
 987             if ( $rOpts->{DEBUG} ) {
 988                 $debugger_object =
 989                   Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
 990             }
 991
 992             #---------------------------------------------------------------
 993             # loop over iterations for one source stream
 994             #---------------------------------------------------------------
 995
 996             # We will do a convergence test if 3 or more iterations are allowed.
 997             # It would be pointless for fewer because we have to make at least
 998             # two passes before we can see if we are converged, and the test
 999             # would just slow things down.
1000             my $max_iterations = $rOpts->{'iterations'};
1001             my $convergence_log_message;
1002             my %saw_md5;
1003             my $do_convergence_test = $max_iterations > 2;
1004             if ($do_convergence_test) {
1005                 eval "use Digest::MD5 qw(md5_hex)";
1006                 $do_convergence_test = !$@;
1007             }
1008
1009             # save objects to allow redirecting output during iterations
1010             my $sink_object_final     = $sink_object;
1011             my $debugger_object_final = $debugger_object;
1012             my $logger_object_final   = $logger_object;
1013
1014             for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
1015
1016                 # send output stream to temp buffers until last iteration
1017                 my $sink_buffer;
1018                 if ( $iter < $max_iterations ) {
1019                     $sink_object =
1020                       Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
1021                         $line_separator, $rOpts, $rpending_logfile_message,
1022                         $binmode );
1023                 }
1024                 else {
1025                     $sink_object = $sink_object_final;
1026                 }
1027
1028                 # Save logger, debugger output only on pass 1 because:
1029                 # (1) line number references must be to the starting
1030                 # source, not an intermediate result, and
1031                 # (2) we need to know if there are errors so we can stop the
1032                 # iterations early if necessary.
1033                 if ( $iter > 1 ) {
1034                     $debugger_object = undef;
1035                     $logger_object   = undef;
1036                 }
1037
1038                 #------------------------------------------------------------
1039                 # create a formatter for this file : html writer or
1040                 # pretty printer
1041                 #------------------------------------------------------------
1042
1043                 # we have to delete any old formatter because, for safety,
1044                 # the formatter will check to see that there is only one.
1045                 $formatter = undef;
1046
1047                 if ($user_formatter) {
1048                     $formatter = $user_formatter;
1049                 }
1050                 elsif ( $rOpts->{'format'} eq 'html' ) {
1051                     $formatter =
1052                       Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1053                         $actual_output_extension, $html_toc_extension,
1054                         $html_src_extension );
1055                 }
1056                 elsif ( $rOpts->{'format'} eq 'tidy' ) {
1057                     $formatter = Perl::Tidy::Formatter->new(
1058                         logger_object      => $logger_object,
1059                         diagnostics_object => $diagnostics_object,
1060                         sink_object        => $sink_object,
1061                     );
1062                 }
1063                 else {
1064                     die "I don't know how to do -format=$rOpts->{'format'}\n";
1065                 }
1066
1067                 unless ($formatter) {
1068                     die
1069                       "Unable to continue with $rOpts->{'format'} formatting\n";
1070                 }
1071
1072                 #---------------------------------------------------------------
1073                 # create the tokenizer for this file
1074                 #---------------------------------------------------------------
1075                 $tokenizer = undef;    # must destroy old tokenizer
1076                 $tokenizer = Perl::Tidy::Tokenizer->new(
1077                     source_object      => $source_object,
1078                     logger_object      => $logger_object,
1079                     debugger_object    => $debugger_object,
1080                     diagnostics_object => $diagnostics_object,
1081                     starting_level => $rOpts->{'starting-indentation-level'},
1082                     tabs           => $rOpts->{'tabs'},
1083                     entab_leading_space => $rOpts->{'entab-leading-whitespace'},
1084                     indent_columns      => $rOpts->{'indent-columns'},
1085                     look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
1086                     look_for_autoloader => $rOpts->{'look-for-autoloader'},
1087                     look_for_selfloader => $rOpts->{'look-for-selfloader'},
1088                     trim_qw             => $rOpts->{'trim-qw'},
1089                 );
1090
1091                 #---------------------------------------------------------------
1092                 # now we can do it
1093                 #---------------------------------------------------------------
1094                 process_this_file( $tokenizer, $formatter );
1095
1096                 #---------------------------------------------------------------
1097                 # close the input source and report errors
1098                 #---------------------------------------------------------------
1099                 $source_object->close_input_file();
1100
1101                 # line source for next iteration (if any) comes from the current
1102                 # temporary output buffer
1103                 if ( $iter < $max_iterations ) {
1104
1105                     $sink_object->close_output_file();
1106                     $source_object =
1107                       Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1108                         $rpending_logfile_message );
1109
1110                     # stop iterations if errors or converged
1111                     my $stop_now = $logger_object->{_warning_count};
1112                     if ($stop_now) {
1113                         $convergence_log_message = <<EOM;
1114 Stopping iterations because of errors.
1115 EOM
1116                     }
1117                     elsif ($do_convergence_test) {
1118                         my $digest = md5_hex($sink_buffer);
1119                         if ( !$saw_md5{$digest} ) {
1120                             $saw_md5{$digest} = $iter;
1121                         }
1122                         else {
1123
1124                             # Saw this result before, stop iterating
1125                             $stop_now = 1;
1126                             my $iterm = $iter - 1;
1127                             if ( $saw_md5{$digest} != $iterm ) {
1128
1129                                 # Blinking (oscillating) between two stable
1130                                 # end states.  This has happened in the past
1131                                 # but at present there are no known instances.
1132                                 $convergence_log_message = <<EOM;
1133 Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1134 EOM
1135                                 $diagnostics_object->write_diagnostics(
1136                                     $convergence_log_message)
1137                                   if $diagnostics_object;
1138                             }
1139                             else {
1140                                 $convergence_log_message = <<EOM;
1141 Converged.  Output for iteration $iter same as for iter $iterm.
1142 EOM
1143                                 $diagnostics_object->write_diagnostics(
1144                                     $convergence_log_message)
1145                                   if $diagnostics_object && $iterm > 2;
1146                             }
1147                         }
1148                     } ## end if ($do_convergence_test)
1149
1150                     if ($stop_now) {
1151
1152                         # we are stopping the iterations early;
1153                         # copy the output stream to its final destination
1154                         $sink_object = $sink_object_final;
1155                         while ( my $line = $source_object->get_line() ) {
1156                             $sink_object->write_line($line);
1157                         }
1158                         $source_object->close_input_file();
1159                         last;
1160                     }
1161                 } ## end if ( $iter < $max_iterations)
1162             }    # end loop over iterations for one source file
1163
1164             # restore objects which have been temporarily undefined
1165             # for second and higher iterations
1166             $debugger_object = $debugger_object_final;
1167             $logger_object   = $logger_object_final;
1168
1169             $logger_object->write_logfile_entry($convergence_log_message)
1170               if $convergence_log_message;
1171
1172             #---------------------------------------------------------------
1173             # Perform any postfilter operation
1174             #---------------------------------------------------------------
1175             if ($postfilter) {
1176                 $sink_object->close_output_file();
1177                 $sink_object =
1178                   Perl::Tidy::LineSink->new( $output_file, $tee_file,
1179                     $line_separator, $rOpts, $rpending_logfile_message,
1180                     $binmode );
1181                 my $buf = $postfilter->($postfilter_buffer);
1182                 $source_object =
1183                   Perl::Tidy::LineSource->new( \$buf, $rOpts,
1184                     $rpending_logfile_message );
1185                 ##chomp $buf;
1186                 ##foreach my $line ( split( "\n", $buf , -1) ) {
1187                 while ( my $line = $source_object->get_line() ) {
1188                     $sink_object->write_line($line);
1189                 }
1190                 $source_object->close_input_file();
1191             }
1192
1193             # Save names of the input and output files for syntax check
1194             my $ifname = $input_file;
1195             my $ofname = $output_file;
1196
1197             #---------------------------------------------------------------
1198             # handle the -b option (backup and modify in-place)
1199             #---------------------------------------------------------------
1200             if ($in_place_modify) {
1201                 unless ( -f $input_file ) {
1202
1203                     # oh, oh, no real file to backup ..
1204                     # shouldn't happen because of numerous preliminary checks
1205                     die
1206 "problem with -b backing up input file '$input_file': not a file\n";
1207                 }
1208                 my $backup_name = $input_file . $backup_extension;
1209                 if ( -f $backup_name ) {
1210                     unlink($backup_name)
1211                       or die
1212 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1213                 }
1214
1215                 # backup the input file
1216                 # we use copy for symlinks, move for regular files
1217                 if ( -l $input_file ) {
1218                     File::Copy::copy( $input_file, $backup_name )
1219                       or die "File::Copy failed trying to backup source: $!";
1220                 }
1221                 else {
1222                     rename( $input_file, $backup_name )
1223                       or die
1224 "problem renaming $input_file to $backup_name for -b option: $!\n";
1225                 }
1226                 $ifname = $backup_name;
1227
1228                 # copy the output to the original input file
1229                 # NOTE: it would be nice to just close $output_file and use
1230                 # File::Copy::copy here, but in this case $output_file is the
1231                 # handle of an open nameless temporary file so we would lose
1232                 # everything if we closed it.
1233                 seek( $output_file, 0, 0 )
1234                   or die
1235                   "unable to rewind a temporary file for -b option: $!\n";
1236                 my $fout = IO::File->new("> $input_file")
1237                   or die
1238 "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1239                 binmode $fout;
1240                 my $line;
1241                 while ( $line = $output_file->getline() ) {
1242                     $fout->print($line);
1243                 }
1244                 $fout->close();
1245                 $output_file = $input_file;
1246                 $ofname      = $input_file;
1247             }
1248
1249             #---------------------------------------------------------------
1250             # clean up and report errors
1251             #---------------------------------------------------------------
1252             $sink_object->close_output_file()    if $sink_object;
1253             $debugger_object->close_debug_file() if $debugger_object;
1254
1255             # set output file permissions
1256             if ( $output_file && -f $output_file && !-l $output_file ) {
1257                 if ($input_file_permissions) {
1258
1259                     # give output script same permissions as input script, but
1260                     # make it user-writable or else we can't run perltidy again.
1261                     # Thus we retain whatever executable flags were set.
1262                     if ( $rOpts->{'format'} eq 'tidy' ) {
1263                         chmod( $input_file_permissions | 0600, $output_file );
1264                     }
1265
1266                     # else use default permissions for html and any other format
1267                 }
1268             }
1269
1270             #---------------------------------------------------------------
1271             # Do syntax check if requested and possible
1272             #---------------------------------------------------------------
1273             my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
1274             if (   $logger_object
1275                 && $rOpts->{'check-syntax'}
1276                 && $ifname
1277                 && $ofname )
1278             {
1279                 $infile_syntax_ok =
1280                   check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1281             }
1282
1283             #---------------------------------------------------------------
1284             # remove the original file for in-place modify as follows:
1285             #   $delete_backup=0 never
1286             #   $delete_backup=1 only if no errors
1287             #   $delete_backup>1 always  : CURRENTLY NOT ALLOWED, see above
1288             #---------------------------------------------------------------
1289             if (   $in_place_modify
1290                 && $delete_backup
1291                 && -f $ifname
1292                 && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1293             {
1294
1295                 # As an added safety precaution, do not delete the source file
1296                 # if its size has dropped from positive to zero, since this
1297                 # could indicate a disaster of some kind, including a hardware
1298                 # failure.  Actually, this could happen if you had a file of
1299                 # all comments (or pod) and deleted everything with -dac (-dap)
1300                 # for some reason.
1301                 if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1302                     warn(
1303 "output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1304                     );
1305                 }
1306                 else {
1307                     unlink($ifname)
1308                       or die
1309 "unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1310                 }
1311             }
1312
1313             $logger_object->finish( $infile_syntax_ok, $formatter )
1314               if $logger_object;
1315         }    # end of main loop to process all files
1316     }    # end of main program perltidy
1317 }
1318
1319 sub get_stream_as_named_file {
1320
1321     # Return the name of a file containing a stream of data, creating
1322     # a temporary file if necessary.
1323     # Given:
1324     #  $stream - the name of a file or stream
1325     # Returns:
1326     #  $fname = name of file if possible, or undef
1327     #  $if_tmpfile = true if temp file, undef if not temp file
1328     #
1329     # This routine is needed for passing actual files to Perl for
1330     # a syntax check.
1331     my ($stream) = @_;
1332     my $is_tmpfile;
1333     my $fname;
1334     if ($stream) {
1335         if ( ref($stream) ) {
1336             my ( $fh_stream, $fh_name ) =
1337               Perl::Tidy::streamhandle( $stream, 'r' );
1338             if ($fh_stream) {
1339                 my ( $fout, $tmpnam );
1340
1341                 # FIXME: fix the tmpnam routine to return an open filehandle
1342                 $tmpnam = Perl::Tidy::make_temporary_filename();
1343                 $fout = IO::File->new( $tmpnam, 'w' );
1344
1345                 if ($fout) {
1346                     $fname      = $tmpnam;
1347                     $is_tmpfile = 1;
1348                     binmode $fout;
1349                     while ( my $line = $fh_stream->getline() ) {
1350                         $fout->print($line);
1351                     }
1352                     $fout->close();
1353                 }
1354                 $fh_stream->close();
1355             }
1356         }
1357         elsif ( $stream ne '-' && -f $stream ) {
1358             $fname = $stream;
1359         }
1360     }
1361     return ( $fname, $is_tmpfile );
1362 }
1363
1364 sub fileglob_to_re {
1365
1366     # modified (corrected) from version in find2perl
1367     my $x = shift;
1368     $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1369     $x =~ s#\*#.*#g;               # '*' -> '.*'
1370     $x =~ s#\?#.#g;                # '?' -> '.'
1371     "^$x\\z";                      # match whole word
1372 }
1373
1374 sub make_extension {
1375
1376     # Make a file extension, including any leading '.' if necessary
1377     # The '.' may actually be an '_' under VMS
1378     my ( $extension, $default, $dot ) = @_;
1379
1380     # Use the default if none specified
1381     $extension = $default unless ($extension);
1382
1383     # Only extensions with these leading characters get a '.'
1384     # This rule gives the user some freedom
1385     if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1386         $extension = $dot . $extension;
1387     }
1388     return $extension;
1389 }
1390
1391 sub write_logfile_header {
1392     my (
1393         $rOpts,        $logger_object, $config_file,
1394         $rraw_options, $Windows_type,  $readable_options
1395     ) = @_;
1396     $logger_object->write_logfile_entry(
1397 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1398     );
1399     if ($Windows_type) {
1400         $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1401     }
1402     my $options_string = join( ' ', @$rraw_options );
1403
1404     if ($config_file) {
1405         $logger_object->write_logfile_entry(
1406             "Found Configuration File >>> $config_file \n");
1407     }
1408     $logger_object->write_logfile_entry(
1409         "Configuration and command line parameters for this run:\n");
1410     $logger_object->write_logfile_entry("$options_string\n");
1411
1412     if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1413         $rOpts->{'logfile'} = 1;    # force logfile to be saved
1414         $logger_object->write_logfile_entry(
1415             "Final parameter set for this run\n");
1416         $logger_object->write_logfile_entry(
1417             "------------------------------------\n");
1418
1419         $logger_object->write_logfile_entry($readable_options);
1420
1421         $logger_object->write_logfile_entry(
1422             "------------------------------------\n");
1423     }
1424     $logger_object->write_logfile_entry(
1425         "To find error messages search for 'WARNING' with your editor\n");
1426 }
1427
1428 sub generate_options {
1429
1430     ######################################################################
1431     # Generate and return references to:
1432     #  @option_string - the list of options to be passed to Getopt::Long
1433     #  @defaults - the list of default options
1434     #  %expansion - a hash showing how all abbreviations are expanded
1435     #  %category - a hash giving the general category of each option
1436     #  %option_range - a hash giving the valid ranges of certain options
1437
1438     # Note: a few options are not documented in the man page and usage
1439     # message. This is because these are experimental or debug options and
1440     # may or may not be retained in future versions.
1441     #
1442     # Here are the undocumented flags as far as I know.  Any of them
1443     # may disappear at any time.  They are mainly for fine-tuning
1444     # and debugging.
1445     #
1446     # fll --> fuzzy-line-length           # a trivial parameter which gets
1447     #                                       turned off for the extrude option
1448     #                                       which is mainly for debugging
1449     # chk --> check-multiline-quotes      # check for old bug; to be deleted
1450     # scl --> short-concatenation-item-length   # helps break at '.'
1451     # recombine                           # for debugging line breaks
1452     # valign                              # for debugging vertical alignment
1453     # I   --> DIAGNOSTICS                 # for debugging
1454     ######################################################################
1455
1456     # here is a summary of the Getopt codes:
1457     # <none> does not take an argument
1458     # =s takes a mandatory string
1459     # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1460     # =i takes a mandatory integer
1461     # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1462     # ! does not take an argument and may be negated
1463     #  i.e., -foo and -nofoo are allowed
1464     # a double dash signals the end of the options list
1465     #
1466     #---------------------------------------------------------------
1467     # Define the option string passed to GetOptions.
1468     #---------------------------------------------------------------
1469
1470     my @option_string   = ();
1471     my %expansion       = ();
1472     my %option_category = ();
1473     my %option_range    = ();
1474     my $rexpansion      = \%expansion;
1475
1476     # names of categories in manual
1477     # leading integers will allow sorting
1478     my @category_name = (
1479         '0. I/O control',
1480         '1. Basic formatting options',
1481         '2. Code indentation control',
1482         '3. Whitespace control',
1483         '4. Comment controls',
1484         '5. Linebreak controls',
1485         '6. Controlling list formatting',
1486         '7. Retaining or ignoring existing line breaks',
1487         '8. Blank line control',
1488         '9. Other controls',
1489         '10. HTML options',
1490         '11. pod2html options',
1491         '12. Controlling HTML properties',
1492         '13. Debugging',
1493     );
1494
1495     #  These options are parsed directly by perltidy:
1496     #    help h
1497     #    version v
1498     #  However, they are included in the option set so that they will
1499     #  be seen in the options dump.
1500
1501     # These long option names have no abbreviations or are treated specially
1502     @option_string = qw(
1503       html!
1504       noprofile
1505       no-profile
1506       npro
1507       recombine!
1508       valign!
1509       notidy
1510     );
1511
1512     my $category = 13;    # Debugging
1513     foreach (@option_string) {
1514         my $opt = $_;     # must avoid changing the actual flag
1515         $opt =~ s/!$//;
1516         $option_category{$opt} = $category_name[$category];
1517     }
1518
1519     $category = 11;                                       # HTML
1520     $option_category{html} = $category_name[$category];
1521
1522     # routine to install and check options
1523     my $add_option = sub {
1524         my ( $long_name, $short_name, $flag ) = @_;
1525         push @option_string, $long_name . $flag;
1526         $option_category{$long_name} = $category_name[$category];
1527         if ($short_name) {
1528             if ( $expansion{$short_name} ) {
1529                 my $existing_name = $expansion{$short_name}[0];
1530                 die
1531 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1532             }
1533             $expansion{$short_name} = [$long_name];
1534             if ( $flag eq '!' ) {
1535                 my $nshort_name = 'n' . $short_name;
1536                 my $nolong_name = 'no' . $long_name;
1537                 if ( $expansion{$nshort_name} ) {
1538                     my $existing_name = $expansion{$nshort_name}[0];
1539                     die
1540 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1541                 }
1542                 $expansion{$nshort_name} = [$nolong_name];
1543             }
1544         }
1545     };
1546
1547     # Install long option names which have a simple abbreviation.
1548     # Options with code '!' get standard negation ('no' for long names,
1549     # 'n' for abbreviations).  Categories follow the manual.
1550
1551     ###########################
1552     $category = 0;    # I/O_Control
1553     ###########################
1554     $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1555     $add_option->( 'backup-file-extension',      'bext',  '=s' );
1556     $add_option->( 'force-read-binary',          'f',     '!' );
1557     $add_option->( 'format',                     'fmt',   '=s' );
1558     $add_option->( 'iterations',                 'it',    '=i' );
1559     $add_option->( 'logfile',                    'log',   '!' );
1560     $add_option->( 'logfile-gap',                'g',     ':i' );
1561     $add_option->( 'outfile',                    'o',     '=s' );
1562     $add_option->( 'output-file-extension',      'oext',  '=s' );
1563     $add_option->( 'output-path',                'opath', '=s' );
1564     $add_option->( 'profile',                    'pro',   '=s' );
1565     $add_option->( 'quiet',                      'q',     '!' );
1566     $add_option->( 'standard-error-output',      'se',    '!' );
1567     $add_option->( 'standard-output',            'st',    '!' );
1568     $add_option->( 'warning-output',             'w',     '!' );
1569
1570     # options which are both toggle switches and values moved here
1571     # to hide from tidyview (which does not show category 0 flags):
1572     # -ole moved here from category 1
1573     # -sil moved here from category 2
1574     $add_option->( 'output-line-ending',         'ole', '=s' );
1575     $add_option->( 'starting-indentation-level', 'sil', '=i' );
1576
1577     ########################################
1578     $category = 1;    # Basic formatting options
1579     ########################################
1580     $add_option->( 'check-syntax',             'syn',  '!' );
1581     $add_option->( 'entab-leading-whitespace', 'et',   '=i' );
1582     $add_option->( 'indent-columns',           'i',    '=i' );
1583     $add_option->( 'maximum-line-length',      'l',    '=i' );
1584     $add_option->( 'perl-syntax-check-flags',  'pscf', '=s' );
1585     $add_option->( 'preserve-line-endings',    'ple',  '!' );
1586     $add_option->( 'tabs',                     't',    '!' );
1587
1588     ########################################
1589     $category = 2;    # Code indentation control
1590     ########################################
1591     $add_option->( 'continuation-indentation',           'ci',   '=i' );
1592     $add_option->( 'line-up-parentheses',                'lp',   '!' );
1593     $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1594     $add_option->( 'outdent-keywords',                   'okw',  '!' );
1595     $add_option->( 'outdent-labels',                     'ola',  '!' );
1596     $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1597     $add_option->( 'indent-closing-brace',               'icb',  '!' );
1598     $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1599     $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1600     $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1601     $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1602     $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1603     $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1604
1605     ########################################
1606     $category = 3;    # Whitespace control
1607     ########################################
1608     $add_option->( 'add-semicolons',                            'asc',   '!' );
1609     $add_option->( 'add-whitespace',                            'aws',   '!' );
1610     $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1611     $add_option->( 'brace-tightness',                           'bt',    '=i' );
1612     $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1613     $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1614     $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1615     $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1616     $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1617     $add_option->( 'paren-tightness',                           'pt',    '=i' );
1618     $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1619     $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1620     $add_option->( 'space-function-paren',                      'sfp',   '!' );
1621     $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1622     $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1623     $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1624     $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1625     $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1626     $add_option->( 'trim-qw',                                   'tqw',   '!' );
1627     $add_option->( 'want-left-space',                           'wls',   '=s' );
1628     $add_option->( 'want-right-space',                          'wrs',   '=s' );
1629
1630     ########################################
1631     $category = 4;    # Comment controls
1632     ########################################
1633     $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1634     $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1635     $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1636     $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1637     $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1638     $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1639     $add_option->( 'closing-side-comments',             'csc',  '!' );
1640     $add_option->( 'closing-side-comments-balanced',    'cscb', '!' );
1641     $add_option->( 'format-skipping',                   'fs',   '!' );
1642     $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1643     $add_option->( 'format-skipping-end',               'fse',  '=s' );
1644     $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1645     $add_option->( 'indent-block-comments',             'ibc',  '!' );
1646     $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1647     $add_option->( 'fixed-position-side-comment',       'fpsc', '=i' );
1648     $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1649     $add_option->( 'outdent-long-comments',             'olc',  '!' );
1650     $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1651     $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1652     $add_option->( 'static-block-comments',             'sbc',  '!' );
1653     $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1654     $add_option->( 'static-side-comments',              'ssc',  '!' );
1655
1656     ########################################
1657     $category = 5;    # Linebreak controls
1658     ########################################
1659     $add_option->( 'add-newlines',                            'anl',   '!' );
1660     $add_option->( 'block-brace-vertical-tightness',          'bbvt',  '=i' );
1661     $add_option->( 'block-brace-vertical-tightness-list',     'bbvtl', '=s' );
1662     $add_option->( 'brace-vertical-tightness',                'bvt',   '=i' );
1663     $add_option->( 'brace-vertical-tightness-closing',        'bvtc',  '=i' );
1664     $add_option->( 'cuddled-else',                            'ce',    '!' );
1665     $add_option->( 'delete-old-newlines',                     'dnl',   '!' );
1666     $add_option->( 'opening-brace-always-on-right',           'bar',   '!' );
1667     $add_option->( 'opening-brace-on-new-line',               'bl',    '!' );
1668     $add_option->( 'opening-hash-brace-right',                'ohbr',  '!' );
1669     $add_option->( 'opening-paren-right',                     'opr',   '!' );
1670     $add_option->( 'opening-square-bracket-right',            'osbr',  '!' );
1671     $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl',  '!' );
1672     $add_option->( 'opening-sub-brace-on-new-line',           'sbl',   '!' );
1673     $add_option->( 'paren-vertical-tightness',                'pvt',   '=i' );
1674     $add_option->( 'paren-vertical-tightness-closing',        'pvtc',  '=i' );
1675     $add_option->( 'stack-closing-hash-brace',                'schb',  '!' );
1676     $add_option->( 'stack-closing-paren',                     'scp',   '!' );
1677     $add_option->( 'stack-closing-square-bracket',            'scsb',  '!' );
1678     $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
1679     $add_option->( 'stack-opening-paren',                     'sop',   '!' );
1680     $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
1681     $add_option->( 'vertical-tightness',                      'vt',    '=i' );
1682     $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
1683     $add_option->( 'want-break-after',                        'wba',   '=s' );
1684     $add_option->( 'want-break-before',                       'wbb',   '=s' );
1685     $add_option->( 'break-after-all-operators',               'baao',  '!' );
1686     $add_option->( 'break-before-all-operators',              'bbao',  '!' );
1687     $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
1688
1689     ########################################
1690     $category = 6;    # Controlling list formatting
1691     ########################################
1692     $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1693     $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1694     $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1695
1696     ########################################
1697     $category = 7;    # Retaining or ignoring existing line breaks
1698     ########################################
1699     $add_option->( 'break-at-old-keyword-breakpoints',   'bok', '!' );
1700     $add_option->( 'break-at-old-logical-breakpoints',   'bol', '!' );
1701     $add_option->( 'break-at-old-ternary-breakpoints',   'bot', '!' );
1702     $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1703     $add_option->( 'ignore-old-breakpoints',             'iob', '!' );
1704
1705     ########################################
1706     $category = 8;    # Blank line control
1707     ########################################
1708     $add_option->( 'blanks-before-blocks',            'bbb',  '!' );
1709     $add_option->( 'blanks-before-comments',          'bbc',  '!' );
1710     $add_option->( 'blank-lines-before-subs',         'blbs', '=i' );
1711     $add_option->( 'blank-lines-before-packages',     'blbp', '=i' );
1712     $add_option->( 'long-block-line-count',           'lbl',  '=i' );
1713     $add_option->( 'maximum-consecutive-blank-lines', 'mbl',  '=i' );
1714     $add_option->( 'keep-old-blank-lines',            'kbl',  '=i' );
1715
1716     ########################################
1717     $category = 9;    # Other controls
1718     ########################################
1719     $add_option->( 'delete-block-comments',        'dbc',  '!' );
1720     $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1721     $add_option->( 'delete-pod',                   'dp',   '!' );
1722     $add_option->( 'delete-side-comments',         'dsc',  '!' );
1723     $add_option->( 'tee-block-comments',           'tbc',  '!' );
1724     $add_option->( 'tee-pod',                      'tp',   '!' );
1725     $add_option->( 'tee-side-comments',            'tsc',  '!' );
1726     $add_option->( 'look-for-autoloader',          'lal',  '!' );
1727     $add_option->( 'look-for-hash-bang',           'x',    '!' );
1728     $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1729     $add_option->( 'pass-version-line',            'pvl',  '!' );
1730
1731     ########################################
1732     $category = 13;    # Debugging
1733     ########################################
1734     $add_option->( 'DEBUG',                           'D',    '!' );
1735     $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1736     $add_option->( 'check-multiline-quotes',          'chk',  '!' );
1737     $add_option->( 'dump-defaults',                   'ddf',  '!' );
1738     $add_option->( 'dump-long-names',                 'dln',  '!' );
1739     $add_option->( 'dump-options',                    'dop',  '!' );
1740     $add_option->( 'dump-profile',                    'dpro', '!' );
1741     $add_option->( 'dump-short-names',                'dsn',  '!' );
1742     $add_option->( 'dump-token-types',                'dtt',  '!' );
1743     $add_option->( 'dump-want-left-space',            'dwls', '!' );
1744     $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1745     $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1746     $add_option->( 'help',                            'h',    '' );
1747     $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1748     $add_option->( 'show-options',                    'opt',  '!' );
1749     $add_option->( 'version',                         'v',    '' );
1750
1751     #---------------------------------------------------------------------
1752
1753     # The Perl::Tidy::HtmlWriter will add its own options to the string
1754     Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1755
1756     ########################################
1757     # Set categories 10, 11, 12
1758     ########################################
1759     # Based on their known order
1760     $category = 12;    # HTML properties
1761     foreach my $opt (@option_string) {
1762         my $long_name = $opt;
1763         $long_name =~ s/(!|=.*|:.*)$//;
1764         unless ( defined( $option_category{$long_name} ) ) {
1765             if ( $long_name =~ /^html-linked/ ) {
1766                 $category = 10;    # HTML options
1767             }
1768             elsif ( $long_name =~ /^pod2html/ ) {
1769                 $category = 11;    # Pod2html
1770             }
1771             $option_category{$long_name} = $category_name[$category];
1772         }
1773     }
1774
1775     #---------------------------------------------------------------
1776     # Assign valid ranges to certain options
1777     #---------------------------------------------------------------
1778     # In the future, these may be used to make preliminary checks
1779     # hash keys are long names
1780     # If key or value is undefined:
1781     #   strings may have any value
1782     #   integer ranges are >=0
1783     # If value is defined:
1784     #   value is [qw(any valid words)] for strings
1785     #   value is [min, max] for integers
1786     #   if min is undefined, there is no lower limit
1787     #   if max is undefined, there is no upper limit
1788     # Parameters not listed here have defaults
1789     %option_range = (
1790         'format'             => [ 'tidy', 'html', 'user' ],
1791         'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
1792
1793         'block-brace-tightness'    => [ 0, 2 ],
1794         'brace-tightness'          => [ 0, 2 ],
1795         'paren-tightness'          => [ 0, 2 ],
1796         'square-bracket-tightness' => [ 0, 2 ],
1797
1798         'block-brace-vertical-tightness'            => [ 0, 2 ],
1799         'brace-vertical-tightness'                  => [ 0, 2 ],
1800         'brace-vertical-tightness-closing'          => [ 0, 2 ],
1801         'paren-vertical-tightness'                  => [ 0, 2 ],
1802         'paren-vertical-tightness-closing'          => [ 0, 2 ],
1803         'square-bracket-vertical-tightness'         => [ 0, 2 ],
1804         'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1805         'vertical-tightness'                        => [ 0, 2 ],
1806         'vertical-tightness-closing'                => [ 0, 2 ],
1807
1808         'closing-brace-indentation'          => [ 0, 3 ],
1809         'closing-paren-indentation'          => [ 0, 3 ],
1810         'closing-square-bracket-indentation' => [ 0, 3 ],
1811         'closing-token-indentation'          => [ 0, 3 ],
1812
1813         'closing-side-comment-else-flag' => [ 0, 2 ],
1814         'comma-arrow-breakpoints'        => [ 0, 3 ],
1815     );
1816
1817     # Note: we could actually allow negative ci if someone really wants it:
1818     # $option_range{'continuation-indentation'} = [ undef, undef ];
1819
1820     #---------------------------------------------------------------
1821     # Assign default values to the above options here, except
1822     # for 'outfile' and 'help'.
1823     # These settings should approximate the perlstyle(1) suggestions.
1824     #---------------------------------------------------------------
1825     my @defaults = qw(
1826       add-newlines
1827       add-semicolons
1828       add-whitespace
1829       blanks-before-blocks
1830       blanks-before-comments
1831       blank-lines-before-subs=1
1832       blank-lines-before-packages=1
1833       block-brace-tightness=0
1834       block-brace-vertical-tightness=0
1835       brace-tightness=1
1836       brace-vertical-tightness-closing=0
1837       brace-vertical-tightness=0
1838       break-at-old-logical-breakpoints
1839       break-at-old-ternary-breakpoints
1840       break-at-old-attribute-breakpoints
1841       break-at-old-keyword-breakpoints
1842       comma-arrow-breakpoints=1
1843       nocheck-syntax
1844       closing-side-comment-interval=6
1845       closing-side-comment-maximum-text=20
1846       closing-side-comment-else-flag=0
1847       closing-side-comments-balanced
1848       closing-paren-indentation=0
1849       closing-brace-indentation=0
1850       closing-square-bracket-indentation=0
1851       continuation-indentation=2
1852       delete-old-newlines
1853       delete-semicolons
1854       fuzzy-line-length
1855       hanging-side-comments
1856       indent-block-comments
1857       indent-columns=4
1858       iterations=1
1859       keep-old-blank-lines=1
1860       long-block-line-count=8
1861       look-for-autoloader
1862       look-for-selfloader
1863       maximum-consecutive-blank-lines=1
1864       maximum-fields-per-table=0
1865       maximum-line-length=80
1866       minimum-space-to-comment=4
1867       nobrace-left-and-indent
1868       nocuddled-else
1869       nodelete-old-whitespace
1870       nohtml
1871       nologfile
1872       noquiet
1873       noshow-options
1874       nostatic-side-comments
1875       notabs
1876       nowarning-output
1877       outdent-labels
1878       outdent-long-quotes
1879       outdent-long-comments
1880       paren-tightness=1
1881       paren-vertical-tightness-closing=0
1882       paren-vertical-tightness=0
1883       pass-version-line
1884       recombine
1885       valign
1886       short-concatenation-item-length=8
1887       space-for-semicolon
1888       square-bracket-tightness=1
1889       square-bracket-vertical-tightness-closing=0
1890       square-bracket-vertical-tightness=0
1891       static-block-comments
1892       trim-qw
1893       format=tidy
1894       backup-file-extension=bak
1895       format-skipping
1896
1897       pod2html
1898       html-table-of-contents
1899       html-entities
1900     );
1901
1902     push @defaults, "perl-syntax-check-flags=-c -T";
1903
1904     #---------------------------------------------------------------
1905     # Define abbreviations which will be expanded into the above primitives.
1906     # These may be defined recursively.
1907     #---------------------------------------------------------------
1908     %expansion = (
1909         %expansion,
1910         'freeze-newlines'   => [qw(noadd-newlines nodelete-old-newlines)],
1911         'fnl'               => [qw(freeze-newlines)],
1912         'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1913         'fws'               => [qw(freeze-whitespace)],
1914         'freeze-blank-lines' =>
1915           [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1916         'fbl'                => [qw(freeze-blank-lines)],
1917         'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1918         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1919         'nooutdent-long-lines' =>
1920           [qw(nooutdent-long-quotes nooutdent-long-comments)],
1921         'noll' => [qw(nooutdent-long-lines)],
1922         'io'   => [qw(indent-only)],
1923         'delete-all-comments' =>
1924           [qw(delete-block-comments delete-side-comments delete-pod)],
1925         'nodelete-all-comments' =>
1926           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1927         'dac'  => [qw(delete-all-comments)],
1928         'ndac' => [qw(nodelete-all-comments)],
1929         'gnu'  => [qw(gnu-style)],
1930         'pbp'  => [qw(perl-best-practices)],
1931         'tee-all-comments' =>
1932           [qw(tee-block-comments tee-side-comments tee-pod)],
1933         'notee-all-comments' =>
1934           [qw(notee-block-comments notee-side-comments notee-pod)],
1935         'tac'   => [qw(tee-all-comments)],
1936         'ntac'  => [qw(notee-all-comments)],
1937         'html'  => [qw(format=html)],
1938         'nhtml' => [qw(format=tidy)],
1939         'tidy'  => [qw(format=tidy)],
1940
1941         'swallow-optional-blank-lines'   => [qw(kbl=0)],
1942         'noswallow-optional-blank-lines' => [qw(kbl=1)],
1943         'sob'                            => [qw(kbl=0)],
1944         'nsob'                           => [qw(kbl=1)],
1945
1946         'break-after-comma-arrows'   => [qw(cab=0)],
1947         'nobreak-after-comma-arrows' => [qw(cab=1)],
1948         'baa'                        => [qw(cab=0)],
1949         'nbaa'                       => [qw(cab=1)],
1950
1951         'blanks-before-subs'   => [qw(blbs=1 blbp=1)],
1952         'bbs'                  => [qw(blbs=1 blbp=1)],
1953         'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
1954         'nbbs'                 => [qw(blbs=0 blbp=0)],
1955
1956         'break-at-old-trinary-breakpoints' => [qw(bot)],
1957
1958         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1959         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1960         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1961         'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1962         'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1963
1964         'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1965         'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1966         'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1967         'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1968         'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1969
1970         'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1971         'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1972         'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1973
1974         'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1975         'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1976         'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1977
1978         'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1979         'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1980         'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1981
1982         'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1983         'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1984         'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1985
1986         'otr'                   => [qw(opr ohbr osbr)],
1987         'opening-token-right'   => [qw(opr ohbr osbr)],
1988         'notr'                  => [qw(nopr nohbr nosbr)],
1989         'noopening-token-right' => [qw(nopr nohbr nosbr)],
1990
1991         'sot'                    => [qw(sop sohb sosb)],
1992         'nsot'                   => [qw(nsop nsohb nsosb)],
1993         'stack-opening-tokens'   => [qw(sop sohb sosb)],
1994         'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1995
1996         'sct'                    => [qw(scp schb scsb)],
1997         'stack-closing-tokens'   => => [qw(scp schb scsb)],
1998         'nsct'                   => [qw(nscp nschb nscsb)],
1999         'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
2000
2001         # 'mangle' originally deleted pod and comments, but to keep it
2002         # reversible, it no longer does.  But if you really want to
2003         # delete them, just use:
2004         #   -mangle -dac
2005
2006         # An interesting use for 'mangle' is to do this:
2007         #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2008         # which will form as many one-line blocks as possible
2009
2010         'mangle' => [
2011             qw(
2012               check-syntax
2013               keep-old-blank-lines=0
2014               delete-old-newlines
2015               delete-old-whitespace
2016               delete-semicolons
2017               indent-columns=0
2018               maximum-consecutive-blank-lines=0
2019               maximum-line-length=100000
2020               noadd-newlines
2021               noadd-semicolons
2022               noadd-whitespace
2023               noblanks-before-blocks
2024               blank-lines-before-subs=0
2025               blank-lines-before-packages=0
2026               notabs
2027               )
2028         ],
2029
2030         # 'extrude' originally deleted pod and comments, but to keep it
2031         # reversible, it no longer does.  But if you really want to
2032         # delete them, just use
2033         #   extrude -dac
2034         #
2035         # An interesting use for 'extrude' is to do this:
2036         #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2037         # which will break up all one-line blocks.
2038
2039         'extrude' => [
2040             qw(
2041               check-syntax
2042               ci=0
2043               delete-old-newlines
2044               delete-old-whitespace
2045               delete-semicolons
2046               indent-columns=0
2047               maximum-consecutive-blank-lines=0
2048               maximum-line-length=1
2049               noadd-semicolons
2050               noadd-whitespace
2051               noblanks-before-blocks
2052               blank-lines-before-subs=0
2053               blank-lines-before-packages=0
2054               nofuzzy-line-length
2055               notabs
2056               norecombine
2057               )
2058         ],
2059
2060         # this style tries to follow the GNU Coding Standards (which do
2061         # not really apply to perl but which are followed by some perl
2062         # programmers).
2063         'gnu-style' => [
2064             qw(
2065               lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2066               )
2067         ],
2068
2069         # Style suggested in Damian Conway's Perl Best Practices
2070         'perl-best-practices' => [
2071             qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2072 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2073         ],
2074
2075         # Additional styles can be added here
2076     );
2077
2078     Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2079
2080     # Uncomment next line to dump all expansions for debugging:
2081     # dump_short_names(\%expansion);
2082     return (
2083         \@option_string,   \@defaults, \%expansion,
2084         \%option_category, \%option_range
2085     );
2086
2087 }    # end of generate_options
2088
2089 sub process_command_line {
2090
2091     my (
2092         $perltidyrc_stream,  $is_Windows, $Windows_type,
2093         $rpending_complaint, $dump_options_type
2094     ) = @_;
2095
2096     use Getopt::Long;
2097
2098     my (
2099         $roption_string,   $rdefaults, $rexpansion,
2100         $roption_category, $roption_range
2101     ) = generate_options();
2102
2103     #---------------------------------------------------------------
2104     # set the defaults by passing the above list through GetOptions
2105     #---------------------------------------------------------------
2106     my %Opts = ();
2107     {
2108         local @ARGV;
2109         my $i;
2110
2111         # do not load the defaults if we are just dumping perltidyrc
2112         unless ( $dump_options_type eq 'perltidyrc' ) {
2113             for $i (@$rdefaults) { push @ARGV, "--" . $i }
2114         }
2115
2116         # Patch to save users Getopt::Long configuration
2117         # and set to Getopt::Long defaults.  Use eval to avoid
2118         # breaking old versions of Perl without these routines.
2119         my $glc;
2120         eval { $glc = Getopt::Long::Configure() };
2121         unless ($@) {
2122             eval { Getopt::Long::ConfigDefaults() };
2123         }
2124         else { $glc = undef }
2125
2126         if ( !GetOptions( \%Opts, @$roption_string ) ) {
2127             die "Programming Bug: error in setting default options";
2128         }
2129
2130         # Patch to put the previous Getopt::Long configuration back
2131         eval { Getopt::Long::Configure($glc) } if defined $glc;
2132     }
2133
2134     my $word;
2135     my @raw_options        = ();
2136     my $config_file        = "";
2137     my $saw_ignore_profile = 0;
2138     my $saw_extrude        = 0;
2139     my $saw_dump_profile   = 0;
2140     my $i;
2141
2142     #---------------------------------------------------------------
2143     # Take a first look at the command-line parameters.  Do as many
2144     # immediate dumps as possible, which can avoid confusion if the
2145     # perltidyrc file has an error.
2146     #---------------------------------------------------------------
2147     foreach $i (@ARGV) {
2148
2149         $i =~ s/^--/-/;
2150         if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2151             $saw_ignore_profile = 1;
2152         }
2153
2154         # note: this must come before -pro and -profile, below:
2155         elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2156             $saw_dump_profile = 1;
2157         }
2158         elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2159             if ($config_file) {
2160                 warn
2161 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2162             }
2163             $config_file = $2;
2164
2165             # resolve <dir>/.../<file>, meaning look upwards from directory
2166             if ( defined($config_file) ) {
2167                 if ( my ( $start_dir, $search_file ) =
2168                     ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2169                 {
2170                     $start_dir = '.' if !$start_dir;
2171                     $start_dir = Cwd::realpath($start_dir);
2172                     if ( my $found_file =
2173                         find_file_upwards( $start_dir, $search_file ) )
2174                     {
2175                         $config_file = $found_file;
2176                     }
2177                 }
2178             }
2179             unless ( -e $config_file ) {
2180                 warn "cannot find file given with -pro=$config_file: $!\n";
2181                 $config_file = "";
2182             }
2183         }
2184         elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2185             die "usage: -pro=filename or --profile=filename, no spaces\n";
2186         }
2187         elsif ( $i =~ /^-extrude$/ ) {
2188             $saw_extrude = 1;
2189         }
2190         elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2191             usage();
2192             exit 0;
2193         }
2194         elsif ( $i =~ /^-(version|v)$/ ) {
2195             show_version();
2196             exit 0;
2197         }
2198         elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2199             dump_defaults(@$rdefaults);
2200             exit 0;
2201         }
2202         elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2203             dump_long_names(@$roption_string);
2204             exit 0;
2205         }
2206         elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2207             dump_short_names($rexpansion);
2208             exit 0;
2209         }
2210         elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2211             Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2212             exit 0;
2213         }
2214     }
2215
2216     if ( $saw_dump_profile && $saw_ignore_profile ) {
2217         warn "No profile to dump because of -npro\n";
2218         exit 1;
2219     }
2220
2221     #---------------------------------------------------------------
2222     # read any .perltidyrc configuration file
2223     #---------------------------------------------------------------
2224     unless ($saw_ignore_profile) {
2225
2226         # resolve possible conflict between $perltidyrc_stream passed
2227         # as call parameter to perltidy and -pro=filename on command
2228         # line.
2229         if ($perltidyrc_stream) {
2230             if ($config_file) {
2231                 warn <<EOM;
2232  Conflict: a perltidyrc configuration file was specified both as this
2233  perltidy call parameter: $perltidyrc_stream
2234  and with this -profile=$config_file.
2235  Using -profile=$config_file.
2236 EOM
2237             }
2238             else {
2239                 $config_file = $perltidyrc_stream;
2240             }
2241         }
2242
2243         # look for a config file if we don't have one yet
2244         my $rconfig_file_chatter;
2245         $$rconfig_file_chatter = "";
2246         $config_file =
2247           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2248             $rpending_complaint )
2249           unless $config_file;
2250
2251         # open any config file
2252         my $fh_config;
2253         if ($config_file) {
2254             ( $fh_config, $config_file ) =
2255               Perl::Tidy::streamhandle( $config_file, 'r' );
2256             unless ($fh_config) {
2257                 $$rconfig_file_chatter .=
2258                   "# $config_file exists but cannot be opened\n";
2259             }
2260         }
2261
2262         if ($saw_dump_profile) {
2263             dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2264             exit 0;
2265         }
2266
2267         if ($fh_config) {
2268
2269             my ( $rconfig_list, $death_message ) =
2270               read_config_file( $fh_config, $config_file, $rexpansion );
2271             die $death_message if ($death_message);
2272
2273             # process any .perltidyrc parameters right now so we can
2274             # localize errors
2275             if (@$rconfig_list) {
2276                 local @ARGV = @$rconfig_list;
2277
2278                 expand_command_abbreviations( $rexpansion, \@raw_options,
2279                     $config_file );
2280
2281                 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2282                     die
2283 "Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
2284                 }
2285
2286                 # Anything left in this local @ARGV is an error and must be
2287                 # invalid bare words from the configuration file.  We cannot
2288                 # check this earlier because bare words may have been valid
2289                 # values for parameters.  We had to wait for GetOptions to have
2290                 # a look at @ARGV.
2291                 if (@ARGV) {
2292                     my $count = @ARGV;
2293                     my $str   = "\'" . pop(@ARGV) . "\'";
2294                     while ( my $param = pop(@ARGV) ) {
2295                         if ( length($str) < 70 ) {
2296                             $str .= ", '$param'";
2297                         }
2298                         else {
2299                             $str .= ", ...";
2300                             last;
2301                         }
2302                     }
2303                     die <<EOM;
2304 There are $count unrecognized values in the configuration file '$config_file':
2305 $str
2306 Use leading dashes for parameters.  Use -npro to ignore this file.
2307 EOM
2308                 }
2309
2310                 # Undo any options which cause premature exit.  They are not
2311                 # appropriate for a config file, and it could be hard to
2312                 # diagnose the cause of the premature exit.
2313                 foreach (
2314                     qw{
2315                     dump-defaults
2316                     dump-long-names
2317                     dump-options
2318                     dump-profile
2319                     dump-short-names
2320                     dump-token-types
2321                     dump-want-left-space
2322                     dump-want-right-space
2323                     help
2324                     stylesheet
2325                     version
2326                     }
2327                   )
2328                 {
2329
2330                     if ( defined( $Opts{$_} ) ) {
2331                         delete $Opts{$_};
2332                         warn "ignoring --$_ in config file: $config_file\n";
2333                     }
2334                 }
2335             }
2336         }
2337     }
2338
2339     #---------------------------------------------------------------
2340     # now process the command line parameters
2341     #---------------------------------------------------------------
2342     expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2343
2344     if ( !GetOptions( \%Opts, @$roption_string ) ) {
2345         die "Error on command line; for help try 'perltidy -h'\n";
2346     }
2347
2348     return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
2349         $rexpansion, $roption_category, $roption_range );
2350 }    # end of process_command_line
2351
2352 sub check_options {
2353
2354     my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2355
2356     #---------------------------------------------------------------
2357     # check and handle any interactions among the basic options..
2358     #---------------------------------------------------------------
2359
2360     # Since -vt, -vtc, and -cti are abbreviations, but under
2361     # msdos, an unquoted input parameter like vtc=1 will be
2362     # seen as 2 parameters, vtc and 1, so the abbreviations
2363     # won't be seen.  Therefore, we will catch them here if
2364     # they get through.
2365
2366     if ( defined $rOpts->{'vertical-tightness'} ) {
2367         my $vt = $rOpts->{'vertical-tightness'};
2368         $rOpts->{'paren-vertical-tightness'}          = $vt;
2369         $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2370         $rOpts->{'brace-vertical-tightness'}          = $vt;
2371     }
2372
2373     if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2374         my $vtc = $rOpts->{'vertical-tightness-closing'};
2375         $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
2376         $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2377         $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
2378     }
2379
2380     if ( defined $rOpts->{'closing-token-indentation'} ) {
2381         my $cti = $rOpts->{'closing-token-indentation'};
2382         $rOpts->{'closing-square-bracket-indentation'} = $cti;
2383         $rOpts->{'closing-brace-indentation'}          = $cti;
2384         $rOpts->{'closing-paren-indentation'}          = $cti;
2385     }
2386
2387     # In quiet mode, there is no log file and hence no way to report
2388     # results of syntax check, so don't do it.
2389     if ( $rOpts->{'quiet'} ) {
2390         $rOpts->{'check-syntax'} = 0;
2391     }
2392
2393     # can't check syntax if no output
2394     if ( $rOpts->{'format'} ne 'tidy' ) {
2395         $rOpts->{'check-syntax'} = 0;
2396     }
2397
2398     # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2399     # wide variety of nasty problems on these systems, because they cannot
2400     # reliably run backticks.  Don't even think about changing this!
2401     if (   $rOpts->{'check-syntax'}
2402         && $is_Windows
2403         && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2404     {
2405         $rOpts->{'check-syntax'} = 0;
2406     }
2407
2408     # It's really a bad idea to check syntax as root unless you wrote
2409     # the script yourself.  FIXME: not sure if this works with VMS
2410     unless ($is_Windows) {
2411
2412         if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2413             $rOpts->{'check-syntax'} = 0;
2414             $$rpending_complaint .=
2415 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2416         }
2417     }
2418
2419     # check iteration count and quietly fix if necessary:
2420     # - iterations option only applies to code beautification mode
2421     # - the convergence check should stop most runs on iteration 2, and
2422     #   virtually all on iteration 3.  But we'll allow up to 6.
2423     if ( $rOpts->{'format'} ne 'tidy' ) {
2424         $rOpts->{'iterations'} = 1;
2425     }
2426     elsif ( defined( $rOpts->{'iterations'} ) ) {
2427         if    ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2428         elsif ( $rOpts->{'iterations'} > 6 )  { $rOpts->{'iterations'} = 6 }
2429     }
2430     else {
2431         $rOpts->{'iterations'} = 1;
2432     }
2433
2434     # check for reasonable number of blank lines and fix to avoid problems
2435     if ( $rOpts->{'blank-lines-before-subs'} ) {
2436         if ( $rOpts->{'blank-lines-before-subs'} < 0 ) {
2437             $rOpts->{'blank-lines-before-subs'} = 0;
2438             warn "negative value of -blbs, setting 0\n";
2439         }
2440         if ( $rOpts->{'blank-lines-before-subs'} > 100 ) {
2441             warn "unreasonably large value of -blbs, reducing\n";
2442             $rOpts->{'blank-lines-before-subs'} = 100;
2443         }
2444     }
2445     if ( $rOpts->{'blank-lines-before-packages'} ) {
2446         if ( $rOpts->{'blank-lines-before-packages'} < 0 ) {
2447             warn "negative value of -blbp, setting 0\n";
2448             $rOpts->{'blank-lines-before-packages'} = 0;
2449         }
2450         if ( $rOpts->{'blank-lines-before-packages'} > 100 ) {
2451             warn "unreasonably large value of -blbp, reducing\n";
2452             $rOpts->{'blank-lines-before-packages'} = 100;
2453         }
2454     }
2455
2456     # see if user set a non-negative logfile-gap
2457     if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2458
2459         # a zero gap will be taken as a 1
2460         if ( $rOpts->{'logfile-gap'} == 0 ) {
2461             $rOpts->{'logfile-gap'} = 1;
2462         }
2463
2464         # setting a non-negative logfile gap causes logfile to be saved
2465         $rOpts->{'logfile'} = 1;
2466     }
2467
2468     # not setting logfile gap, or setting it negative, causes default of 50
2469     else {
2470         $rOpts->{'logfile-gap'} = 50;
2471     }
2472
2473     # set short-cut flag when only indentation is to be done.
2474     # Note that the user may or may not have already set the
2475     # indent-only flag.
2476     if (   !$rOpts->{'add-whitespace'}
2477         && !$rOpts->{'delete-old-whitespace'}
2478         && !$rOpts->{'add-newlines'}
2479         && !$rOpts->{'delete-old-newlines'} )
2480     {
2481         $rOpts->{'indent-only'} = 1;
2482     }
2483
2484     # -isbc implies -ibc
2485     if ( $rOpts->{'indent-spaced-block-comments'} ) {
2486         $rOpts->{'indent-block-comments'} = 1;
2487     }
2488
2489     # -bli flag implies -bl
2490     if ( $rOpts->{'brace-left-and-indent'} ) {
2491         $rOpts->{'opening-brace-on-new-line'} = 1;
2492     }
2493
2494     if (   $rOpts->{'opening-brace-always-on-right'}
2495         && $rOpts->{'opening-brace-on-new-line'} )
2496     {
2497         warn <<EOM;
2498  Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2499   'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2500 EOM
2501         $rOpts->{'opening-brace-on-new-line'} = 0;
2502     }
2503
2504     # it simplifies things if -bl is 0 rather than undefined
2505     if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2506         $rOpts->{'opening-brace-on-new-line'} = 0;
2507     }
2508
2509     # -sbl defaults to -bl if not defined
2510     if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2511         $rOpts->{'opening-sub-brace-on-new-line'} =
2512           $rOpts->{'opening-brace-on-new-line'};
2513     }
2514
2515     if ( $rOpts->{'entab-leading-whitespace'} ) {
2516         if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2517             warn "-et=n must use a positive integer; ignoring -et\n";
2518             $rOpts->{'entab-leading-whitespace'} = undef;
2519         }
2520
2521         # entab leading whitespace has priority over the older 'tabs' option
2522         if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2523     }
2524 }
2525
2526 sub find_file_upwards {
2527     my ( $search_dir, $search_file ) = @_;
2528
2529     $search_dir  =~ s{/+$}{};
2530     $search_file =~ s{^/+}{};
2531
2532     while (1) {
2533         my $try_path = "$search_dir/$search_file";
2534         if ( -f $try_path ) {
2535             return $try_path;
2536         }
2537         elsif ( $search_dir eq '/' ) {
2538             return undef;
2539         }
2540         else {
2541             $search_dir = dirname($search_dir);
2542         }
2543     }
2544 }
2545
2546 sub expand_command_abbreviations {
2547
2548     # go through @ARGV and expand any abbreviations
2549
2550     my ( $rexpansion, $rraw_options, $config_file ) = @_;
2551     my ($word);
2552
2553     # set a pass limit to prevent an infinite loop;
2554     # 10 should be plenty, but it may be increased to allow deeply
2555     # nested expansions.
2556     my $max_passes = 10;
2557     my @new_argv   = ();
2558
2559     # keep looping until all expansions have been converted into actual
2560     # dash parameters..
2561     for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2562         my @new_argv     = ();
2563         my $abbrev_count = 0;
2564
2565         # loop over each item in @ARGV..
2566         foreach $word (@ARGV) {
2567
2568             # convert any leading 'no-' to just 'no'
2569             if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2570
2571             # if it is a dash flag (instead of a file name)..
2572             if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2573
2574                 my $abr   = $1;
2575                 my $flags = $2;
2576
2577                 # save the raw input for debug output in case of circular refs
2578                 if ( $pass_count == 0 ) {
2579                     push( @$rraw_options, $word );
2580                 }
2581
2582                 # recombine abbreviation and flag, if necessary,
2583                 # to allow abbreviations with arguments such as '-vt=1'
2584                 if ( $rexpansion->{ $abr . $flags } ) {
2585                     $abr   = $abr . $flags;
2586                     $flags = "";
2587                 }
2588
2589                 # if we see this dash item in the expansion hash..
2590                 if ( $rexpansion->{$abr} ) {
2591                     $abbrev_count++;
2592
2593                     # stuff all of the words that it expands to into the
2594                     # new arg list for the next pass
2595                     foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2596                         next unless $abbrev;    # for safety; shouldn't happen
2597                         push( @new_argv, '--' . $abbrev . $flags );
2598                     }
2599                 }
2600
2601                 # not in expansion hash, must be actual long name
2602                 else {
2603                     push( @new_argv, $word );
2604                 }
2605             }
2606
2607             # not a dash item, so just save it for the next pass
2608             else {
2609                 push( @new_argv, $word );
2610             }
2611         }    # end of this pass
2612
2613         # update parameter list @ARGV to the new one
2614         @ARGV = @new_argv;
2615         last unless ( $abbrev_count > 0 );
2616
2617         # make sure we are not in an infinite loop
2618         if ( $pass_count == $max_passes ) {
2619             print STDERR
2620 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2621             print STDERR "Here are the raw options\n";
2622             local $" = ')(';
2623             print STDERR "(@$rraw_options)\n";
2624             my $num = @new_argv;
2625
2626             if ( $num < 50 ) {
2627                 print STDERR "After $max_passes passes here is ARGV\n";
2628                 print STDERR "(@new_argv)\n";
2629             }
2630             else {
2631                 print STDERR "After $max_passes passes ARGV has $num entries\n";
2632             }
2633
2634             if ($config_file) {
2635                 die <<"DIE";
2636 Please check your configuration file $config_file for circular-references.
2637 To deactivate it, use -npro.
2638 DIE
2639             }
2640             else {
2641                 die <<'DIE';
2642 Program bug - circular-references in the %expansion hash, probably due to
2643 a recent program change.
2644 DIE
2645             }
2646         }    # end of check for circular references
2647     }    # end of loop over all passes
2648 }
2649
2650 # Debug routine -- this will dump the expansion hash
2651 sub dump_short_names {
2652     my $rexpansion = shift;
2653     print STDOUT <<EOM;
2654 List of short names.  This list shows how all abbreviations are
2655 translated into other abbreviations and, eventually, into long names.
2656 New abbreviations may be defined in a .perltidyrc file.
2657 For a list of all long names, use perltidy --dump-long-names (-dln).
2658 --------------------------------------------------------------------------
2659 EOM
2660     foreach my $abbrev ( sort keys %$rexpansion ) {
2661         my @list = @{ $$rexpansion{$abbrev} };
2662         print STDOUT "$abbrev --> @list\n";
2663     }
2664 }
2665
2666 sub check_vms_filename {
2667
2668     # given a valid filename (the perltidy input file)
2669     # create a modified filename and separator character
2670     # suitable for VMS.
2671     #
2672     # Contributed by Michael Cartmell
2673     #
2674     my ( $base, $path ) = fileparse( $_[0] );
2675
2676     # remove explicit ; version
2677     $base =~ s/;-?\d*$//
2678
2679       # remove explicit . version ie two dots in filename NB ^ escapes a dot
2680       or $base =~ s/(          # begin capture $1
2681                   (?:^|[^^])\. # match a dot not preceded by a caret
2682                   (?:          # followed by nothing
2683                     |          # or
2684                     .*[^^]     # anything ending in a non caret
2685                   )
2686                 )              # end capture $1
2687                 \.-?\d*$       # match . version number
2688               /$1/x;
2689
2690     # normalise filename, if there are no unescaped dots then append one
2691     $base .= '.' unless $base =~ /(?:^|[^^])\./;
2692
2693     # if we don't already have an extension then we just append the extention
2694     my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2695     return ( $path . $base, $separator );
2696 }
2697
2698 sub Win_OS_Type {
2699
2700     # TODO: are these more standard names?
2701     # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2702
2703     # Returns a string that determines what MS OS we are on.
2704     # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2705     # Returns blank string if not an MS system.
2706     # Original code contributed by: Yves Orton
2707     # We need to know this to decide where to look for config files
2708
2709     my $rpending_complaint = shift;
2710     my $os                 = "";
2711     return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2712
2713     # Systems built from Perl source may not have Win32.pm
2714     # But probably have Win32::GetOSVersion() anyway so the
2715     # following line is not 'required':
2716     # return $os unless eval('require Win32');
2717
2718     # Use the standard API call to determine the version
2719     my ( $undef, $major, $minor, $build, $id );
2720     eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2721
2722     #
2723     #    NAME                   ID   MAJOR  MINOR
2724     #    Windows NT 4           2      4       0
2725     #    Windows 2000           2      5       0
2726     #    Windows XP             2      5       1
2727     #    Windows Server 2003    2      5       2
2728
2729     return "win32s" unless $id;    # If id==0 then its a win32s box.
2730     $os = {                        # Magic numbers from MSDN
2731                                    # documentation of GetOSVersion
2732         1 => {
2733             0  => "95",
2734             10 => "98",
2735             90 => "Me"
2736         },
2737         2 => {
2738             0  => "2000",          # or NT 4, see below
2739             1  => "XP/.Net",
2740             2  => "Win2003",
2741             51 => "NT3.51"
2742         }
2743     }->{$id}->{$minor};
2744
2745     # If $os is undefined, the above code is out of date.  Suggested updates
2746     # are welcome.
2747     unless ( defined $os ) {
2748         $os = "";
2749         $$rpending_complaint .= <<EOS;
2750 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2751 We won't be able to look for a system-wide config file.
2752 EOS
2753     }
2754
2755     # Unfortunately the logic used for the various versions isnt so clever..
2756     # so we have to handle an outside case.
2757     return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2758 }
2759
2760 sub is_unix {
2761     return
2762          ( $^O !~ /win32|dos/i )
2763       && ( $^O ne 'VMS' )
2764       && ( $^O ne 'OS2' )
2765       && ( $^O ne 'MacOS' );
2766 }
2767
2768 sub look_for_Windows {
2769
2770     # determine Windows sub-type and location of
2771     # system-wide configuration files
2772     my $rpending_complaint = shift;
2773     my $is_Windows         = ( $^O =~ /win32|dos/i );
2774     my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
2775     return ( $is_Windows, $Windows_type );
2776 }
2777
2778 sub find_config_file {
2779
2780     # look for a .perltidyrc configuration file
2781     # For Windows also look for a file named perltidy.ini
2782     my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2783         $rpending_complaint ) = @_;
2784
2785     $$rconfig_file_chatter .= "# Config file search...system reported as:";
2786     if ($is_Windows) {
2787         $$rconfig_file_chatter .= "Windows $Windows_type\n";
2788     }
2789     else {
2790         $$rconfig_file_chatter .= " $^O\n";
2791     }
2792
2793     # sub to check file existance and record all tests
2794     my $exists_config_file = sub {
2795         my $config_file = shift;
2796         return 0 unless $config_file;
2797         $$rconfig_file_chatter .= "# Testing: $config_file\n";
2798         return -f $config_file;
2799     };
2800
2801     my $config_file;
2802
2803     # look in current directory first
2804     $config_file = ".perltidyrc";
2805     return $config_file if $exists_config_file->($config_file);
2806     if ($is_Windows) {
2807         $config_file = "perltidy.ini";
2808         return $config_file if $exists_config_file->($config_file);
2809     }
2810
2811     # Default environment vars.
2812     my @envs = qw(PERLTIDY HOME);
2813
2814     # Check the NT/2k/XP locations, first a local machine def, then a
2815     # network def
2816     push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2817
2818     # Now go through the enviornment ...
2819     foreach my $var (@envs) {
2820         $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2821         if ( defined( $ENV{$var} ) ) {
2822             $$rconfig_file_chatter .= " = $ENV{$var}\n";
2823
2824             # test ENV{ PERLTIDY } as file:
2825             if ( $var eq 'PERLTIDY' ) {
2826                 $config_file = "$ENV{$var}";
2827                 return $config_file if $exists_config_file->($config_file);
2828             }
2829
2830             # test ENV as directory:
2831             $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2832             return $config_file if $exists_config_file->($config_file);
2833
2834             if ($is_Windows) {
2835                 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2836                 return $config_file if $exists_config_file->($config_file);
2837             }
2838         }
2839         else {
2840             $$rconfig_file_chatter .= "\n";
2841         }
2842     }
2843
2844     # then look for a system-wide definition
2845     # where to look varies with OS
2846     if ($is_Windows) {
2847
2848         if ($Windows_type) {
2849             my ( $os, $system, $allusers ) =
2850               Win_Config_Locs( $rpending_complaint, $Windows_type );
2851
2852             # Check All Users directory, if there is one.
2853             # i.e. C:\Documents and Settings\User\perltidy.ini
2854             if ($allusers) {
2855
2856                 $config_file = catfile( $allusers, ".perltidyrc" );
2857                 return $config_file if $exists_config_file->($config_file);
2858
2859                 $config_file = catfile( $allusers, "perltidy.ini" );
2860                 return $config_file if $exists_config_file->($config_file);
2861             }
2862
2863             # Check system directory.
2864             # retain old code in case someone has been able to create
2865             # a file with a leading period.
2866             $config_file = catfile( $system, ".perltidyrc" );
2867             return $config_file if $exists_config_file->($config_file);
2868
2869             $config_file = catfile( $system, "perltidy.ini" );
2870             return $config_file if $exists_config_file->($config_file);
2871         }
2872     }
2873
2874     # Place to add customization code for other systems
2875     elsif ( $^O eq 'OS2' ) {
2876     }
2877     elsif ( $^O eq 'MacOS' ) {
2878     }
2879     elsif ( $^O eq 'VMS' ) {
2880     }
2881
2882     # Assume some kind of Unix
2883     else {
2884
2885         $config_file = "/usr/local/etc/perltidyrc";
2886         return $config_file if $exists_config_file->($config_file);
2887
2888         $config_file = "/etc/perltidyrc";
2889         return $config_file if $exists_config_file->($config_file);
2890     }
2891
2892     # Couldn't find a config file
2893     return;
2894 }
2895
2896 sub Win_Config_Locs {
2897
2898     # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2899     # or undef if its not a win32 OS.  In list context returns OS, System
2900     # Directory, and All Users Directory.  All Users will be empty on a
2901     # 9x/Me box.  Contributed by: Yves Orton.
2902
2903     my $rpending_complaint = shift;
2904     my $os = (@_) ? shift : Win_OS_Type();
2905     return unless $os;
2906
2907     my $system   = "";
2908     my $allusers = "";
2909
2910     if ( $os =~ /9[58]|Me/ ) {
2911         $system = "C:/Windows";
2912     }
2913     elsif ( $os =~ /NT|XP|200?/ ) {
2914         $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2915         $allusers =
2916           ( $os =~ /NT/ )
2917           ? "C:/WinNT/profiles/All Users/"
2918           : "C:/Documents and Settings/All Users/";
2919     }
2920     else {
2921
2922         # This currently would only happen on a win32s computer.  I dont have
2923         # one to test, so I am unsure how to proceed.  Suggestions welcome!
2924         $$rpending_complaint .=
2925 "I dont know a sensible place to look for config files on an $os system.\n";
2926         return;
2927     }
2928     return wantarray ? ( $os, $system, $allusers ) : $os;
2929 }
2930
2931 sub dump_config_file {
2932     my $fh                   = shift;
2933     my $config_file          = shift;
2934     my $rconfig_file_chatter = shift;
2935     print STDOUT "$$rconfig_file_chatter";
2936     if ($fh) {
2937         print STDOUT "# Dump of file: '$config_file'\n";
2938         while ( my $line = $fh->getline() ) { print STDOUT $line }
2939         eval { $fh->close() };
2940     }
2941     else {
2942         print STDOUT "# ...no config file found\n";
2943     }
2944 }
2945
2946 sub read_config_file {
2947
2948     my ( $fh, $config_file, $rexpansion ) = @_;
2949     my @config_list = ();
2950
2951     # file is bad if non-empty $death_message is returned
2952     my $death_message = "";
2953
2954     my $name = undef;
2955     my $line_no;
2956     while ( my $line = $fh->getline() ) {
2957         $line_no++;
2958         chomp $line;
2959         ( $line, $death_message ) =
2960           strip_comment( $line, $config_file, $line_no );
2961         last if ($death_message);
2962         next unless $line;
2963         $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
2964         next unless $line;
2965
2966         # look for something of the general form
2967         #    newname { body }
2968         # or just
2969         #    body
2970
2971         my $body = $line;
2972         my ($newname);
2973         if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) {
2974             ( $newname, $body ) = ( $2, $3, );
2975         }
2976         if ($body) {
2977
2978             # handle a new alias definition
2979             if ($newname) {
2980                 if ($name) {
2981                     $death_message =
2982 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2983                     last;
2984                 }
2985                 $name = $newname;
2986
2987                 if ( ${$rexpansion}{$name} ) {
2988                     local $" = ')(';
2989                     my @names = sort keys %$rexpansion;
2990                     $death_message =
2991                         "Here is a list of all installed aliases\n(@names)\n"
2992                       . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2993                     last;
2994                 }
2995                 ${$rexpansion}{$name} = [];
2996             }
2997
2998             # now do the body
2999             if ($body) {
3000
3001                 my ( $rbody_parts, $msg ) = parse_args($body);
3002                 if ($msg) {
3003                     $death_message = <<EOM;
3004 Error reading file '$config_file' at line number $line_no.
3005 $msg
3006 Please fix this line or use -npro to avoid reading this file
3007 EOM
3008                     last;
3009                 }
3010
3011                 if ($name) {
3012
3013                     # remove leading dashes if this is an alias
3014                     foreach (@$rbody_parts) { s/^\-+//; }
3015                     push @{ ${$rexpansion}{$name} }, @$rbody_parts;
3016                 }
3017                 else {
3018                     push( @config_list, @$rbody_parts );
3019                 }
3020             }
3021         }
3022     }
3023     eval { $fh->close() };
3024     return ( \@config_list, $death_message );
3025 }
3026
3027 sub strip_comment {
3028
3029     # Strip any comment from a command line
3030     my ( $instr, $config_file, $line_no ) = @_;
3031     my $msg = "";
3032
3033     # check for full-line comment
3034     if ( $instr =~ /^\s*#/ ) {
3035         return ( "", $msg );
3036     }
3037
3038     # nothing to do if no comments
3039     if ( $instr !~ /#/ ) {
3040         return ( $instr, $msg );
3041     }
3042
3043     # handle case of no quotes
3044     elsif ( $instr !~ /['"]/ ) {
3045
3046         # We now require a space before the # of a side comment
3047         # this allows something like:
3048         #    -sbcp=#
3049         # Otherwise, it would have to be quoted:
3050         #    -sbcp='#'
3051         $instr =~ s/\s+\#.*$//;
3052         return ( $instr, $msg );
3053     }
3054
3055     # handle comments and quotes
3056     my $outstr     = "";
3057     my $quote_char = "";
3058     while (1) {
3059
3060         # looking for ending quote character
3061         if ($quote_char) {
3062             if ( $instr =~ /\G($quote_char)/gc ) {
3063                 $quote_char = "";
3064                 $outstr .= $1;
3065             }
3066             elsif ( $instr =~ /\G(.)/gc ) {
3067                 $outstr .= $1;
3068             }
3069
3070             # error..we reached the end without seeing the ending quote char
3071             else {
3072                 $msg = <<EOM;
3073 Error reading file $config_file at line number $line_no.
3074 Did not see ending quote character <$quote_char> in this text:
3075 $instr
3076 Please fix this line or use -npro to avoid reading this file
3077 EOM
3078                 last;
3079             }
3080         }
3081
3082         # accumulating characters and looking for start of a quoted string
3083         else {
3084             if ( $instr =~ /\G([\"\'])/gc ) {
3085                 $outstr .= $1;
3086                 $quote_char = $1;
3087             }
3088
3089             # Note: not yet enforcing the space-before-hash rule for side
3090             # comments if the parameter is quoted.
3091             elsif ( $instr =~ /\G#/gc ) {
3092                 last;
3093             }
3094             elsif ( $instr =~ /\G(.)/gc ) {
3095                 $outstr .= $1;
3096             }
3097             else {
3098                 last;
3099             }
3100         }
3101     }
3102     return ( $outstr, $msg );
3103 }
3104
3105 sub parse_args {
3106
3107     # Parse a command string containing multiple string with possible
3108     # quotes, into individual commands.  It might look like this, for example:
3109     #
3110     #    -wba=" + - "  -some-thing -wbb='. && ||'
3111     #
3112     # There is no need, at present, to handle escaped quote characters.
3113     # (They are not perltidy tokens, so needn't be in strings).
3114
3115     my ($body)     = @_;
3116     my @body_parts = ();
3117     my $quote_char = "";
3118     my $part       = "";
3119     my $msg        = "";
3120     while (1) {
3121
3122         # looking for ending quote character
3123         if ($quote_char) {
3124             if ( $body =~ /\G($quote_char)/gc ) {
3125                 $quote_char = "";
3126             }
3127             elsif ( $body =~ /\G(.)/gc ) {
3128                 $part .= $1;
3129             }
3130
3131             # error..we reached the end without seeing the ending quote char
3132             else {
3133                 if ( length($part) ) { push @body_parts, $part; }
3134                 $msg = <<EOM;
3135 Did not see ending quote character <$quote_char> in this text:
3136 $body
3137 EOM
3138                 last;
3139             }
3140         }
3141
3142         # accumulating characters and looking for start of a quoted string
3143         else {
3144             if ( $body =~ /\G([\"\'])/gc ) {
3145                 $quote_char = $1;
3146             }
3147             elsif ( $body =~ /\G(\s+)/gc ) {
3148                 if ( length($part) ) { push @body_parts, $part; }
3149                 $part = "";
3150             }
3151             elsif ( $body =~ /\G(.)/gc ) {
3152                 $part .= $1;
3153             }
3154             else {
3155                 if ( length($part) ) { push @body_parts, $part; }
3156                 last;
3157             }
3158         }
3159     }
3160     return ( \@body_parts, $msg );
3161 }
3162
3163 sub dump_long_names {
3164
3165     my @names = sort @_;
3166     print STDOUT <<EOM;
3167 # Command line long names (passed to GetOptions)
3168 #---------------------------------------------------------------
3169 # here is a summary of the Getopt codes:
3170 # <none> does not take an argument
3171 # =s takes a mandatory string
3172 # :s takes an optional string
3173 # =i takes a mandatory integer
3174 # :i takes an optional integer
3175 # ! does not take an argument and may be negated
3176 #  i.e., -foo and -nofoo are allowed
3177 # a double dash signals the end of the options list
3178 #
3179 #---------------------------------------------------------------
3180 EOM
3181
3182     foreach (@names) { print STDOUT "$_\n" }
3183 }
3184
3185 sub dump_defaults {
3186     my @defaults = sort @_;
3187     print STDOUT "Default command line options:\n";
3188     foreach (@_) { print STDOUT "$_\n" }
3189 }
3190
3191 sub readable_options {
3192
3193     # return options for this run as a string which could be
3194     # put in a perltidyrc file
3195     my ( $rOpts, $roption_string ) = @_;
3196     my %Getopt_flags;
3197     my $rGetopt_flags    = \%Getopt_flags;
3198     my $readable_options = "# Final parameter set for this run.\n";
3199     $readable_options .=
3200       "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3201     foreach my $opt ( @{$roption_string} ) {
3202         my $flag = "";
3203         if ( $opt =~ /(.*)(!|=.*)$/ ) {
3204             $opt  = $1;
3205             $flag = $2;
3206         }
3207         if ( defined( $rOpts->{$opt} ) ) {
3208             $rGetopt_flags->{$opt} = $flag;
3209         }
3210     }
3211     foreach my $key ( sort keys %{$rOpts} ) {
3212         my $flag   = $rGetopt_flags->{$key};
3213         my $value  = $rOpts->{$key};
3214         my $prefix = '--';
3215         my $suffix = "";
3216         if ($flag) {
3217             if ( $flag =~ /^=/ ) {
3218                 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3219                 $suffix = "=" . $value;
3220             }
3221             elsif ( $flag =~ /^!/ ) {
3222                 $prefix .= "no" unless ($value);
3223             }
3224             else {
3225
3226                 # shouldn't happen
3227                 $readable_options .=
3228                   "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3229             }
3230         }
3231         $readable_options .= $prefix . $key . $suffix . "\n";
3232     }
3233     return $readable_options;
3234 }
3235
3236 sub show_version {
3237     print <<"EOM";
3238 This is perltidy, v$VERSION
3239
3240 Copyright 2000-2012, Steve Hancock
3241
3242 Perltidy is free software and may be copied under the terms of the GNU
3243 General Public License, which is included in the distribution files.
3244
3245 Complete documentation for perltidy can be found using 'man perltidy'
3246 or on the internet at http://perltidy.sourceforge.net.
3247 EOM
3248 }
3249
3250 sub usage {
3251
3252     print STDOUT <<EOF;
3253 This is perltidy version $VERSION, a perl script indenter.  Usage:
3254
3255     perltidy [ options ] file1 file2 file3 ...
3256             (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3257     perltidy [ options ] file1 -o outfile
3258     perltidy [ options ] file1 -st >outfile
3259     perltidy [ options ] <infile >outfile
3260
3261 Options have short and long forms. Short forms are shown; see
3262 man pages for long forms.  Note: '=s' indicates a required string,
3263 and '=n' indicates a required integer.
3264
3265 I/O control
3266  -h      show this help
3267  -o=file name of the output file (only if single input file)
3268  -oext=s change output extension from 'tdy' to s
3269  -opath=path  change path to be 'path' for output files
3270  -b      backup original to .bak and modify file in-place
3271  -bext=s change default backup extension from 'bak' to s
3272  -q      deactivate error messages (for running under editor)
3273  -w      include non-critical warning messages in the .ERR error output
3274  -syn    run perl -c to check syntax (default under unix systems)
3275  -log    save .LOG file, which has useful diagnostics
3276  -f      force perltidy to read a binary file
3277  -g      like -log but writes more detailed .LOG file, for debugging scripts
3278  -opt    write the set of options actually used to a .LOG file
3279  -npro   ignore .perltidyrc configuration command file
3280  -pro=file   read configuration commands from file instead of .perltidyrc
3281  -st     send output to standard output, STDOUT
3282  -se     send error output to standard error output, STDERR
3283  -v      display version number to standard output and quit
3284
3285 Basic Options:
3286  -i=n    use n columns per indentation level (default n=4)
3287  -t      tabs: use one tab character per indentation level, not recommeded
3288  -nt     no tabs: use n spaces per indentation level (default)
3289  -et=n   entab leading whitespace n spaces per tab; not recommended
3290  -io     "indent only": just do indentation, no other formatting.
3291  -sil=n  set starting indentation level to n;  use if auto detection fails
3292  -ole=s  specify output line ending (s=dos or win, mac, unix)
3293  -ple    keep output line endings same as input (input must be filename)
3294
3295 Whitespace Control
3296  -fws    freeze whitespace; this disables all whitespace changes
3297            and disables the following switches:
3298  -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
3299  -bbt    same as -bt but for code block braces; same as -bt if not given
3300  -bbvt   block braces vertically tight; use with -bl or -bli
3301  -bbvtl=s  make -bbvt to apply to selected list of block types
3302  -pt=n   paren tightness (n=0, 1 or 2)
3303  -sbt=n  square bracket tightness (n=0, 1, or 2)
3304  -bvt=n  brace vertical tightness,
3305          n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3306  -pvt=n  paren vertical tightness (see -bvt for n)
3307  -sbvt=n square bracket vertical tightness (see -bvt for n)
3308  -bvtc=n closing brace vertical tightness:
3309          n=(0=open, 1=sometimes close, 2=always close)
3310  -pvtc=n closing paren vertical tightness, see -bvtc for n.
3311  -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3312  -ci=n   sets continuation indentation=n,  default is n=2 spaces
3313  -lp     line up parentheses, brackets, and non-BLOCK braces
3314  -sfs    add space before semicolon in for( ; ; )
3315  -aws    allow perltidy to add whitespace (default)
3316  -dws    delete all old non-essential whitespace
3317  -icb    indent closing brace of a code block
3318  -cti=n  closing indentation of paren, square bracket, or non-block brace:
3319          n=0 none, =1 align with opening, =2 one full indentation level
3320  -icp    equivalent to -cti=2
3321  -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
3322  -wrs=s  want space right of tokens in string;
3323  -sts    put space before terminal semicolon of a statement
3324  -sak=s  put space between keywords given in s and '(';
3325  -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3326
3327 Line Break Control
3328  -fnl    freeze newlines; this disables all line break changes
3329             and disables the following switches:
3330  -anl    add newlines;  ok to introduce new line breaks
3331  -bbs    add blank line before subs and packages
3332  -bbc    add blank line before block comments
3333  -bbb    add blank line between major blocks
3334  -kbl=n  keep old blank lines? 0=no, 1=some, 2=all
3335  -mbl=n  maximum consecutive blank lines to output (default=1)
3336  -ce     cuddled else; use this style: '} else {'
3337  -dnl    delete old newlines (default)
3338  -l=n    maximum line length;  default n=80
3339  -bl     opening brace on new line
3340  -sbl    opening sub brace on new line.  value of -bl is used if not given.
3341  -bli    opening brace on new line and indented
3342  -bar    opening brace always on right, even for long clauses
3343  -vt=n   vertical tightness (requires -lp); n controls break after opening
3344          token: 0=never  1=no break if next line balanced   2=no break
3345  -vtc=n  vertical tightness of closing container; n controls if closing
3346          token starts new line: 0=always  1=not unless list  1=never
3347  -wba=s  want break after tokens in string; i.e. wba=': .'
3348  -wbb=s  want break before tokens in string
3349
3350 Following Old Breakpoints
3351  -kis    keep interior semicolons.  Allows multiple statements per line.
3352  -boc    break at old comma breaks: turns off all automatic list formatting
3353  -bol    break at old logical breakpoints: or, and, ||, && (default)
3354  -bok    break at old list keyword breakpoints such as map, sort (default)
3355  -bot    break at old conditional (ternary ?:) operator breakpoints (default)
3356  -boa    break at old attribute breakpoints
3357  -cab=n  break at commas after a comma-arrow (=>):
3358          n=0 break at all commas after =>
3359          n=1 stable: break unless this breaks an existing one-line container
3360          n=2 break only if a one-line container cannot be formed
3361          n=3 do not treat commas after => specially at all
3362
3363 Comment controls
3364  -ibc    indent block comments (default)
3365  -isbc   indent spaced block comments; may indent unless no leading space
3366  -msc=n  minimum desired spaces to side comment, default 4
3367  -fpsc=n fix position for side comments; default 0;
3368  -csc    add or update closing side comments after closing BLOCK brace
3369  -dcsc   delete closing side comments created by a -csc command
3370  -cscp=s change closing side comment prefix to be other than '## end'
3371  -cscl=s change closing side comment to apply to selected list of blocks
3372  -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3373  -csct=n maximum number of columns of appended text, default n=20
3374  -cscw   causes warning if old side comment is overwritten with -csc
3375
3376  -sbc    use 'static block comments' identified by leading '##' (default)
3377  -sbcp=s change static block comment identifier to be other than '##'
3378  -osbc   outdent static block comments
3379
3380  -ssc    use 'static side comments' identified by leading '##' (default)
3381  -sscp=s change static side comment identifier to be other than '##'
3382
3383 Delete selected text
3384  -dac    delete all comments AND pod
3385  -dbc    delete block comments
3386  -dsc    delete side comments
3387  -dp     delete pod
3388
3389 Send selected text to a '.TEE' file
3390  -tac    tee all comments AND pod
3391  -tbc    tee block comments
3392  -tsc    tee side comments
3393  -tp     tee pod
3394
3395 Outdenting
3396  -olq    outdent long quoted strings (default)
3397  -olc    outdent a long block comment line
3398  -ola    outdent statement labels
3399  -okw    outdent control keywords (redo, next, last, goto, return)
3400  -okwl=s specify alternative keywords for -okw command
3401
3402 Other controls
3403  -mft=n  maximum fields per table; default n=40
3404  -x      do not format lines before hash-bang line (i.e., for VMS)
3405  -asc    allows perltidy to add a ';' when missing (default)
3406  -dsm    allows perltidy to delete an unnecessary ';'  (default)
3407
3408 Combinations of other parameters
3409  -gnu     attempt to follow GNU Coding Standards as applied to perl
3410  -mangle  remove as many newlines as possible (but keep comments and pods)
3411  -extrude  insert as many newlines as possible
3412
3413 Dump and die, debugging
3414  -dop    dump options used in this run to standard output and quit
3415  -ddf    dump default options to standard output and quit
3416  -dsn    dump all option short names to standard output and quit
3417  -dln    dump option long names to standard output and quit
3418  -dpro   dump whatever configuration file is in effect to standard output
3419  -dtt    dump all token types to standard output and quit
3420
3421 HTML
3422  -html write an html file (see 'man perl2web' for many options)
3423        Note: when -html is used, no indentation or formatting are done.
3424        Hint: try perltidy -html -css=mystyle.css filename.pl
3425        and edit mystyle.css to change the appearance of filename.html.
3426        -nnn gives line numbers
3427        -pre only writes out <pre>..</pre> code section
3428        -toc places a table of contents to subs at the top (default)
3429        -pod passes pod text through pod2html (default)
3430        -frm write html as a frame (3 files)
3431        -text=s extra extension for table of contents if -frm, default='toc'
3432        -sext=s extra extension for file content if -frm, default='src'
3433
3434 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3435 negates the long forms.  For example, -nasc means don't add missing
3436 semicolons.
3437
3438 If you are unable to see this entire text, try "perltidy -h | more"
3439 For more detailed information, and additional options, try "man perltidy",
3440 or go to the perltidy home page at http://perltidy.sourceforge.net
3441 EOF
3442
3443 }
3444
3445 sub process_this_file {
3446
3447     my ( $truth, $beauty ) = @_;
3448
3449     # loop to process each line of this file
3450     while ( my $line_of_tokens = $truth->get_line() ) {
3451         $beauty->write_line($line_of_tokens);
3452     }
3453
3454     # finish up
3455     eval { $beauty->finish_formatting() };
3456     $truth->report_tokenization_errors();
3457 }
3458
3459 sub check_syntax {
3460
3461     # Use 'perl -c' to make sure that we did not create bad syntax
3462     # This is a very good independent check for programming errors
3463     #
3464     # Given names of the input and output files, ($istream, $ostream),
3465     # we do the following:
3466     # - check syntax of the input file
3467     # - if bad, all done (could be an incomplete code snippet)
3468     # - if infile syntax ok, then check syntax of the output file;
3469     #   - if outfile syntax bad, issue warning; this implies a code bug!
3470     # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3471
3472     my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3473     my $infile_syntax_ok = 0;
3474     my $line_of_dashes   = '-' x 42 . "\n";
3475
3476     my $flags = $rOpts->{'perl-syntax-check-flags'};
3477
3478     # be sure we invoke perl with -c
3479     # note: perl will accept repeated flags like '-c -c'.  It is safest
3480     # to append another -c than try to find an interior bundled c, as
3481     # in -Tc, because such a 'c' might be in a quoted string, for example.
3482     if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3483
3484     # be sure we invoke perl with -x if requested
3485     # same comments about repeated parameters applies
3486     if ( $rOpts->{'look-for-hash-bang'} ) {
3487         if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3488     }
3489
3490     # this shouldn't happen unless a termporary file couldn't be made
3491     if ( $istream eq '-' ) {
3492         $logger_object->write_logfile_entry(
3493             "Cannot run perl -c on STDIN and STDOUT\n");
3494         return $infile_syntax_ok;
3495     }
3496
3497     $logger_object->write_logfile_entry(
3498         "checking input file syntax with perl $flags\n");
3499
3500     # Not all operating systems/shells support redirection of the standard
3501     # error output.
3502     my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3503
3504     my ( $istream_filename, $perl_output ) =
3505       do_syntax_check( $istream, $flags, $error_redirection );
3506     $logger_object->write_logfile_entry(
3507         "Input stream passed to Perl as file $istream_filename\n");
3508     $logger_object->write_logfile_entry($line_of_dashes);
3509     $logger_object->write_logfile_entry("$perl_output\n");
3510
3511     if ( $perl_output =~ /syntax\s*OK/ ) {
3512         $infile_syntax_ok = 1;
3513         $logger_object->write_logfile_entry($line_of_dashes);
3514         $logger_object->write_logfile_entry(
3515             "checking output file syntax with perl $flags ...\n");
3516         my ( $ostream_filename, $perl_output ) =
3517           do_syntax_check( $ostream, $flags, $error_redirection );
3518         $logger_object->write_logfile_entry(
3519             "Output stream passed to Perl as file $ostream_filename\n");
3520         $logger_object->write_logfile_entry($line_of_dashes);
3521         $logger_object->write_logfile_entry("$perl_output\n");
3522
3523         unless ( $perl_output =~ /syntax\s*OK/ ) {
3524             $logger_object->write_logfile_entry($line_of_dashes);
3525             $logger_object->warning(
3526 "The output file has a syntax error when tested with perl $flags $ostream !\n"
3527             );
3528             $logger_object->warning(
3529                 "This implies an error in perltidy; the file $ostream is bad\n"
3530             );
3531             $logger_object->report_definite_bug();
3532
3533             # the perl version number will be helpful for diagnosing the problem
3534             $logger_object->write_logfile_entry(
3535                 qx/perl -v $error_redirection/ . "\n" );
3536         }
3537     }
3538     else {
3539
3540         # Only warn of perl -c syntax errors.  Other messages,
3541         # such as missing modules, are too common.  They can be
3542         # seen by running with perltidy -w
3543         $logger_object->complain("A syntax check using perl $flags\n");
3544         $logger_object->complain(
3545             "for the output in file $istream_filename gives:\n");
3546         $logger_object->complain($line_of_dashes);
3547         $logger_object->complain("$perl_output\n");
3548         $logger_object->complain($line_of_dashes);
3549         $infile_syntax_ok = -1;
3550         $logger_object->write_logfile_entry($line_of_dashes);
3551         $logger_object->write_logfile_entry(
3552 "The output file will not be checked because of input file problems\n"
3553         );
3554     }
3555     return $infile_syntax_ok;
3556 }
3557
3558 sub do_syntax_check {
3559     my ( $stream, $flags, $error_redirection ) = @_;
3560
3561     # We need a named input file for executing perl
3562     my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3563
3564     # TODO: Need to add name of file to log somewhere
3565     # otherwise Perl output is hard to read
3566     if ( !$stream_filename ) { return $stream_filename, "" }
3567
3568     # We have to quote the filename in case it has unusual characters
3569     # or spaces.  Example: this filename #CM11.pm# gives trouble.
3570     my $quoted_stream_filename = '"' . $stream_filename . '"';
3571
3572     # Under VMS something like -T will become -t (and an error) so we
3573     # will put quotes around the flags.  Double quotes seem to work on
3574     # Unix/Windows/VMS, but this may not work on all systems.  (Single
3575     # quotes do not work under Windows).  It could become necessary to
3576     # put double quotes around each flag, such as:  -"c"  -"T"
3577     # We may eventually need some system-dependent coding here.
3578     $flags = '"' . $flags . '"';
3579
3580     # now wish for luck...
3581     my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3582
3583     unlink $stream_filename if ($is_tmpfile);
3584     return $stream_filename, $msg;
3585 }
3586
3587 #####################################################################
3588 #
3589 # This is a stripped down version of IO::Scalar
3590 # Given a reference to a scalar, it supplies either:
3591 # a getline method which reads lines (mode='r'), or
3592 # a print method which reads lines (mode='w')
3593 #
3594 #####################################################################
3595 package Perl::Tidy::IOScalar;
3596 use Carp;
3597
3598 sub new {
3599     my ( $package, $rscalar, $mode ) = @_;
3600     my $ref = ref $rscalar;
3601     if ( $ref ne 'SCALAR' ) {
3602         confess <<EOM;
3603 ------------------------------------------------------------------------
3604 expecting ref to SCALAR but got ref to ($ref); trace follows:
3605 ------------------------------------------------------------------------
3606 EOM
3607
3608     }
3609     if ( $mode eq 'w' ) {
3610         $$rscalar = "";
3611         return bless [ $rscalar, $mode ], $package;
3612     }
3613     elsif ( $mode eq 'r' ) {
3614
3615         # Convert a scalar to an array.
3616         # This avoids looking for "\n" on each call to getline
3617         #
3618         # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3619         # (which might be important in a DATA section).
3620         my @array;
3621         if ( $rscalar && ${$rscalar} ) {
3622             @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3623
3624             # remove possible extra blank line introduced with split
3625             if ( @array && $array[-1] eq "\n" ) { pop @array }
3626         }
3627         my $i_next = 0;
3628         return bless [ \@array, $mode, $i_next ], $package;
3629     }
3630     else {
3631         confess <<EOM;
3632 ------------------------------------------------------------------------
3633 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3634 ------------------------------------------------------------------------
3635 EOM
3636     }
3637 }
3638
3639 sub getline {
3640     my $self = shift;
3641     my $mode = $self->[1];
3642     if ( $mode ne 'r' ) {
3643         confess <<EOM;
3644 ------------------------------------------------------------------------
3645 getline call requires mode = 'r' but mode = ($mode); trace follows:
3646 ------------------------------------------------------------------------
3647 EOM
3648     }
3649     my $i = $self->[2]++;
3650     return $self->[0]->[$i];
3651 }
3652
3653 sub print {
3654     my $self = shift;
3655     my $mode = $self->[1];
3656     if ( $mode ne 'w' ) {
3657         confess <<EOM;
3658 ------------------------------------------------------------------------
3659 print call requires mode = 'w' but mode = ($mode); trace follows:
3660 ------------------------------------------------------------------------
3661 EOM
3662     }
3663     ${ $self->[0] } .= $_[0];
3664 }
3665 sub close { return }
3666
3667 #####################################################################
3668 #
3669 # This is a stripped down version of IO::ScalarArray
3670 # Given a reference to an array, it supplies either:
3671 # a getline method which reads lines (mode='r'), or
3672 # a print method which reads lines (mode='w')
3673 #
3674 # NOTE: this routine assumes that that there aren't any embedded
3675 # newlines within any of the array elements.  There are no checks
3676 # for that.
3677 #
3678 #####################################################################
3679 package Perl::Tidy::IOScalarArray;
3680 use Carp;
3681
3682 sub new {
3683     my ( $package, $rarray, $mode ) = @_;
3684     my $ref = ref $rarray;
3685     if ( $ref ne 'ARRAY' ) {
3686         confess <<EOM;
3687 ------------------------------------------------------------------------
3688 expecting ref to ARRAY but got ref to ($ref); trace follows:
3689 ------------------------------------------------------------------------
3690 EOM
3691
3692     }
3693     if ( $mode eq 'w' ) {
3694         @$rarray = ();
3695         return bless [ $rarray, $mode ], $package;
3696     }
3697     elsif ( $mode eq 'r' ) {
3698         my $i_next = 0;
3699         return bless [ $rarray, $mode, $i_next ], $package;
3700     }
3701     else {
3702         confess <<EOM;
3703 ------------------------------------------------------------------------
3704 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3705 ------------------------------------------------------------------------
3706 EOM
3707     }
3708 }
3709
3710 sub getline {
3711     my $self = shift;
3712     my $mode = $self->[1];
3713     if ( $mode ne 'r' ) {
3714         confess <<EOM;
3715 ------------------------------------------------------------------------
3716 getline requires mode = 'r' but mode = ($mode); trace follows:
3717 ------------------------------------------------------------------------
3718 EOM
3719     }
3720     my $i = $self->[2]++;
3721     return $self->[0]->[$i];
3722 }
3723
3724 sub print {
3725     my $self = shift;
3726     my $mode = $self->[1];
3727     if ( $mode ne 'w' ) {
3728         confess <<EOM;
3729 ------------------------------------------------------------------------
3730 print requires mode = 'w' but mode = ($mode); trace follows:
3731 ------------------------------------------------------------------------
3732 EOM
3733     }
3734     push @{ $self->[0] }, $_[0];
3735 }
3736 sub close { return }
3737
3738 #####################################################################
3739 #
3740 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3741 # which returns the next line to be parsed
3742 #
3743 #####################################################################
3744
3745 package Perl::Tidy::LineSource;
3746
3747 sub new {
3748
3749     my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3750
3751     my $input_line_ending;
3752     if ( $rOpts->{'preserve-line-endings'} ) {
3753         $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3754     }
3755
3756     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3757     return undef unless $fh;
3758
3759     # in order to check output syntax when standard output is used,
3760     # or when it is an object, we have to make a copy of the file
3761     if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3762     {
3763
3764         # Turning off syntax check when input output is used.
3765         # The reason is that temporary files cause problems on
3766         # on many systems.
3767         $rOpts->{'check-syntax'} = 0;
3768
3769         $$rpending_logfile_message .= <<EOM;
3770 Note: --syntax check will be skipped because standard input is used
3771 EOM
3772
3773     }
3774
3775     return bless {
3776         _fh                => $fh,
3777         _filename          => $input_file,
3778         _input_line_ending => $input_line_ending,
3779         _rinput_buffer     => [],
3780         _started           => 0,
3781     }, $class;
3782 }
3783
3784 sub close_input_file {
3785     my $self = shift;
3786     eval { $self->{_fh}->close() };
3787 }
3788
3789 sub get_line {
3790     my $self          = shift;
3791     my $line          = undef;
3792     my $fh            = $self->{_fh};
3793     my $rinput_buffer = $self->{_rinput_buffer};
3794
3795     if ( scalar(@$rinput_buffer) ) {
3796         $line = shift @$rinput_buffer;
3797     }
3798     else {
3799         $line = $fh->getline();
3800
3801         # patch to read raw mac files under unix, dos
3802         # see if the first line has embedded \r's
3803         if ( $line && !$self->{_started} ) {
3804             if ( $line =~ /[\015][^\015\012]/ ) {
3805
3806                 # found one -- break the line up and store in a buffer
3807                 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3808                 my $count = @$rinput_buffer;
3809                 $line = shift @$rinput_buffer;
3810             }
3811             $self->{_started}++;
3812         }
3813     }
3814     return $line;
3815 }
3816
3817 #####################################################################
3818 #
3819 # the Perl::Tidy::LineSink class supplies a write_line method for
3820 # actual file writing
3821 #
3822 #####################################################################
3823
3824 package Perl::Tidy::LineSink;
3825
3826 sub new {
3827
3828     my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3829         $rpending_logfile_message, $binmode )
3830       = @_;
3831     my $fh     = undef;
3832     my $fh_tee = undef;
3833
3834     my $output_file_open = 0;
3835
3836     if ( $rOpts->{'format'} eq 'tidy' ) {
3837         ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3838         unless ($fh) { die "Cannot write to output stream\n"; }
3839         $output_file_open = 1;
3840         if ($binmode) {
3841             if ( ref($fh) eq 'IO::File' ) {
3842                 binmode $fh;
3843             }
3844             if ( $output_file eq '-' ) { binmode STDOUT }
3845         }
3846     }
3847
3848     # in order to check output syntax when standard output is used,
3849     # or when it is an object, we have to make a copy of the file
3850     if ( $output_file eq '-' || ref $output_file ) {
3851         if ( $rOpts->{'check-syntax'} ) {
3852
3853             # Turning off syntax check when standard output is used.
3854             # The reason is that temporary files cause problems on
3855             # on many systems.
3856             $rOpts->{'check-syntax'} = 0;
3857             $$rpending_logfile_message .= <<EOM;
3858 Note: --syntax check will be skipped because standard output is used
3859 EOM
3860
3861         }
3862     }
3863
3864     bless {
3865         _fh               => $fh,
3866         _fh_tee           => $fh_tee,
3867         _output_file      => $output_file,
3868         _output_file_open => $output_file_open,
3869         _tee_flag         => 0,
3870         _tee_file         => $tee_file,
3871         _tee_file_opened  => 0,
3872         _line_separator   => $line_separator,
3873         _binmode          => $binmode,
3874     }, $class;
3875 }
3876
3877 sub write_line {
3878
3879     my $self = shift;
3880     my $fh   = $self->{_fh};
3881
3882     my $output_file_open = $self->{_output_file_open};
3883     chomp $_[0];
3884     $_[0] .= $self->{_line_separator};
3885
3886     $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3887
3888     if ( $self->{_tee_flag} ) {
3889         unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3890         my $fh_tee = $self->{_fh_tee};
3891         print $fh_tee $_[0];
3892     }
3893 }
3894
3895 sub tee_on {
3896     my $self = shift;
3897     $self->{_tee_flag} = 1;
3898 }
3899
3900 sub tee_off {
3901     my $self = shift;
3902     $self->{_tee_flag} = 0;
3903 }
3904
3905 sub really_open_tee_file {
3906     my $self     = shift;
3907     my $tee_file = $self->{_tee_file};
3908     my $fh_tee;
3909     $fh_tee = IO::File->new(">$tee_file")
3910       or die("couldn't open TEE file $tee_file: $!\n");
3911     binmode $fh_tee if $self->{_binmode};
3912     $self->{_tee_file_opened} = 1;
3913     $self->{_fh_tee}          = $fh_tee;
3914 }
3915
3916 sub close_output_file {
3917     my $self = shift;
3918     eval { $self->{_fh}->close() } if $self->{_output_file_open};
3919     $self->close_tee_file();
3920 }
3921
3922 sub close_tee_file {
3923     my $self = shift;
3924
3925     if ( $self->{_tee_file_opened} ) {
3926         eval { $self->{_fh_tee}->close() };
3927         $self->{_tee_file_opened} = 0;
3928     }
3929 }
3930
3931 #####################################################################
3932 #
3933 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3934 # useful for program development.
3935 #
3936 # Only one such file is created regardless of the number of input
3937 # files processed.  This allows the results of processing many files
3938 # to be summarized in a single file.
3939 #
3940 #####################################################################
3941
3942 package Perl::Tidy::Diagnostics;
3943
3944 sub new {
3945
3946     my $class = shift;
3947     bless {
3948         _write_diagnostics_count => 0,
3949         _last_diagnostic_file    => "",
3950         _input_file              => "",
3951         _fh                      => undef,
3952     }, $class;
3953 }
3954
3955 sub set_input_file {
3956     my $self = shift;
3957     $self->{_input_file} = $_[0];
3958 }
3959
3960 # This is a diagnostic routine which is useful for program development.
3961 # Output from debug messages go to a file named DIAGNOSTICS, where
3962 # they are labeled by file and line.  This allows many files to be
3963 # scanned at once for some particular condition of interest.
3964 sub write_diagnostics {
3965     my $self = shift;
3966
3967     unless ( $self->{_write_diagnostics_count} ) {
3968         open DIAGNOSTICS, ">DIAGNOSTICS"
3969           or death("couldn't open DIAGNOSTICS: $!\n");
3970     }
3971
3972     my $last_diagnostic_file = $self->{_last_diagnostic_file};
3973     my $input_file           = $self->{_input_file};
3974     if ( $last_diagnostic_file ne $input_file ) {
3975         print DIAGNOSTICS "\nFILE:$input_file\n";
3976     }
3977     $self->{_last_diagnostic_file} = $input_file;
3978     my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3979     print DIAGNOSTICS "$input_line_number:\t@_";
3980     $self->{_write_diagnostics_count}++;
3981 }
3982
3983 #####################################################################
3984 #
3985 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3986 #
3987 #####################################################################
3988
3989 package Perl::Tidy::Logger;
3990
3991 sub new {
3992     my $class = shift;
3993     my $fh;
3994     my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3995
3996     # remove any old error output file
3997     unless ( ref($warning_file) ) {
3998         if ( -e $warning_file ) { unlink($warning_file) }
3999     }
4000
4001     bless {
4002         _log_file                      => $log_file,
4003         _rOpts                         => $rOpts,
4004         _fh_warnings                   => undef,
4005         _last_input_line_written       => 0,
4006         _at_end_of_file                => 0,
4007         _use_prefix                    => 1,
4008         _block_log_output              => 0,
4009         _line_of_tokens                => undef,
4010         _output_line_number            => undef,
4011         _wrote_line_information_string => 0,
4012         _wrote_column_headings         => 0,
4013         _warning_file                  => $warning_file,
4014         _warning_count                 => 0,
4015         _complaint_count               => 0,
4016         _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
4017         _saw_brace_error => 0,
4018         _saw_extrude     => $saw_extrude,
4019         _output_array    => [],
4020     }, $class;
4021 }
4022
4023 sub close_log_file {
4024
4025     my $self = shift;
4026     if ( $self->{_fh_warnings} ) {
4027         eval { $self->{_fh_warnings}->close() };
4028         $self->{_fh_warnings} = undef;
4029     }
4030 }
4031
4032 sub get_warning_count {
4033     my $self = shift;
4034     return $self->{_warning_count};
4035 }
4036
4037 sub get_use_prefix {
4038     my $self = shift;
4039     return $self->{_use_prefix};
4040 }
4041
4042 sub block_log_output {
4043     my $self = shift;
4044     $self->{_block_log_output} = 1;
4045 }
4046
4047 sub unblock_log_output {
4048     my $self = shift;
4049     $self->{_block_log_output} = 0;
4050 }
4051
4052 sub interrupt_logfile {
4053     my $self = shift;
4054     $self->{_use_prefix} = 0;
4055     $self->warning("\n");
4056     $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
4057 }
4058
4059 sub resume_logfile {
4060     my $self = shift;
4061     $self->write_logfile_entry( '#' x 60 . "\n" );
4062     $self->{_use_prefix} = 1;
4063 }
4064
4065 sub we_are_at_the_last_line {
4066     my $self = shift;
4067     unless ( $self->{_wrote_line_information_string} ) {
4068         $self->write_logfile_entry("Last line\n\n");
4069     }
4070     $self->{_at_end_of_file} = 1;
4071 }
4072
4073 # record some stuff in case we go down in flames
4074 sub black_box {
4075     my $self = shift;
4076     my ( $line_of_tokens, $output_line_number ) = @_;
4077     my $input_line        = $line_of_tokens->{_line_text};
4078     my $input_line_number = $line_of_tokens->{_line_number};
4079
4080     # save line information in case we have to write a logfile message
4081     $self->{_line_of_tokens}                = $line_of_tokens;
4082     $self->{_output_line_number}            = $output_line_number;
4083     $self->{_wrote_line_information_string} = 0;
4084
4085     my $last_input_line_written = $self->{_last_input_line_written};
4086     my $rOpts                   = $self->{_rOpts};
4087     if (
4088         (
4089             ( $input_line_number - $last_input_line_written ) >=
4090             $rOpts->{'logfile-gap'}
4091         )
4092         || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4093       )
4094     {
4095         my $rlevels                      = $line_of_tokens->{_rlevels};
4096         my $structural_indentation_level = $$rlevels[0];
4097         $self->{_last_input_line_written} = $input_line_number;
4098         ( my $out_str = $input_line ) =~ s/^\s*//;
4099         chomp $out_str;
4100
4101         $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4102
4103         if ( length($out_str) > 35 ) {
4104             $out_str = substr( $out_str, 0, 35 ) . " ....";
4105         }
4106         $self->logfile_output( "", "$out_str\n" );
4107     }
4108 }
4109
4110 sub write_logfile_entry {
4111     my $self = shift;
4112
4113     # add leading >>> to avoid confusing error mesages and code
4114     $self->logfile_output( ">>>", "@_" );
4115 }
4116
4117 sub write_column_headings {
4118     my $self = shift;
4119
4120     $self->{_wrote_column_headings} = 1;
4121     my $routput_array = $self->{_output_array};
4122     push @{$routput_array}, <<EOM;
4123 The nesting depths in the table below are at the start of the lines.
4124 The indicated output line numbers are not always exact.
4125 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4126
4127 in:out indent c b  nesting   code + messages; (messages begin with >>>)
4128 lines  levels i k            (code begins with one '.' per indent level)
4129 ------  ----- - - --------   -------------------------------------------
4130 EOM
4131 }
4132
4133 sub make_line_information_string {
4134
4135     # make columns of information when a logfile message needs to go out
4136     my $self                    = shift;
4137     my $line_of_tokens          = $self->{_line_of_tokens};
4138     my $input_line_number       = $line_of_tokens->{_line_number};
4139     my $line_information_string = "";
4140     if ($input_line_number) {
4141
4142         my $output_line_number   = $self->{_output_line_number};
4143         my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
4144         my $paren_depth          = $line_of_tokens->{_paren_depth};
4145         my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4146         my $python_indentation_level =
4147           $line_of_tokens->{_python_indentation_level};
4148         my $rlevels         = $line_of_tokens->{_rlevels};
4149         my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
4150         my $rci_levels      = $line_of_tokens->{_rci_levels};
4151         my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
4152
4153         my $structural_indentation_level = $$rlevels[0];
4154
4155         $self->write_column_headings() unless $self->{_wrote_column_headings};
4156
4157         # keep logfile columns aligned for scripts up to 999 lines;
4158         # for longer scripts it doesn't really matter
4159         my $extra_space = "";
4160         $extra_space .=
4161             ( $input_line_number < 10 )  ? "  "
4162           : ( $input_line_number < 100 ) ? " "
4163           :                                "";
4164         $extra_space .=
4165             ( $output_line_number < 10 )  ? "  "
4166           : ( $output_line_number < 100 ) ? " "
4167           :                                 "";
4168
4169         # there are 2 possible nesting strings:
4170         # the original which looks like this:  (0 [1 {2
4171         # the new one, which looks like this:  {{[
4172         # the new one is easier to read, and shows the order, but
4173         # could be arbitrarily long, so we use it unless it is too long
4174         my $nesting_string =
4175           "($paren_depth [$square_bracket_depth {$brace_depth";
4176         my $nesting_string_new = $$rnesting_tokens[0];
4177
4178         my $ci_level = $$rci_levels[0];
4179         if ( $ci_level > 9 ) { $ci_level = '*' }
4180         my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
4181
4182         if ( length($nesting_string_new) <= 8 ) {
4183             $nesting_string =
4184               $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4185         }
4186         if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
4187         $line_information_string =
4188 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4189     }
4190     return $line_information_string;
4191 }
4192
4193 sub logfile_output {
4194     my $self = shift;
4195     my ( $prompt, $msg ) = @_;
4196     return if ( $self->{_block_log_output} );
4197
4198     my $routput_array = $self->{_output_array};
4199     if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4200         push @{$routput_array}, "$msg";
4201     }
4202     else {
4203         my $line_information_string = $self->make_line_information_string();
4204         $self->{_wrote_line_information_string} = 1;
4205
4206         if ($line_information_string) {
4207             push @{$routput_array}, "$line_information_string   $prompt$msg";
4208         }
4209         else {
4210             push @{$routput_array}, "$msg";
4211         }
4212     }
4213 }
4214
4215 sub get_saw_brace_error {
4216     my $self = shift;
4217     return $self->{_saw_brace_error};
4218 }
4219
4220 sub increment_brace_error {
4221     my $self = shift;
4222     $self->{_saw_brace_error}++;
4223 }
4224
4225 sub brace_warning {
4226     my $self = shift;
4227     use constant BRACE_WARNING_LIMIT => 10;
4228     my $saw_brace_error = $self->{_saw_brace_error};
4229
4230     if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
4231         $self->warning(@_);
4232     }
4233     $saw_brace_error++;
4234     $self->{_saw_brace_error} = $saw_brace_error;
4235
4236     if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
4237         $self->warning("No further warnings of this type will be given\n");
4238     }
4239 }
4240
4241 sub complain {
4242
4243     # handle non-critical warning messages based on input flag
4244     my $self  = shift;
4245     my $rOpts = $self->{_rOpts};
4246
4247     # these appear in .ERR output only if -w flag is used
4248     if ( $rOpts->{'warning-output'} ) {
4249         $self->warning(@_);
4250     }
4251
4252     # otherwise, they go to the .LOG file
4253     else {
4254         $self->{_complaint_count}++;
4255         $self->write_logfile_entry(@_);
4256     }
4257 }
4258
4259 sub warning {
4260
4261     # report errors to .ERR file (or stdout)
4262     my $self = shift;
4263     use constant WARNING_LIMIT => 50;
4264
4265     my $rOpts = $self->{_rOpts};
4266     unless ( $rOpts->{'quiet'} ) {
4267
4268         my $warning_count = $self->{_warning_count};
4269         unless ($warning_count) {
4270             my $warning_file = $self->{_warning_file};
4271             my $fh_warnings;
4272             if ( $rOpts->{'standard-error-output'} ) {
4273                 $fh_warnings = *STDERR;
4274             }
4275             else {
4276                 ( $fh_warnings, my $filename ) =
4277                   Perl::Tidy::streamhandle( $warning_file, 'w' );
4278                 $fh_warnings or die("couldn't open $filename $!\n");
4279                 warn "## Please see file $filename\n" unless ref($warning_file);
4280             }
4281             $self->{_fh_warnings} = $fh_warnings;
4282         }
4283
4284         my $fh_warnings = $self->{_fh_warnings};
4285         if ( $warning_count < WARNING_LIMIT ) {
4286             if ( $self->get_use_prefix() > 0 ) {
4287                 my $input_line_number =
4288                   Perl::Tidy::Tokenizer::get_input_line_number();
4289                 $fh_warnings->print("$input_line_number:\t@_");
4290                 $self->write_logfile_entry("WARNING: @_");
4291             }
4292             else {
4293                 $fh_warnings->print(@_);
4294                 $self->write_logfile_entry(@_);
4295             }
4296         }
4297         $warning_count++;
4298         $self->{_warning_count} = $warning_count;
4299
4300         if ( $warning_count == WARNING_LIMIT ) {
4301             $fh_warnings->print("No further warnings will be given\n");
4302         }
4303     }
4304 }
4305
4306 # programming bug codes:
4307 #   -1 = no bug
4308 #    0 = maybe, not sure.
4309 #    1 = definitely
4310 sub report_possible_bug {
4311     my $self         = shift;
4312     my $saw_code_bug = $self->{_saw_code_bug};
4313     $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4314 }
4315
4316 sub report_definite_bug {
4317     my $self = shift;
4318     $self->{_saw_code_bug} = 1;
4319 }
4320
4321 sub ask_user_for_bug_report {
4322     my $self = shift;
4323
4324     my ( $infile_syntax_ok, $formatter ) = @_;
4325     my $saw_code_bug = $self->{_saw_code_bug};
4326     if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4327         $self->warning(<<EOM);
4328
4329 You may have encountered a code bug in perltidy.  If you think so, and
4330 the problem is not listed in the BUGS file at
4331 http://perltidy.sourceforge.net, please report it so that it can be
4332 corrected.  Include the smallest possible script which has the problem,
4333 along with the .LOG file. See the manual pages for contact information.
4334 Thank you!
4335 EOM
4336
4337     }
4338     elsif ( $saw_code_bug == 1 ) {
4339         if ( $self->{_saw_extrude} ) {
4340             $self->warning(<<EOM);
4341
4342 You may have encountered a bug in perltidy.  However, since you are using the
4343 -extrude option, the problem may be with perl or one of its modules, which have
4344 occasional problems with this type of file.  If you believe that the
4345 problem is with perltidy, and the problem is not listed in the BUGS file at
4346 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4347 Include the smallest possible script which has the problem, along with the .LOG
4348 file. See the manual pages for contact information.
4349 Thank you!
4350 EOM
4351         }
4352         else {
4353             $self->warning(<<EOM);
4354
4355 Oops, you seem to have encountered a bug in perltidy.  Please check the
4356 BUGS file at http://perltidy.sourceforge.net.  If the problem is not
4357 listed there, please report it so that it can be corrected.  Include the
4358 smallest possible script which produces this message, along with the
4359 .LOG file if appropriate.  See the manual pages for contact information.
4360 Your efforts are appreciated.
4361 Thank you!
4362 EOM
4363             my $added_semicolon_count = 0;
4364             eval {
4365                 $added_semicolon_count =
4366                   $formatter->get_added_semicolon_count();
4367             };
4368             if ( $added_semicolon_count > 0 ) {
4369                 $self->warning(<<EOM);
4370
4371 The log file shows that perltidy added $added_semicolon_count semicolons.
4372 Please rerun with -nasc to see if that is the cause of the syntax error.  Even
4373 if that is the problem, please report it so that it can be fixed.
4374 EOM
4375
4376             }
4377         }
4378     }
4379 }
4380
4381 sub finish {
4382
4383     # called after all formatting to summarize errors
4384     my $self = shift;
4385     my ( $infile_syntax_ok, $formatter ) = @_;
4386
4387     my $rOpts         = $self->{_rOpts};
4388     my $warning_count = $self->{_warning_count};
4389     my $saw_code_bug  = $self->{_saw_code_bug};
4390
4391     my $save_logfile =
4392          ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4393       || $saw_code_bug == 1
4394       || $rOpts->{'logfile'};
4395     my $log_file = $self->{_log_file};
4396     if ($warning_count) {
4397         if ($save_logfile) {
4398             $self->block_log_output();    # avoid echoing this to the logfile
4399             $self->warning(
4400                 "The logfile $log_file may contain useful information\n");
4401             $self->unblock_log_output();
4402         }
4403
4404         if ( $self->{_complaint_count} > 0 ) {
4405             $self->warning(
4406 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4407             );
4408         }
4409
4410         if ( $self->{_saw_brace_error}
4411             && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
4412         {
4413             $self->warning("To save a full .LOG file rerun with -g\n");
4414         }
4415     }
4416     $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4417
4418     if ($save_logfile) {
4419         my $log_file = $self->{_log_file};
4420         my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4421         if ($fh) {
4422             my $routput_array = $self->{_output_array};
4423             foreach ( @{$routput_array} ) { $fh->print($_) }
4424             eval { $fh->close() };
4425         }
4426     }
4427 }
4428
4429 #####################################################################
4430 #
4431 # The Perl::Tidy::DevNull class supplies a dummy print method
4432 #
4433 #####################################################################
4434
4435 package Perl::Tidy::DevNull;
4436 sub new { return bless {}, $_[0] }
4437 sub print { return }
4438 sub close { return }
4439
4440 #####################################################################
4441 #
4442 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4443 #
4444 #####################################################################
4445
4446 package Perl::Tidy::HtmlWriter;
4447
4448 use File::Basename;
4449
4450 # class variables
4451 use vars qw{
4452   %html_color
4453   %html_bold
4454   %html_italic
4455   %token_short_names
4456   %short_to_long_names
4457   $rOpts
4458   $css_filename
4459   $css_linkname
4460   $missing_html_entities
4461 };
4462
4463 # replace unsafe characters with HTML entity representation if HTML::Entities
4464 # is available
4465 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4466
4467 sub new {
4468
4469     my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4470         $html_src_extension )
4471       = @_;
4472
4473     my $html_file_opened = 0;
4474     my $html_fh;
4475     ( $html_fh, my $html_filename ) =
4476       Perl::Tidy::streamhandle( $html_file, 'w' );
4477     unless ($html_fh) {
4478         warn("can't open $html_file: $!\n");
4479         return undef;
4480     }
4481     $html_file_opened = 1;
4482
4483     if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4484         $input_file = "NONAME";
4485     }
4486
4487     # write the table of contents to a string
4488     my $toc_string;
4489     my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4490
4491     my $html_pre_fh;
4492     my @pre_string_stack;
4493     if ( $rOpts->{'html-pre-only'} ) {
4494
4495         # pre section goes directly to the output stream
4496         $html_pre_fh = $html_fh;
4497         $html_pre_fh->print( <<"PRE_END");
4498 <pre>
4499 PRE_END
4500     }
4501     else {
4502
4503         # pre section go out to a temporary string
4504         my $pre_string;
4505         $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4506         push @pre_string_stack, \$pre_string;
4507     }
4508
4509     # pod text gets diverted if the 'pod2html' is used
4510     my $html_pod_fh;
4511     my $pod_string;
4512     if ( $rOpts->{'pod2html'} ) {
4513         if ( $rOpts->{'html-pre-only'} ) {
4514             undef $rOpts->{'pod2html'};
4515         }
4516         else {
4517             eval "use Pod::Html";
4518             if ($@) {
4519                 warn
4520 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4521                 undef $rOpts->{'pod2html'};
4522             }
4523             else {
4524                 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4525             }
4526         }
4527     }
4528
4529     my $toc_filename;
4530     my $src_filename;
4531     if ( $rOpts->{'frames'} ) {
4532         unless ($extension) {
4533             warn
4534 "cannot use frames without a specified output extension; ignoring -frm\n";
4535             undef $rOpts->{'frames'};
4536         }
4537         else {
4538             $toc_filename = $input_file . $html_toc_extension . $extension;
4539             $src_filename = $input_file . $html_src_extension . $extension;
4540         }
4541     }
4542
4543     # ----------------------------------------------------------
4544     # Output is now directed as follows:
4545     # html_toc_fh <-- table of contents items
4546     # html_pre_fh <-- the <pre> section of formatted code, except:
4547     # html_pod_fh <-- pod goes here with the pod2html option
4548     # ----------------------------------------------------------
4549
4550     my $title = $rOpts->{'title'};
4551     unless ($title) {
4552         ( $title, my $path ) = fileparse($input_file);
4553     }
4554     my $toc_item_count = 0;
4555     my $in_toc_package = "";
4556     my $last_level     = 0;
4557     bless {
4558         _input_file        => $input_file,          # name of input file
4559         _title             => $title,               # title, unescaped
4560         _html_file         => $html_file,           # name of .html output file
4561         _toc_filename      => $toc_filename,        # for frames option
4562         _src_filename      => $src_filename,        # for frames option
4563         _html_file_opened  => $html_file_opened,    # a flag
4564         _html_fh           => $html_fh,             # the output stream
4565         _html_pre_fh       => $html_pre_fh,         # pre section goes here
4566         _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4567         _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4568         _rpod_string       => \$pod_string,         # string holding pod
4569         _pod_cut_count     => 0,                    # how many =cut's?
4570         _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4571         _rtoc_string       => \$toc_string,         # string holding toc
4572         _rtoc_item_count   => \$toc_item_count,     # how many toc items
4573         _rin_toc_package   => \$in_toc_package,     # package name
4574         _rtoc_name_count   => {},                   # hash to track unique names
4575         _rpackage_stack    => [],                   # stack to check for package
4576                                                     # name changes
4577         _rlast_level       => \$last_level,         # brace indentation level
4578     }, $class;
4579 }
4580
4581 sub add_toc_item {
4582
4583     # Add an item to the html table of contents.
4584     # This is called even if no table of contents is written,
4585     # because we still want to put the anchors in the <pre> text.
4586     # We are given an anchor name and its type; types are:
4587     #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4588     # There must be an 'EOF' call at the end to wrap things up.
4589     my $self = shift;
4590     my ( $name, $type ) = @_;
4591     my $html_toc_fh     = $self->{_html_toc_fh};
4592     my $html_pre_fh     = $self->{_html_pre_fh};
4593     my $rtoc_name_count = $self->{_rtoc_name_count};
4594     my $rtoc_item_count = $self->{_rtoc_item_count};
4595     my $rlast_level     = $self->{_rlast_level};
4596     my $rin_toc_package = $self->{_rin_toc_package};
4597     my $rpackage_stack  = $self->{_rpackage_stack};
4598
4599     # packages contain sublists of subs, so to avoid errors all package
4600     # items are written and finished with the following routines
4601     my $end_package_list = sub {
4602         if ($$rin_toc_package) {
4603             $html_toc_fh->print("</ul>\n</li>\n");
4604             $$rin_toc_package = "";
4605         }
4606     };
4607
4608     my $start_package_list = sub {
4609         my ( $unique_name, $package ) = @_;
4610         if ($$rin_toc_package) { $end_package_list->() }
4611         $html_toc_fh->print(<<EOM);
4612 <li><a href=\"#$unique_name\">package $package</a>
4613 <ul>
4614 EOM
4615         $$rin_toc_package = $package;
4616     };
4617
4618     # start the table of contents on the first item
4619     unless ($$rtoc_item_count) {
4620
4621         # but just quit if we hit EOF without any other entries
4622         # in this case, there will be no toc
4623         return if ( $type eq 'EOF' );
4624         $html_toc_fh->print( <<"TOC_END");
4625 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4626 <ul>
4627 TOC_END
4628     }
4629     $$rtoc_item_count++;
4630
4631     # make a unique anchor name for this location:
4632     #   - packages get a 'package-' prefix
4633     #   - subs use their names
4634     my $unique_name = $name;
4635     if ( $type eq 'package' ) { $unique_name = "package-$name" }
4636
4637     # append '-1', '-2', etc if necessary to make unique; this will
4638     # be unique because subs and packages cannot have a '-'
4639     if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4640         $unique_name .= "-$count";
4641     }
4642
4643     #   - all names get terminal '-' if pod2html is used, to avoid
4644     #     conflicts with anchor names created by pod2html
4645     if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4646
4647     # start/stop lists of subs
4648     if ( $type eq 'sub' ) {
4649         my $package = $rpackage_stack->[$$rlast_level];
4650         unless ($package) { $package = 'main' }
4651
4652         # if we're already in a package/sub list, be sure its the right
4653         # package or else close it
4654         if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4655             $end_package_list->();
4656         }
4657
4658         # start a package/sub list if necessary
4659         unless ($$rin_toc_package) {
4660             $start_package_list->( $unique_name, $package );
4661         }
4662     }
4663
4664     # now write an entry in the toc for this item
4665     if ( $type eq 'package' ) {
4666         $start_package_list->( $unique_name, $name );
4667     }
4668     elsif ( $type eq 'sub' ) {
4669         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4670     }
4671     else {
4672         $end_package_list->();
4673         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4674     }
4675
4676     # write the anchor in the <pre> section
4677     $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4678
4679     # end the table of contents, if any, on the end of file
4680     if ( $type eq 'EOF' ) {
4681         $html_toc_fh->print( <<"TOC_END");
4682 </ul>
4683 <!-- END CODE INDEX -->
4684 TOC_END
4685     }
4686 }
4687
4688 BEGIN {
4689
4690     # This is the official list of tokens which may be identified by the
4691     # user.  Long names are used as getopt keys.  Short names are
4692     # convenient short abbreviations for specifying input.  Short names
4693     # somewhat resemble token type characters, but are often different
4694     # because they may only be alphanumeric, to allow command line
4695     # input.  Also, note that because of case insensitivity of html,
4696     # this table must be in a single case only (I've chosen to use all
4697     # lower case).
4698     # When adding NEW_TOKENS: update this hash table
4699     # short names => long names
4700     %short_to_long_names = (
4701         'n'  => 'numeric',
4702         'p'  => 'paren',
4703         'q'  => 'quote',
4704         's'  => 'structure',
4705         'c'  => 'comment',
4706         'v'  => 'v-string',
4707         'cm' => 'comma',
4708         'w'  => 'bareword',
4709         'co' => 'colon',
4710         'pu' => 'punctuation',
4711         'i'  => 'identifier',
4712         'j'  => 'label',
4713         'h'  => 'here-doc-target',
4714         'hh' => 'here-doc-text',
4715         'k'  => 'keyword',
4716         'sc' => 'semicolon',
4717         'm'  => 'subroutine',
4718         'pd' => 'pod-text',
4719     );
4720
4721     # Now we have to map actual token types into one of the above short
4722     # names; any token types not mapped will get 'punctuation'
4723     # properties.
4724
4725     # The values of this hash table correspond to the keys of the
4726     # previous hash table.
4727     # The keys of this hash table are token types and can be seen
4728     # by running with --dump-token-types (-dtt).
4729
4730     # When adding NEW_TOKENS: update this hash table
4731     # $type => $short_name
4732     %token_short_names = (
4733         '#'  => 'c',
4734         'n'  => 'n',
4735         'v'  => 'v',
4736         'k'  => 'k',
4737         'F'  => 'k',
4738         'Q'  => 'q',
4739         'q'  => 'q',
4740         'J'  => 'j',
4741         'j'  => 'j',
4742         'h'  => 'h',
4743         'H'  => 'hh',
4744         'w'  => 'w',
4745         ','  => 'cm',
4746         '=>' => 'cm',
4747         ';'  => 'sc',
4748         ':'  => 'co',
4749         'f'  => 'sc',
4750         '('  => 'p',
4751         ')'  => 'p',
4752         'M'  => 'm',
4753         'P'  => 'pd',
4754         'A'  => 'co',
4755     );
4756
4757     # These token types will all be called identifiers for now
4758     # FIXME: need to separate user defined modules as separate type
4759     my @identifier = qw" i t U C Y Z G :: ";
4760     @token_short_names{@identifier} = ('i') x scalar(@identifier);
4761
4762     # These token types will be called 'structure'
4763     my @structure = qw" { } ";
4764     @token_short_names{@structure} = ('s') x scalar(@structure);
4765
4766     # OLD NOTES: save for reference
4767     # Any of these could be added later if it would be useful.
4768     # For now, they will by default become punctuation
4769     #    my @list = qw" L R [ ] ";
4770     #    @token_long_names{@list} = ('non-structure') x scalar(@list);
4771     #
4772     #    my @list = qw"
4773     #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4774     #      ";
4775     #    @token_long_names{@list} = ('math') x scalar(@list);
4776     #
4777     #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
4778     #    @token_long_names{@list} = ('bit') x scalar(@list);
4779     #
4780     #    my @list = qw" == != < > <= <=> ";
4781     #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4782     #
4783     #    my @list = qw" && || ! &&= ||= //= ";
4784     #    @token_long_names{@list} = ('logical') x scalar(@list);
4785     #
4786     #    my @list = qw" . .= =~ !~ x x= ";
4787     #    @token_long_names{@list} = ('string-operators') x scalar(@list);
4788     #
4789     #    # Incomplete..
4790     #    my @list = qw" .. -> <> ... \ ? ";
4791     #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
4792
4793 }
4794
4795 sub make_getopt_long_names {
4796     my $class = shift;
4797     my ($rgetopt_names) = @_;
4798     while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4799         push @$rgetopt_names, "html-color-$name=s";
4800         push @$rgetopt_names, "html-italic-$name!";
4801         push @$rgetopt_names, "html-bold-$name!";
4802     }
4803     push @$rgetopt_names, "html-color-background=s";
4804     push @$rgetopt_names, "html-linked-style-sheet=s";
4805     push @$rgetopt_names, "nohtml-style-sheets";
4806     push @$rgetopt_names, "html-pre-only";
4807     push @$rgetopt_names, "html-line-numbers";
4808     push @$rgetopt_names, "html-entities!";
4809     push @$rgetopt_names, "stylesheet";
4810     push @$rgetopt_names, "html-table-of-contents!";
4811     push @$rgetopt_names, "pod2html!";
4812     push @$rgetopt_names, "frames!";
4813     push @$rgetopt_names, "html-toc-extension=s";
4814     push @$rgetopt_names, "html-src-extension=s";
4815
4816     # Pod::Html parameters:
4817     push @$rgetopt_names, "backlink=s";
4818     push @$rgetopt_names, "cachedir=s";
4819     push @$rgetopt_names, "htmlroot=s";
4820     push @$rgetopt_names, "libpods=s";
4821     push @$rgetopt_names, "podpath=s";
4822     push @$rgetopt_names, "podroot=s";
4823     push @$rgetopt_names, "title=s";
4824
4825     # Pod::Html parameters with leading 'pod' which will be removed
4826     # before the call to Pod::Html
4827     push @$rgetopt_names, "podquiet!";
4828     push @$rgetopt_names, "podverbose!";
4829     push @$rgetopt_names, "podrecurse!";
4830     push @$rgetopt_names, "podflush";
4831     push @$rgetopt_names, "podheader!";
4832     push @$rgetopt_names, "podindex!";
4833 }
4834
4835 sub make_abbreviated_names {
4836
4837     # We're appending things like this to the expansion list:
4838     #      'hcc'    => [qw(html-color-comment)],
4839     #      'hck'    => [qw(html-color-keyword)],
4840     #  etc
4841     my $class = shift;
4842     my ($rexpansion) = @_;
4843
4844     # abbreviations for color/bold/italic properties
4845     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4846         ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
4847         ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
4848         ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
4849         ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4850         ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4851     }
4852
4853     # abbreviations for all other html options
4854     ${$rexpansion}{"hcbg"}  = ["html-color-background"];
4855     ${$rexpansion}{"pre"}   = ["html-pre-only"];
4856     ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
4857     ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
4858     ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
4859     ${$rexpansion}{"hent"}  = ["html-entities"];
4860     ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4861     ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
4862     ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
4863     ${$rexpansion}{"ss"}    = ["stylesheet"];
4864     ${$rexpansion}{"pod"}   = ["pod2html"];
4865     ${$rexpansion}{"npod"}  = ["nopod2html"];
4866     ${$rexpansion}{"frm"}   = ["frames"];
4867     ${$rexpansion}{"nfrm"}  = ["noframes"];
4868     ${$rexpansion}{"text"}  = ["html-toc-extension"];
4869     ${$rexpansion}{"sext"}  = ["html-src-extension"];
4870 }
4871
4872 sub check_options {
4873
4874     # This will be called once after options have been parsed
4875     my $class = shift;
4876     $rOpts = shift;
4877
4878     # X11 color names for default settings that seemed to look ok
4879     # (these color names are only used for programming clarity; the hex
4880     # numbers are actually written)
4881     use constant ForestGreen   => "#228B22";
4882     use constant SaddleBrown   => "#8B4513";
4883     use constant magenta4      => "#8B008B";
4884     use constant IndianRed3    => "#CD5555";
4885     use constant DeepSkyBlue4  => "#00688B";
4886     use constant MediumOrchid3 => "#B452CD";
4887     use constant black         => "#000000";
4888     use constant white         => "#FFFFFF";
4889     use constant red           => "#FF0000";
4890
4891     # set default color, bold, italic properties
4892     # anything not listed here will be given the default (punctuation) color --
4893     # these types currently not listed and get default: ws pu s sc cm co p
4894     # When adding NEW_TOKENS: add an entry here if you don't want defaults
4895
4896     # set_default_properties( $short_name, default_color, bold?, italic? );
4897     set_default_properties( 'c',  ForestGreen,   0, 0 );
4898     set_default_properties( 'pd', ForestGreen,   0, 1 );
4899     set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4900     set_default_properties( 'q',  IndianRed3,    0, 0 );
4901     set_default_properties( 'hh', IndianRed3,    0, 1 );
4902     set_default_properties( 'h',  IndianRed3,    1, 0 );
4903     set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4904     set_default_properties( 'w',  black,         0, 0 );
4905     set_default_properties( 'n',  MediumOrchid3, 0, 0 );
4906     set_default_properties( 'v',  MediumOrchid3, 0, 0 );
4907     set_default_properties( 'j',  IndianRed3,    1, 0 );
4908     set_default_properties( 'm',  red,           1, 0 );
4909
4910     set_default_color( 'html-color-background',  white );
4911     set_default_color( 'html-color-punctuation', black );
4912
4913     # setup property lookup tables for tokens based on their short names
4914     # every token type has a short name, and will use these tables
4915     # to do the html markup
4916     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4917         $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
4918         $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
4919         $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4920     }
4921
4922     # write style sheet to STDOUT and die if requested
4923     if ( defined( $rOpts->{'stylesheet'} ) ) {
4924         write_style_sheet_file('-');
4925         exit 0;
4926     }
4927
4928     # make sure user gives a file name after -css
4929     if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4930         $css_linkname = $rOpts->{'html-linked-style-sheet'};
4931         if ( $css_linkname =~ /^-/ ) {
4932             die "You must specify a valid filename after -css\n";
4933         }
4934     }
4935
4936     # check for conflict
4937     if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4938         $rOpts->{'nohtml-style-sheets'} = 0;
4939         warning("You can't specify both -css and -nss; -nss ignored\n");
4940     }
4941
4942     # write a style sheet file if necessary
4943     if ($css_linkname) {
4944
4945         # if the selected filename exists, don't write, because user may
4946         # have done some work by hand to create it; use backup name instead
4947         # Also, this will avoid a potential disaster in which the user
4948         # forgets to specify the style sheet, like this:
4949         #    perltidy -html -css myfile1.pl myfile2.pl
4950         # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4951         my $css_filename = $css_linkname;
4952         unless ( -e $css_filename ) {
4953             write_style_sheet_file($css_filename);
4954         }
4955     }
4956     $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4957 }
4958
4959 sub write_style_sheet_file {
4960
4961     my $css_filename = shift;
4962     my $fh;
4963     unless ( $fh = IO::File->new("> $css_filename") ) {
4964         die "can't open $css_filename: $!\n";
4965     }
4966     write_style_sheet_data($fh);
4967     eval { $fh->close };
4968 }
4969
4970 sub write_style_sheet_data {
4971
4972     # write the style sheet data to an open file handle
4973     my $fh = shift;
4974
4975     my $bg_color   = $rOpts->{'html-color-background'};
4976     my $text_color = $rOpts->{'html-color-punctuation'};
4977
4978     # pre-bgcolor is new, and may not be defined
4979     my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4980     $pre_bg_color = $bg_color unless $pre_bg_color;
4981
4982     $fh->print(<<"EOM");
4983 /* default style sheet generated by perltidy */
4984 body {background: $bg_color; color: $text_color}
4985 pre { color: $text_color;
4986       background: $pre_bg_color;
4987       font-family: courier;
4988     }
4989
4990 EOM
4991
4992     foreach my $short_name ( sort keys %short_to_long_names ) {
4993         my $long_name = $short_to_long_names{$short_name};
4994
4995         my $abbrev = '.' . $short_name;
4996         if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
4997         my $color = $html_color{$short_name};
4998         if ( !defined($color) ) { $color = $text_color }
4999         $fh->print("$abbrev \{ color: $color;");
5000
5001         if ( $html_bold{$short_name} ) {
5002             $fh->print(" font-weight:bold;");
5003         }
5004
5005         if ( $html_italic{$short_name} ) {
5006             $fh->print(" font-style:italic;");
5007         }
5008         $fh->print("} /* $long_name */\n");
5009     }
5010 }
5011
5012 sub set_default_color {
5013
5014     # make sure that options hash $rOpts->{$key} contains a valid color
5015     my ( $key, $color ) = @_;
5016     if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5017     $rOpts->{$key} = check_RGB($color);
5018 }
5019
5020 sub check_RGB {
5021
5022     # if color is a 6 digit hex RGB value, prepend a #, otherwise
5023     # assume that it is a valid ascii color name
5024     my ($color) = @_;
5025     if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5026     return $color;
5027 }
5028
5029 sub set_default_properties {
5030     my ( $short_name, $color, $bold, $italic ) = @_;
5031
5032     set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5033     my $key;
5034     $key = "html-bold-$short_to_long_names{$short_name}";
5035     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5036     $key = "html-italic-$short_to_long_names{$short_name}";
5037     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5038 }
5039
5040 sub pod_to_html {
5041
5042     # Use Pod::Html to process the pod and make the page
5043     # then merge the perltidy code sections into it.
5044     # return 1 if success, 0 otherwise
5045     my $self = shift;
5046     my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
5047     my $input_file   = $self->{_input_file};
5048     my $title        = $self->{_title};
5049     my $success_flag = 0;
5050
5051     # don't try to use pod2html if no pod
5052     unless ($pod_string) {
5053         return $success_flag;
5054     }
5055
5056     # Pod::Html requires a real temporary filename
5057     # If we are making a frame, we have a name available
5058     # Otherwise, we have to fine one
5059     my $tmpfile;
5060     if ( $rOpts->{'frames'} ) {
5061         $tmpfile = $self->{_toc_filename};
5062     }
5063     else {
5064         $tmpfile = Perl::Tidy::make_temporary_filename();
5065     }
5066     my $fh_tmp = IO::File->new( $tmpfile, 'w' );
5067     unless ($fh_tmp) {
5068         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
5069         return $success_flag;
5070     }
5071
5072     #------------------------------------------------------------------
5073     # Warning: a temporary file is open; we have to clean up if
5074     # things go bad.  From here on all returns should be by going to
5075     # RETURN so that the temporary file gets unlinked.
5076     #------------------------------------------------------------------
5077
5078     # write the pod text to the temporary file
5079     $fh_tmp->print($pod_string);
5080     $fh_tmp->close();
5081
5082     # Hand off the pod to pod2html.
5083     # Note that we can use the same temporary filename for input and output
5084     # because of the way pod2html works.
5085     {
5086
5087         my @args;
5088         push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5089         my $kw;
5090
5091         # Flags with string args:
5092         # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5093         # "podpath=s", "podroot=s"
5094         # Note: -css=s is handled by perltidy itself
5095         foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
5096             if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5097         }
5098
5099         # Toggle switches; these have extra leading 'pod'
5100         # "header!", "index!", "recurse!", "quiet!", "verbose!"
5101         foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5102             my $kwd = $kw;    # allows us to strip 'pod'
5103             if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5104             elsif ( defined( $rOpts->{$kw} ) ) {
5105                 $kwd =~ s/^pod//;
5106                 push @args, "--no$kwd";
5107             }
5108         }
5109
5110         # "flush",
5111         $kw = 'podflush';
5112         if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5113
5114         # Must clean up if pod2html dies (it can);
5115         # Be careful not to overwrite callers __DIE__ routine
5116         local $SIG{__DIE__} = sub {
5117             print $_[0];
5118             unlink $tmpfile if -e $tmpfile;
5119             exit 1;
5120         };
5121
5122         pod2html(@args);
5123     }
5124     $fh_tmp = IO::File->new( $tmpfile, 'r' );
5125     unless ($fh_tmp) {
5126
5127         # this error shouldn't happen ... we just used this filename
5128         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
5129         goto RETURN;
5130     }
5131
5132     my $html_fh = $self->{_html_fh};
5133     my @toc;
5134     my $in_toc;
5135     my $no_print;
5136
5137     # This routine will write the html selectively and store the toc
5138     my $html_print = sub {
5139         foreach (@_) {
5140             $html_fh->print($_) unless ($no_print);
5141             if ($in_toc) { push @toc, $_ }
5142         }
5143     };
5144
5145     # loop over lines of html output from pod2html and merge in
5146     # the necessary perltidy html sections
5147     my ( $saw_body, $saw_index, $saw_body_end );
5148     while ( my $line = $fh_tmp->getline() ) {
5149
5150         if ( $line =~ /^\s*<html>\s*$/i ) {
5151             my $date = localtime;
5152             $html_print->("<!-- Generated by perltidy on $date -->\n");
5153             $html_print->($line);
5154         }
5155
5156         # Copy the perltidy css, if any, after <body> tag
5157         elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5158             $saw_body = 1;
5159             $html_print->($css_string) if $css_string;
5160             $html_print->($line);
5161
5162             # add a top anchor and heading
5163             $html_print->("<a name=\"-top-\"></a>\n");
5164             $title = escape_html($title);
5165             $html_print->("<h1>$title</h1>\n");
5166         }
5167         elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5168             $in_toc = 1;
5169
5170             # when frames are used, an extra table of contents in the
5171             # contents panel is confusing, so don't print it
5172             $no_print = $rOpts->{'frames'}
5173               || !$rOpts->{'html-table-of-contents'};
5174             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5175             $html_print->($line);
5176         }
5177
5178         # Copy the perltidy toc, if any, after the Pod::Html toc
5179         elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5180             $saw_index = 1;
5181             $html_print->($line);
5182             if ($toc_string) {
5183                 $html_print->("<hr />\n") if $rOpts->{'frames'};
5184                 $html_print->("<h2>Code Index:</h2>\n");
5185                 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5186                 $html_print->(@toc);
5187             }
5188             $in_toc   = 0;
5189             $no_print = 0;
5190         }
5191
5192         # Copy one perltidy section after each marker
5193         elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5194             $line = $2;
5195             $html_print->($1) if $1;
5196
5197             # Intermingle code and pod sections if we saw multiple =cut's.
5198             if ( $self->{_pod_cut_count} > 1 ) {
5199                 my $rpre_string = shift(@$rpre_string_stack);
5200                 if ($$rpre_string) {
5201                     $html_print->('<pre>');
5202                     $html_print->($$rpre_string);
5203                     $html_print->('</pre>');
5204                 }
5205                 else {
5206
5207                     # shouldn't happen: we stored a string before writing
5208                     # each marker.
5209                     warn
5210 "Problem merging html stream with pod2html; order may be wrong\n";
5211                 }
5212                 $html_print->($line);
5213             }
5214
5215             # If didn't see multiple =cut lines, we'll put the pod out first
5216             # and then the code, because it's less confusing.
5217             else {
5218
5219                 # since we are not intermixing code and pod, we don't need
5220                 # or want any <hr> lines which separated pod and code
5221                 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5222             }
5223         }
5224
5225         # Copy any remaining code section before the </body> tag
5226         elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5227             $saw_body_end = 1;
5228             if (@$rpre_string_stack) {
5229                 unless ( $self->{_pod_cut_count} > 1 ) {
5230                     $html_print->('<hr />');
5231                 }
5232                 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
5233                     $html_print->('<pre>');
5234                     $html_print->($$rpre_string);
5235                     $html_print->('</pre>');
5236                 }
5237             }
5238             $html_print->($line);
5239         }
5240         else {
5241             $html_print->($line);
5242         }
5243     }
5244
5245     $success_flag = 1;
5246     unless ($saw_body) {
5247         warn "Did not see <body> in pod2html output\n";
5248         $success_flag = 0;
5249     }
5250     unless ($saw_body_end) {
5251         warn "Did not see </body> in pod2html output\n";
5252         $success_flag = 0;
5253     }
5254     unless ($saw_index) {
5255         warn "Did not find INDEX END in pod2html output\n";
5256         $success_flag = 0;
5257     }
5258
5259   RETURN:
5260     eval { $html_fh->close() };
5261
5262     # note that we have to unlink tmpfile before making frames
5263     # because the tmpfile may be one of the names used for frames
5264     unlink $tmpfile if -e $tmpfile;
5265     if ( $success_flag && $rOpts->{'frames'} ) {
5266         $self->make_frame( \@toc );
5267     }
5268     return $success_flag;
5269 }
5270
5271 sub make_frame {
5272
5273     # Make a frame with table of contents in the left panel
5274     # and the text in the right panel.
5275     # On entry:
5276     #  $html_filename contains the no-frames html output
5277     #  $rtoc is a reference to an array with the table of contents
5278     my $self          = shift;
5279     my ($rtoc)        = @_;
5280     my $input_file    = $self->{_input_file};
5281     my $html_filename = $self->{_html_file};
5282     my $toc_filename  = $self->{_toc_filename};
5283     my $src_filename  = $self->{_src_filename};
5284     my $title         = $self->{_title};
5285     $title = escape_html($title);
5286
5287     # FUTURE input parameter:
5288     my $top_basename = "";
5289
5290     # We need to produce 3 html files:
5291     # 1. - the table of contents
5292     # 2. - the contents (source code) itself
5293     # 3. - the frame which contains them
5294
5295     # get basenames for relative links
5296     my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5297     my ( $src_basename, $src_path ) = fileparse($src_filename);
5298
5299     # 1. Make the table of contents panel, with appropriate changes
5300     # to the anchor names
5301     my $src_frame_name = 'SRC';
5302     my $first_anchor =
5303       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5304         $src_frame_name );
5305
5306     # 2. The current .html filename is renamed to be the contents panel
5307     rename( $html_filename, $src_filename )
5308       or die "Cannot rename $html_filename to $src_filename:$!\n";
5309
5310     # 3. Then use the original html filename for the frame
5311     write_frame_html(
5312         $title,        $html_filename, $top_basename,
5313         $toc_basename, $src_basename,  $src_frame_name
5314     );
5315 }
5316
5317 sub write_toc_html {
5318
5319     # write a separate html table of contents file for frames
5320     my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5321     my $fh = IO::File->new( $toc_filename, 'w' )
5322       or die "Cannot open $toc_filename:$!\n";
5323     $fh->print(<<EOM);
5324 <html>
5325 <head>
5326 <title>$title</title>
5327 </head>
5328 <body>
5329 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5330 EOM
5331
5332     my $first_anchor =
5333       change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5334     $fh->print( join "", @$rtoc );
5335
5336     $fh->print(<<EOM);
5337 </body>
5338 </html>
5339 EOM
5340
5341 }
5342
5343 sub write_frame_html {
5344
5345     # write an html file to be the table of contents frame
5346     my (
5347         $title,        $frame_filename, $top_basename,
5348         $toc_basename, $src_basename,   $src_frame_name
5349     ) = @_;
5350
5351     my $fh = IO::File->new( $frame_filename, 'w' )
5352       or die "Cannot open $toc_basename:$!\n";
5353
5354     $fh->print(<<EOM);
5355 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5356     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5357 <?xml version="1.0" encoding="iso-8859-1" ?>
5358 <html xmlns="http://www.w3.org/1999/xhtml">
5359 <head>
5360 <title>$title</title>
5361 </head>
5362 EOM
5363
5364     # two left panels, one right, if master index file
5365     if ($top_basename) {
5366         $fh->print(<<EOM);
5367 <frameset cols="20%,80%">
5368 <frameset rows="30%,70%">
5369 <frame src = "$top_basename" />
5370 <frame src = "$toc_basename" />
5371 </frameset>
5372 EOM
5373     }
5374
5375     # one left panels, one right, if no master index file
5376     else {
5377         $fh->print(<<EOM);
5378 <frameset cols="20%,*">
5379 <frame src = "$toc_basename" />
5380 EOM
5381     }
5382     $fh->print(<<EOM);
5383 <frame src = "$src_basename" name = "$src_frame_name" />
5384 <noframes>
5385 <body>
5386 <p>If you see this message, you are using a non-frame-capable web client.</p>
5387 <p>This document contains:</p>
5388 <ul>
5389 <li><a href="$toc_basename">A table of contents</a></li>
5390 <li><a href="$src_basename">The source code</a></li>
5391 </ul>
5392 </body>
5393 </noframes>
5394 </frameset>
5395 </html>
5396 EOM
5397 }
5398
5399 sub change_anchor_names {
5400
5401     # add a filename and target to anchors
5402     # also return the first anchor
5403     my ( $rlines, $filename, $target ) = @_;
5404     my $first_anchor;
5405     foreach my $line (@$rlines) {
5406
5407         #  We're looking for lines like this:
5408         #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5409         #  ----  -       --------  -----------------
5410         #  $1              $4            $5
5411         if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5412             my $pre  = $1;
5413             my $name = $4;
5414             my $post = $5;
5415             my $href = "$filename#$name";
5416             $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5417             unless ($first_anchor) { $first_anchor = $href }
5418         }
5419     }
5420     return $first_anchor;
5421 }
5422
5423 sub close_html_file {
5424     my $self = shift;
5425     return unless $self->{_html_file_opened};
5426
5427     my $html_fh     = $self->{_html_fh};
5428     my $rtoc_string = $self->{_rtoc_string};
5429
5430     # There are 3 basic paths to html output...
5431
5432     # ---------------------------------
5433     # Path 1: finish up if in -pre mode
5434     # ---------------------------------
5435     if ( $rOpts->{'html-pre-only'} ) {
5436         $html_fh->print( <<"PRE_END");
5437 </pre>
5438 PRE_END
5439         eval { $html_fh->close() };
5440         return;
5441     }
5442
5443     # Finish the index
5444     $self->add_toc_item( 'EOF', 'EOF' );
5445
5446     my $rpre_string_stack = $self->{_rpre_string_stack};
5447
5448     # Patch to darken the <pre> background color in case of pod2html and
5449     # interleaved code/documentation.  Otherwise, the distinction
5450     # between code and documentation is blurred.
5451     if (   $rOpts->{pod2html}
5452         && $self->{_pod_cut_count} >= 1
5453         && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5454     {
5455         $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5456     }
5457
5458     # put the css or its link into a string, if used
5459     my $css_string;
5460     my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5461
5462     # use css linked to another file
5463     if ( $rOpts->{'html-linked-style-sheet'} ) {
5464         $fh_css->print(
5465             qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5466         );
5467     }
5468
5469     # use css embedded in this file
5470     elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5471         $fh_css->print( <<'ENDCSS');
5472 <style type="text/css">
5473 <!--
5474 ENDCSS
5475         write_style_sheet_data($fh_css);
5476         $fh_css->print( <<"ENDCSS");
5477 -->
5478 </style>
5479 ENDCSS
5480     }
5481
5482     # -----------------------------------------------------------
5483     # path 2: use pod2html if requested
5484     #         If we fail for some reason, continue on to path 3
5485     # -----------------------------------------------------------
5486     if ( $rOpts->{'pod2html'} ) {
5487         my $rpod_string = $self->{_rpod_string};
5488         $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5489             $rpre_string_stack )
5490           && return;
5491     }
5492
5493     # --------------------------------------------------
5494     # path 3: write code in html, with pod only in italics
5495     # --------------------------------------------------
5496     my $input_file = $self->{_input_file};
5497     my $title      = escape_html($input_file);
5498     my $date       = localtime;
5499     $html_fh->print( <<"HTML_START");
5500 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5501    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5502 <!-- Generated by perltidy on $date -->
5503 <html xmlns="http://www.w3.org/1999/xhtml">
5504 <head>
5505 <title>$title</title>
5506 HTML_START
5507
5508     # output the css, if used
5509     if ($css_string) {
5510         $html_fh->print($css_string);
5511         $html_fh->print( <<"ENDCSS");
5512 </head>
5513 <body>
5514 ENDCSS
5515     }
5516     else {
5517
5518         $html_fh->print( <<"HTML_START");
5519 </head>
5520 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5521 HTML_START
5522     }
5523
5524     $html_fh->print("<a name=\"-top-\"></a>\n");
5525     $html_fh->print( <<"EOM");
5526 <h1>$title</h1>
5527 EOM
5528
5529     # copy the table of contents
5530     if (   $$rtoc_string
5531         && !$rOpts->{'frames'}
5532         && $rOpts->{'html-table-of-contents'} )
5533     {
5534         $html_fh->print($$rtoc_string);
5535     }
5536
5537     # copy the pre section(s)
5538     my $fname_comment = $input_file;
5539     $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5540     $html_fh->print( <<"END_PRE");
5541 <hr />
5542 <!-- contents of filename: $fname_comment -->
5543 <pre>
5544 END_PRE
5545
5546     foreach my $rpre_string (@$rpre_string_stack) {
5547         $html_fh->print($$rpre_string);
5548     }
5549
5550     # and finish the html page
5551     $html_fh->print( <<"HTML_END");
5552 </pre>
5553 </body>
5554 </html>
5555 HTML_END
5556     eval { $html_fh->close() };    # could be object without close method
5557
5558     if ( $rOpts->{'frames'} ) {
5559         my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5560         $self->make_frame( \@toc );
5561     }
5562 }
5563
5564 sub markup_tokens {
5565     my $self = shift;
5566     my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5567     my ( @colored_tokens, $j, $string, $type, $token, $level );
5568     my $rlast_level    = $self->{_rlast_level};
5569     my $rpackage_stack = $self->{_rpackage_stack};
5570
5571     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5572         $type  = $$rtoken_type[$j];
5573         $token = $$rtokens[$j];
5574         $level = $$rlevels[$j];
5575         $level = 0 if ( $level < 0 );
5576
5577         #-------------------------------------------------------
5578         # Update the package stack.  The package stack is needed to keep
5579         # the toc correct because some packages may be declared within
5580         # blocks and go out of scope when we leave the block.
5581         #-------------------------------------------------------
5582         if ( $level > $$rlast_level ) {
5583             unless ( $rpackage_stack->[ $level - 1 ] ) {
5584                 $rpackage_stack->[ $level - 1 ] = 'main';
5585             }
5586             $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5587         }
5588         elsif ( $level < $$rlast_level ) {
5589             my $package = $rpackage_stack->[$level];
5590             unless ($package) { $package = 'main' }
5591
5592             # if we change packages due to a nesting change, we
5593             # have to make an entry in the toc
5594             if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5595                 $self->add_toc_item( $package, 'package' );
5596             }
5597         }
5598         $$rlast_level = $level;
5599
5600         #-------------------------------------------------------
5601         # Intercept a sub name here; split it
5602         # into keyword 'sub' and sub name; and add an
5603         # entry in the toc
5604         #-------------------------------------------------------
5605         if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5606             $token = $self->markup_html_element( $1, 'k' );
5607             push @colored_tokens, $token;
5608             $token = $2;
5609             $type  = 'M';
5610
5611             # but don't include sub declarations in the toc;
5612             # these wlll have leading token types 'i;'
5613             my $signature = join "", @$rtoken_type;
5614             unless ( $signature =~ /^i;/ ) {
5615                 my $subname = $token;
5616                 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5617                 $self->add_toc_item( $subname, 'sub' );
5618             }
5619         }
5620
5621         #-------------------------------------------------------
5622         # Intercept a package name here; split it
5623         # into keyword 'package' and name; add to the toc,
5624         # and update the package stack
5625         #-------------------------------------------------------
5626         if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5627             $token = $self->markup_html_element( $1, 'k' );
5628             push @colored_tokens, $token;
5629             $token = $2;
5630             $type  = 'i';
5631             $self->add_toc_item( "$token", 'package' );
5632             $rpackage_stack->[$level] = $token;
5633         }
5634
5635         $token = $self->markup_html_element( $token, $type );
5636         push @colored_tokens, $token;
5637     }
5638     return ( \@colored_tokens );
5639 }
5640
5641 sub markup_html_element {
5642     my $self = shift;
5643     my ( $token, $type ) = @_;
5644
5645     return $token if ( $type eq 'b' );    # skip a blank token
5646     return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5647     $token = escape_html($token);
5648
5649     # get the short abbreviation for this token type
5650     my $short_name = $token_short_names{$type};
5651     if ( !defined($short_name) ) {
5652         $short_name = "pu";                    # punctuation is default
5653     }
5654
5655     # handle style sheets..
5656     if ( !$rOpts->{'nohtml-style-sheets'} ) {
5657         if ( $short_name ne 'pu' ) {
5658             $token = qq(<span class="$short_name">) . $token . "</span>";
5659         }
5660     }
5661
5662     # handle no style sheets..
5663     else {
5664         my $color = $html_color{$short_name};
5665
5666         if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5667             $token = qq(<font color="$color">) . $token . "</font>";
5668         }
5669         if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5670         if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5671     }
5672     return $token;
5673 }
5674
5675 sub escape_html {
5676
5677     my $token = shift;
5678     if ($missing_html_entities) {
5679         $token =~ s/\&/&amp;/g;
5680         $token =~ s/\</&lt;/g;
5681         $token =~ s/\>/&gt;/g;
5682         $token =~ s/\"/&quot;/g;
5683     }
5684     else {
5685         HTML::Entities::encode_entities($token);
5686     }
5687     return $token;
5688 }
5689
5690 sub finish_formatting {
5691
5692     # called after last line
5693     my $self = shift;
5694     $self->close_html_file();
5695     return;
5696 }
5697
5698 sub write_line {
5699
5700     my $self = shift;
5701     return unless $self->{_html_file_opened};
5702     my $html_pre_fh      = $self->{_html_pre_fh};
5703     my ($line_of_tokens) = @_;
5704     my $line_type        = $line_of_tokens->{_line_type};
5705     my $input_line       = $line_of_tokens->{_line_text};
5706     my $line_number      = $line_of_tokens->{_line_number};
5707     chomp $input_line;
5708
5709     # markup line of code..
5710     my $html_line;
5711     if ( $line_type eq 'CODE' ) {
5712         my $rtoken_type = $line_of_tokens->{_rtoken_type};
5713         my $rtokens     = $line_of_tokens->{_rtokens};
5714         my $rlevels     = $line_of_tokens->{_rlevels};
5715
5716         if ( $input_line =~ /(^\s*)/ ) {
5717             $html_line = $1;
5718         }
5719         else {
5720             $html_line = "";
5721         }
5722         my ($rcolored_tokens) =
5723           $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5724         $html_line .= join '', @$rcolored_tokens;
5725     }
5726
5727     # markup line of non-code..
5728     else {
5729         my $line_character;
5730         if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
5731         elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
5732         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
5733         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5734         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
5735         elsif ( $line_type eq 'END_START' ) {
5736             $line_character = 'k';
5737             $self->add_toc_item( '__END__', '__END__' );
5738         }
5739         elsif ( $line_type eq 'DATA_START' ) {
5740             $line_character = 'k';
5741             $self->add_toc_item( '__DATA__', '__DATA__' );
5742         }
5743         elsif ( $line_type =~ /^POD/ ) {
5744             $line_character = 'P';
5745             if ( $rOpts->{'pod2html'} ) {
5746                 my $html_pod_fh = $self->{_html_pod_fh};
5747                 if ( $line_type eq 'POD_START' ) {
5748
5749                     my $rpre_string_stack = $self->{_rpre_string_stack};
5750                     my $rpre_string       = $rpre_string_stack->[-1];
5751
5752                     # if we have written any non-blank lines to the
5753                     # current pre section, start writing to a new output
5754                     # string
5755                     if ( $$rpre_string =~ /\S/ ) {
5756                         my $pre_string;
5757                         $html_pre_fh =
5758                           Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5759                         $self->{_html_pre_fh} = $html_pre_fh;
5760                         push @$rpre_string_stack, \$pre_string;
5761
5762                         # leave a marker in the pod stream so we know
5763                         # where to put the pre section we just
5764                         # finished.
5765                         my $for_html = '=for html';    # don't confuse pod utils
5766                         $html_pod_fh->print(<<EOM);
5767
5768 $for_html
5769 <!-- pERLTIDY sECTION -->
5770
5771 EOM
5772                     }
5773
5774                     # otherwise, just clear the current string and start
5775                     # over
5776                     else {
5777                         $$rpre_string = "";
5778                         $html_pod_fh->print("\n");
5779                     }
5780                 }
5781                 $html_pod_fh->print( $input_line . "\n" );
5782                 if ( $line_type eq 'POD_END' ) {
5783                     $self->{_pod_cut_count}++;
5784                     $html_pod_fh->print("\n");
5785                 }
5786                 return;
5787             }
5788         }
5789         else { $line_character = 'Q' }
5790         $html_line = $self->markup_html_element( $input_line, $line_character );
5791     }
5792
5793     # add the line number if requested
5794     if ( $rOpts->{'html-line-numbers'} ) {
5795         my $extra_space .=
5796             ( $line_number < 10 )   ? "   "
5797           : ( $line_number < 100 )  ? "  "
5798           : ( $line_number < 1000 ) ? " "
5799           :                           "";
5800         $html_line = $extra_space . $line_number . " " . $html_line;
5801     }
5802
5803     # write the line
5804     $html_pre_fh->print("$html_line\n");
5805 }
5806
5807 #####################################################################
5808 #
5809 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5810 # line breaks to the token stream
5811 #
5812 # WARNING: This is not a real class for speed reasons.  Only one
5813 # Formatter may be used.
5814 #
5815 #####################################################################
5816
5817 package Perl::Tidy::Formatter;
5818
5819 BEGIN {
5820
5821     # Caution: these debug flags produce a lot of output
5822     # They should all be 0 except when debugging small scripts
5823     use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
5824     use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
5825     use constant FORMATTER_DEBUG_FLAG_CI      => 0;
5826     use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
5827     use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
5828     use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
5829     use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5830     use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
5831     use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
5832     use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
5833     use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
5834     use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
5835
5836     my $debug_warning = sub {
5837         print "FORMATTER_DEBUGGING with key $_[0]\n";
5838     };
5839
5840     FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
5841     FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
5842     FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
5843     FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
5844     FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
5845     FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
5846     FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5847     FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
5848     FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
5849     FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
5850     FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
5851     FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
5852 }
5853
5854 use Carp;
5855 use vars qw{
5856
5857   @gnu_stack
5858   $max_gnu_stack_index
5859   $gnu_position_predictor
5860   $line_start_index_to_go
5861   $last_indentation_written
5862   $last_unadjusted_indentation
5863   $last_leading_token
5864   $last_output_short_opening_token
5865
5866   $saw_VERSION_in_this_file
5867   $saw_END_or_DATA_
5868
5869   @gnu_item_list
5870   $max_gnu_item_index
5871   $gnu_sequence_number
5872   $last_output_indentation
5873   %last_gnu_equals
5874   %gnu_comma_count
5875   %gnu_arrow_count
5876
5877   @block_type_to_go
5878   @type_sequence_to_go
5879   @container_environment_to_go
5880   @bond_strength_to_go
5881   @forced_breakpoint_to_go
5882   @lengths_to_go
5883   @levels_to_go
5884   @leading_spaces_to_go
5885   @reduced_spaces_to_go
5886   @matching_token_to_go
5887   @mate_index_to_go
5888   @nesting_blocks_to_go
5889   @ci_levels_to_go
5890   @nesting_depth_to_go
5891   @nobreak_to_go
5892   @old_breakpoint_to_go
5893   @tokens_to_go
5894   @types_to_go
5895
5896   %saved_opening_indentation
5897
5898   $max_index_to_go
5899   $comma_count_in_batch
5900   $old_line_count_in_batch
5901   $last_nonblank_index_to_go
5902   $last_nonblank_type_to_go
5903   $last_nonblank_token_to_go
5904   $last_last_nonblank_index_to_go
5905   $last_last_nonblank_type_to_go
5906   $last_last_nonblank_token_to_go
5907   @nonblank_lines_at_depth
5908   $starting_in_quote
5909   $ending_in_quote
5910
5911   $in_format_skipping_section
5912   $format_skipping_pattern_begin
5913   $format_skipping_pattern_end
5914
5915   $forced_breakpoint_count
5916   $forced_breakpoint_undo_count
5917   @forced_breakpoint_undo_stack
5918   %postponed_breakpoint
5919
5920   $tabbing
5921   $embedded_tab_count
5922   $first_embedded_tab_at
5923   $last_embedded_tab_at
5924   $deleted_semicolon_count
5925   $first_deleted_semicolon_at
5926   $last_deleted_semicolon_at
5927   $added_semicolon_count
5928   $first_added_semicolon_at
5929   $last_added_semicolon_at
5930   $first_tabbing_disagreement
5931   $last_tabbing_disagreement
5932   $in_tabbing_disagreement
5933   $tabbing_disagreement_count
5934   $input_line_tabbing
5935
5936   $last_line_type
5937   $last_line_leading_type
5938   $last_line_leading_level
5939   $last_last_line_leading_level
5940
5941   %block_leading_text
5942   %block_opening_line_number
5943   $csc_new_statement_ok
5944   $csc_last_label
5945   %csc_block_label
5946   $accumulating_text_for_block
5947   $leading_block_text
5948   $rleading_block_if_elsif_text
5949   $leading_block_text_level
5950   $leading_block_text_length_exceeded
5951   $leading_block_text_line_length
5952   $leading_block_text_line_number
5953   $closing_side_comment_prefix_pattern
5954   $closing_side_comment_list_pattern
5955
5956   $last_nonblank_token
5957   $last_nonblank_type
5958   $last_last_nonblank_token
5959   $last_last_nonblank_type
5960   $last_nonblank_block_type
5961   $last_output_level
5962   %is_do_follower
5963   %is_if_brace_follower
5964   %space_after_keyword
5965   $rbrace_follower
5966   $looking_for_else
5967   %is_last_next_redo_return
5968   %is_other_brace_follower
5969   %is_else_brace_follower
5970   %is_anon_sub_brace_follower
5971   %is_anon_sub_1_brace_follower
5972   %is_sort_map_grep
5973   %is_sort_map_grep_eval
5974   %is_sort_map_grep_eval_do
5975   %is_block_without_semicolon
5976   %is_if_unless
5977   %is_and_or
5978   %is_assignment
5979   %is_chain_operator
5980   %is_if_unless_and_or_last_next_redo_return
5981   %is_until_while_for_if_elsif_else
5982
5983   @has_broken_sublist
5984   @dont_align
5985   @want_comma_break
5986
5987   $is_static_block_comment
5988   $index_start_one_line_block
5989   $semicolons_before_block_self_destruct
5990   $index_max_forced_break
5991   $input_line_number
5992   $diagnostics_object
5993   $vertical_aligner_object
5994   $logger_object
5995   $file_writer_object
5996   $formatter_self
5997   @ci_stack
5998   $last_line_had_side_comment
5999   %want_break_before
6000   %outdent_keyword
6001   $static_block_comment_pattern
6002   $static_side_comment_pattern
6003   %opening_vertical_tightness
6004   %closing_vertical_tightness
6005   %closing_token_indentation
6006   $some_closing_token_indentation
6007
6008   %opening_token_right
6009   %stack_opening_token
6010   %stack_closing_token
6011
6012   $block_brace_vertical_tightness_pattern
6013
6014   $rOpts_add_newlines
6015   $rOpts_add_whitespace
6016   $rOpts_block_brace_tightness
6017   $rOpts_block_brace_vertical_tightness
6018   $rOpts_brace_left_and_indent
6019   $rOpts_comma_arrow_breakpoints
6020   $rOpts_break_at_old_keyword_breakpoints
6021   $rOpts_break_at_old_comma_breakpoints
6022   $rOpts_break_at_old_logical_breakpoints
6023   $rOpts_break_at_old_ternary_breakpoints
6024   $rOpts_break_at_old_attribute_breakpoints
6025   $rOpts_closing_side_comment_else_flag
6026   $rOpts_closing_side_comment_maximum_text
6027   $rOpts_continuation_indentation
6028   $rOpts_cuddled_else
6029   $rOpts_delete_old_whitespace
6030   $rOpts_fuzzy_line_length
6031   $rOpts_indent_columns
6032   $rOpts_line_up_parentheses
6033   $rOpts_maximum_fields_per_table
6034   $rOpts_maximum_line_length
6035   $rOpts_short_concatenation_item_length
6036   $rOpts_keep_old_blank_lines
6037   $rOpts_ignore_old_breakpoints
6038   $rOpts_format_skipping
6039   $rOpts_space_function_paren
6040   $rOpts_space_keyword_paren
6041   $rOpts_keep_interior_semicolons
6042
6043   $half_maximum_line_length
6044
6045   %is_opening_type
6046   %is_closing_type
6047   %is_keyword_returning_list
6048   %tightness
6049   %matching_token
6050   $rOpts
6051   %right_bond_strength
6052   %left_bond_strength
6053   %binary_ws_rules
6054   %want_left_space
6055   %want_right_space
6056   %is_digraph
6057   %is_trigraph
6058   $bli_pattern
6059   $bli_list_string
6060   %is_closing_type
6061   %is_opening_type
6062   %is_closing_token
6063   %is_opening_token
6064 };
6065
6066 BEGIN {
6067
6068     # default list of block types for which -bli would apply
6069     $bli_list_string = 'if else elsif unless while for foreach do : sub';
6070
6071     @_ = qw(
6072       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6073       <= >= == =~ !~ != ++ -- /= x=
6074     );
6075     @is_digraph{@_} = (1) x scalar(@_);
6076
6077     @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
6078     @is_trigraph{@_} = (1) x scalar(@_);
6079
6080     @_ = qw(
6081       = **= += *= &= <<= &&=
6082       -= /= |= >>= ||= //=
6083       .= %= ^=
6084       x=
6085     );
6086     @is_assignment{@_} = (1) x scalar(@_);
6087
6088     @_ = qw(
6089       grep
6090       keys
6091       map
6092       reverse
6093       sort
6094       split
6095     );
6096     @is_keyword_returning_list{@_} = (1) x scalar(@_);
6097
6098     @_ = qw(is if unless and or err last next redo return);
6099     @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
6100
6101     # always break after a closing curly of these block types:
6102     @_ = qw(until while for if elsif else);
6103     @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
6104
6105     @_ = qw(last next redo return);
6106     @is_last_next_redo_return{@_} = (1) x scalar(@_);
6107
6108     @_ = qw(sort map grep);
6109     @is_sort_map_grep{@_} = (1) x scalar(@_);
6110
6111     @_ = qw(sort map grep eval);
6112     @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
6113
6114     @_ = qw(sort map grep eval do);
6115     @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
6116
6117     @_ = qw(if unless);
6118     @is_if_unless{@_} = (1) x scalar(@_);
6119
6120     @_ = qw(and or err);
6121     @is_and_or{@_} = (1) x scalar(@_);
6122
6123     # Identify certain operators which often occur in chains.
6124     # Note: the minus (-) causes a side effect of padding of the first line in
6125     # something like this (by sub set_logical_padding):
6126     #    Checkbutton => 'Transmission checked',
6127     #   -variable    => \$TRANS
6128     # This usually improves appearance so it seems ok.
6129     @_ = qw(&& || and or : ? . + - * /);
6130     @is_chain_operator{@_} = (1) x scalar(@_);
6131
6132     # We can remove semicolons after blocks preceded by these keywords
6133     @_ =
6134       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6135       unless while until for foreach given when default);
6136     @is_block_without_semicolon{@_} = (1) x scalar(@_);
6137
6138     # 'L' is token for opening { at hash key
6139     @_ = qw" L { ( [ ";
6140     @is_opening_type{@_} = (1) x scalar(@_);
6141
6142     # 'R' is token for closing } at hash key
6143     @_ = qw" R } ) ] ";
6144     @is_closing_type{@_} = (1) x scalar(@_);
6145
6146     @_ = qw" { ( [ ";
6147     @is_opening_token{@_} = (1) x scalar(@_);
6148
6149     @_ = qw" } ) ] ";
6150     @is_closing_token{@_} = (1) x scalar(@_);
6151 }
6152
6153 # whitespace codes
6154 use constant WS_YES      => 1;
6155 use constant WS_OPTIONAL => 0;
6156 use constant WS_NO       => -1;
6157
6158 # Token bond strengths.
6159 use constant NO_BREAK    => 10000;
6160 use constant VERY_STRONG => 100;
6161 use constant STRONG      => 2.1;
6162 use constant NOMINAL     => 1.1;
6163 use constant WEAK        => 0.8;
6164 use constant VERY_WEAK   => 0.55;
6165
6166 # values for testing indexes in output array
6167 use constant UNDEFINED_INDEX => -1;
6168
6169 # Maximum number of little messages; probably need not be changed.
6170 use constant MAX_NAG_MESSAGES => 6;
6171
6172 # increment between sequence numbers for each type
6173 # For example, ?: pairs might have numbers 7,11,15,...
6174 use constant TYPE_SEQUENCE_INCREMENT => 4;
6175
6176 {
6177
6178     # methods to count instances
6179     my $_count = 0;
6180     sub get_count        { $_count; }
6181     sub _increment_count { ++$_count }
6182     sub _decrement_count { --$_count }
6183 }
6184
6185 sub trim {
6186
6187     # trim leading and trailing whitespace from a string
6188     $_[0] =~ s/\s+$//;
6189     $_[0] =~ s/^\s+//;
6190     return $_[0];
6191 }
6192
6193 sub split_words {
6194
6195     # given a string containing words separated by whitespace,
6196     # return the list of words
6197     my ($str) = @_;
6198     return unless $str;
6199     $str =~ s/\s+$//;
6200     $str =~ s/^\s+//;
6201     return split( /\s+/, $str );
6202 }
6203
6204 # interface to Perl::Tidy::Logger routines
6205 sub warning {
6206     if ($logger_object) {
6207         $logger_object->warning(@_);
6208     }
6209 }
6210
6211 sub complain {
6212     if ($logger_object) {
6213         $logger_object->complain(@_);
6214     }
6215 }
6216
6217 sub write_logfile_entry {
6218     if ($logger_object) {
6219         $logger_object->write_logfile_entry(@_);
6220     }
6221 }
6222
6223 sub black_box {
6224     if ($logger_object) {
6225         $logger_object->black_box(@_);
6226     }
6227 }
6228
6229 sub report_definite_bug {
6230     if ($logger_object) {
6231         $logger_object->report_definite_bug();
6232     }
6233 }
6234
6235 sub get_saw_brace_error {
6236     if ($logger_object) {
6237         $logger_object->get_saw_brace_error();
6238     }
6239 }
6240
6241 sub we_are_at_the_last_line {
6242     if ($logger_object) {
6243         $logger_object->we_are_at_the_last_line();
6244     }
6245 }
6246
6247 # interface to Perl::Tidy::Diagnostics routine
6248 sub write_diagnostics {
6249
6250     if ($diagnostics_object) {
6251         $diagnostics_object->write_diagnostics(@_);
6252     }
6253 }
6254
6255 sub get_added_semicolon_count {
6256     my $self = shift;
6257     return $added_semicolon_count;
6258 }
6259
6260 sub DESTROY {
6261     $_[0]->_decrement_count();
6262 }
6263
6264 sub new {
6265
6266     my $class = shift;
6267
6268     # we are given an object with a write_line() method to take lines
6269     my %defaults = (
6270         sink_object        => undef,
6271         diagnostics_object => undef,
6272         logger_object      => undef,
6273     );
6274     my %args = ( %defaults, @_ );
6275
6276     $logger_object      = $args{logger_object};
6277     $diagnostics_object = $args{diagnostics_object};
6278
6279     # we create another object with a get_line() and peek_ahead() method
6280     my $sink_object = $args{sink_object};
6281     $file_writer_object =
6282       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6283
6284     # initialize the leading whitespace stack to negative levels
6285     # so that we can never run off the end of the stack
6286     $gnu_position_predictor = 0;    # where the current token is predicted to be
6287     $max_gnu_stack_index    = 0;
6288     $max_gnu_item_index     = -1;
6289     $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6290     @gnu_item_list                   = ();
6291     $last_output_indentation         = 0;
6292     $last_indentation_written        = 0;
6293     $last_unadjusted_indentation     = 0;
6294     $last_leading_token              = "";
6295     $last_output_short_opening_token = 0;
6296
6297     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6298     $saw_END_or_DATA_         = 0;
6299
6300     @block_type_to_go            = ();
6301     @type_sequence_to_go         = ();
6302     @container_environment_to_go = ();
6303     @bond_strength_to_go         = ();
6304     @forced_breakpoint_to_go     = ();
6305     @lengths_to_go               = ();    # line length to start of ith token
6306     @levels_to_go                = ();
6307     @matching_token_to_go        = ();
6308     @mate_index_to_go            = ();
6309     @nesting_blocks_to_go        = ();
6310     @ci_levels_to_go             = ();
6311     @nesting_depth_to_go         = (0);
6312     @nobreak_to_go               = ();
6313     @old_breakpoint_to_go        = ();
6314     @tokens_to_go                = ();
6315     @types_to_go                 = ();
6316     @leading_spaces_to_go        = ();
6317     @reduced_spaces_to_go        = ();
6318
6319     @dont_align         = ();
6320     @has_broken_sublist = ();
6321     @want_comma_break   = ();
6322
6323     @ci_stack                   = ("");
6324     $first_tabbing_disagreement = 0;
6325     $last_tabbing_disagreement  = 0;
6326     $tabbing_disagreement_count = 0;
6327     $in_tabbing_disagreement    = 0;
6328     $input_line_tabbing         = undef;
6329
6330     $last_line_type               = "";
6331     $last_last_line_leading_level = 0;
6332     $last_line_leading_level      = 0;
6333     $last_line_leading_type       = '#';
6334
6335     $last_nonblank_token        = ';';
6336     $last_nonblank_type         = ';';
6337     $last_last_nonblank_token   = ';';
6338     $last_last_nonblank_type    = ';';
6339     $last_nonblank_block_type   = "";
6340     $last_output_level          = 0;
6341     $looking_for_else           = 0;
6342     $embedded_tab_count         = 0;
6343     $first_embedded_tab_at      = 0;
6344     $last_embedded_tab_at       = 0;
6345     $deleted_semicolon_count    = 0;
6346     $first_deleted_semicolon_at = 0;
6347     $last_deleted_semicolon_at  = 0;
6348     $added_semicolon_count      = 0;
6349     $first_added_semicolon_at   = 0;
6350     $last_added_semicolon_at    = 0;
6351     $last_line_had_side_comment = 0;
6352     $is_static_block_comment    = 0;
6353     %postponed_breakpoint       = ();
6354
6355     # variables for adding side comments
6356     %block_leading_text        = ();
6357     %block_opening_line_number = ();
6358     $csc_new_statement_ok      = 1;
6359     %csc_block_label           = ();
6360
6361     %saved_opening_indentation  = ();
6362     $in_format_skipping_section = 0;
6363
6364     reset_block_text_accumulator();
6365
6366     prepare_for_new_input_lines();
6367
6368     $vertical_aligner_object =
6369       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6370         $logger_object, $diagnostics_object );
6371
6372     if ( $rOpts->{'entab-leading-whitespace'} ) {
6373         write_logfile_entry(
6374 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6375         );
6376     }
6377     elsif ( $rOpts->{'tabs'} ) {
6378         write_logfile_entry("Indentation will be with a tab character\n");
6379     }
6380     else {
6381         write_logfile_entry(
6382             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6383     }
6384
6385     # This was the start of a formatter referent, but object-oriented
6386     # coding has turned out to be too slow here.
6387     $formatter_self = {};
6388
6389     bless $formatter_self, $class;
6390
6391     # Safety check..this is not a class yet
6392     if ( _increment_count() > 1 ) {
6393         confess
6394 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6395     }
6396     return $formatter_self;
6397 }
6398
6399 sub prepare_for_new_input_lines {
6400
6401     $gnu_sequence_number++;    # increment output batch counter
6402     %last_gnu_equals                = ();
6403     %gnu_comma_count                = ();
6404     %gnu_arrow_count                = ();
6405     $line_start_index_to_go         = 0;
6406     $max_gnu_item_index             = UNDEFINED_INDEX;
6407     $index_max_forced_break         = UNDEFINED_INDEX;
6408     $max_index_to_go                = UNDEFINED_INDEX;
6409     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
6410     $last_nonblank_type_to_go       = '';
6411     $last_nonblank_token_to_go      = '';
6412     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6413     $last_last_nonblank_type_to_go  = '';
6414     $last_last_nonblank_token_to_go = '';
6415     $forced_breakpoint_count        = 0;
6416     $forced_breakpoint_undo_count   = 0;
6417     $rbrace_follower                = undef;
6418     $lengths_to_go[0]               = 0;
6419     $old_line_count_in_batch        = 1;
6420     $comma_count_in_batch           = 0;
6421     $starting_in_quote              = 0;
6422
6423     destroy_one_line_block();
6424 }
6425
6426 sub write_line {
6427
6428     my $self = shift;
6429     my ($line_of_tokens) = @_;
6430
6431     my $line_type  = $line_of_tokens->{_line_type};
6432     my $input_line = $line_of_tokens->{_line_text};
6433
6434     if ( $rOpts->{notidy} ) {
6435         write_unindented_line($input_line);
6436         $last_line_type = $line_type;
6437         return;
6438     }
6439
6440     # _line_type codes are:
6441     #   SYSTEM         - system-specific code before hash-bang line
6442     #   CODE           - line of perl code (including comments)
6443     #   POD_START      - line starting pod, such as '=head'
6444     #   POD            - pod documentation text
6445     #   POD_END        - last line of pod section, '=cut'
6446     #   HERE           - text of here-document
6447     #   HERE_END       - last line of here-doc (target word)
6448     #   FORMAT         - format section
6449     #   FORMAT_END     - last line of format section, '.'
6450     #   DATA_START     - __DATA__ line
6451     #   DATA           - unidentified text following __DATA__
6452     #   END_START      - __END__ line
6453     #   END            - unidentified text following __END__
6454     #   ERROR          - we are in big trouble, probably not a perl script
6455
6456     # put a blank line after an =cut which comes before __END__ and __DATA__
6457     # (required by podchecker)
6458     if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6459         $file_writer_object->reset_consecutive_blank_lines();
6460         if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6461     }
6462
6463     # handle line of code..
6464     if ( $line_type eq 'CODE' ) {
6465
6466         # let logger see all non-blank lines of code
6467         if ( $input_line !~ /^\s*$/ ) {
6468             my $output_line_number =
6469               $vertical_aligner_object->get_output_line_number();
6470             black_box( $line_of_tokens, $output_line_number );
6471         }
6472         print_line_of_tokens($line_of_tokens);
6473     }
6474
6475     # handle line of non-code..
6476     else {
6477
6478         # set special flags
6479         my $skip_line = 0;
6480         my $tee_line  = 0;
6481         if ( $line_type =~ /^POD/ ) {
6482
6483             # Pod docs should have a preceding blank line.  But stay
6484             # out of __END__ and __DATA__ sections, because
6485             # the user may be using this section for any purpose whatsoever
6486             if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6487             if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
6488             if (  !$skip_line
6489                 && $line_type eq 'POD_START'
6490                 && !$saw_END_or_DATA_ )
6491             {
6492                 want_blank_line();
6493             }
6494         }
6495
6496         # leave the blank counters in a predictable state
6497         # after __END__ or __DATA__
6498         elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6499             $file_writer_object->reset_consecutive_blank_lines();
6500             $saw_END_or_DATA_ = 1;
6501         }
6502
6503         # write unindented non-code line
6504         if ( !$skip_line ) {
6505             if ($tee_line) { $file_writer_object->tee_on() }
6506             write_unindented_line($input_line);
6507             if ($tee_line) { $file_writer_object->tee_off() }
6508         }
6509     }
6510     $last_line_type = $line_type;
6511 }
6512
6513 sub create_one_line_block {
6514     $index_start_one_line_block            = $_[0];
6515     $semicolons_before_block_self_destruct = $_[1];
6516 }
6517
6518 sub destroy_one_line_block {
6519     $index_start_one_line_block            = UNDEFINED_INDEX;
6520     $semicolons_before_block_self_destruct = 0;
6521 }
6522
6523 sub leading_spaces_to_go {
6524
6525     # return the number of indentation spaces for a token in the output stream;
6526     # these were previously stored by 'set_leading_whitespace'.
6527
6528     return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6529
6530 }
6531
6532 sub get_SPACES {
6533
6534     # return the number of leading spaces associated with an indentation
6535     # variable $indentation is either a constant number of spaces or an object
6536     # with a get_SPACES method.
6537     my $indentation = shift;
6538     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6539 }
6540
6541 sub get_RECOVERABLE_SPACES {
6542
6543     # return the number of spaces (+ means shift right, - means shift left)
6544     # that we would like to shift a group of lines with the same indentation
6545     # to get them to line up with their opening parens
6546     my $indentation = shift;
6547     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6548 }
6549
6550 sub get_AVAILABLE_SPACES_to_go {
6551
6552     my $item = $leading_spaces_to_go[ $_[0] ];
6553
6554     # return the number of available leading spaces associated with an
6555     # indentation variable.  $indentation is either a constant number of
6556     # spaces or an object with a get_AVAILABLE_SPACES method.
6557     return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6558 }
6559
6560 sub new_lp_indentation_item {
6561
6562     # this is an interface to the IndentationItem class
6563     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6564
6565     # A negative level implies not to store the item in the item_list
6566     my $index = 0;
6567     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6568
6569     my $item = Perl::Tidy::IndentationItem->new(
6570         $spaces,      $level,
6571         $ci_level,    $available_spaces,
6572         $index,       $gnu_sequence_number,
6573         $align_paren, $max_gnu_stack_index,
6574         $line_start_index_to_go,
6575     );
6576
6577     if ( $level >= 0 ) {
6578         $gnu_item_list[$max_gnu_item_index] = $item;
6579     }
6580
6581     return $item;
6582 }
6583
6584 sub set_leading_whitespace {
6585
6586     # This routine defines leading whitespace
6587     # given: the level and continuation_level of a token,
6588     # define: space count of leading string which would apply if it
6589     # were the first token of a new line.
6590
6591     my ( $level, $ci_level, $in_continued_quote ) = @_;
6592
6593     # modify for -bli, which adds one continuation indentation for
6594     # opening braces
6595     if (   $rOpts_brace_left_and_indent
6596         && $max_index_to_go == 0
6597         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6598     {
6599         $ci_level++;
6600     }
6601
6602     # patch to avoid trouble when input file has negative indentation.
6603     # other logic should catch this error.
6604     if ( $level < 0 ) { $level = 0 }
6605
6606     #-------------------------------------------
6607     # handle the standard indentation scheme
6608     #-------------------------------------------
6609     unless ($rOpts_line_up_parentheses) {
6610         my $space_count =
6611           $ci_level * $rOpts_continuation_indentation +
6612           $level * $rOpts_indent_columns;
6613         my $ci_spaces =
6614           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6615
6616         if ($in_continued_quote) {
6617             $space_count = 0;
6618             $ci_spaces   = 0;
6619         }
6620         $leading_spaces_to_go[$max_index_to_go] = $space_count;
6621         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6622         return;
6623     }
6624
6625     #-------------------------------------------------------------
6626     # handle case of -lp indentation..
6627     #-------------------------------------------------------------
6628
6629     # The continued_quote flag means that this is the first token of a
6630     # line, and it is the continuation of some kind of multi-line quote
6631     # or pattern.  It requires special treatment because it must have no
6632     # added leading whitespace. So we create a special indentation item
6633     # which is not in the stack.
6634     if ($in_continued_quote) {
6635         my $space_count     = 0;
6636         my $available_space = 0;
6637         $level = -1;    # flag to prevent storing in item_list
6638         $leading_spaces_to_go[$max_index_to_go] =
6639           $reduced_spaces_to_go[$max_index_to_go] =
6640           new_lp_indentation_item( $space_count, $level, $ci_level,
6641             $available_space, 0 );
6642         return;
6643     }
6644
6645     # get the top state from the stack
6646     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6647     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6648     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6649
6650     my $type        = $types_to_go[$max_index_to_go];
6651     my $token       = $tokens_to_go[$max_index_to_go];
6652     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6653
6654     if ( $type eq '{' || $type eq '(' ) {
6655
6656         $gnu_comma_count{ $total_depth + 1 } = 0;
6657         $gnu_arrow_count{ $total_depth + 1 } = 0;
6658
6659         # If we come to an opening token after an '=' token of some type,
6660         # see if it would be helpful to 'break' after the '=' to save space
6661         my $last_equals = $last_gnu_equals{$total_depth};
6662         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6663
6664             # find the position if we break at the '='
6665             my $i_test = $last_equals;
6666             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6667
6668             # TESTING
6669             ##my $too_close = ($i_test==$max_index_to_go-1);
6670
6671             my $test_position = total_line_length( $i_test, $max_index_to_go );
6672
6673             if (
6674
6675                 # the equals is not just before an open paren (testing)
6676                 ##!$too_close &&
6677
6678                 # if we are beyond the midpoint
6679                 $gnu_position_predictor > $half_maximum_line_length
6680
6681                 # or we are beyont the 1/4 point and there was an old
6682                 # break at the equals
6683                 || (
6684                     $gnu_position_predictor > $half_maximum_line_length / 2
6685                     && (
6686                         $old_breakpoint_to_go[$last_equals]
6687                         || (   $last_equals > 0
6688                             && $old_breakpoint_to_go[ $last_equals - 1 ] )
6689                         || (   $last_equals > 1
6690                             && $types_to_go[ $last_equals - 1 ] eq 'b'
6691                             && $old_breakpoint_to_go[ $last_equals - 2 ] )
6692                     )
6693                 )
6694               )
6695             {
6696
6697                 # then make the switch -- note that we do not set a real
6698                 # breakpoint here because we may not really need one; sub
6699                 # scan_list will do that if necessary
6700                 $line_start_index_to_go = $i_test + 1;
6701                 $gnu_position_predictor = $test_position;
6702             }
6703         }
6704     }
6705
6706     # Check for decreasing depth ..
6707     # Note that one token may have both decreasing and then increasing
6708     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
6709     # in this example we would first go back to (1,0) then up to (2,0)
6710     # in a single call.
6711     if ( $level < $current_level || $ci_level < $current_ci_level ) {
6712
6713         # loop to find the first entry at or completely below this level
6714         my ( $lev, $ci_lev );
6715         while (1) {
6716             if ($max_gnu_stack_index) {
6717
6718                 # save index of token which closes this level
6719                 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6720
6721                 # Undo any extra indentation if we saw no commas
6722                 my $available_spaces =
6723                   $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6724
6725                 my $comma_count = 0;
6726                 my $arrow_count = 0;
6727                 if ( $type eq '}' || $type eq ')' ) {
6728                     $comma_count = $gnu_comma_count{$total_depth};
6729                     $arrow_count = $gnu_arrow_count{$total_depth};
6730                     $comma_count = 0 unless $comma_count;
6731                     $arrow_count = 0 unless $arrow_count;
6732                 }
6733                 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6734                 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6735
6736                 if ( $available_spaces > 0 ) {
6737
6738                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
6739
6740                         my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6741                         my $seqno =
6742                           $gnu_stack[$max_gnu_stack_index]
6743                           ->get_SEQUENCE_NUMBER();
6744
6745                         # Be sure this item was created in this batch.  This
6746                         # should be true because we delete any available
6747                         # space from open items at the end of each batch.
6748                         if (   $gnu_sequence_number != $seqno
6749                             || $i > $max_gnu_item_index )
6750                         {
6751                             warning(
6752 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6753                             );
6754                             report_definite_bug();
6755                         }
6756
6757                         else {
6758                             if ( $arrow_count == 0 ) {
6759                                 $gnu_item_list[$i]
6760                                   ->permanently_decrease_AVAILABLE_SPACES(
6761                                     $available_spaces);
6762                             }
6763                             else {
6764                                 $gnu_item_list[$i]
6765                                   ->tentatively_decrease_AVAILABLE_SPACES(
6766                                     $available_spaces);
6767                             }
6768
6769                             my $j;
6770                             for (
6771                                 $j = $i + 1 ;
6772                                 $j <= $max_gnu_item_index ;
6773                                 $j++
6774                               )
6775                             {
6776                                 $gnu_item_list[$j]
6777                                   ->decrease_SPACES($available_spaces);
6778                             }
6779                         }
6780                     }
6781                 }
6782
6783                 # go down one level
6784                 --$max_gnu_stack_index;
6785                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6786                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6787
6788                 # stop when we reach a level at or below the current level
6789                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6790                     $space_count =
6791                       $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6792                     $current_level    = $lev;
6793                     $current_ci_level = $ci_lev;
6794                     last;
6795                 }
6796             }
6797
6798             # reached bottom of stack .. should never happen because
6799             # only negative levels can get here, and $level was forced
6800             # to be positive above.
6801             else {
6802                 warning(
6803 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6804                 );
6805                 report_definite_bug();
6806                 last;
6807             }
6808         }
6809     }
6810
6811     # handle increasing depth
6812     if ( $level > $current_level || $ci_level > $current_ci_level ) {
6813
6814         # Compute the standard incremental whitespace.  This will be
6815         # the minimum incremental whitespace that will be used.  This
6816         # choice results in a smooth transition between the gnu-style
6817         # and the standard style.
6818         my $standard_increment =
6819           ( $level - $current_level ) * $rOpts_indent_columns +
6820           ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6821
6822         # Now we have to define how much extra incremental space
6823         # ("$available_space") we want.  This extra space will be
6824         # reduced as necessary when long lines are encountered or when
6825         # it becomes clear that we do not have a good list.
6826         my $available_space = 0;
6827         my $align_paren     = 0;
6828         my $excess          = 0;
6829
6830         # initialization on empty stack..
6831         if ( $max_gnu_stack_index == 0 ) {
6832             $space_count = $level * $rOpts_indent_columns;
6833         }
6834
6835         # if this is a BLOCK, add the standard increment
6836         elsif ($last_nonblank_block_type) {
6837             $space_count += $standard_increment;
6838         }
6839
6840         # if last nonblank token was not structural indentation,
6841         # just use standard increment
6842         elsif ( $last_nonblank_type ne '{' ) {
6843             $space_count += $standard_increment;
6844         }
6845
6846         # otherwise use the space to the first non-blank level change token
6847         else {
6848
6849             $space_count = $gnu_position_predictor;
6850
6851             my $min_gnu_indentation =
6852               $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6853
6854             $available_space = $space_count - $min_gnu_indentation;
6855             if ( $available_space >= $standard_increment ) {
6856                 $min_gnu_indentation += $standard_increment;
6857             }
6858             elsif ( $available_space > 1 ) {
6859                 $min_gnu_indentation += $available_space + 1;
6860             }
6861             elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6862                 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6863                     $min_gnu_indentation += 2;
6864                 }
6865                 else {
6866                     $min_gnu_indentation += 1;
6867                 }
6868             }
6869             else {
6870                 $min_gnu_indentation += $standard_increment;
6871             }
6872             $available_space = $space_count - $min_gnu_indentation;
6873
6874             if ( $available_space < 0 ) {
6875                 $space_count     = $min_gnu_indentation;
6876                 $available_space = 0;
6877             }
6878             $align_paren = 1;
6879         }
6880
6881         # update state, but not on a blank token
6882         if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6883
6884             $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6885
6886             ++$max_gnu_stack_index;
6887             $gnu_stack[$max_gnu_stack_index] =
6888               new_lp_indentation_item( $space_count, $level, $ci_level,
6889                 $available_space, $align_paren );
6890
6891             # If the opening paren is beyond the half-line length, then
6892             # we will use the minimum (standard) indentation.  This will
6893             # help avoid problems associated with running out of space
6894             # near the end of a line.  As a result, in deeply nested
6895             # lists, there will be some indentations which are limited
6896             # to this minimum standard indentation. But the most deeply
6897             # nested container will still probably be able to shift its
6898             # parameters to the right for proper alignment, so in most
6899             # cases this will not be noticable.
6900             if (   $available_space > 0
6901                 && $space_count > $half_maximum_line_length )
6902             {
6903                 $gnu_stack[$max_gnu_stack_index]
6904                   ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6905             }
6906         }
6907     }
6908
6909     # Count commas and look for non-list characters.  Once we see a
6910     # non-list character, we give up and don't look for any more commas.
6911     if ( $type eq '=>' ) {
6912         $gnu_arrow_count{$total_depth}++;
6913
6914         # tentatively treating '=>' like '=' for estimating breaks
6915         # TODO: this could use some experimentation
6916         $last_gnu_equals{$total_depth} = $max_index_to_go;
6917     }
6918
6919     elsif ( $type eq ',' ) {
6920         $gnu_comma_count{$total_depth}++;
6921     }
6922
6923     elsif ( $is_assignment{$type} ) {
6924         $last_gnu_equals{$total_depth} = $max_index_to_go;
6925     }
6926
6927     # this token might start a new line
6928     # if this is a non-blank..
6929     if ( $type ne 'b' ) {
6930
6931         # and if ..
6932         if (
6933
6934             # this is the first nonblank token of the line
6935             $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6936
6937             # or previous character was one of these:
6938             || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6939
6940             # or previous character was opening and this does not close it
6941             || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6942             || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6943
6944             # or this token is one of these:
6945             || $type =~ /^([\.]|\|\||\&\&)$/
6946
6947             # or this is a closing structure
6948             || (   $last_nonblank_type_to_go eq '}'
6949                 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6950
6951             # or previous token was keyword 'return'
6952             || ( $last_nonblank_type_to_go eq 'k'
6953                 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6954
6955             # or starting a new line at certain keywords is fine
6956             || (   $type eq 'k'
6957                 && $is_if_unless_and_or_last_next_redo_return{$token} )
6958
6959             # or this is after an assignment after a closing structure
6960             || (
6961                 $is_assignment{$last_nonblank_type_to_go}
6962                 && (
6963                     $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6964
6965                     # and it is significantly to the right
6966                     || $gnu_position_predictor > $half_maximum_line_length
6967                 )
6968             )
6969           )
6970         {
6971             check_for_long_gnu_style_lines();
6972             $line_start_index_to_go = $max_index_to_go;
6973
6974             # back up 1 token if we want to break before that type
6975             # otherwise, we may strand tokens like '?' or ':' on a line
6976             if ( $line_start_index_to_go > 0 ) {
6977                 if ( $last_nonblank_type_to_go eq 'k' ) {
6978
6979                     if ( $want_break_before{$last_nonblank_token_to_go} ) {
6980                         $line_start_index_to_go--;
6981                     }
6982                 }
6983                 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6984                     $line_start_index_to_go--;
6985                 }
6986             }
6987         }
6988     }
6989
6990     # remember the predicted position of this token on the output line
6991     if ( $max_index_to_go > $line_start_index_to_go ) {
6992         $gnu_position_predictor =
6993           total_line_length( $line_start_index_to_go, $max_index_to_go );
6994     }
6995     else {
6996         $gnu_position_predictor = $space_count +
6997           token_sequence_length( $max_index_to_go, $max_index_to_go );
6998     }
6999
7000     # store the indentation object for this token
7001     # this allows us to manipulate the leading whitespace
7002     # (in case we have to reduce indentation to fit a line) without
7003     # having to change any token values
7004     $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
7005     $reduced_spaces_to_go[$max_index_to_go] =
7006       ( $max_gnu_stack_index > 0 && $ci_level )
7007       ? $gnu_stack[ $max_gnu_stack_index - 1 ]
7008       : $gnu_stack[$max_gnu_stack_index];
7009     return;
7010 }
7011
7012 sub check_for_long_gnu_style_lines {
7013
7014     # look at the current estimated maximum line length, and
7015     # remove some whitespace if it exceeds the desired maximum
7016
7017     # this is only for the '-lp' style
7018     return unless ($rOpts_line_up_parentheses);
7019
7020     # nothing can be done if no stack items defined for this line
7021     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7022
7023     # see if we have exceeded the maximum desired line length
7024     # keep 2 extra free because they are needed in some cases
7025     # (result of trial-and-error testing)
7026     my $spaces_needed =
7027       $gnu_position_predictor - $rOpts_maximum_line_length + 2;
7028
7029     return if ( $spaces_needed <= 0 );
7030
7031     # We are over the limit, so try to remove a requested number of
7032     # spaces from leading whitespace.  We are only allowed to remove
7033     # from whitespace items created on this batch, since others have
7034     # already been used and cannot be undone.
7035     my @candidates = ();
7036     my $i;
7037
7038     # loop over all whitespace items created for the current batch
7039     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7040         my $item = $gnu_item_list[$i];
7041
7042         # item must still be open to be a candidate (otherwise it
7043         # cannot influence the current token)
7044         next if ( $item->get_CLOSED() >= 0 );
7045
7046         my $available_spaces = $item->get_AVAILABLE_SPACES();
7047
7048         if ( $available_spaces > 0 ) {
7049             push( @candidates, [ $i, $available_spaces ] );
7050         }
7051     }
7052
7053     return unless (@candidates);
7054
7055     # sort by available whitespace so that we can remove whitespace
7056     # from the maximum available first
7057     @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
7058
7059     # keep removing whitespace until we are done or have no more
7060     my $candidate;
7061     foreach $candidate (@candidates) {
7062         my ( $i, $available_spaces ) = @{$candidate};
7063         my $deleted_spaces =
7064           ( $available_spaces > $spaces_needed )
7065           ? $spaces_needed
7066           : $available_spaces;
7067
7068         # remove the incremental space from this item
7069         $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
7070
7071         my $i_debug = $i;
7072
7073         # update the leading whitespace of this item and all items
7074         # that came after it
7075         for ( ; $i <= $max_gnu_item_index ; $i++ ) {
7076
7077             my $old_spaces = $gnu_item_list[$i]->get_SPACES();
7078             if ( $old_spaces >= $deleted_spaces ) {
7079                 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
7080             }
7081
7082             # shouldn't happen except for code bug:
7083             else {
7084                 my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
7085                 my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
7086                 my $old_level    = $gnu_item_list[$i]->get_LEVEL();
7087                 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
7088                 warning(
7089 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
7090                 );
7091                 report_definite_bug();
7092             }
7093         }
7094         $gnu_position_predictor -= $deleted_spaces;
7095         $spaces_needed          -= $deleted_spaces;
7096         last unless ( $spaces_needed > 0 );
7097     }
7098 }
7099
7100 sub finish_lp_batch {
7101
7102     # This routine is called once after each each output stream batch is
7103     # finished to undo indentation for all incomplete -lp
7104     # indentation levels.  It is too risky to leave a level open,
7105     # because then we can't backtrack in case of a long line to follow.
7106     # This means that comments and blank lines will disrupt this
7107     # indentation style.  But the vertical aligner may be able to
7108     # get the space back if there are side comments.
7109
7110     # this is only for the 'lp' style
7111     return unless ($rOpts_line_up_parentheses);
7112
7113     # nothing can be done if no stack items defined for this line
7114     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7115
7116     # loop over all whitespace items created for the current batch
7117     my $i;
7118     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7119         my $item = $gnu_item_list[$i];
7120
7121         # only look for open items
7122         next if ( $item->get_CLOSED() >= 0 );
7123
7124         # Tentatively remove all of the available space
7125         # (The vertical aligner will try to get it back later)
7126         my $available_spaces = $item->get_AVAILABLE_SPACES();
7127         if ( $available_spaces > 0 ) {
7128
7129             # delete incremental space for this item
7130             $gnu_item_list[$i]
7131               ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
7132
7133             # Reduce the total indentation space of any nodes that follow
7134             # Note that any such nodes must necessarily be dependents
7135             # of this node.
7136             foreach ( $i + 1 .. $max_gnu_item_index ) {
7137                 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
7138             }
7139         }
7140     }
7141     return;
7142 }
7143
7144 sub reduce_lp_indentation {
7145
7146     # reduce the leading whitespace at token $i if possible by $spaces_needed
7147     # (a large value of $spaces_needed will remove all excess space)
7148     # NOTE: to be called from scan_list only for a sequence of tokens
7149     # contained between opening and closing parens/braces/brackets
7150
7151     my ( $i, $spaces_wanted ) = @_;
7152     my $deleted_spaces = 0;
7153
7154     my $item             = $leading_spaces_to_go[$i];
7155     my $available_spaces = $item->get_AVAILABLE_SPACES();
7156
7157     if (
7158         $available_spaces > 0
7159         && ( ( $spaces_wanted <= $available_spaces )
7160             || !$item->get_HAVE_CHILD() )
7161       )
7162     {
7163
7164         # we'll remove these spaces, but mark them as recoverable
7165         $deleted_spaces =
7166           $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
7167     }
7168
7169     return $deleted_spaces;
7170 }
7171
7172 sub token_sequence_length {
7173
7174     # return length of tokens ($ifirst .. $ilast) including first & last
7175     # returns 0 if $ifirst > $ilast
7176     my $ifirst = shift;
7177     my $ilast  = shift;
7178     return 0 if ( $ilast < 0 || $ifirst > $ilast );
7179     return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
7180     return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
7181 }
7182
7183 sub total_line_length {
7184
7185     # return length of a line of tokens ($ifirst .. $ilast)
7186     my $ifirst = shift;
7187     my $ilast  = shift;
7188     if ( $ifirst < 0 ) { $ifirst = 0 }
7189
7190     return leading_spaces_to_go($ifirst) +
7191       token_sequence_length( $ifirst, $ilast );
7192 }
7193
7194 sub excess_line_length {
7195
7196     # return number of characters by which a line of tokens ($ifirst..$ilast)
7197     # exceeds the allowable line length.
7198     my $ifirst = shift;
7199     my $ilast  = shift;
7200     if ( $ifirst < 0 ) { $ifirst = 0 }
7201     return leading_spaces_to_go($ifirst) +
7202       token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
7203 }
7204
7205 sub finish_formatting {
7206
7207     # flush buffer and write any informative messages
7208     my $self = shift;
7209
7210     flush();
7211     $file_writer_object->decrement_output_line_number()
7212       ;    # fix up line number since it was incremented
7213     we_are_at_the_last_line();
7214     if ( $added_semicolon_count > 0 ) {
7215         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
7216         my $what =
7217           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
7218         write_logfile_entry("$added_semicolon_count $what added:\n");
7219         write_logfile_entry(
7220             "  $first at input line $first_added_semicolon_at\n");
7221
7222         if ( $added_semicolon_count > 1 ) {
7223             write_logfile_entry(
7224                 "   Last at input line $last_added_semicolon_at\n");
7225         }
7226         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
7227         write_logfile_entry("\n");
7228     }
7229
7230     if ( $deleted_semicolon_count > 0 ) {
7231         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
7232         my $what =
7233           ( $deleted_semicolon_count > 1 )
7234           ? "semicolons were"
7235           : "semicolon was";
7236         write_logfile_entry(
7237             "$deleted_semicolon_count unnecessary $what deleted:\n");
7238         write_logfile_entry(
7239             "  $first at input line $first_deleted_semicolon_at\n");
7240
7241         if ( $deleted_semicolon_count > 1 ) {
7242             write_logfile_entry(
7243                 "   Last at input line $last_deleted_semicolon_at\n");
7244         }
7245         write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
7246         write_logfile_entry("\n");
7247     }
7248
7249     if ( $embedded_tab_count > 0 ) {
7250         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
7251         my $what =
7252           ( $embedded_tab_count > 1 )
7253           ? "quotes or patterns"
7254           : "quote or pattern";
7255         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
7256         write_logfile_entry(
7257 "This means the display of this script could vary with device or software\n"
7258         );
7259         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
7260
7261         if ( $embedded_tab_count > 1 ) {
7262             write_logfile_entry(
7263                 "   Last at input line $last_embedded_tab_at\n");
7264         }
7265         write_logfile_entry("\n");
7266     }
7267
7268     if ($first_tabbing_disagreement) {
7269         write_logfile_entry(
7270 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7271         );
7272     }
7273
7274     if ($in_tabbing_disagreement) {
7275         write_logfile_entry(
7276 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7277         );
7278     }
7279     else {
7280
7281         if ($last_tabbing_disagreement) {
7282
7283             write_logfile_entry(
7284 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7285             );
7286         }
7287         else {
7288             write_logfile_entry("No indentation disagreement seen\n");
7289         }
7290     }
7291     write_logfile_entry("\n");
7292
7293     $vertical_aligner_object->report_anything_unusual();
7294
7295     $file_writer_object->report_line_length_errors();
7296 }
7297
7298 sub check_options {
7299
7300     # This routine is called to check the Opts hash after it is defined
7301
7302     ($rOpts) = @_;
7303     my ( $tabbing_string, $tab_msg );
7304
7305     make_static_block_comment_pattern();
7306     make_static_side_comment_pattern();
7307     make_closing_side_comment_prefix();
7308     make_closing_side_comment_list_pattern();
7309     $format_skipping_pattern_begin =
7310       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7311     $format_skipping_pattern_end =
7312       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7313
7314     # If closing side comments ARE selected, then we can safely
7315     # delete old closing side comments unless closing side comment
7316     # warnings are requested.  This is a good idea because it will
7317     # eliminate any old csc's which fall below the line count threshold.
7318     # We cannot do this if warnings are turned on, though, because we
7319     # might delete some text which has been added.  So that must
7320     # be handled when comments are created.
7321     if ( $rOpts->{'closing-side-comments'} ) {
7322         if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7323             $rOpts->{'delete-closing-side-comments'} = 1;
7324         }
7325     }
7326
7327     # If closing side comments ARE NOT selected, but warnings ARE
7328     # selected and we ARE DELETING csc's, then we will pretend to be
7329     # adding with a huge interval.  This will force the comments to be
7330     # generated for comparison with the old comments, but not added.
7331     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7332         if ( $rOpts->{'delete-closing-side-comments'} ) {
7333             $rOpts->{'delete-closing-side-comments'}  = 0;
7334             $rOpts->{'closing-side-comments'}         = 1;
7335             $rOpts->{'closing-side-comment-interval'} = 100000000;
7336         }
7337     }
7338
7339     make_bli_pattern();
7340     make_block_brace_vertical_tightness_pattern();
7341
7342     if ( $rOpts->{'line-up-parentheses'} ) {
7343
7344         if (   $rOpts->{'indent-only'}
7345             || !$rOpts->{'add-newlines'}
7346             || !$rOpts->{'delete-old-newlines'} )
7347         {
7348             warn <<EOM;
7349 -----------------------------------------------------------------------
7350 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7351
7352 The -lp indentation logic requires that perltidy be able to coordinate
7353 arbitrarily large numbers of line breakpoints.  This isn't possible
7354 with these flags. Sometimes an acceptable workaround is to use -wocb=3
7355 -----------------------------------------------------------------------
7356 EOM
7357             $rOpts->{'line-up-parentheses'} = 0;
7358         }
7359     }
7360
7361     # At present, tabs are not compatable with the line-up-parentheses style
7362     # (it would be possible to entab the total leading whitespace
7363     # just prior to writing the line, if desired).
7364     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7365         warn <<EOM;
7366 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
7367 EOM
7368         $rOpts->{'tabs'} = 0;
7369     }
7370
7371     # Likewise, tabs are not compatable with outdenting..
7372     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7373         warn <<EOM;
7374 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7375 EOM
7376         $rOpts->{'tabs'} = 0;
7377     }
7378
7379     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7380         warn <<EOM;
7381 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
7382 EOM
7383         $rOpts->{'tabs'} = 0;
7384     }
7385
7386     if ( !$rOpts->{'space-for-semicolon'} ) {
7387         $want_left_space{'f'} = -1;
7388     }
7389
7390     if ( $rOpts->{'space-terminal-semicolon'} ) {
7391         $want_left_space{';'} = 1;
7392     }
7393
7394     # implement outdenting preferences for keywords
7395     %outdent_keyword = ();
7396     unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7397         @_ = qw(next last redo goto return);    # defaults
7398     }
7399
7400     # FUTURE: if not a keyword, assume that it is an identifier
7401     foreach (@_) {
7402         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7403             $outdent_keyword{$_} = 1;
7404         }
7405         else {
7406             warn "ignoring '$_' in -okwl list; not a perl keyword";
7407         }
7408     }
7409
7410     # implement user whitespace preferences
7411     if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7412         @want_left_space{@_} = (1) x scalar(@_);
7413     }
7414
7415     if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7416         @want_right_space{@_} = (1) x scalar(@_);
7417     }
7418
7419     if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7420         @want_left_space{@_} = (-1) x scalar(@_);
7421     }
7422
7423     if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7424         @want_right_space{@_} = (-1) x scalar(@_);
7425     }
7426     if ( $rOpts->{'dump-want-left-space'} ) {
7427         dump_want_left_space(*STDOUT);
7428         exit 0;
7429     }
7430
7431     if ( $rOpts->{'dump-want-right-space'} ) {
7432         dump_want_right_space(*STDOUT);
7433         exit 0;
7434     }
7435
7436     # default keywords for which space is introduced before an opening paren
7437     # (at present, including them messes up vertical alignment)
7438     @_ = qw(my local our and or err eq ne if else elsif until
7439       unless while for foreach return switch case given when);
7440     @space_after_keyword{@_} = (1) x scalar(@_);
7441
7442     # first remove any or all of these if desired
7443     if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7444
7445         # -nsak='*' selects all the above keywords
7446         if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
7447         @space_after_keyword{@_} = (0) x scalar(@_);
7448     }
7449
7450     # then allow user to add to these defaults
7451     if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7452         @space_after_keyword{@_} = (1) x scalar(@_);
7453     }
7454
7455     # implement user break preferences
7456     my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7457       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7458       . : ? && || and or err xor
7459     );
7460
7461     my $break_after = sub {
7462         foreach my $tok (@_) {
7463             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
7464             my $lbs = $left_bond_strength{$tok};
7465             my $rbs = $right_bond_strength{$tok};
7466             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7467                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7468                   ( $lbs, $rbs );
7469             }
7470         }
7471     };
7472
7473     my $break_before = sub {
7474         foreach my $tok (@_) {
7475             my $lbs = $left_bond_strength{$tok};
7476             my $rbs = $right_bond_strength{$tok};
7477             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7478                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7479                   ( $lbs, $rbs );
7480             }
7481         }
7482     };
7483
7484     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7485     $break_before->(@all_operators)
7486       if ( $rOpts->{'break-before-all-operators'} );
7487
7488     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7489     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7490
7491     # make note if breaks are before certain key types
7492     %want_break_before = ();
7493     foreach my $tok ( @all_operators, ',' ) {
7494         $want_break_before{$tok} =
7495           $left_bond_strength{$tok} < $right_bond_strength{$tok};
7496     }
7497
7498     # Coordinate ?/: breaks, which must be similar
7499     if ( !$want_break_before{':'} ) {
7500         $want_break_before{'?'}   = $want_break_before{':'};
7501         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7502         $left_bond_strength{'?'}  = NO_BREAK;
7503     }
7504
7505     # Define here tokens which may follow the closing brace of a do statement
7506     # on the same line, as in:
7507     #   } while ( $something);
7508     @_ = qw(until while unless if ; : );
7509     push @_, ',';
7510     @is_do_follower{@_} = (1) x scalar(@_);
7511
7512     # These tokens may follow the closing brace of an if or elsif block.
7513     # In other words, for cuddled else we want code to look like:
7514     #   } elsif ( $something) {
7515     #   } else {
7516     if ( $rOpts->{'cuddled-else'} ) {
7517         @_ = qw(else elsif);
7518         @is_if_brace_follower{@_} = (1) x scalar(@_);
7519     }
7520     else {
7521         %is_if_brace_follower = ();
7522     }
7523
7524     # nothing can follow the closing curly of an else { } block:
7525     %is_else_brace_follower = ();
7526
7527     # what can follow a multi-line anonymous sub definition closing curly:
7528     @_ = qw# ; : => or and  && || ~~ !~~ ) #;
7529     push @_, ',';
7530     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7531
7532     # what can follow a one-line anonynomous sub closing curly:
7533     # one-line anonumous subs also have ']' here...
7534     # see tk3.t and PP.pm
7535     @_ = qw#  ; : => or and  && || ) ] ~~ !~~ #;
7536     push @_, ',';
7537     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7538
7539     # What can follow a closing curly of a block
7540     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7541     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7542     @_ = qw#  ; : => or and  && || ) #;
7543     push @_, ',';
7544
7545     # allow cuddled continue if cuddled else is specified
7546     if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7547
7548     @is_other_brace_follower{@_} = (1) x scalar(@_);
7549
7550     $right_bond_strength{'{'} = WEAK;
7551     $left_bond_strength{'{'}  = VERY_STRONG;
7552
7553     # make -l=0  equal to -l=infinite
7554     if ( !$rOpts->{'maximum-line-length'} ) {
7555         $rOpts->{'maximum-line-length'} = 1000000;
7556     }
7557
7558     # make -lbl=0  equal to -lbl=infinite
7559     if ( !$rOpts->{'long-block-line-count'} ) {
7560         $rOpts->{'long-block-line-count'} = 1000000;
7561     }
7562
7563     my $ole = $rOpts->{'output-line-ending'};
7564     if ($ole) {
7565         my %endings = (
7566             dos  => "\015\012",
7567             win  => "\015\012",
7568             mac  => "\015",
7569             unix => "\012",
7570         );
7571         $ole = lc $ole;
7572         unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7573             my $str = join " ", keys %endings;
7574             die <<EOM;
7575 Unrecognized line ending '$ole'; expecting one of: $str
7576 EOM
7577         }
7578         if ( $rOpts->{'preserve-line-endings'} ) {
7579             warn "Ignoring -ple; conflicts with -ole\n";
7580             $rOpts->{'preserve-line-endings'} = undef;
7581         }
7582     }
7583
7584     # hashes used to simplify setting whitespace
7585     %tightness = (
7586         '{' => $rOpts->{'brace-tightness'},
7587         '}' => $rOpts->{'brace-tightness'},
7588         '(' => $rOpts->{'paren-tightness'},
7589         ')' => $rOpts->{'paren-tightness'},
7590         '[' => $rOpts->{'square-bracket-tightness'},
7591         ']' => $rOpts->{'square-bracket-tightness'},
7592     );
7593     %matching_token = (
7594         '{' => '}',
7595         '(' => ')',
7596         '[' => ']',
7597         '?' => ':',
7598     );
7599
7600     # frequently used parameters
7601     $rOpts_add_newlines          = $rOpts->{'add-newlines'};
7602     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
7603     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7604     $rOpts_block_brace_vertical_tightness =
7605       $rOpts->{'block-brace-vertical-tightness'};
7606     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
7607     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7608     $rOpts_break_at_old_ternary_breakpoints =
7609       $rOpts->{'break-at-old-ternary-breakpoints'};
7610     $rOpts_break_at_old_attribute_breakpoints =
7611       $rOpts->{'break-at-old-attribute-breakpoints'};
7612     $rOpts_break_at_old_comma_breakpoints =
7613       $rOpts->{'break-at-old-comma-breakpoints'};
7614     $rOpts_break_at_old_keyword_breakpoints =
7615       $rOpts->{'break-at-old-keyword-breakpoints'};
7616     $rOpts_break_at_old_logical_breakpoints =
7617       $rOpts->{'break-at-old-logical-breakpoints'};
7618     $rOpts_closing_side_comment_else_flag =
7619       $rOpts->{'closing-side-comment-else-flag'};
7620     $rOpts_closing_side_comment_maximum_text =
7621       $rOpts->{'closing-side-comment-maximum-text'};
7622     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7623     $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
7624     $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
7625     $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
7626     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
7627     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
7628     $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7629     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
7630     $rOpts_short_concatenation_item_length =
7631       $rOpts->{'short-concatenation-item-length'};
7632     $rOpts_keep_old_blank_lines     = $rOpts->{'keep-old-blank-lines'};
7633     $rOpts_ignore_old_breakpoints   = $rOpts->{'ignore-old-breakpoints'};
7634     $rOpts_format_skipping          = $rOpts->{'format-skipping'};
7635     $rOpts_space_function_paren     = $rOpts->{'space-function-paren'};
7636     $rOpts_space_keyword_paren      = $rOpts->{'space-keyword-paren'};
7637     $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7638     $half_maximum_line_length       = $rOpts_maximum_line_length / 2;
7639
7640     # Note that both opening and closing tokens can access the opening
7641     # and closing flags of their container types.
7642     %opening_vertical_tightness = (
7643         '(' => $rOpts->{'paren-vertical-tightness'},
7644         '{' => $rOpts->{'brace-vertical-tightness'},
7645         '[' => $rOpts->{'square-bracket-vertical-tightness'},
7646         ')' => $rOpts->{'paren-vertical-tightness'},
7647         '}' => $rOpts->{'brace-vertical-tightness'},
7648         ']' => $rOpts->{'square-bracket-vertical-tightness'},
7649     );
7650
7651     %closing_vertical_tightness = (
7652         '(' => $rOpts->{'paren-vertical-tightness-closing'},
7653         '{' => $rOpts->{'brace-vertical-tightness-closing'},
7654         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7655         ')' => $rOpts->{'paren-vertical-tightness-closing'},
7656         '}' => $rOpts->{'brace-vertical-tightness-closing'},
7657         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7658     );
7659
7660     # assume flag for '>' same as ')' for closing qw quotes
7661     %closing_token_indentation = (
7662         ')' => $rOpts->{'closing-paren-indentation'},
7663         '}' => $rOpts->{'closing-brace-indentation'},
7664         ']' => $rOpts->{'closing-square-bracket-indentation'},
7665         '>' => $rOpts->{'closing-paren-indentation'},
7666     );
7667
7668     # flag indicating if any closing tokens are indented
7669     $some_closing_token_indentation =
7670          $rOpts->{'closing-paren-indentation'}
7671       || $rOpts->{'closing-brace-indentation'}
7672       || $rOpts->{'closing-square-bracket-indentation'}
7673       || $rOpts->{'indent-closing-brace'};
7674
7675     %opening_token_right = (
7676         '(' => $rOpts->{'opening-paren-right'},
7677         '{' => $rOpts->{'opening-hash-brace-right'},
7678         '[' => $rOpts->{'opening-square-bracket-right'},
7679     );
7680
7681     %stack_opening_token = (
7682         '(' => $rOpts->{'stack-opening-paren'},
7683         '{' => $rOpts->{'stack-opening-hash-brace'},
7684         '[' => $rOpts->{'stack-opening-square-bracket'},
7685     );
7686
7687     %stack_closing_token = (
7688         ')' => $rOpts->{'stack-closing-paren'},
7689         '}' => $rOpts->{'stack-closing-hash-brace'},
7690         ']' => $rOpts->{'stack-closing-square-bracket'},
7691     );
7692 }
7693
7694 sub make_static_block_comment_pattern {
7695
7696     # create the pattern used to identify static block comments
7697     $static_block_comment_pattern = '^\s*##';
7698
7699     # allow the user to change it
7700     if ( $rOpts->{'static-block-comment-prefix'} ) {
7701         my $prefix = $rOpts->{'static-block-comment-prefix'};
7702         $prefix =~ s/^\s*//;
7703         my $pattern = $prefix;
7704
7705         # user may give leading caret to force matching left comments only
7706         if ( $prefix !~ /^\^#/ ) {
7707             if ( $prefix !~ /^#/ ) {
7708                 die
7709 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7710             }
7711             $pattern = '^\s*' . $prefix;
7712         }
7713         eval "'##'=~/$pattern/";
7714         if ($@) {
7715             die
7716 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7717         }
7718         $static_block_comment_pattern = $pattern;
7719     }
7720 }
7721
7722 sub make_format_skipping_pattern {
7723     my ( $opt_name, $default ) = @_;
7724     my $param = $rOpts->{$opt_name};
7725     unless ($param) { $param = $default }
7726     $param =~ s/^\s*//;
7727     if ( $param !~ /^#/ ) {
7728         die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7729     }
7730     my $pattern = '^' . $param . '\s';
7731     eval "'#'=~/$pattern/";
7732     if ($@) {
7733         die
7734 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7735     }
7736     return $pattern;
7737 }
7738
7739 sub make_closing_side_comment_list_pattern {
7740
7741     # turn any input list into a regex for recognizing selected block types
7742     $closing_side_comment_list_pattern = '^\w+';
7743     if ( defined( $rOpts->{'closing-side-comment-list'} )
7744         && $rOpts->{'closing-side-comment-list'} )
7745     {
7746         $closing_side_comment_list_pattern =
7747           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7748     }
7749 }
7750
7751 sub make_bli_pattern {
7752
7753     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7754         && $rOpts->{'brace-left-and-indent-list'} )
7755     {
7756         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7757     }
7758
7759     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7760 }
7761
7762 sub make_block_brace_vertical_tightness_pattern {
7763
7764     # turn any input list into a regex for recognizing selected block types
7765     $block_brace_vertical_tightness_pattern =
7766       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7767
7768     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7769         && $rOpts->{'block-brace-vertical-tightness-list'} )
7770     {
7771         $block_brace_vertical_tightness_pattern =
7772           make_block_pattern( '-bbvtl',
7773             $rOpts->{'block-brace-vertical-tightness-list'} );
7774     }
7775 }
7776
7777 sub make_block_pattern {
7778
7779     #  given a string of block-type keywords, return a regex to match them
7780     #  The only tricky part is that labels are indicated with a single ':'
7781     #  and the 'sub' token text may have additional text after it (name of
7782     #  sub).
7783     #
7784     #  Example:
7785     #
7786     #   input string: "if else elsif unless while for foreach do : sub";
7787     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7788
7789     my ( $abbrev, $string ) = @_;
7790     my @list  = split_words($string);
7791     my @words = ();
7792     my %seen;
7793     for my $i (@list) {
7794         next if $seen{$i};
7795         $seen{$i} = 1;
7796         if ( $i eq 'sub' ) {
7797         }
7798         elsif ( $i eq ':' ) {
7799             push @words, '\w+:';
7800         }
7801         elsif ( $i =~ /^\w/ ) {
7802             push @words, $i;
7803         }
7804         else {
7805             warn "unrecognized block type $i after $abbrev, ignoring\n";
7806         }
7807     }
7808     my $pattern = '(' . join( '|', @words ) . ')$';
7809     if ( $seen{'sub'} ) {
7810         $pattern = '(' . $pattern . '|sub)';
7811     }
7812     $pattern = '^' . $pattern;
7813     return $pattern;
7814 }
7815
7816 sub make_static_side_comment_pattern {
7817
7818     # create the pattern used to identify static side comments
7819     $static_side_comment_pattern = '^##';
7820
7821     # allow the user to change it
7822     if ( $rOpts->{'static-side-comment-prefix'} ) {
7823         my $prefix = $rOpts->{'static-side-comment-prefix'};
7824         $prefix =~ s/^\s*//;
7825         my $pattern = '^' . $prefix;
7826         eval "'##'=~/$pattern/";
7827         if ($@) {
7828             die
7829 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7830         }
7831         $static_side_comment_pattern = $pattern;
7832     }
7833 }
7834
7835 sub make_closing_side_comment_prefix {
7836
7837     # Be sure we have a valid closing side comment prefix
7838     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7839     my $csc_prefix_pattern;
7840     if ( !defined($csc_prefix) ) {
7841         $csc_prefix         = '## end';
7842         $csc_prefix_pattern = '^##\s+end';
7843     }
7844     else {
7845         my $test_csc_prefix = $csc_prefix;
7846         if ( $test_csc_prefix !~ /^#/ ) {
7847             $test_csc_prefix = '#' . $test_csc_prefix;
7848         }
7849
7850         # make a regex to recognize the prefix
7851         my $test_csc_prefix_pattern = $test_csc_prefix;
7852
7853         # escape any special characters
7854         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7855
7856         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7857
7858         # allow exact number of intermediate spaces to vary
7859         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7860
7861         # make sure we have a good pattern
7862         # if we fail this we probably have an error in escaping
7863         # characters.
7864         eval "'##'=~/$test_csc_prefix_pattern/";
7865         if ($@) {
7866
7867             # shouldn't happen..must have screwed up escaping, above
7868             report_definite_bug();
7869             warn
7870 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7871
7872             # just warn and keep going with defaults
7873             warn "Please consider using a simpler -cscp prefix\n";
7874             warn "Using default -cscp instead; please check output\n";
7875         }
7876         else {
7877             $csc_prefix         = $test_csc_prefix;
7878             $csc_prefix_pattern = $test_csc_prefix_pattern;
7879         }
7880     }
7881     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7882     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7883 }
7884
7885 sub dump_want_left_space {
7886     my $fh = shift;
7887     local $" = "\n";
7888     print $fh <<EOM;
7889 These values are the main control of whitespace to the left of a token type;
7890 They may be altered with the -wls parameter.
7891 For a list of token types, use perltidy --dump-token-types (-dtt)
7892  1 means the token wants a space to its left
7893 -1 means the token does not want a space to its left
7894 ------------------------------------------------------------------------
7895 EOM
7896     foreach ( sort keys %want_left_space ) {
7897         print $fh "$_\t$want_left_space{$_}\n";
7898     }
7899 }
7900
7901 sub dump_want_right_space {
7902     my $fh = shift;
7903     local $" = "\n";
7904     print $fh <<EOM;
7905 These values are the main control of whitespace to the right of a token type;
7906 They may be altered with the -wrs parameter.
7907 For a list of token types, use perltidy --dump-token-types (-dtt)
7908  1 means the token wants a space to its right
7909 -1 means the token does not want a space to its right
7910 ------------------------------------------------------------------------
7911 EOM
7912     foreach ( sort keys %want_right_space ) {
7913         print $fh "$_\t$want_right_space{$_}\n";
7914     }
7915 }
7916
7917 {    # begin is_essential_whitespace
7918
7919     my %is_sort_grep_map;
7920     my %is_for_foreach;
7921
7922     BEGIN {
7923
7924         @_ = qw(sort grep map);
7925         @is_sort_grep_map{@_} = (1) x scalar(@_);
7926
7927         @_ = qw(for foreach);
7928         @is_for_foreach{@_} = (1) x scalar(@_);
7929
7930     }
7931
7932     sub is_essential_whitespace {
7933
7934         # Essential whitespace means whitespace which cannot be safely deleted
7935         # without risking the introduction of a syntax error.
7936         # We are given three tokens and their types:
7937         # ($tokenl, $typel) is the token to the left of the space in question
7938         # ($tokenr, $typer) is the token to the right of the space in question
7939         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7940         #
7941         # This is a slow routine but is not needed too often except when -mangle
7942         # is used.
7943         #
7944         # Note: This routine should almost never need to be changed.  It is
7945         # for avoiding syntax problems rather than for formatting.
7946         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7947
7948         my $result =
7949
7950           # never combine two bare words or numbers
7951           # examples:  and ::ok(1)
7952           #            return ::spw(...)
7953           #            for bla::bla:: abc
7954           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7955           #            $input eq"quit" to make $inputeq"quit"
7956           #            my $size=-s::SINK if $file;  <==OK but we won't do it
7957           # don't join something like: for bla::bla:: abc
7958           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7959           ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7960
7961           # do not combine a number with a concatination dot
7962           # example: pom.caputo:
7963           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7964           || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7965           || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7966
7967           # do not join a minus with a bare word, because you might form
7968           # a file test operator.  Example from Complex.pm:
7969           # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7970           || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7971
7972           # and something like this could become ambiguous without space
7973           # after the '-':
7974           #   use constant III=>1;
7975           #   $a = $b - III;
7976           # and even this:
7977           #   $a = - III;
7978           || ( ( $tokenl eq '-' )
7979             && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7980
7981           # '= -' should not become =- or you will get a warning
7982           # about reversed -=
7983           # || ($tokenr eq '-')
7984
7985           # keep a space between a quote and a bareword to prevent the
7986           # bareword from becomming a quote modifier.
7987           || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7988
7989           # keep a space between a token ending in '$' and any word;
7990           # this caused trouble:  "die @$ if $@"
7991           || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7992             && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7993
7994           # perl is very fussy about spaces before <<
7995           || ( $tokenr =~ /^\<\</ )
7996
7997           # avoid combining tokens to create new meanings. Example:
7998           #     $a+ +$b must not become $a++$b
7999           || ( $is_digraph{ $tokenl . $tokenr } )
8000           || ( $is_trigraph{ $tokenl . $tokenr } )
8001
8002           # another example: do not combine these two &'s:
8003           #     allow_options & &OPT_EXECCGI
8004           || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
8005
8006           # don't combine $$ or $# with any alphanumeric
8007           # (testfile mangle.t with --mangle)
8008           || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
8009
8010           # retain any space after possible filehandle
8011           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
8012           || ( $typel eq 'Z' )
8013
8014           # Perl is sensitive to whitespace after the + here:
8015           #  $b = xvals $a + 0.1 * yvals $a;
8016           || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
8017
8018           # keep paren separate in 'use Foo::Bar ()'
8019           || ( $tokenr eq '('
8020             && $typel   eq 'w'
8021             && $typell  eq 'k'
8022             && $tokenll eq 'use' )
8023
8024           # keep any space between filehandle and paren:
8025           # file mangle.t with --mangle:
8026           || ( $typel eq 'Y' && $tokenr eq '(' )
8027
8028           # retain any space after here doc operator ( hereerr.t)
8029           || ( $typel eq 'h' )
8030
8031           # be careful with a space around ++ and --, to avoid ambiguity as to
8032           # which token it applies
8033           || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
8034           || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
8035
8036           # need space after foreach my; for example, this will fail in
8037           # older versions of Perl:
8038           # foreach my$ft(@filetypes)...
8039           || (
8040             $tokenl eq 'my'
8041
8042             #  /^(for|foreach)$/
8043             && $is_for_foreach{$tokenll}
8044             && $tokenr =~ /^\$/
8045           )
8046
8047           # must have space between grep and left paren; "grep(" will fail
8048           || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
8049
8050           # don't stick numbers next to left parens, as in:
8051           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
8052           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
8053
8054           # We must be sure that a space between a ? and a quoted string
8055           # remains if the space before the ? remains.  [Loca.pm, lockarea]
8056           # ie,
8057           #    $b=join $comma ? ',' : ':', @_;  # ok
8058           #    $b=join $comma?',' : ':', @_;    # ok!
8059           #    $b=join $comma ?',' : ':', @_;   # error!
8060           # Not really required:
8061           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
8062
8063           # do not remove space between an '&' and a bare word because
8064           # it may turn into a function evaluation, like here
8065           # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
8066           #    $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
8067           || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8068
8069           ;    # the value of this long logic sequence is the result we want
8070         return $result;
8071     }
8072 }
8073
8074 sub set_white_space_flag {
8075
8076     #    This routine examines each pair of nonblank tokens and
8077     #    sets values for array @white_space_flag.
8078     #
8079     #    $white_space_flag[$j] is a flag indicating whether a white space
8080     #    BEFORE token $j is needed, with the following values:
8081     #
8082     #            -1 do not want a space before token $j
8083     #             0 optional space or $j is a whitespace
8084     #             1 want a space before token $j
8085     #
8086     #
8087     #   The values for the first token will be defined based
8088     #   upon the contents of the "to_go" output array.
8089     #
8090     #   Note: retain debug print statements because they are usually
8091     #   required after adding new token types.
8092
8093     BEGIN {
8094
8095         # initialize these global hashes, which control the use of
8096         # whitespace around tokens:
8097         #
8098         # %binary_ws_rules
8099         # %want_left_space
8100         # %want_right_space
8101         # %space_after_keyword
8102         #
8103         # Many token types are identical to the tokens themselves.
8104         # See the tokenizer for a complete list. Here are some special types:
8105         #   k = perl keyword
8106         #   f = semicolon in for statement
8107         #   m = unary minus
8108         #   p = unary plus
8109         # Note that :: is excluded since it should be contained in an identifier
8110         # Note that '->' is excluded because it never gets space
8111         # parentheses and brackets are excluded since they are handled specially
8112         # curly braces are included but may be overridden by logic, such as
8113         # newline logic.
8114
8115         # NEW_TOKENS: create a whitespace rule here.  This can be as
8116         # simple as adding your new letter to @spaces_both_sides, for
8117         # example.
8118
8119         @_ = qw" L { ( [ ";
8120         @is_opening_type{@_} = (1) x scalar(@_);
8121
8122         @_ = qw" R } ) ] ";
8123         @is_closing_type{@_} = (1) x scalar(@_);
8124
8125         my @spaces_both_sides = qw"
8126           + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
8127           .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
8128           &&= ||= //= <=> A k f w F n C Y U G v
8129           ";
8130
8131         my @spaces_left_side = qw"
8132           t ! ~ m p { \ h pp mm Z j
8133           ";
8134         push( @spaces_left_side, '#' );    # avoids warning message
8135
8136         my @spaces_right_side = qw"
8137           ; } ) ] R J ++ -- **=
8138           ";
8139         push( @spaces_right_side, ',' );    # avoids warning message
8140         @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
8141         @want_right_space{@spaces_both_sides} =
8142           (1) x scalar(@spaces_both_sides);
8143         @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
8144         @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
8145         @want_left_space{@spaces_right_side} =
8146           (-1) x scalar(@spaces_right_side);
8147         @want_right_space{@spaces_right_side} =
8148           (1) x scalar(@spaces_right_side);
8149         $want_left_space{'L'}   = WS_NO;
8150         $want_left_space{'->'}  = WS_NO;
8151         $want_right_space{'->'} = WS_NO;
8152         $want_left_space{'**'}  = WS_NO;
8153         $want_right_space{'**'} = WS_NO;
8154
8155         # hash type information must stay tightly bound
8156         # as in :  ${xxxx}
8157         $binary_ws_rules{'i'}{'L'} = WS_NO;
8158         $binary_ws_rules{'i'}{'{'} = WS_YES;
8159         $binary_ws_rules{'k'}{'{'} = WS_YES;
8160         $binary_ws_rules{'U'}{'{'} = WS_YES;
8161         $binary_ws_rules{'i'}{'['} = WS_NO;
8162         $binary_ws_rules{'R'}{'L'} = WS_NO;
8163         $binary_ws_rules{'R'}{'{'} = WS_NO;
8164         $binary_ws_rules{'t'}{'L'} = WS_NO;
8165         $binary_ws_rules{'t'}{'{'} = WS_NO;
8166         $binary_ws_rules{'}'}{'L'} = WS_NO;
8167         $binary_ws_rules{'}'}{'{'} = WS_NO;
8168         $binary_ws_rules{'$'}{'L'} = WS_NO;
8169         $binary_ws_rules{'$'}{'{'} = WS_NO;
8170         $binary_ws_rules{'@'}{'L'} = WS_NO;
8171         $binary_ws_rules{'@'}{'{'} = WS_NO;
8172         $binary_ws_rules{'='}{'L'} = WS_YES;
8173
8174         # the following includes ') {'
8175         # as in :    if ( xxx ) { yyy }
8176         $binary_ws_rules{']'}{'L'} = WS_NO;
8177         $binary_ws_rules{']'}{'{'} = WS_NO;
8178         $binary_ws_rules{')'}{'{'} = WS_YES;
8179         $binary_ws_rules{')'}{'['} = WS_NO;
8180         $binary_ws_rules{']'}{'['} = WS_NO;
8181         $binary_ws_rules{']'}{'{'} = WS_NO;
8182         $binary_ws_rules{'}'}{'['} = WS_NO;
8183         $binary_ws_rules{'R'}{'['} = WS_NO;
8184
8185         $binary_ws_rules{']'}{'++'} = WS_NO;
8186         $binary_ws_rules{']'}{'--'} = WS_NO;
8187         $binary_ws_rules{')'}{'++'} = WS_NO;
8188         $binary_ws_rules{')'}{'--'} = WS_NO;
8189
8190         $binary_ws_rules{'R'}{'++'} = WS_NO;
8191         $binary_ws_rules{'R'}{'--'} = WS_NO;
8192
8193         ########################################################
8194         # should no longer be necessary (see niek.pl)
8195         ##$binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
8196         ##$binary_ws_rules{'w'}{':'} = WS_NO;
8197         ########################################################
8198         $binary_ws_rules{'i'}{'Q'} = WS_YES;
8199         $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
8200
8201         # FIXME: we need to split 'i' into variables and functions
8202         # and have no space for functions but space for variables.  For now,
8203         # I have a special patch in the special rules below
8204         $binary_ws_rules{'i'}{'('} = WS_NO;
8205
8206         $binary_ws_rules{'w'}{'('} = WS_NO;
8207         $binary_ws_rules{'w'}{'{'} = WS_YES;
8208     }
8209     my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
8210     my ( $last_token, $last_type, $last_block_type, $token, $type,
8211         $block_type );
8212     my (@white_space_flag);
8213     my $j_tight_closing_paren = -1;
8214
8215     if ( $max_index_to_go >= 0 ) {
8216         $token      = $tokens_to_go[$max_index_to_go];
8217         $type       = $types_to_go[$max_index_to_go];
8218         $block_type = $block_type_to_go[$max_index_to_go];
8219     }
8220     else {
8221         $token      = ' ';
8222         $type       = 'b';
8223         $block_type = '';
8224     }
8225
8226     # loop over all tokens
8227     my ( $j, $ws );
8228
8229     for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8230
8231         if ( $$rtoken_type[$j] eq 'b' ) {
8232             $white_space_flag[$j] = WS_OPTIONAL;
8233             next;
8234         }
8235
8236         # set a default value, to be changed as needed
8237         $ws              = undef;
8238         $last_token      = $token;
8239         $last_type       = $type;
8240         $last_block_type = $block_type;
8241         $token           = $$rtokens[$j];
8242         $type            = $$rtoken_type[$j];
8243         $block_type      = $$rblock_type[$j];
8244
8245         #---------------------------------------------------------------
8246         # section 1:
8247         # handle space on the inside of opening braces
8248         #---------------------------------------------------------------
8249
8250         #    /^[L\{\(\[]$/
8251         if ( $is_opening_type{$last_type} ) {
8252
8253             $j_tight_closing_paren = -1;
8254
8255             # let's keep empty matched braces together: () {} []
8256             # except for BLOCKS
8257             if ( $token eq $matching_token{$last_token} ) {
8258                 if ($block_type) {
8259                     $ws = WS_YES;
8260                 }
8261                 else {
8262                     $ws = WS_NO;
8263                 }
8264             }
8265             else {
8266
8267                 # we're considering the right of an opening brace
8268                 # tightness = 0 means always pad inside with space
8269                 # tightness = 1 means pad inside if "complex"
8270                 # tightness = 2 means never pad inside with space
8271
8272                 my $tightness;
8273                 if (   $last_type eq '{'
8274                     && $last_token eq '{'
8275                     && $last_block_type )
8276                 {
8277                     $tightness = $rOpts_block_brace_tightness;
8278                 }
8279                 else { $tightness = $tightness{$last_token} }
8280
8281     #=================================================================
8282     # Patch for fabrice_bug.pl
8283     # We must always avoid spaces around a bare word beginning with ^ as in:
8284     #    my $before = ${^PREMATCH};
8285     # Because all of the following cause an error in perl:
8286     #    my $before = ${ ^PREMATCH };
8287     #    my $before = ${ ^PREMATCH};
8288     #    my $before = ${^PREMATCH };
8289     # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1.
8290     # Note that here we must set tightness=1 and not 2 so that the closing space
8291     # is also avoided (via the $j_tight_closing_paren flag in coding)
8292                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8293
8294               #=================================================================
8295
8296                 if ( $tightness <= 0 ) {
8297                     $ws = WS_YES;
8298                 }
8299                 elsif ( $tightness > 1 ) {
8300                     $ws = WS_NO;
8301                 }
8302                 else {
8303
8304                     # Patch to count '-foo' as single token so that
8305                     # each of  $a{-foo} and $a{foo} and $a{'foo'} do
8306                     # not get spaces with default formatting.
8307                     my $j_here = $j;
8308                     ++$j_here
8309                       if ( $token eq '-'
8310                         && $last_token eq '{'
8311                         && $$rtoken_type[ $j + 1 ] eq 'w' );
8312
8313                     # $j_next is where a closing token should be if
8314                     # the container has a single token
8315                     my $j_next =
8316                       ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8317                       ? $j_here + 2
8318                       : $j_here + 1;
8319                     my $tok_next  = $$rtokens[$j_next];
8320                     my $type_next = $$rtoken_type[$j_next];
8321
8322                     # for tightness = 1, if there is just one token
8323                     # within the matching pair, we will keep it tight
8324                     if (
8325                         $tok_next eq $matching_token{$last_token}
8326
8327                         # but watch out for this: [ [ ]    (misc.t)
8328                         && $last_token ne $token
8329                       )
8330                     {
8331
8332                         # remember where to put the space for the closing paren
8333                         $j_tight_closing_paren = $j_next;
8334                         $ws                    = WS_NO;
8335                     }
8336                     else {
8337                         $ws = WS_YES;
8338                     }
8339                 }
8340             }
8341         }    # done with opening braces and brackets
8342         my $ws_1 = $ws
8343           if FORMATTER_DEBUG_FLAG_WHITE;
8344
8345         #---------------------------------------------------------------
8346         # section 2:
8347         # handle space on inside of closing brace pairs
8348         #---------------------------------------------------------------
8349
8350         #   /[\}\)\]R]/
8351         if ( $is_closing_type{$type} ) {
8352
8353             if ( $j == $j_tight_closing_paren ) {
8354
8355                 $j_tight_closing_paren = -1;
8356                 $ws                    = WS_NO;
8357             }
8358             else {
8359
8360                 if ( !defined($ws) ) {
8361
8362                     my $tightness;
8363                     if ( $type eq '}' && $token eq '}' && $block_type ) {
8364                         $tightness = $rOpts_block_brace_tightness;
8365                     }
8366                     else { $tightness = $tightness{$token} }
8367
8368                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8369                 }
8370             }
8371         }
8372
8373         my $ws_2 = $ws
8374           if FORMATTER_DEBUG_FLAG_WHITE;
8375
8376         #---------------------------------------------------------------
8377         # section 3:
8378         # use the binary table
8379         #---------------------------------------------------------------
8380         if ( !defined($ws) ) {
8381             $ws = $binary_ws_rules{$last_type}{$type};
8382         }
8383         my $ws_3 = $ws
8384           if FORMATTER_DEBUG_FLAG_WHITE;
8385
8386         #---------------------------------------------------------------
8387         # section 4:
8388         # some special cases
8389         #---------------------------------------------------------------
8390         if ( $token eq '(' ) {
8391
8392             # This will have to be tweaked as tokenization changes.
8393             # We usually want a space at '} (', for example:
8394             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8395             #
8396             # But not others:
8397             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8398             # At present, the above & block is marked as type L/R so this case
8399             # won't go through here.
8400             if ( $last_type eq '}' ) { $ws = WS_YES }
8401
8402             # NOTE: some older versions of Perl had occasional problems if
8403             # spaces are introduced between keywords or functions and opening
8404             # parens.  So the default is not to do this except is certain
8405             # cases.  The current Perl seems to tolerate spaces.
8406
8407             # Space between keyword and '('
8408             elsif ( $last_type eq 'k' ) {
8409                 $ws = WS_NO
8410                   unless ( $rOpts_space_keyword_paren
8411                     || $space_after_keyword{$last_token} );
8412             }
8413
8414             # Space between function and '('
8415             # -----------------------------------------------------
8416             # 'w' and 'i' checks for something like:
8417             #   myfun(    &myfun(   ->myfun(
8418             # -----------------------------------------------------
8419             elsif (( $last_type =~ /^[wUG]$/ )
8420                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8421             {
8422                 $ws = WS_NO unless ($rOpts_space_function_paren);
8423             }
8424
8425             # space between something like $i and ( in
8426             # for $i ( 0 .. 20 ) {
8427             # FIXME: eventually, type 'i' needs to be split into multiple
8428             # token types so this can be a hardwired rule.
8429             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8430                 $ws = WS_YES;
8431             }
8432
8433             # allow constant function followed by '()' to retain no space
8434             elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8435                 $ws = WS_NO;
8436             }
8437         }
8438
8439         # patch for SWITCH/CASE: make space at ']{' optional
8440         # since the '{' might begin a case or when block
8441         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8442             $ws = WS_OPTIONAL;
8443         }
8444
8445         # keep space between 'sub' and '{' for anonymous sub definition
8446         if ( $type eq '{' ) {
8447             if ( $last_token eq 'sub' ) {
8448                 $ws = WS_YES;
8449             }
8450
8451             # this is needed to avoid no space in '){'
8452             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8453
8454             # avoid any space before the brace or bracket in something like
8455             #  @opts{'a','b',...}
8456             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8457                 $ws = WS_NO;
8458             }
8459         }
8460
8461         elsif ( $type eq 'i' ) {
8462
8463             # never a space before ->
8464             if ( $token =~ /^\-\>/ ) {
8465                 $ws = WS_NO;
8466             }
8467         }
8468
8469         # retain any space between '-' and bare word
8470         elsif ( $type eq 'w' || $type eq 'C' ) {
8471             $ws = WS_OPTIONAL if $last_type eq '-';
8472
8473             # never a space before ->
8474             if ( $token =~ /^\-\>/ ) {
8475                 $ws = WS_NO;
8476             }
8477         }
8478
8479         # retain any space between '-' and bare word
8480         # example: avoid space between 'USER' and '-' here:
8481         #   $myhash{USER-NAME}='steve';
8482         elsif ( $type eq 'm' || $type eq '-' ) {
8483             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8484         }
8485
8486         # always space before side comment
8487         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8488
8489         # always preserver whatever space was used after a possible
8490         # filehandle (except _) or here doc operator
8491         if (
8492             $type ne '#'
8493             && ( ( $last_type eq 'Z' && $last_token ne '_' )
8494                 || $last_type eq 'h' )
8495           )
8496         {
8497             $ws = WS_OPTIONAL;
8498         }
8499
8500         my $ws_4 = $ws
8501           if FORMATTER_DEBUG_FLAG_WHITE;
8502
8503         #---------------------------------------------------------------
8504         # section 5:
8505         # default rules not covered above
8506         #---------------------------------------------------------------
8507         # if we fall through to here,
8508         # look at the pre-defined hash tables for the two tokens, and
8509         # if (they are equal) use the common value
8510         # if (either is zero or undef) use the other
8511         # if (either is -1) use it
8512         # That is,
8513         # left  vs right
8514         #  1    vs    1     -->  1
8515         #  0    vs    0     -->  0
8516         # -1    vs   -1     --> -1
8517         #
8518         #  0    vs   -1     --> -1
8519         #  0    vs    1     -->  1
8520         #  1    vs    0     -->  1
8521         # -1    vs    0     --> -1
8522         #
8523         # -1    vs    1     --> -1
8524         #  1    vs   -1     --> -1
8525         if ( !defined($ws) ) {
8526             my $wl = $want_left_space{$type};
8527             my $wr = $want_right_space{$last_type};
8528             if ( !defined($wl) ) { $wl = 0 }
8529             if ( !defined($wr) ) { $wr = 0 }
8530             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8531         }
8532
8533         if ( !defined($ws) ) {
8534             $ws = 0;
8535             write_diagnostics(
8536                 "WS flag is undefined for tokens $last_token $token\n");
8537         }
8538
8539         # Treat newline as a whitespace. Otherwise, we might combine
8540         # 'Send' and '-recipients' here according to the above rules:
8541         #    my $msg = new Fax::Send
8542         #      -recipients => $to,
8543         #      -data => $data;
8544         if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8545
8546         if (   ( $ws == 0 )
8547             && $j > 0
8548             && $j < $jmax
8549             && ( $last_type !~ /^[Zh]$/ ) )
8550         {
8551
8552             # If this happens, we have a non-fatal but undesirable
8553             # hole in the above rules which should be patched.
8554             write_diagnostics(
8555                 "WS flag is zero for tokens $last_token $token\n");
8556         }
8557         $white_space_flag[$j] = $ws;
8558
8559         FORMATTER_DEBUG_FLAG_WHITE && do {
8560             my $str = substr( $last_token, 0, 15 );
8561             $str .= ' ' x ( 16 - length($str) );
8562             if ( !defined($ws_1) ) { $ws_1 = "*" }
8563             if ( !defined($ws_2) ) { $ws_2 = "*" }
8564             if ( !defined($ws_3) ) { $ws_3 = "*" }
8565             if ( !defined($ws_4) ) { $ws_4 = "*" }
8566             print
8567 "WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8568         };
8569     }
8570     return \@white_space_flag;
8571 }
8572
8573 {    # begin print_line_of_tokens
8574
8575     my $rtoken_type;
8576     my $rtokens;
8577     my $rlevels;
8578     my $rslevels;
8579     my $rblock_type;
8580     my $rcontainer_type;
8581     my $rcontainer_environment;
8582     my $rtype_sequence;
8583     my $input_line;
8584     my $rnesting_tokens;
8585     my $rci_levels;
8586     my $rnesting_blocks;
8587
8588     my $in_quote;
8589     my $python_indentation_level;
8590
8591     # These local token variables are stored by store_token_to_go:
8592     my $block_type;
8593     my $ci_level;
8594     my $container_environment;
8595     my $container_type;
8596     my $in_continued_quote;
8597     my $level;
8598     my $nesting_blocks;
8599     my $no_internal_newlines;
8600     my $slevel;
8601     my $token;
8602     my $type;
8603     my $type_sequence;
8604
8605     # routine to pull the jth token from the line of tokens
8606     sub extract_token {
8607         my $j = shift;
8608         $token                 = $$rtokens[$j];
8609         $type                  = $$rtoken_type[$j];
8610         $block_type            = $$rblock_type[$j];
8611         $container_type        = $$rcontainer_type[$j];
8612         $container_environment = $$rcontainer_environment[$j];
8613         $type_sequence         = $$rtype_sequence[$j];
8614         $level                 = $$rlevels[$j];
8615         $slevel                = $$rslevels[$j];
8616         $nesting_blocks        = $$rnesting_blocks[$j];
8617         $ci_level              = $$rci_levels[$j];
8618     }
8619
8620     {
8621         my @saved_token;
8622
8623         sub save_current_token {
8624
8625             @saved_token = (
8626                 $block_type,            $ci_level,
8627                 $container_environment, $container_type,
8628                 $in_continued_quote,    $level,
8629                 $nesting_blocks,        $no_internal_newlines,
8630                 $slevel,                $token,
8631                 $type,                  $type_sequence,
8632             );
8633         }
8634
8635         sub restore_current_token {
8636             (
8637                 $block_type,            $ci_level,
8638                 $container_environment, $container_type,
8639                 $in_continued_quote,    $level,
8640                 $nesting_blocks,        $no_internal_newlines,
8641                 $slevel,                $token,
8642                 $type,                  $type_sequence,
8643             ) = @saved_token;
8644         }
8645     }
8646
8647     # Routine to place the current token into the output stream.
8648     # Called once per output token.
8649     sub store_token_to_go {
8650
8651         my $flag = $no_internal_newlines;
8652         if ( $_[0] ) { $flag = 1 }
8653
8654         $tokens_to_go[ ++$max_index_to_go ]            = $token;
8655         $types_to_go[$max_index_to_go]                 = $type;
8656         $nobreak_to_go[$max_index_to_go]               = $flag;
8657         $old_breakpoint_to_go[$max_index_to_go]        = 0;
8658         $forced_breakpoint_to_go[$max_index_to_go]     = 0;
8659         $block_type_to_go[$max_index_to_go]            = $block_type;
8660         $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
8661         $container_environment_to_go[$max_index_to_go] = $container_environment;
8662         $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
8663         $ci_levels_to_go[$max_index_to_go]             = $ci_level;
8664         $mate_index_to_go[$max_index_to_go]            = -1;
8665         $matching_token_to_go[$max_index_to_go]        = '';
8666         $bond_strength_to_go[$max_index_to_go]         = 0;
8667
8668         # Note: negative levels are currently retained as a diagnostic so that
8669         # the 'final indentation level' is correctly reported for bad scripts.
8670         # But this means that every use of $level as an index must be checked.
8671         # If this becomes too much of a problem, we might give up and just clip
8672         # them at zero.
8673         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8674         $levels_to_go[$max_index_to_go] = $level;
8675         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8676         $lengths_to_go[ $max_index_to_go + 1 ] =
8677           $lengths_to_go[$max_index_to_go] + length($token);
8678
8679         # Define the indentation that this token would have if it started
8680         # a new line.  We have to do this now because we need to know this
8681         # when considering one-line blocks.
8682         set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8683
8684         if ( $type ne 'b' ) {
8685             $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8686             $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
8687             $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8688             $last_nonblank_index_to_go      = $max_index_to_go;
8689             $last_nonblank_type_to_go       = $type;
8690             $last_nonblank_token_to_go      = $token;
8691             if ( $type eq ',' ) {
8692                 $comma_count_in_batch++;
8693             }
8694         }
8695
8696         FORMATTER_DEBUG_FLAG_STORE && do {
8697             my ( $a, $b, $c ) = caller();
8698             print
8699 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8700         };
8701     }
8702
8703     sub insert_new_token_to_go {
8704
8705         # insert a new token into the output stream.  use same level as
8706         # previous token; assumes a character at max_index_to_go.
8707         save_current_token();
8708         ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8709
8710         if ( $max_index_to_go == UNDEFINED_INDEX ) {
8711             warning("code bug: bad call to insert_new_token_to_go\n");
8712         }
8713         $level = $levels_to_go[$max_index_to_go];
8714
8715         # FIXME: it seems to be necessary to use the next, rather than
8716         # previous, value of this variable when creating a new blank (align.t)
8717         #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
8718         $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
8719         $ci_level              = $ci_levels_to_go[$max_index_to_go];
8720         $container_environment = $container_environment_to_go[$max_index_to_go];
8721         $in_continued_quote    = 0;
8722         $block_type            = "";
8723         $type_sequence         = "";
8724         store_token_to_go();
8725         restore_current_token();
8726         return;
8727     }
8728
8729     sub print_line_of_tokens {
8730
8731         my $line_of_tokens = shift;
8732
8733         # This routine is called once per input line to process all of
8734         # the tokens on that line.  This is the first stage of
8735         # beautification.
8736         #
8737         # Full-line comments and blank lines may be processed immediately.
8738         #
8739         # For normal lines of code, the tokens are stored one-by-one,
8740         # via calls to 'sub store_token_to_go', until a known line break
8741         # point is reached.  Then, the batch of collected tokens is
8742         # passed along to 'sub output_line_to_go' for further
8743         # processing.  This routine decides if there should be
8744         # whitespace between each pair of non-white tokens, so later
8745         # routines only need to decide on any additional line breaks.
8746         # Any whitespace is initally a single space character.  Later,
8747         # the vertical aligner may expand that to be multiple space
8748         # characters if necessary for alignment.
8749
8750         # extract input line number for error messages
8751         $input_line_number = $line_of_tokens->{_line_number};
8752
8753         $rtoken_type            = $line_of_tokens->{_rtoken_type};
8754         $rtokens                = $line_of_tokens->{_rtokens};
8755         $rlevels                = $line_of_tokens->{_rlevels};
8756         $rslevels               = $line_of_tokens->{_rslevels};
8757         $rblock_type            = $line_of_tokens->{_rblock_type};
8758         $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
8759         $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8760         $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
8761         $input_line             = $line_of_tokens->{_line_text};
8762         $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
8763         $rci_levels             = $line_of_tokens->{_rci_levels};
8764         $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
8765
8766         $in_continued_quote = $starting_in_quote =
8767           $line_of_tokens->{_starting_in_quote};
8768         $in_quote        = $line_of_tokens->{_ending_in_quote};
8769         $ending_in_quote = $in_quote;
8770         $python_indentation_level =
8771           $line_of_tokens->{_python_indentation_level};
8772
8773         my $j;
8774         my $j_next;
8775         my $jmax;
8776         my $next_nonblank_token;
8777         my $next_nonblank_token_type;
8778         my $rwhite_space_flag;
8779
8780         $jmax                    = @$rtokens - 1;
8781         $block_type              = "";
8782         $container_type          = "";
8783         $container_environment   = "";
8784         $type_sequence           = "";
8785         $no_internal_newlines    = 1 - $rOpts_add_newlines;
8786         $is_static_block_comment = 0;
8787
8788         # Handle a continued quote..
8789         if ($in_continued_quote) {
8790
8791             # A line which is entirely a quote or pattern must go out
8792             # verbatim.  Note: the \n is contained in $input_line.
8793             if ( $jmax <= 0 ) {
8794                 if ( ( $input_line =~ "\t" ) ) {
8795                     note_embedded_tab();
8796                 }
8797                 write_unindented_line("$input_line");
8798                 $last_line_had_side_comment = 0;
8799                 return;
8800             }
8801
8802             # prior to version 20010406, perltidy had a bug which placed
8803             # continuation indentation before the last line of some multiline
8804             # quotes and patterns -- exactly the lines passing this way.
8805             # To help find affected lines in scripts run with these
8806             # versions, run with '-chk', and it will warn of any quotes or
8807             # patterns which might have been modified by these early
8808             # versions.
8809             if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8810                 warning(
8811 "-chk: please check this line for extra leading whitespace\n"
8812                 );
8813             }
8814         }
8815
8816         # Write line verbatim if we are in a formatting skip section
8817         if ($in_format_skipping_section) {
8818             write_unindented_line("$input_line");
8819             $last_line_had_side_comment = 0;
8820
8821             # Note: extra space appended to comment simplifies pattern matching
8822             if (   $jmax == 0
8823                 && $$rtoken_type[0] eq '#'
8824                 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8825             {
8826                 $in_format_skipping_section = 0;
8827                 write_logfile_entry("Exiting formatting skip section\n");
8828                 $file_writer_object->reset_consecutive_blank_lines();
8829             }
8830             return;
8831         }
8832
8833         # See if we are entering a formatting skip section
8834         if (   $rOpts_format_skipping
8835             && $jmax == 0
8836             && $$rtoken_type[0] eq '#'
8837             && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8838         {
8839             flush();
8840             $in_format_skipping_section = 1;
8841             write_logfile_entry("Entering formatting skip section\n");
8842             write_unindented_line("$input_line");
8843             $last_line_had_side_comment = 0;
8844             return;
8845         }
8846
8847         # delete trailing blank tokens
8848         if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8849
8850         # Handle a blank line..
8851         if ( $jmax < 0 ) {
8852
8853             # If keep-old-blank-lines is zero, we delete all
8854             # old blank lines and let the blank line rules generate any
8855             # needed blanks.
8856             if ($rOpts_keep_old_blank_lines) {
8857                 flush();
8858                 $file_writer_object->write_blank_code_line(
8859                     $rOpts_keep_old_blank_lines == 2 );
8860                 $last_line_leading_type = 'b';
8861             }
8862             $last_line_had_side_comment = 0;
8863             return;
8864         }
8865
8866         # see if this is a static block comment (starts with ## by default)
8867         my $is_static_block_comment_without_leading_space = 0;
8868         if (   $jmax == 0
8869             && $$rtoken_type[0] eq '#'
8870             && $rOpts->{'static-block-comments'}
8871             && $input_line =~ /$static_block_comment_pattern/o )
8872         {
8873             $is_static_block_comment = 1;
8874             $is_static_block_comment_without_leading_space =
8875               substr( $input_line, 0, 1 ) eq '#';
8876         }
8877
8878         # Check for comments which are line directives
8879         # Treat exactly as static block comments without leading space
8880         # reference: perlsyn, near end, section Plain Old Comments (Not!)
8881         # example: '# line 42 "new_filename.plx"'
8882         if (
8883                $jmax == 0
8884             && $$rtoken_type[0] eq '#'
8885             && $input_line =~ /^\#   \s*
8886                                line \s+ (\d+)   \s*
8887                                (?:\s("?)([^"]+)\2)? \s*
8888                                $/x
8889           )
8890         {
8891             $is_static_block_comment                       = 1;
8892             $is_static_block_comment_without_leading_space = 1;
8893         }
8894
8895         # create a hanging side comment if appropriate
8896         if (
8897                $jmax == 0
8898             && $$rtoken_type[0] eq '#'    # only token is a comment
8899             && $last_line_had_side_comment    # last line had side comment
8900             && $input_line =~ /^\s/           # there is some leading space
8901             && !$is_static_block_comment    # do not make static comment hanging
8902             && $rOpts->{'hanging-side-comments'}    # user is allowing
8903                                                     # hanging side comments
8904                                                     # like this
8905           )
8906         {
8907
8908             # We will insert an empty qw string at the start of the token list
8909             # to force this comment to be a side comment. The vertical aligner
8910             # should then line it up with the previous side comment.
8911             unshift @$rtoken_type,            'q';
8912             unshift @$rtokens,                '';
8913             unshift @$rlevels,                $$rlevels[0];
8914             unshift @$rslevels,               $$rslevels[0];
8915             unshift @$rblock_type,            '';
8916             unshift @$rcontainer_type,        '';
8917             unshift @$rcontainer_environment, '';
8918             unshift @$rtype_sequence,         '';
8919             unshift @$rnesting_tokens,        $$rnesting_tokens[0];
8920             unshift @$rci_levels,             $$rci_levels[0];
8921             unshift @$rnesting_blocks,        $$rnesting_blocks[0];
8922             $jmax = 1;
8923         }
8924
8925         # remember if this line has a side comment
8926         $last_line_had_side_comment =
8927           ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8928
8929         # Handle a block (full-line) comment..
8930         if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8931
8932             if ( $rOpts->{'delete-block-comments'} ) { return }
8933
8934             if ( $rOpts->{'tee-block-comments'} ) {
8935                 $file_writer_object->tee_on();
8936             }
8937
8938             destroy_one_line_block();
8939             output_line_to_go();
8940
8941             # output a blank line before block comments
8942             if (
8943                 # unless we follow a blank or comment line
8944                 $last_line_leading_type !~ /^[#b]$/
8945
8946                 # only if allowed
8947                 && $rOpts->{'blanks-before-comments'}
8948
8949                 # not if this is an empty comment line
8950                 && $$rtokens[0] ne '#'
8951
8952                 # not after a short line ending in an opening token
8953                 # because we already have space above this comment.
8954                 # Note that the first comment in this if block, after
8955                 # the 'if (', does not get a blank line because of this.
8956                 && !$last_output_short_opening_token
8957
8958                 # never before static block comments
8959                 && !$is_static_block_comment
8960               )
8961             {
8962                 flush();    # switching to new output stream
8963                 $file_writer_object->write_blank_code_line();
8964                 $last_line_leading_type = 'b';
8965             }
8966
8967             # TRIM COMMENTS -- This could be turned off as a option
8968             $$rtokens[0] =~ s/\s*$//;    # trim right end
8969
8970             if (
8971                 $rOpts->{'indent-block-comments'}
8972                 && (  !$rOpts->{'indent-spaced-block-comments'}
8973                     || $input_line =~ /^\s+/ )
8974                 && !$is_static_block_comment_without_leading_space
8975               )
8976             {
8977                 extract_token(0);
8978                 store_token_to_go();
8979                 output_line_to_go();
8980             }
8981             else {
8982                 flush();    # switching to new output stream
8983                 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8984                 $last_line_leading_type = '#';
8985             }
8986             if ( $rOpts->{'tee-block-comments'} ) {
8987                 $file_writer_object->tee_off();
8988             }
8989             return;
8990         }
8991
8992         # compare input/output indentation except for continuation lines
8993         # (because they have an unknown amount of initial blank space)
8994         # and lines which are quotes (because they may have been outdented)
8995         # Note: this test is placed here because we know the continuation flag
8996         # at this point, which allows us to avoid non-meaningful checks.
8997         my $structural_indentation_level = $$rlevels[0];
8998         compare_indentation_levels( $python_indentation_level,
8999             $structural_indentation_level )
9000           unless ( $python_indentation_level < 0
9001             || ( $$rci_levels[0] > 0 )
9002             || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
9003           );
9004
9005         #   Patch needed for MakeMaker.  Do not break a statement
9006         #   in which $VERSION may be calculated.  See MakeMaker.pm;
9007         #   this is based on the coding in it.
9008         #   The first line of a file that matches this will be eval'd:
9009         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9010         #   Examples:
9011         #     *VERSION = \'1.01';
9012         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
9013         #   We will pass such a line straight through without breaking
9014         #   it unless -npvl is used
9015
9016         my $is_VERSION_statement = 0;
9017
9018         if (
9019               !$saw_VERSION_in_this_file
9020             && $input_line =~ /VERSION/    # quick check to reject most lines
9021             && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9022           )
9023         {
9024             $saw_VERSION_in_this_file = 1;
9025             $is_VERSION_statement     = 1;
9026             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
9027             $no_internal_newlines = 1;
9028         }
9029
9030         # take care of indentation-only
9031         # NOTE: In previous versions we sent all qw lines out immediately here.
9032         # No longer doing this: also write a line which is entirely a 'qw' list
9033         # to allow stacking of opening and closing tokens.  Note that interior
9034         # qw lines will still go out at the end of this routine.
9035         if ( $rOpts->{'indent-only'} ) {
9036             flush();
9037             trim($input_line);
9038
9039             extract_token(0);
9040             $token                 = $input_line;
9041             $type                  = 'q';
9042             $block_type            = "";
9043             $container_type        = "";
9044             $container_environment = "";
9045             $type_sequence         = "";
9046             store_token_to_go();
9047             output_line_to_go();
9048             return;
9049         }
9050
9051         push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
9052         push( @$rtoken_type, 'b', 'b' );
9053         ($rwhite_space_flag) =
9054           set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
9055
9056         # find input tabbing to allow checks for tabbing disagreement
9057         ## not used for now
9058         ##$input_line_tabbing = "";
9059         ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
9060
9061         # if the buffer hasn't been flushed, add a leading space if
9062         # necessary to keep essential whitespace. This is really only
9063         # necessary if we are squeezing out all ws.
9064         if ( $max_index_to_go >= 0 ) {
9065
9066             $old_line_count_in_batch++;
9067
9068             if (
9069                 is_essential_whitespace(
9070                     $last_last_nonblank_token,
9071                     $last_last_nonblank_type,
9072                     $tokens_to_go[$max_index_to_go],
9073                     $types_to_go[$max_index_to_go],
9074                     $$rtokens[0],
9075                     $$rtoken_type[0]
9076                 )
9077               )
9078             {
9079                 my $slevel = $$rslevels[0];
9080                 insert_new_token_to_go( ' ', 'b', $slevel,
9081                     $no_internal_newlines );
9082             }
9083         }
9084
9085         # If we just saw the end of an elsif block, write nag message
9086         # if we do not see another elseif or an else.
9087         if ($looking_for_else) {
9088
9089             unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
9090                 write_logfile_entry("(No else block)\n");
9091             }
9092             $looking_for_else = 0;
9093         }
9094
9095         # This is a good place to kill incomplete one-line blocks
9096         if (   ( $semicolons_before_block_self_destruct == 0 )
9097             && ( $max_index_to_go >= 0 )
9098             && ( $types_to_go[$max_index_to_go] eq ';' )
9099             && ( $$rtokens[0] ne '}' ) )
9100         {
9101             destroy_one_line_block();
9102             output_line_to_go();
9103         }
9104
9105         # loop to process the tokens one-by-one
9106         $type  = 'b';
9107         $token = "";
9108
9109         foreach $j ( 0 .. $jmax ) {
9110
9111             # pull out the local values for this token
9112             extract_token($j);
9113
9114             if ( $type eq '#' ) {
9115
9116                 # trim trailing whitespace
9117                 # (there is no option at present to prevent this)
9118                 $token =~ s/\s*$//;
9119
9120                 if (
9121                     $rOpts->{'delete-side-comments'}
9122
9123                     # delete closing side comments if necessary
9124                     || (   $rOpts->{'delete-closing-side-comments'}
9125                         && $token =~ /$closing_side_comment_prefix_pattern/o
9126                         && $last_nonblank_block_type =~
9127                         /$closing_side_comment_list_pattern/o )
9128                   )
9129                 {
9130                     if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9131                         unstore_token_to_go();
9132                     }
9133                     last;
9134                 }
9135             }
9136
9137             # If we are continuing after seeing a right curly brace, flush
9138             # buffer unless we see what we are looking for, as in
9139             #   } else ...
9140             if ( $rbrace_follower && $type ne 'b' ) {
9141
9142                 unless ( $rbrace_follower->{$token} ) {
9143                     output_line_to_go();
9144                 }
9145                 $rbrace_follower = undef;
9146             }
9147
9148             $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
9149             $next_nonblank_token      = $$rtokens[$j_next];
9150             $next_nonblank_token_type = $$rtoken_type[$j_next];
9151
9152             #--------------------------------------------------------
9153             # Start of section to patch token text
9154             #--------------------------------------------------------
9155
9156             # Modify certain tokens here for whitespace
9157             # The following is not yet done, but could be:
9158             #   sub (x x x)
9159             if ( $type =~ /^[wit]$/ ) {
9160
9161                 # Examples:
9162                 # change '$  var'  to '$var' etc
9163                 #        '-> new'  to '->new'
9164                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
9165                     $token =~ s/\s*//g;
9166                 }
9167
9168                 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
9169
9170                 # trim identifiers of trailing blanks which can occur
9171                 # under some unusual circumstances, such as if the
9172                 # identifier 'witch' has trailing blanks on input here:
9173                 #
9174                 # sub
9175                 # witch
9176                 # ()   # prototype may be on new line ...
9177                 # ...
9178                 if ( $type eq 'i' ) { $token =~ s/\s+$//g }
9179             }
9180
9181             # change 'LABEL   :'   to 'LABEL:'
9182             elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
9183
9184             # patch to add space to something like "x10"
9185             # This avoids having to split this token in the pre-tokenizer
9186             elsif ( $type eq 'n' ) {
9187                 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
9188             }
9189
9190             elsif ( $type eq 'Q' ) {
9191                 note_embedded_tab() if ( $token =~ "\t" );
9192
9193                 # make note of something like '$var = s/xxx/yyy/;'
9194                 # in case it should have been '$var =~ s/xxx/yyy/;'
9195                 if (
9196                        $token =~ /^(s|tr|y|m|\/)/
9197                     && $last_nonblank_token =~ /^(=|==|!=)$/
9198
9199                     # precededed by simple scalar
9200                     && $last_last_nonblank_type eq 'i'
9201                     && $last_last_nonblank_token =~ /^\$/
9202
9203                     # followed by some kind of termination
9204                     # (but give complaint if we can's see far enough ahead)
9205                     && $next_nonblank_token =~ /^[; \)\}]$/
9206
9207                     # scalar is not decleared
9208                     && !(
9209                            $types_to_go[0] eq 'k'
9210                         && $tokens_to_go[0] =~ /^(my|our|local)$/
9211                     )
9212                   )
9213                 {
9214                     my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
9215                     complain(
9216 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
9217                     );
9218                 }
9219             }
9220
9221            # trim blanks from right of qw quotes
9222            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
9223             elsif ( $type eq 'q' ) {
9224                 $token =~ s/\s*$//;
9225                 note_embedded_tab() if ( $token =~ "\t" );
9226             }
9227
9228             #--------------------------------------------------------
9229             # End of section to patch token text
9230             #--------------------------------------------------------
9231
9232             # insert any needed whitespace
9233             if (   ( $type ne 'b' )
9234                 && ( $max_index_to_go >= 0 )
9235                 && ( $types_to_go[$max_index_to_go] ne 'b' )
9236                 && $rOpts_add_whitespace )
9237             {
9238                 my $ws = $$rwhite_space_flag[$j];
9239
9240                 if ( $ws == 1 ) {
9241                     insert_new_token_to_go( ' ', 'b', $slevel,
9242                         $no_internal_newlines );
9243                 }
9244             }
9245
9246             # Do not allow breaks which would promote a side comment to a
9247             # block comment.  In order to allow a break before an opening
9248             # or closing BLOCK, followed by a side comment, those sections
9249             # of code will handle this flag separately.
9250             my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
9251             my $is_opening_BLOCK =
9252               (      $type eq '{'
9253                   && $token eq '{'
9254                   && $block_type
9255                   && $block_type ne 't' );
9256             my $is_closing_BLOCK =
9257               (      $type eq '}'
9258                   && $token eq '}'
9259                   && $block_type
9260                   && $block_type ne 't' );
9261
9262             if (   $side_comment_follows
9263                 && !$is_opening_BLOCK
9264                 && !$is_closing_BLOCK )
9265             {
9266                 $no_internal_newlines = 1;
9267             }
9268
9269             # We're only going to handle breaking for code BLOCKS at this
9270             # (top) level.  Other indentation breaks will be handled by
9271             # sub scan_list, which is better suited to dealing with them.
9272             if ($is_opening_BLOCK) {
9273
9274                 # Tentatively output this token.  This is required before
9275                 # calling starting_one_line_block.  We may have to unstore
9276                 # it, though, if we have to break before it.
9277                 store_token_to_go($side_comment_follows);
9278
9279                 # Look ahead to see if we might form a one-line block
9280                 my $too_long =
9281                   starting_one_line_block( $j, $jmax, $level, $slevel,
9282                     $ci_level, $rtokens, $rtoken_type, $rblock_type );
9283                 clear_breakpoint_undo_stack();
9284
9285                 # to simplify the logic below, set a flag to indicate if
9286                 # this opening brace is far from the keyword which introduces it
9287                 my $keyword_on_same_line = 1;
9288                 if (   ( $max_index_to_go >= 0 )
9289                     && ( $last_nonblank_type eq ')' ) )
9290                 {
9291                     if (   $block_type =~ /^(if|else|elsif)$/
9292                         && ( $tokens_to_go[0] eq '}' )
9293                         && $rOpts_cuddled_else )
9294                     {
9295                         $keyword_on_same_line = 1;
9296                     }
9297                     elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
9298                     {
9299                         $keyword_on_same_line = 0;
9300                     }
9301                 }
9302
9303                 # decide if user requested break before '{'
9304                 my $want_break =
9305
9306                   # use -bl flag if not a sub block of any type
9307                   $block_type !~ /^sub/
9308                   ? $rOpts->{'opening-brace-on-new-line'}
9309
9310                   # use -sbl flag for a named sub block
9311                   : $block_type !~ /^sub\W*$/
9312                   ? $rOpts->{'opening-sub-brace-on-new-line'}
9313
9314                   # use -asbl flag for an anonymous sub block
9315                   : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9316
9317                 # Break before an opening '{' ...
9318                 if (
9319
9320                     # if requested
9321                     $want_break
9322
9323                     # and we were unable to start looking for a block,
9324                     && $index_start_one_line_block == UNDEFINED_INDEX
9325
9326                     # or if it will not be on same line as its keyword, so that
9327                     # it will be outdented (eval.t, overload.t), and the user
9328                     # has not insisted on keeping it on the right
9329                     || (   !$keyword_on_same_line
9330                         && !$rOpts->{'opening-brace-always-on-right'} )
9331
9332                   )
9333                 {
9334
9335                     # but only if allowed
9336                     unless ($no_internal_newlines) {
9337
9338                         # since we already stored this token, we must unstore it
9339                         unstore_token_to_go();
9340
9341                         # then output the line
9342                         output_line_to_go();
9343
9344                         # and now store this token at the start of a new line
9345                         store_token_to_go($side_comment_follows);
9346                     }
9347                 }
9348
9349                 # Now update for side comment
9350                 if ($side_comment_follows) { $no_internal_newlines = 1 }
9351
9352                 # now output this line
9353                 unless ($no_internal_newlines) {
9354                     output_line_to_go();
9355                 }
9356             }
9357
9358             elsif ($is_closing_BLOCK) {
9359
9360                 # If there is a pending one-line block ..
9361                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9362
9363                     # we have to terminate it if..
9364                     if (
9365
9366                     # it is too long (final length may be different from
9367                     # initial estimate). note: must allow 1 space for this token
9368                         excess_line_length( $index_start_one_line_block,
9369                             $max_index_to_go ) >= 0
9370
9371                         # or if it has too many semicolons
9372                         || (   $semicolons_before_block_self_destruct == 0
9373                             && $last_nonblank_type ne ';' )
9374                       )
9375                     {
9376                         destroy_one_line_block();
9377                     }
9378                 }
9379
9380                 # put a break before this closing curly brace if appropriate
9381                 unless ( $no_internal_newlines
9382                     || $index_start_one_line_block != UNDEFINED_INDEX )
9383                 {
9384
9385                     # add missing semicolon if ...
9386                     # there are some tokens
9387                     if (
9388                         ( $max_index_to_go > 0 )
9389
9390                         # and we don't have one
9391                         && ( $last_nonblank_type ne ';' )
9392
9393                         # patch until some block type issues are fixed:
9394                         # Do not add semi-colon for block types '{',
9395                         # '}', and ';' because we cannot be sure yet
9396                         # that this is a block and not an anonomyous
9397                         # hash (blktype.t, blktype1.t)
9398                         && ( $block_type !~ /^[\{\};]$/ )
9399
9400                         # patch: and do not add semi-colons for recently
9401                         # added block types (see tmp/semicolon.t)
9402                         && ( $block_type !~
9403                             /^(switch|case|given|when|default)$/ )
9404
9405                         # it seems best not to add semicolons in these
9406                         # special block types: sort|map|grep
9407                         && ( !$is_sort_map_grep{$block_type} )
9408
9409                         # and we are allowed to do so.
9410                         && $rOpts->{'add-semicolons'}
9411                       )
9412                     {
9413
9414                         save_current_token();
9415                         $token  = ';';
9416                         $type   = ';';
9417                         $level  = $levels_to_go[$max_index_to_go];
9418                         $slevel = $nesting_depth_to_go[$max_index_to_go];
9419                         $nesting_blocks =
9420                           $nesting_blocks_to_go[$max_index_to_go];
9421                         $ci_level       = $ci_levels_to_go[$max_index_to_go];
9422                         $block_type     = "";
9423                         $container_type = "";
9424                         $container_environment = "";
9425                         $type_sequence         = "";
9426
9427                         # Note - we remove any blank AFTER extracting its
9428                         # parameters such as level, etc, above
9429                         if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9430                             unstore_token_to_go();
9431                         }
9432                         store_token_to_go();
9433
9434                         note_added_semicolon();
9435                         restore_current_token();
9436                     }
9437
9438                     # then write out everything before this closing curly brace
9439                     output_line_to_go();
9440
9441                 }
9442
9443                 # Now update for side comment
9444                 if ($side_comment_follows) { $no_internal_newlines = 1 }
9445
9446                 # store the closing curly brace
9447                 store_token_to_go();
9448
9449                 # ok, we just stored a closing curly brace.  Often, but
9450                 # not always, we want to end the line immediately.
9451                 # So now we have to check for special cases.
9452
9453                 # if this '}' successfully ends a one-line block..
9454                 my $is_one_line_block = 0;
9455                 my $keep_going        = 0;
9456                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9457
9458                     # Remember the type of token just before the
9459                     # opening brace.  It would be more general to use
9460                     # a stack, but this will work for one-line blocks.
9461                     $is_one_line_block =
9462                       $types_to_go[$index_start_one_line_block];
9463
9464                     # we have to actually make it by removing tentative
9465                     # breaks that were set within it
9466                     undo_forced_breakpoint_stack(0);
9467                     set_nobreaks( $index_start_one_line_block,
9468                         $max_index_to_go - 1 );
9469
9470                     # then re-initialize for the next one-line block
9471                     destroy_one_line_block();
9472
9473                     # then decide if we want to break after the '}' ..
9474                     # We will keep going to allow certain brace followers as in:
9475                     #   do { $ifclosed = 1; last } unless $losing;
9476                     #
9477                     # But make a line break if the curly ends a
9478                     # significant block:
9479                     if (
9480                         $is_block_without_semicolon{$block_type}
9481
9482                         # if needless semicolon follows we handle it later
9483                         && $next_nonblank_token ne ';'
9484                       )
9485                     {
9486                         output_line_to_go() unless ($no_internal_newlines);
9487                     }
9488                 }
9489
9490                 # set string indicating what we need to look for brace follower
9491                 # tokens
9492                 if ( $block_type eq 'do' ) {
9493                     $rbrace_follower = \%is_do_follower;
9494                 }
9495                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9496                     $rbrace_follower = \%is_if_brace_follower;
9497                 }
9498                 elsif ( $block_type eq 'else' ) {
9499                     $rbrace_follower = \%is_else_brace_follower;
9500                 }
9501
9502                 # added eval for borris.t
9503                 elsif ($is_sort_map_grep_eval{$block_type}
9504                     || $is_one_line_block eq 'G' )
9505                 {
9506                     $rbrace_follower = undef;
9507                     $keep_going      = 1;
9508                 }
9509
9510                 # anonymous sub
9511                 elsif ( $block_type =~ /^sub\W*$/ ) {
9512
9513                     if ($is_one_line_block) {
9514                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
9515                     }
9516                     else {
9517                         $rbrace_follower = \%is_anon_sub_brace_follower;
9518                     }
9519                 }
9520
9521                 # None of the above: specify what can follow a closing
9522                 # brace of a block which is not an
9523                 # if/elsif/else/do/sort/map/grep/eval
9524                 # Testfiles:
9525                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9526                 else {
9527                     $rbrace_follower = \%is_other_brace_follower;
9528                 }
9529
9530                 # See if an elsif block is followed by another elsif or else;
9531                 # complain if not.
9532                 if ( $block_type eq 'elsif' ) {
9533
9534                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
9535                         $looking_for_else = 1;    # ok, check on next line
9536                     }
9537                     else {
9538
9539                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9540                             write_logfile_entry("No else block :(\n");
9541                         }
9542                     }
9543                 }
9544
9545                 # keep going after certain block types (map,sort,grep,eval)
9546                 # added eval for borris.t
9547                 if ($keep_going) {
9548
9549                     # keep going
9550                 }
9551
9552                 # if no more tokens, postpone decision until re-entring
9553                 elsif ( ( $next_nonblank_token_type eq 'b' )
9554                     && $rOpts_add_newlines )
9555                 {
9556                     unless ($rbrace_follower) {
9557                         output_line_to_go() unless ($no_internal_newlines);
9558                     }
9559                 }
9560
9561                 elsif ($rbrace_follower) {
9562
9563                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
9564                         output_line_to_go() unless ($no_internal_newlines);
9565                     }
9566                     $rbrace_follower = undef;
9567                 }
9568
9569                 else {
9570                     output_line_to_go() unless ($no_internal_newlines);
9571                 }
9572
9573             }    # end treatment of closing block token
9574
9575             # handle semicolon
9576             elsif ( $type eq ';' ) {
9577
9578                 # kill one-line blocks with too many semicolons
9579                 $semicolons_before_block_self_destruct--;
9580                 if (
9581                     ( $semicolons_before_block_self_destruct < 0 )
9582                     || (   $semicolons_before_block_self_destruct == 0
9583                         && $next_nonblank_token_type !~ /^[b\}]$/ )
9584                   )
9585                 {
9586                     destroy_one_line_block();
9587                 }
9588
9589                 # Remove unnecessary semicolons, but not after bare
9590                 # blocks, where it could be unsafe if the brace is
9591                 # mistokenized.
9592                 if (
9593                     (
9594                         $last_nonblank_token eq '}'
9595                         && (
9596                             $is_block_without_semicolon{
9597                                 $last_nonblank_block_type}
9598                             || $last_nonblank_block_type =~ /^sub\s+\w/
9599                             || $last_nonblank_block_type =~ /^\w+:$/ )
9600                     )
9601                     || $last_nonblank_type eq ';'
9602                   )
9603                 {
9604
9605                     if (
9606                         $rOpts->{'delete-semicolons'}
9607
9608                         # don't delete ; before a # because it would promote it
9609                         # to a block comment
9610                         && ( $next_nonblank_token_type ne '#' )
9611                       )
9612                     {
9613                         note_deleted_semicolon();
9614                         output_line_to_go()
9615                           unless ( $no_internal_newlines
9616                             || $index_start_one_line_block != UNDEFINED_INDEX );
9617                         next;
9618                     }
9619                     else {
9620                         write_logfile_entry("Extra ';'\n");
9621                     }
9622                 }
9623                 store_token_to_go();
9624
9625                 output_line_to_go()
9626                   unless ( $no_internal_newlines
9627                     || ( $rOpts_keep_interior_semicolons && $j < $jmax )
9628                     || ( $next_nonblank_token eq '}' ) );
9629
9630             }
9631
9632             # handle here_doc target string
9633             elsif ( $type eq 'h' ) {
9634                 $no_internal_newlines =
9635                   1;    # no newlines after seeing here-target
9636                 destroy_one_line_block();
9637                 store_token_to_go();
9638             }
9639
9640             # handle all other token types
9641             else {
9642
9643                 # if this is a blank...
9644                 if ( $type eq 'b' ) {
9645
9646                     # make it just one character
9647                     $token = ' ' if $rOpts_add_whitespace;
9648
9649                     # delete it if unwanted by whitespace rules
9650                     # or we are deleting all whitespace
9651                     my $ws = $$rwhite_space_flag[ $j + 1 ];
9652                     if ( ( defined($ws) && $ws == -1 )
9653                         || $rOpts_delete_old_whitespace )
9654                     {
9655
9656                         # unless it might make a syntax error
9657                         next
9658                           unless is_essential_whitespace(
9659                             $last_last_nonblank_token,
9660                             $last_last_nonblank_type,
9661                             $tokens_to_go[$max_index_to_go],
9662                             $types_to_go[$max_index_to_go],
9663                             $$rtokens[ $j + 1 ],
9664                             $$rtoken_type[ $j + 1 ]
9665                           );
9666                     }
9667                 }
9668                 store_token_to_go();
9669             }
9670
9671             # remember two previous nonblank OUTPUT tokens
9672             if ( $type ne '#' && $type ne 'b' ) {
9673                 $last_last_nonblank_token = $last_nonblank_token;
9674                 $last_last_nonblank_type  = $last_nonblank_type;
9675                 $last_nonblank_token      = $token;
9676                 $last_nonblank_type       = $type;
9677                 $last_nonblank_block_type = $block_type;
9678             }
9679
9680             # unset the continued-quote flag since it only applies to the
9681             # first token, and we want to resume normal formatting if
9682             # there are additional tokens on the line
9683             $in_continued_quote = 0;
9684
9685         }    # end of loop over all tokens in this 'line_of_tokens'
9686
9687         # we have to flush ..
9688         if (
9689
9690             # if there is a side comment
9691             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9692
9693             # if this line ends in a quote
9694             # NOTE: This is critically important for insuring that quoted lines
9695             # do not get processed by things like -sot and -sct
9696             || $in_quote
9697
9698             # if this is a VERSION statement
9699             || $is_VERSION_statement
9700
9701             # to keep a label on one line if that is how it is now
9702             || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9703
9704             # if we are instructed to keep all old line breaks
9705             || !$rOpts->{'delete-old-newlines'}
9706           )
9707         {
9708             destroy_one_line_block();
9709             output_line_to_go();
9710         }
9711
9712         # mark old line breakpoints in current output stream
9713         if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9714             $old_breakpoint_to_go[$max_index_to_go] = 1;
9715         }
9716     }    # end sub print_line_of_tokens
9717 }    # end print_line_of_tokens
9718
9719 # sub output_line_to_go sends one logical line of tokens on down the
9720 # pipeline to the VerticalAligner package, breaking the line into continuation
9721 # lines as necessary.  The line of tokens is ready to go in the "to_go"
9722 # arrays.
9723 sub output_line_to_go {
9724
9725     # debug stuff; this routine can be called from many points
9726     FORMATTER_DEBUG_FLAG_OUTPUT && do {
9727         my ( $a, $b, $c ) = caller;
9728         write_diagnostics(
9729 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9730         );
9731         my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9732         write_diagnostics("$output_str\n");
9733     };
9734
9735     # just set a tentative breakpoint if we might be in a one-line block
9736     if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9737         set_forced_breakpoint($max_index_to_go);
9738         return;
9739     }
9740
9741     my $cscw_block_comment;
9742     $cscw_block_comment = add_closing_side_comment()
9743       if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9744
9745     match_opening_and_closing_tokens();
9746
9747     # tell the -lp option we are outputting a batch so it can close
9748     # any unfinished items in its stack
9749     finish_lp_batch();
9750
9751     # If this line ends in a code block brace, set breaks at any
9752     # previous closing code block braces to breakup a chain of code
9753     # blocks on one line.  This is very rare but can happen for
9754     # user-defined subs.  For example we might be looking at this:
9755     #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9756     my $saw_good_break = 0;    # flag to force breaks even if short line
9757     if (
9758
9759         # looking for opening or closing block brace
9760         $block_type_to_go[$max_index_to_go]
9761
9762         # but not one of these which are never duplicated on a line:
9763         # until|while|for|if|elsif|else
9764         && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9765       )
9766     {
9767         my $lev = $nesting_depth_to_go[$max_index_to_go];
9768
9769         # Walk backwards from the end and
9770         # set break at any closing block braces at the same level.
9771         # But quit if we are not in a chain of blocks.
9772         for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
9773             last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
9774             next if ( $levels_to_go[$i] > $lev );    # skip past higher level
9775
9776             if ( $block_type_to_go[$i] ) {
9777                 if ( $tokens_to_go[$i] eq '}' ) {
9778                     set_forced_breakpoint($i);
9779                     $saw_good_break = 1;
9780                 }
9781             }
9782
9783             # quit if we see anything besides words, function, blanks
9784             # at this level
9785             elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
9786         }
9787     }
9788
9789     my $imin = 0;
9790     my $imax = $max_index_to_go;
9791
9792     # trim any blank tokens
9793     if ( $max_index_to_go >= 0 ) {
9794         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9795         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9796     }
9797
9798     # anything left to write?
9799     if ( $imin <= $imax ) {
9800
9801         # add a blank line before certain key types but not after a comment
9802         ##if ( $last_line_leading_type !~ /^[#b]/ ) {
9803         if ( $last_line_leading_type !~ /^[#]/ ) {
9804             my $want_blank    = 0;
9805             my $leading_token = $tokens_to_go[$imin];
9806             my $leading_type  = $types_to_go[$imin];
9807
9808             # blank lines before subs except declarations and one-liners
9809             # MCONVERSION LOCATION - for sub tokenization change
9810             if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9811                 $want_blank = $rOpts->{'blank-lines-before-subs'}
9812                   if (
9813                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9814                         $imax ) !~ /^[\;\}]$/
9815                   );
9816             }
9817
9818             # break before all package declarations
9819             # MCONVERSION LOCATION - for tokenizaton change
9820             elsif ($leading_token =~ /^(package\s)/
9821                 && $leading_type eq 'i' )
9822             {
9823                 $want_blank = $rOpts->{'blank-lines-before-packages'};
9824             }
9825
9826             # break before certain key blocks except one-liners
9827             if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9828                 $want_blank = $rOpts->{'blank-lines-before-subs'}
9829                   if (
9830                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9831                         $imax ) ne '}'
9832                   );
9833             }
9834
9835             # Break before certain block types if we haven't had a
9836             # break at this level for a while.  This is the
9837             # difficult decision..
9838             elsif ($leading_type eq 'k'
9839                 && $last_line_leading_type ne 'b'
9840                 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
9841             {
9842                 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9843                 if ( !defined($lc) ) { $lc = 0 }
9844
9845                 $want_blank =
9846                      $rOpts->{'blanks-before-blocks'}
9847                   && $lc >= $rOpts->{'long-block-line-count'}
9848                   && $file_writer_object->get_consecutive_nonblank_lines() >=
9849                   $rOpts->{'long-block-line-count'}
9850                   && (
9851                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9852                         $imax ) ne '}'
9853                   );
9854             }
9855
9856             if ($want_blank) {
9857
9858                 # future: send blank line down normal path to VerticalAligner
9859                 Perl::Tidy::VerticalAligner::flush();
9860                 $file_writer_object->require_blank_code_lines($want_blank);
9861             }
9862         }
9863
9864         # update blank line variables and count number of consecutive
9865         # non-blank, non-comment lines at this level
9866         $last_last_line_leading_level = $last_line_leading_level;
9867         $last_line_leading_level      = $levels_to_go[$imin];
9868         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9869         $last_line_leading_type = $types_to_go[$imin];
9870         if (   $last_line_leading_level == $last_last_line_leading_level
9871             && $last_line_leading_type ne 'b'
9872             && $last_line_leading_type ne '#'
9873             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9874         {
9875             $nonblank_lines_at_depth[$last_line_leading_level]++;
9876         }
9877         else {
9878             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9879         }
9880
9881         FORMATTER_DEBUG_FLAG_FLUSH && do {
9882             my ( $package, $file, $line ) = caller;
9883             print
9884 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9885         };
9886
9887         # add a couple of extra terminal blank tokens
9888         pad_array_to_go();
9889
9890         # set all forced breakpoints for good list formatting
9891         my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9892
9893         if (
9894             $max_index_to_go > 0
9895             && (
9896                    $is_long_line
9897                 || $old_line_count_in_batch > 1
9898                 || is_unbalanced_batch()
9899                 || (
9900                     $comma_count_in_batch
9901                     && (   $rOpts_maximum_fields_per_table > 0
9902                         || $rOpts_comma_arrow_breakpoints == 0 )
9903                 )
9904             )
9905           )
9906         {
9907             $saw_good_break ||= scan_list();
9908         }
9909
9910         # let $ri_first and $ri_last be references to lists of
9911         # first and last tokens of line fragments to output..
9912         my ( $ri_first, $ri_last );
9913
9914         # write a single line if..
9915         if (
9916
9917             # we aren't allowed to add any newlines
9918             !$rOpts_add_newlines
9919
9920             # or, we don't already have an interior breakpoint
9921             # and we didn't see a good breakpoint
9922             || (
9923                    !$forced_breakpoint_count
9924                 && !$saw_good_break
9925
9926                 # and this line is 'short'
9927                 && !$is_long_line
9928             )
9929           )
9930         {
9931             @$ri_first = ($imin);
9932             @$ri_last  = ($imax);
9933         }
9934
9935         # otherwise use multiple lines
9936         else {
9937
9938             ( $ri_first, $ri_last, my $colon_count ) =
9939               set_continuation_breaks($saw_good_break);
9940
9941             break_all_chain_tokens( $ri_first, $ri_last );
9942
9943             break_equals( $ri_first, $ri_last );
9944
9945             # now we do a correction step to clean this up a bit
9946             # (The only time we would not do this is for debugging)
9947             if ( $rOpts->{'recombine'} ) {
9948                 ( $ri_first, $ri_last ) =
9949                   recombine_breakpoints( $ri_first, $ri_last );
9950             }
9951
9952             insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
9953         }
9954
9955         # do corrector step if -lp option is used
9956         my $do_not_pad = 0;
9957         if ($rOpts_line_up_parentheses) {
9958             $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9959         }
9960         send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9961     }
9962     prepare_for_new_input_lines();
9963
9964     # output any new -cscw block comment
9965     if ($cscw_block_comment) {
9966         flush();
9967         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9968     }
9969 }
9970
9971 sub note_added_semicolon {
9972     $last_added_semicolon_at = $input_line_number;
9973     if ( $added_semicolon_count == 0 ) {
9974         $first_added_semicolon_at = $last_added_semicolon_at;
9975     }
9976     $added_semicolon_count++;
9977     write_logfile_entry("Added ';' here\n");
9978 }
9979
9980 sub note_deleted_semicolon {
9981     $last_deleted_semicolon_at = $input_line_number;
9982     if ( $deleted_semicolon_count == 0 ) {
9983         $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9984     }
9985     $deleted_semicolon_count++;
9986     write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
9987 }
9988
9989 sub note_embedded_tab {
9990     $embedded_tab_count++;
9991     $last_embedded_tab_at = $input_line_number;
9992     if ( !$first_embedded_tab_at ) {
9993         $first_embedded_tab_at = $last_embedded_tab_at;
9994     }
9995
9996     if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9997         write_logfile_entry("Embedded tabs in quote or pattern\n");
9998     }
9999 }
10000
10001 sub starting_one_line_block {
10002
10003     # after seeing an opening curly brace, look for the closing brace
10004     # and see if the entire block will fit on a line.  This routine is
10005     # not always right because it uses the old whitespace, so a check
10006     # is made later (at the closing brace) to make sure we really
10007     # have a one-line block.  We have to do this preliminary check,
10008     # though, because otherwise we would always break at a semicolon
10009     # within a one-line block if the block contains multiple statements.
10010
10011     my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
10012         $rblock_type )
10013       = @_;
10014
10015     # kill any current block - we can only go 1 deep
10016     destroy_one_line_block();
10017
10018     # return value:
10019     #  1=distance from start of block to opening brace exceeds line length
10020     #  0=otherwise
10021
10022     my $i_start = 0;
10023
10024     # shouldn't happen: there must have been a prior call to
10025     # store_token_to_go to put the opening brace in the output stream
10026     if ( $max_index_to_go < 0 ) {
10027         warning("program bug: store_token_to_go called incorrectly\n");
10028         report_definite_bug();
10029     }
10030     else {
10031
10032         # cannot use one-line blocks with cuddled else else/elsif lines
10033         if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
10034             return 0;
10035         }
10036     }
10037
10038     my $block_type = $$rblock_type[$j];
10039
10040     # find the starting keyword for this block (such as 'if', 'else', ...)
10041
10042     if ( $block_type =~ /^[\{\}\;\:]$/ ) {
10043         $i_start = $max_index_to_go;
10044     }
10045
10046     elsif ( $last_last_nonblank_token_to_go eq ')' ) {
10047
10048         # For something like "if (xxx) {", the keyword "if" will be
10049         # just after the most recent break. This will be 0 unless
10050         # we have just killed a one-line block and are starting another.
10051         # (doif.t)
10052         $i_start = $index_max_forced_break + 1;
10053         if ( $types_to_go[$i_start] eq 'b' ) {
10054             $i_start++;
10055         }
10056
10057         unless ( $tokens_to_go[$i_start] eq $block_type ) {
10058             return 0;
10059         }
10060     }
10061
10062     # the previous nonblank token should start these block types
10063     elsif (
10064         ( $last_last_nonblank_token_to_go eq $block_type )
10065         || (   $block_type =~ /^sub/
10066             && $last_last_nonblank_token_to_go =~ /^sub/ )
10067       )
10068     {
10069         $i_start = $last_last_nonblank_index_to_go;
10070     }
10071
10072     # patch for SWITCH/CASE to retain one-line case/when blocks
10073     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
10074         $i_start = $index_max_forced_break + 1;
10075         if ( $types_to_go[$i_start] eq 'b' ) {
10076             $i_start++;
10077         }
10078         unless ( $tokens_to_go[$i_start] eq $block_type ) {
10079             return 0;
10080         }
10081     }
10082
10083     else {
10084         return 1;
10085     }
10086
10087     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
10088
10089     my $i;
10090
10091     # see if length is too long to even start
10092     if ( $pos > $rOpts_maximum_line_length ) {
10093         return 1;
10094     }
10095
10096     for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
10097
10098         # old whitespace could be arbitrarily large, so don't use it
10099         if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
10100         else                              { $pos += length( $$rtokens[$i] ) }
10101
10102         # Return false result if we exceed the maximum line length,
10103         if ( $pos > $rOpts_maximum_line_length ) {
10104             return 0;
10105         }
10106
10107         # or encounter another opening brace before finding the closing brace.
10108         elsif ($$rtokens[$i] eq '{'
10109             && $$rtoken_type[$i] eq '{'
10110             && $$rblock_type[$i] )
10111         {
10112             return 0;
10113         }
10114
10115         # if we find our closing brace..
10116         elsif ($$rtokens[$i] eq '}'
10117             && $$rtoken_type[$i] eq '}'
10118             && $$rblock_type[$i] )
10119         {
10120
10121             # be sure any trailing comment also fits on the line
10122             my $i_nonblank =
10123               ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
10124
10125             # Patch for one-line sort/map/grep/eval blocks with side comments:
10126             # We will ignore the side comment length for sort/map/grep/eval
10127             # because this can lead to statements which change every time
10128             # perltidy is run.  Here is an example from Denis Moskowitz which
10129             # oscillates between these two states without this patch:
10130
10131 ## --------
10132 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10133 ##  @baz;
10134 ##
10135 ## grep {
10136 ##     $_->foo ne 'bar'
10137 ##   }    # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10138 ##   @baz;
10139 ## --------
10140
10141             # When the first line is input it gets broken apart by the main
10142             # line break logic in sub print_line_of_tokens.
10143             # When the second line is input it gets recombined by
10144             # print_line_of_tokens and passed to the output routines.  The
10145             # output routines (set_continuation_breaks) do not break it apart
10146             # because the bond strengths are set to the highest possible value
10147             # for grep/map/eval/sort blocks, so the first version gets output.
10148             # It would be possible to fix this by changing bond strengths,
10149             # but they are high to prevent errors in older versions of perl.
10150
10151             if ( $$rtoken_type[$i_nonblank] eq '#'
10152                 && !$is_sort_map_grep{$block_type} )
10153             {
10154
10155                 ## POSSIBLE FUTURE PATCH FOR IGNORING SIDE COMMENT LENGTHS
10156                 ## WHEN CHECKING FOR ONE-LINE BLOCKS:
10157                 ##  if (flag set) then (just add 1 to pos)
10158                 $pos += length( $$rtokens[$i_nonblank] );
10159
10160                 if ( $i_nonblank > $i + 1 ) {
10161
10162                     # source whitespace could be anything, assume
10163                     # at least one space before the hash on output
10164                     if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 }
10165                     else { $pos += length( $$rtokens[ $i + 1 ] ) }
10166                 }
10167
10168                 if ( $pos >= $rOpts_maximum_line_length ) {
10169                     return 0;
10170                 }
10171             }
10172
10173             # ok, it's a one-line block
10174             create_one_line_block( $i_start, 20 );
10175             return 0;
10176         }
10177
10178         # just keep going for other characters
10179         else {
10180         }
10181     }
10182
10183     # Allow certain types of new one-line blocks to form by joining
10184     # input lines.  These can be safely done, but for other block types,
10185     # we keep old one-line blocks but do not form new ones. It is not
10186     # always a good idea to make as many one-line blocks as possible,
10187     # so other types are not done.  The user can always use -mangle.
10188     if ( $is_sort_map_grep_eval{$block_type} ) {
10189         create_one_line_block( $i_start, 1 );
10190     }
10191
10192     return 0;
10193 }
10194
10195 sub unstore_token_to_go {
10196
10197     # remove most recent token from output stream
10198     if ( $max_index_to_go > 0 ) {
10199         $max_index_to_go--;
10200     }
10201     else {
10202         $max_index_to_go = UNDEFINED_INDEX;
10203     }
10204
10205 }
10206
10207 sub want_blank_line {
10208     flush();
10209     $file_writer_object->want_blank_line();
10210 }
10211
10212 sub write_unindented_line {
10213     flush();
10214     $file_writer_object->write_line( $_[0] );
10215 }
10216
10217 sub undo_ci {
10218
10219     # Undo continuation indentation in certain sequences
10220     # For example, we can undo continuation indation in sort/map/grep chains
10221     #    my $dat1 = pack( "n*",
10222     #        map { $_, $lookup->{$_} }
10223     #          sort { $a <=> $b }
10224     #          grep { $lookup->{$_} ne $default } keys %$lookup );
10225     # To align the map/sort/grep keywords like this:
10226     #    my $dat1 = pack( "n*",
10227     #        map { $_, $lookup->{$_} }
10228     #        sort { $a <=> $b }
10229     #        grep { $lookup->{$_} ne $default } keys %$lookup );
10230     my ( $ri_first, $ri_last ) = @_;
10231     my ( $line_1, $line_2, $lev_last );
10232     my $this_line_is_semicolon_terminated;
10233     my $max_line = @$ri_first - 1;
10234
10235     # looking at each line of this batch..
10236     # We are looking at leading tokens and looking for a sequence
10237     # all at the same level and higher level than enclosing lines.
10238     foreach my $line ( 0 .. $max_line ) {
10239
10240         my $ibeg = $$ri_first[$line];
10241         my $lev  = $levels_to_go[$ibeg];
10242         if ( $line > 0 ) {
10243
10244             # if we have started a chain..
10245             if ($line_1) {
10246
10247                 # see if it continues..
10248                 if ( $lev == $lev_last ) {
10249                     if (   $types_to_go[$ibeg] eq 'k'
10250                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10251                     {
10252
10253                         # chain continues...
10254                         # check for chain ending at end of a a statement
10255                         if ( $line == $max_line ) {
10256
10257                             # see of this line ends a statement
10258                             my $iend = $$ri_last[$line];
10259                             $this_line_is_semicolon_terminated =
10260                               $types_to_go[$iend] eq ';'
10261
10262                               # with possible side comment
10263                               || ( $types_to_go[$iend] eq '#'
10264                                 && $iend - $ibeg >= 2
10265                                 && $types_to_go[ $iend - 2 ] eq ';'
10266                                 && $types_to_go[ $iend - 1 ] eq 'b' );
10267                         }
10268                         $line_2 = $line if ($this_line_is_semicolon_terminated);
10269                     }
10270                     else {
10271
10272                         # kill chain
10273                         $line_1 = undef;
10274                     }
10275                 }
10276                 elsif ( $lev < $lev_last ) {
10277
10278                     # chain ends with previous line
10279                     $line_2 = $line - 1;
10280                 }
10281                 elsif ( $lev > $lev_last ) {
10282
10283                     # kill chain
10284                     $line_1 = undef;
10285                 }
10286
10287                 # undo the continuation indentation if a chain ends
10288                 if ( defined($line_2) && defined($line_1) ) {
10289                     my $continuation_line_count = $line_2 - $line_1 + 1;
10290                     @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10291                       (0) x ($continuation_line_count);
10292                     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10293                       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
10294                     $line_1 = undef;
10295                 }
10296             }
10297
10298             # not in a chain yet..
10299             else {
10300
10301                 # look for start of a new sort/map/grep chain
10302                 if ( $lev > $lev_last ) {
10303                     if (   $types_to_go[$ibeg] eq 'k'
10304                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10305                     {
10306                         $line_1 = $line;
10307                     }
10308                 }
10309             }
10310         }
10311         $lev_last = $lev;
10312     }
10313 }
10314
10315 sub undo_lp_ci {
10316
10317     # If there is a single, long parameter within parens, like this:
10318     #
10319     #  $self->command( "/msg "
10320     #        . $infoline->chan
10321     #        . " You said $1, but did you know that it's square was "
10322     #        . $1 * $1 . " ?" );
10323     #
10324     # we can remove the continuation indentation of the 2nd and higher lines
10325     # to achieve this effect, which is more pleasing:
10326     #
10327     #  $self->command("/msg "
10328     #                 . $infoline->chan
10329     #                 . " You said $1, but did you know that it's square was "
10330     #                 . $1 * $1 . " ?");
10331
10332     my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
10333     my $max_line = @$ri_first - 1;
10334
10335     # must be multiple lines
10336     return unless $max_line > $line_open;
10337
10338     my $lev_start     = $levels_to_go[$i_start];
10339     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
10340
10341     # see if all additional lines in this container have continuation
10342     # indentation
10343     my $n;
10344     my $line_1 = 1 + $line_open;
10345     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
10346         my $ibeg = $$ri_first[$n];
10347         my $iend = $$ri_last[$n];
10348         if ( $ibeg eq $closing_index ) { $n--; last }
10349         return if ( $lev_start != $levels_to_go[$ibeg] );
10350         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10351         last   if ( $closing_index <= $iend );
10352     }
10353
10354     # we can reduce the indentation of all continuation lines
10355     my $continuation_line_count = $n - $line_open;
10356     @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10357       (0) x ($continuation_line_count);
10358     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10359       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10360 }
10361
10362 sub set_logical_padding {
10363
10364     # Look at a batch of lines and see if extra padding can improve the
10365     # alignment when there are certain leading operators. Here is an
10366     # example, in which some extra space is introduced before
10367     # '( $year' to make it line up with the subsequent lines:
10368     #
10369     #       if (   ( $Year < 1601 )
10370     #           || ( $Year > 2899 )
10371     #           || ( $EndYear < 1601 )
10372     #           || ( $EndYear > 2899 ) )
10373     #       {
10374     #           &Error_OutOfRange;
10375     #       }
10376     #
10377     my ( $ri_first, $ri_last ) = @_;
10378     my $max_line = @$ri_first - 1;
10379
10380     my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
10381         $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10382
10383     # looking at each line of this batch..
10384     foreach $line ( 0 .. $max_line - 1 ) {
10385
10386         # see if the next line begins with a logical operator
10387         $ibeg      = $$ri_first[$line];
10388         $iend      = $$ri_last[$line];
10389         $ibeg_next = $$ri_first[ $line + 1 ];
10390         $tok_next  = $tokens_to_go[$ibeg_next];
10391         $type_next = $types_to_go[$ibeg_next];
10392
10393         $has_leading_op_next = ( $tok_next =~ /^\w/ )
10394           ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
10395           : $is_chain_operator{$type_next};    # and, or
10396
10397         next unless ($has_leading_op_next);
10398
10399         # next line must not be at lesser depth
10400         next
10401           if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
10402
10403         # identify the token in this line to be padded on the left
10404         $ipad = undef;
10405
10406         # handle lines at same depth...
10407         if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
10408
10409             # if this is not first line of the batch ...
10410             if ( $line > 0 ) {
10411
10412                 # and we have leading operator..
10413                 next if $has_leading_op;
10414
10415                 # Introduce padding if..
10416                 # 1. the previous line is at lesser depth, or
10417                 # 2. the previous line ends in an assignment
10418                 # 3. the previous line ends in a 'return'
10419                 # 4. the previous line ends in a comma
10420                 # Example 1: previous line at lesser depth
10421                 #       if (   ( $Year < 1601 )      # <- we are here but
10422                 #           || ( $Year > 2899 )      #  list has not yet
10423                 #           || ( $EndYear < 1601 )   # collapsed vertically
10424                 #           || ( $EndYear > 2899 ) )
10425                 #       {
10426                 #
10427                 # Example 2: previous line ending in assignment:
10428                 #    $leapyear =
10429                 #        $year % 4   ? 0     # <- We are here
10430                 #      : $year % 100 ? 1
10431                 #      : $year % 400 ? 0
10432                 #      : 1;
10433                 #
10434                 # Example 3: previous line ending in comma:
10435                 #    push @expr,
10436                 #        /test/   ? undef
10437                 #      : eval($_) ? 1
10438                 #      : eval($_) ? 1
10439                 #      :            0;
10440
10441                 # be sure levels agree (do not indent after an indented 'if')
10442                 next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
10443
10444                 # allow padding on first line after a comma but only if:
10445                 # (1) this is line 2 and
10446                 # (2) there are at more than three lines and
10447                 # (3) lines 3 and 4 have the same leading operator
10448                 # These rules try to prevent padding within a long
10449                 # comma-separated list.
10450                 my $ok_comma;
10451                 if (   $types_to_go[$iendm] eq ','
10452                     && $line == 1
10453                     && $max_line > 2 )
10454                 {
10455                     my $ibeg_next_next = $$ri_first[ $line + 2 ];
10456                     my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
10457                     $ok_comma = $tok_next_next eq $tok_next;
10458                 }
10459
10460                 next
10461                   unless (
10462                        $is_assignment{ $types_to_go[$iendm] }
10463                     || $ok_comma
10464                     || ( $nesting_depth_to_go[$ibegm] <
10465                         $nesting_depth_to_go[$ibeg] )
10466                     || (   $types_to_go[$iendm] eq 'k'
10467                         && $tokens_to_go[$iendm] eq 'return' )
10468                   );
10469
10470                 # we will add padding before the first token
10471                 $ipad = $ibeg;
10472             }
10473
10474             # for first line of the batch..
10475             else {
10476
10477                 # WARNING: Never indent if first line is starting in a
10478                 # continued quote, which would change the quote.
10479                 next if $starting_in_quote;
10480
10481                 # if this is text after closing '}'
10482                 # then look for an interior token to pad
10483                 if ( $types_to_go[$ibeg] eq '}' ) {
10484
10485                 }
10486
10487                 # otherwise, we might pad if it looks really good
10488                 else {
10489
10490                     # we might pad token $ibeg, so be sure that it
10491                     # is at the same depth as the next line.
10492                     next
10493                       if ( $nesting_depth_to_go[$ibeg] !=
10494                         $nesting_depth_to_go[$ibeg_next] );
10495
10496                     # We can pad on line 1 of a statement if at least 3
10497                     # lines will be aligned. Otherwise, it
10498                     # can look very confusing.
10499
10500                  # We have to be careful not to pad if there are too few
10501                  # lines.  The current rule is:
10502                  # (1) in general we require at least 3 consecutive lines
10503                  # with the same leading chain operator token,
10504                  # (2) but an exception is that we only require two lines
10505                  # with leading colons if there are no more lines.  For example,
10506                  # the first $i in the following snippet would get padding
10507                  # by the second rule:
10508                  #
10509                  #   $i == 1 ? ( "First", "Color" )
10510                  # : $i == 2 ? ( "Then",  "Rarity" )
10511                  # :           ( "Then",  "Name" );
10512
10513                     if ( $max_line > 1 ) {
10514                         my $leading_token = $tokens_to_go[$ibeg_next];
10515                         my $tokens_differ;
10516
10517                         # never indent line 1 of a '.' series because
10518                         # previous line is most likely at same level.
10519                         # TODO: we should also look at the leasing_spaces
10520                         # of the last output line and skip if it is same
10521                         # as this line.
10522                         next if ( $leading_token eq '.' );
10523
10524                         my $count = 1;
10525                         foreach my $l ( 2 .. 3 ) {
10526                             last if ( $line + $l > $max_line );
10527                             my $ibeg_next_next = $$ri_first[ $line + $l ];
10528                             if ( $tokens_to_go[$ibeg_next_next] ne
10529                                 $leading_token )
10530                             {
10531                                 $tokens_differ = 1;
10532                                 last;
10533                             }
10534                             $count++;
10535                         }
10536                         next if ($tokens_differ);
10537                         next if ( $count < 3 && $leading_token ne ':' );
10538                         $ipad = $ibeg;
10539                     }
10540                     else {
10541                         next;
10542                     }
10543                 }
10544             }
10545         }
10546
10547         # find interior token to pad if necessary
10548         if ( !defined($ipad) ) {
10549
10550             for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
10551
10552                 # find any unclosed container
10553                 next
10554                   unless ( $type_sequence_to_go[$i]
10555                     && $mate_index_to_go[$i] > $iend );
10556
10557                 # find next nonblank token to pad
10558                 $ipad = $i + 1;
10559                 if ( $types_to_go[$ipad] eq 'b' ) {
10560                     $ipad++;
10561                     last if ( $ipad > $iend );
10562                 }
10563             }
10564             last unless $ipad;
10565         }
10566
10567         # We cannot pad a leading token at the lowest level because
10568         # it could cause a bug in which the starting indentation
10569         # level is guessed incorrectly each time the code is run
10570         # though perltidy, thus causing the code to march off to
10571         # the right.  For example, the following snippet would have
10572         # this problem:
10573
10574 ##     ov_method mycan( $package, '(""' ),       $package
10575 ##  or ov_method mycan( $package, '(0+' ),       $package
10576 ##  or ov_method mycan( $package, '(bool' ),     $package
10577 ##  or ov_method mycan( $package, '(nomethod' ), $package;
10578
10579         # If this snippet is within a block this won't happen
10580         # unless the user just processes the snippet alone within
10581         # an editor.  In that case either the user will see and
10582         # fix the problem or it will be corrected next time the
10583         # entire file is processed with perltidy.
10584         next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
10585
10586         # next line must not be at greater depth
10587         my $iend_next = $$ri_last[ $line + 1 ];
10588         next
10589           if ( $nesting_depth_to_go[ $iend_next + 1 ] >
10590             $nesting_depth_to_go[$ipad] );
10591
10592         # lines must be somewhat similar to be padded..
10593         my $inext_next = $ibeg_next + 1;
10594         if ( $types_to_go[$inext_next] eq 'b' ) {
10595             $inext_next++;
10596         }
10597         my $type      = $types_to_go[$ipad];
10598         my $type_next = $types_to_go[ $ipad + 1 ];
10599
10600         # see if there are multiple continuation lines
10601         my $logical_continuation_lines = 1;
10602         if ( $line + 2 <= $max_line ) {
10603             my $leading_token  = $tokens_to_go[$ibeg_next];
10604             my $ibeg_next_next = $$ri_first[ $line + 2 ];
10605             if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
10606                 && $nesting_depth_to_go[$ibeg_next] eq
10607                 $nesting_depth_to_go[$ibeg_next_next] )
10608             {
10609                 $logical_continuation_lines++;
10610             }
10611         }
10612
10613         # see if leading types match
10614         my $types_match = $types_to_go[$inext_next] eq $type;
10615         my $matches_without_bang;
10616
10617         # if first line has leading ! then compare the following token
10618         if ( !$types_match && $type eq '!' ) {
10619             $types_match = $matches_without_bang =
10620               $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
10621         }
10622
10623         if (
10624
10625             # either we have multiple continuation lines to follow
10626             # and we are not padding the first token
10627             ( $logical_continuation_lines > 1 && $ipad > 0 )
10628
10629             # or..
10630             || (
10631
10632                 # types must match
10633                 $types_match
10634
10635                 # and keywords must match if keyword
10636                 && !(
10637                        $type eq 'k'
10638                     && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
10639                 )
10640             )
10641           )
10642         {
10643
10644             #----------------------begin special checks--------------
10645             #
10646             # SPECIAL CHECK 1:
10647             # A check is needed before we can make the pad.
10648             # If we are in a list with some long items, we want each
10649             # item to stand out.  So in the following example, the
10650             # first line begining with '$casefold->' would look good
10651             # padded to align with the next line, but then it
10652             # would be indented more than the last line, so we
10653             # won't do it.
10654             #
10655             #  ok(
10656             #      $casefold->{code}         eq '0041'
10657             #        && $casefold->{status}  eq 'C'
10658             #        && $casefold->{mapping} eq '0061',
10659             #      'casefold 0x41'
10660             #  );
10661             #
10662             # Note:
10663             # It would be faster, and almost as good, to use a comma
10664             # count, and not pad if comma_count > 1 and the previous
10665             # line did not end with a comma.
10666             #
10667             my $ok_to_pad = 1;
10668
10669             my $ibg   = $$ri_first[ $line + 1 ];
10670             my $depth = $nesting_depth_to_go[ $ibg + 1 ];
10671
10672             # just use simplified formula for leading spaces to avoid
10673             # needless sub calls
10674             my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
10675
10676             # look at each line beyond the next ..
10677             my $l = $line + 1;
10678             foreach $l ( $line + 2 .. $max_line ) {
10679                 my $ibg = $$ri_first[$l];
10680
10681                 # quit looking at the end of this container
10682                 last
10683                   if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
10684                   || ( $nesting_depth_to_go[$ibg] < $depth );
10685
10686                 # cannot do the pad if a later line would be
10687                 # outdented more
10688                 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
10689                     $ok_to_pad = 0;
10690                     last;
10691                 }
10692             }
10693
10694             # don't pad if we end in a broken list
10695             if ( $l == $max_line ) {
10696                 my $i2 = $$ri_last[$l];
10697                 if ( $types_to_go[$i2] eq '#' ) {
10698                     my $i1 = $$ri_first[$l];
10699                     next
10700                       if (
10701                         terminal_type( \@types_to_go, \@block_type_to_go, $i1,
10702                             $i2 ) eq ','
10703                       );
10704                 }
10705             }
10706
10707             # SPECIAL CHECK 2:
10708             # a minus may introduce a quoted variable, and we will
10709             # add the pad only if this line begins with a bare word,
10710             # such as for the word 'Button' here:
10711             #    [
10712             #         Button      => "Print letter \"~$_\"",
10713             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10714             #        -accelerator => "Meta+$_"
10715             #    ];
10716             #
10717             #  On the other hand, if 'Button' is quoted, it looks best
10718             #  not to pad:
10719             #    [
10720             #        'Button'     => "Print letter \"~$_\"",
10721             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10722             #        -accelerator => "Meta+$_"
10723             #    ];
10724             if ( $types_to_go[$ibeg_next] eq 'm' ) {
10725                 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
10726             }
10727
10728             next unless $ok_to_pad;
10729
10730             #----------------------end special check---------------
10731
10732             my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
10733             my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
10734             $pad_spaces = $length_2 - $length_1;
10735
10736             # If the first line has a leading ! and the second does
10737             # not, then remove one space to try to align the next
10738             # leading characters, which are often the same.  For example:
10739             #  if (  !$ts
10740             #      || $ts == $self->Holder
10741             #      || $self->Holder->Type eq "Arena" )
10742             #
10743             # This usually helps readability, but if there are subsequent
10744             # ! operators things will still get messed up.  For example:
10745             #
10746             #  if (  !exists $Net::DNS::typesbyname{$qtype}
10747             #      && exists $Net::DNS::classesbyname{$qtype}
10748             #      && !exists $Net::DNS::classesbyname{$qclass}
10749             #      && exists $Net::DNS::typesbyname{$qclass} )
10750             # We can't fix that.
10751             if ($matches_without_bang) { $pad_spaces-- }
10752
10753             # make sure this won't change if -lp is used
10754             my $indentation_1 = $leading_spaces_to_go[$ibeg];
10755             if ( ref($indentation_1) ) {
10756                 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
10757                     my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
10758                     unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
10759                         $pad_spaces = 0;
10760                     }
10761                 }
10762             }
10763
10764             # we might be able to handle a pad of -1 by removing a blank
10765             # token
10766             if ( $pad_spaces < 0 ) {
10767
10768                 if ( $pad_spaces == -1 ) {
10769                     if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
10770                         $tokens_to_go[ $ipad - 1 ] = '';
10771                     }
10772                 }
10773                 $pad_spaces = 0;
10774             }
10775
10776             # now apply any padding for alignment
10777             if ( $ipad >= 0 && $pad_spaces ) {
10778
10779                 my $length_t = total_line_length( $ibeg, $iend );
10780                 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
10781                     $tokens_to_go[$ipad] =
10782                       ' ' x $pad_spaces . $tokens_to_go[$ipad];
10783                 }
10784             }
10785         }
10786     }
10787     continue {
10788         $iendm          = $iend;
10789         $ibegm          = $ibeg;
10790         $has_leading_op = $has_leading_op_next;
10791     }    # end of loop over lines
10792     return;
10793 }
10794
10795 sub correct_lp_indentation {
10796
10797     # When the -lp option is used, we need to make a last pass through
10798     # each line to correct the indentation positions in case they differ
10799     # from the predictions.  This is necessary because perltidy uses a
10800     # predictor/corrector method for aligning with opening parens.  The
10801     # predictor is usually good, but sometimes stumbles.  The corrector
10802     # tries to patch things up once the actual opening paren locations
10803     # are known.
10804     my ( $ri_first, $ri_last ) = @_;
10805     my $do_not_pad = 0;
10806
10807     #  Note on flag '$do_not_pad':
10808     #  We want to avoid a situation like this, where the aligner inserts
10809     #  whitespace before the '=' to align it with a previous '=', because
10810     #  otherwise the parens might become mis-aligned in a situation like
10811     #  this, where the '=' has become aligned with the previous line,
10812     #  pushing the opening '(' forward beyond where we want it.
10813     #
10814     #  $mkFloor::currentRoom = '';
10815     #  $mkFloor::c_entry     = $c->Entry(
10816     #                                 -width        => '10',
10817     #                                 -relief       => 'sunken',
10818     #                                 ...
10819     #                                 );
10820     #
10821     #  We leave it to the aligner to decide how to do this.
10822
10823     # first remove continuation indentation if appropriate
10824     my $max_line = @$ri_first - 1;
10825
10826     # looking at each line of this batch..
10827     my ( $ibeg, $iend );
10828     my $line;
10829     foreach $line ( 0 .. $max_line ) {
10830         $ibeg = $$ri_first[$line];
10831         $iend = $$ri_last[$line];
10832
10833         # looking at each token in this output line..
10834         my $i;
10835         foreach $i ( $ibeg .. $iend ) {
10836
10837             # How many space characters to place before this token
10838             # for special alignment.  Actual padding is done in the
10839             # continue block.
10840
10841             # looking for next unvisited indentation item
10842             my $indentation = $leading_spaces_to_go[$i];
10843             if ( !$indentation->get_MARKED() ) {
10844                 $indentation->set_MARKED(1);
10845
10846                 # looking for indentation item for which we are aligning
10847                 # with parens, braces, and brackets
10848                 next unless ( $indentation->get_ALIGN_PAREN() );
10849
10850                 # skip closed container on this line
10851                 if ( $i > $ibeg ) {
10852                     my $im = $i - 1;
10853                     if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
10854                     if (   $type_sequence_to_go[$im]
10855                         && $mate_index_to_go[$im] <= $iend )
10856                     {
10857                         next;
10858                     }
10859                 }
10860
10861                 if ( $line == 1 && $i == $ibeg ) {
10862                     $do_not_pad = 1;
10863                 }
10864
10865                 # Ok, let's see what the error is and try to fix it
10866                 my $actual_pos;
10867                 my $predicted_pos = $indentation->get_SPACES();
10868                 if ( $i > $ibeg ) {
10869
10870                     # token is mid-line - use length to previous token
10871                     $actual_pos = total_line_length( $ibeg, $i - 1 );
10872
10873                     # for mid-line token, we must check to see if all
10874                     # additional lines have continuation indentation,
10875                     # and remove it if so.  Otherwise, we do not get
10876                     # good alignment.
10877                     my $closing_index = $indentation->get_CLOSED();
10878                     if ( $closing_index > $iend ) {
10879                         my $ibeg_next = $$ri_first[ $line + 1 ];
10880                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
10881                             undo_lp_ci( $line, $i, $closing_index, $ri_first,
10882                                 $ri_last );
10883                         }
10884                     }
10885                 }
10886                 elsif ( $line > 0 ) {
10887
10888                     # handle case where token starts a new line;
10889                     # use length of previous line
10890                     my $ibegm = $$ri_first[ $line - 1 ];
10891                     my $iendm = $$ri_last[ $line - 1 ];
10892                     $actual_pos = total_line_length( $ibegm, $iendm );
10893
10894                     # follow -pt style
10895                     ++$actual_pos
10896                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
10897                 }
10898                 else {
10899
10900                     # token is first character of first line of batch
10901                     $actual_pos = $predicted_pos;
10902                 }
10903
10904                 my $move_right = $actual_pos - $predicted_pos;
10905
10906                 # done if no error to correct (gnu2.t)
10907                 if ( $move_right == 0 ) {
10908                     $indentation->set_RECOVERABLE_SPACES($move_right);
10909                     next;
10910                 }
10911
10912                 # if we have not seen closure for this indentation in
10913                 # this batch, we can only pass on a request to the
10914                 # vertical aligner
10915                 my $closing_index = $indentation->get_CLOSED();
10916
10917                 if ( $closing_index < 0 ) {
10918                     $indentation->set_RECOVERABLE_SPACES($move_right);
10919                     next;
10920                 }
10921
10922                 # If necessary, look ahead to see if there is really any
10923                 # leading whitespace dependent on this whitespace, and
10924                 # also find the longest line using this whitespace.
10925                 # Since it is always safe to move left if there are no
10926                 # dependents, we only need to do this if we may have
10927                 # dependent nodes or need to move right.
10928
10929                 my $right_margin = 0;
10930                 my $have_child   = $indentation->get_HAVE_CHILD();
10931
10932                 my %saw_indentation;
10933                 my $line_count = 1;
10934                 $saw_indentation{$indentation} = $indentation;
10935
10936                 if ( $have_child || $move_right > 0 ) {
10937                     $have_child = 0;
10938                     my $max_length = 0;
10939                     if ( $i == $ibeg ) {
10940                         $max_length = total_line_length( $ibeg, $iend );
10941                     }
10942
10943                     # look ahead at the rest of the lines of this batch..
10944                     my $line_t;
10945                     foreach $line_t ( $line + 1 .. $max_line ) {
10946                         my $ibeg_t = $$ri_first[$line_t];
10947                         my $iend_t = $$ri_last[$line_t];
10948                         last if ( $closing_index <= $ibeg_t );
10949
10950                         # remember all different indentation objects
10951                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
10952                         $saw_indentation{$indentation_t} = $indentation_t;
10953                         $line_count++;
10954
10955                         # remember longest line in the group
10956                         my $length_t = total_line_length( $ibeg_t, $iend_t );
10957                         if ( $length_t > $max_length ) {
10958                             $max_length = $length_t;
10959                         }
10960                     }
10961                     $right_margin = $rOpts_maximum_line_length - $max_length;
10962                     if ( $right_margin < 0 ) { $right_margin = 0 }
10963                 }
10964
10965                 my $first_line_comma_count =
10966                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
10967                 my $comma_count = $indentation->get_COMMA_COUNT();
10968                 my $arrow_count = $indentation->get_ARROW_COUNT();
10969
10970                 # This is a simple approximate test for vertical alignment:
10971                 # if we broke just after an opening paren, brace, bracket,
10972                 # and there are 2 or more commas in the first line,
10973                 # and there are no '=>'s,
10974                 # then we are probably vertically aligned.  We could set
10975                 # an exact flag in sub scan_list, but this is good
10976                 # enough.
10977                 my $indentation_count = keys %saw_indentation;
10978                 my $is_vertically_aligned =
10979                   (      $i == $ibeg
10980                       && $first_line_comma_count > 1
10981                       && $indentation_count == 1
10982                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
10983
10984                 # Make the move if possible ..
10985                 if (
10986
10987                     # we can always move left
10988                     $move_right < 0
10989
10990                     # but we should only move right if we are sure it will
10991                     # not spoil vertical alignment
10992                     || ( $comma_count == 0 )
10993                     || ( $comma_count > 0 && !$is_vertically_aligned )
10994                   )
10995                 {
10996                     my $move =
10997                       ( $move_right <= $right_margin )
10998                       ? $move_right
10999                       : $right_margin;
11000
11001                     foreach ( keys %saw_indentation ) {
11002                         $saw_indentation{$_}
11003                           ->permanently_decrease_AVAILABLE_SPACES( -$move );
11004                     }
11005                 }
11006
11007                 # Otherwise, record what we want and the vertical aligner
11008                 # will try to recover it.
11009                 else {
11010                     $indentation->set_RECOVERABLE_SPACES($move_right);
11011                 }
11012             }
11013         }
11014     }
11015     return $do_not_pad;
11016 }
11017
11018 # flush is called to output any tokens in the pipeline, so that
11019 # an alternate source of lines can be written in the correct order
11020
11021 sub flush {
11022     destroy_one_line_block();
11023     output_line_to_go();
11024     Perl::Tidy::VerticalAligner::flush();
11025 }
11026
11027 sub reset_block_text_accumulator {
11028
11029     # save text after 'if' and 'elsif' to append after 'else'
11030     if ($accumulating_text_for_block) {
11031
11032         if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
11033             push @{$rleading_block_if_elsif_text}, $leading_block_text;
11034         }
11035     }
11036     $accumulating_text_for_block        = "";
11037     $leading_block_text                 = "";
11038     $leading_block_text_level           = 0;
11039     $leading_block_text_length_exceeded = 0;
11040     $leading_block_text_line_number     = 0;
11041     $leading_block_text_line_length     = 0;
11042 }
11043
11044 sub set_block_text_accumulator {
11045     my $i = shift;
11046     $accumulating_text_for_block = $tokens_to_go[$i];
11047     if ( $accumulating_text_for_block !~ /^els/ ) {
11048         $rleading_block_if_elsif_text = [];
11049     }
11050     $leading_block_text       = "";
11051     $leading_block_text_level = $levels_to_go[$i];
11052     $leading_block_text_line_number =
11053       $vertical_aligner_object->get_output_line_number();
11054     $leading_block_text_length_exceeded = 0;
11055
11056     # this will contain the column number of the last character
11057     # of the closing side comment
11058     ##$csc_last_label="" unless $csc_last_label;
11059     $leading_block_text_line_length =
11060       length($csc_last_label) +
11061       length($accumulating_text_for_block) +
11062       length( $rOpts->{'closing-side-comment-prefix'} ) +
11063       $leading_block_text_level * $rOpts_indent_columns + 3;
11064 }
11065
11066 sub accumulate_block_text {
11067     my $i = shift;
11068
11069     # accumulate leading text for -csc, ignoring any side comments
11070     if (   $accumulating_text_for_block
11071         && !$leading_block_text_length_exceeded
11072         && $types_to_go[$i] ne '#' )
11073     {
11074
11075         my $added_length = length( $tokens_to_go[$i] );
11076         $added_length += 1 if $i == 0;
11077         my $new_line_length = $leading_block_text_line_length + $added_length;
11078
11079         # we can add this text if we don't exceed some limits..
11080         if (
11081
11082             # we must not have already exceeded the text length limit
11083             length($leading_block_text) <
11084             $rOpts_closing_side_comment_maximum_text
11085
11086             # and either:
11087             # the new total line length must be below the line length limit
11088             # or the new length must be below the text length limit
11089             # (ie, we may allow one token to exceed the text length limit)
11090             && ( $new_line_length < $rOpts_maximum_line_length
11091                 || length($leading_block_text) + $added_length <
11092                 $rOpts_closing_side_comment_maximum_text )
11093
11094             # UNLESS: we are adding a closing paren before the brace we seek.
11095             # This is an attempt to avoid situations where the ... to be
11096             # added are longer than the omitted right paren, as in:
11097
11098             #   foreach my $item (@a_rather_long_variable_name_here) {
11099             #      &whatever;
11100             #   } ## end foreach my $item (@a_rather_long_variable_name_here...
11101
11102             || (
11103                 $tokens_to_go[$i] eq ')'
11104                 && (
11105                     (
11106                            $i + 1 <= $max_index_to_go
11107                         && $block_type_to_go[ $i + 1 ] eq
11108                         $accumulating_text_for_block
11109                     )
11110                     || (   $i + 2 <= $max_index_to_go
11111                         && $block_type_to_go[ $i + 2 ] eq
11112                         $accumulating_text_for_block )
11113                 )
11114             )
11115           )
11116         {
11117
11118             # add an extra space at each newline
11119             if ( $i == 0 ) { $leading_block_text .= ' ' }
11120
11121             # add the token text
11122             $leading_block_text .= $tokens_to_go[$i];
11123             $leading_block_text_line_length = $new_line_length;
11124         }
11125
11126         # show that text was truncated if necessary
11127         elsif ( $types_to_go[$i] ne 'b' ) {
11128             $leading_block_text_length_exceeded = 1;
11129             $leading_block_text .= '...';
11130         }
11131     }
11132 }
11133
11134 {
11135     my %is_if_elsif_else_unless_while_until_for_foreach;
11136
11137     BEGIN {
11138
11139         # These block types may have text between the keyword and opening
11140         # curly.  Note: 'else' does not, but must be included to allow trailing
11141         # if/elsif text to be appended.
11142         # patch for SWITCH/CASE: added 'case' and 'when'
11143         @_ = qw(if elsif else unless while until for foreach case when);
11144         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11145     }
11146
11147     sub accumulate_csc_text {
11148
11149         # called once per output buffer when -csc is used. Accumulates
11150         # the text placed after certain closing block braces.
11151         # Defines and returns the following for this buffer:
11152
11153         my $block_leading_text = "";    # the leading text of the last '}'
11154         my $rblock_leading_if_elsif_text;
11155         my $i_block_leading_text =
11156           -1;    # index of token owning block_leading_text
11157         my $block_line_count    = 100;    # how many lines the block spans
11158         my $terminal_type       = 'b';    # type of last nonblank token
11159         my $i_terminal          = 0;      # index of last nonblank token
11160         my $terminal_block_type = "";
11161
11162         # update most recent statement label
11163         $csc_last_label = "" unless ($csc_last_label);
11164         if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
11165         my $block_label = $csc_last_label;
11166
11167         # Loop over all tokens of this batch
11168         for my $i ( 0 .. $max_index_to_go ) {
11169             my $type       = $types_to_go[$i];
11170             my $block_type = $block_type_to_go[$i];
11171             my $token      = $tokens_to_go[$i];
11172
11173             # remember last nonblank token type
11174             if ( $type ne '#' && $type ne 'b' ) {
11175                 $terminal_type       = $type;
11176                 $terminal_block_type = $block_type;
11177                 $i_terminal          = $i;
11178             }
11179
11180             my $type_sequence = $type_sequence_to_go[$i];
11181             if ( $block_type && $type_sequence ) {
11182
11183                 if ( $token eq '}' ) {
11184
11185                     # restore any leading text saved when we entered this block
11186                     if ( defined( $block_leading_text{$type_sequence} ) ) {
11187                         ( $block_leading_text, $rblock_leading_if_elsif_text ) =
11188                           @{ $block_leading_text{$type_sequence} };
11189                         $i_block_leading_text = $i;
11190                         delete $block_leading_text{$type_sequence};
11191                         $rleading_block_if_elsif_text =
11192                           $rblock_leading_if_elsif_text;
11193                     }
11194
11195                     if ( defined( $csc_block_label{$type_sequence} ) ) {
11196                         $block_label = $csc_block_label{$type_sequence};
11197                         delete $csc_block_label{$type_sequence};
11198                     }
11199
11200                     # if we run into a '}' then we probably started accumulating
11201                     # at something like a trailing 'if' clause..no harm done.
11202                     if (   $accumulating_text_for_block
11203                         && $levels_to_go[$i] <= $leading_block_text_level )
11204                     {
11205                         my $lev = $levels_to_go[$i];
11206                         reset_block_text_accumulator();
11207                     }
11208
11209                     if ( defined( $block_opening_line_number{$type_sequence} ) )
11210                     {
11211                         my $output_line_number =
11212                           $vertical_aligner_object->get_output_line_number();
11213                         $block_line_count =
11214                           $output_line_number -
11215                           $block_opening_line_number{$type_sequence} + 1;
11216                         delete $block_opening_line_number{$type_sequence};
11217                     }
11218                     else {
11219
11220                         # Error: block opening line undefined for this line..
11221                         # This shouldn't be possible, but it is not a
11222                         # significant problem.
11223                     }
11224                 }
11225
11226                 elsif ( $token eq '{' ) {
11227
11228                     my $line_number =
11229                       $vertical_aligner_object->get_output_line_number();
11230                     $block_opening_line_number{$type_sequence} = $line_number;
11231
11232                     # set a label for this block, except for
11233                     # a bare block which already has the label
11234                     # A label can only be used on the next {
11235                     if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
11236                     $csc_block_label{$type_sequence} = $csc_last_label;
11237                     $csc_last_label = "";
11238
11239                     if (   $accumulating_text_for_block
11240                         && $levels_to_go[$i] == $leading_block_text_level )
11241                     {
11242
11243                         if ( $accumulating_text_for_block eq $block_type ) {
11244
11245                             # save any leading text before we enter this block
11246                             $block_leading_text{$type_sequence} = [
11247                                 $leading_block_text,
11248                                 $rleading_block_if_elsif_text
11249                             ];
11250                             $block_opening_line_number{$type_sequence} =
11251                               $leading_block_text_line_number;
11252                             reset_block_text_accumulator();
11253                         }
11254                         else {
11255
11256                             # shouldn't happen, but not a serious error.
11257                             # We were accumulating -csc text for block type
11258                             # $accumulating_text_for_block and unexpectedly
11259                             # encountered a '{' for block type $block_type.
11260                         }
11261                     }
11262                 }
11263             }
11264
11265             if (   $type eq 'k'
11266                 && $csc_new_statement_ok
11267                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
11268                 && $token =~ /$closing_side_comment_list_pattern/o )
11269             {
11270                 set_block_text_accumulator($i);
11271             }
11272             else {
11273
11274                 # note: ignoring type 'q' because of tricks being played
11275                 # with 'q' for hanging side comments
11276                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
11277                     $csc_new_statement_ok =
11278                       ( $block_type || $type eq 'J' || $type eq ';' );
11279                 }
11280                 if (   $type eq ';'
11281                     && $accumulating_text_for_block
11282                     && $levels_to_go[$i] == $leading_block_text_level )
11283                 {
11284                     reset_block_text_accumulator();
11285                 }
11286                 else {
11287                     accumulate_block_text($i);
11288                 }
11289             }
11290         }
11291
11292         # Treat an 'else' block specially by adding preceding 'if' and
11293         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
11294         # especially for cuddled-else formatting.
11295         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
11296             $block_leading_text =
11297               make_else_csc_text( $i_terminal, $terminal_block_type,
11298                 $block_leading_text, $rblock_leading_if_elsif_text );
11299         }
11300
11301         # if this line ends in a label then remember it for the next pass
11302         $csc_last_label = "";
11303         if ( $terminal_type eq 'J' ) {
11304             $csc_last_label = $tokens_to_go[$i_terminal];
11305         }
11306
11307         return ( $terminal_type, $i_terminal, $i_block_leading_text,
11308             $block_leading_text, $block_line_count, $block_label );
11309     }
11310 }
11311
11312 sub make_else_csc_text {
11313
11314     # create additional -csc text for an 'else' and optionally 'elsif',
11315     # depending on the value of switch
11316     # $rOpts_closing_side_comment_else_flag:
11317     #
11318     #  = 0 add 'if' text to trailing else
11319     #  = 1 same as 0 plus:
11320     #      add 'if' to 'elsif's if can fit in line length
11321     #      add last 'elsif' to trailing else if can fit in one line
11322     #  = 2 same as 1 but do not check if exceed line length
11323     #
11324     # $rif_elsif_text = a reference to a list of all previous closing
11325     # side comments created for this if block
11326     #
11327     my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
11328     my $csc_text = $block_leading_text;
11329
11330     if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
11331     {
11332         return $csc_text;
11333     }
11334
11335     my $count = @{$rif_elsif_text};
11336     return $csc_text unless ($count);
11337
11338     my $if_text = '[ if' . $rif_elsif_text->[0];
11339
11340     # always show the leading 'if' text on 'else'
11341     if ( $block_type eq 'else' ) {
11342         $csc_text .= $if_text;
11343     }
11344
11345     # see if that's all
11346     if ( $rOpts_closing_side_comment_else_flag == 0 ) {
11347         return $csc_text;
11348     }
11349
11350     my $last_elsif_text = "";
11351     if ( $count > 1 ) {
11352         $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
11353         if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
11354     }
11355
11356     # tentatively append one more item
11357     my $saved_text = $csc_text;
11358     if ( $block_type eq 'else' ) {
11359         $csc_text .= $last_elsif_text;
11360     }
11361     else {
11362         $csc_text .= ' ' . $if_text;
11363     }
11364
11365     # all done if no length checks requested
11366     if ( $rOpts_closing_side_comment_else_flag == 2 ) {
11367         return $csc_text;
11368     }
11369
11370     # undo it if line length exceeded
11371     my $length =
11372       length($csc_text) +
11373       length($block_type) +
11374       length( $rOpts->{'closing-side-comment-prefix'} ) +
11375       $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
11376     if ( $length > $rOpts_maximum_line_length ) {
11377         $csc_text = $saved_text;
11378     }
11379     return $csc_text;
11380 }
11381
11382 {    # sub balance_csc_text
11383
11384     my %matching_char;
11385
11386     BEGIN {
11387         %matching_char = (
11388             '{' => '}',
11389             '(' => ')',
11390             '[' => ']',
11391             '}' => '{',
11392             ')' => '(',
11393             ']' => '[',
11394         );
11395     }
11396
11397     sub balance_csc_text {
11398
11399         # Append characters to balance a closing side comment so that editors
11400         # such as vim can correctly jump through code.
11401         # Simple Example:
11402         #  input  = ## end foreach my $foo ( sort { $b  ...
11403         #  output = ## end foreach my $foo ( sort { $b  ...})
11404
11405         # NOTE: This routine does not currently filter out structures within
11406         # quoted text because the bounce algorithims in text editors do not
11407         # necessarily do this either (a version of vim was checked and
11408         # did not do this).
11409
11410         # Some complex examples which will cause trouble for some editors:
11411         #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
11412         #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
11413         #  if ( $1 eq '{' ) {
11414         # test file test1/braces.pl has many such examples.
11415
11416         my ($csc) = @_;
11417
11418         # loop to examine characters one-by-one, RIGHT to LEFT and
11419         # build a balancing ending, LEFT to RIGHT.
11420         for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
11421
11422             my $char = substr( $csc, $pos, 1 );
11423
11424             # ignore everything except structural characters
11425             next unless ( $matching_char{$char} );
11426
11427             # pop most recently appended character
11428             my $top = chop($csc);
11429
11430             # push it back plus the mate to the newest character
11431             # unless they balance each other.
11432             $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
11433         }
11434
11435         # return the balanced string
11436         return $csc;
11437     }
11438 }
11439
11440 sub add_closing_side_comment {
11441
11442     # add closing side comments after closing block braces if -csc used
11443     my $cscw_block_comment;
11444
11445     #---------------------------------------------------------------
11446     # Step 1: loop through all tokens of this line to accumulate
11447     # the text needed to create the closing side comments. Also see
11448     # how the line ends.
11449     #---------------------------------------------------------------
11450
11451     my ( $terminal_type, $i_terminal, $i_block_leading_text,
11452         $block_leading_text, $block_line_count, $block_label )
11453       = accumulate_csc_text();
11454
11455     #---------------------------------------------------------------
11456     # Step 2: make the closing side comment if this ends a block
11457     #---------------------------------------------------------------
11458     my $have_side_comment = $i_terminal != $max_index_to_go;
11459
11460     # if this line might end in a block closure..
11461     if (
11462         $terminal_type eq '}'
11463
11464         # ..and either
11465         && (
11466
11467             # the block is long enough
11468             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
11469
11470             # or there is an existing comment to check
11471             || (   $have_side_comment
11472                 && $rOpts->{'closing-side-comment-warnings'} )
11473         )
11474
11475         # .. and if this is one of the types of interest
11476         && $block_type_to_go[$i_terminal] =~
11477         /$closing_side_comment_list_pattern/o
11478
11479         # .. but not an anonymous sub
11480         # These are not normally of interest, and their closing braces are
11481         # often followed by commas or semicolons anyway.  This also avoids
11482         # possible erratic output due to line numbering inconsistencies
11483         # in the cases where their closing braces terminate a line.
11484         && $block_type_to_go[$i_terminal] ne 'sub'
11485
11486         # ..and the corresponding opening brace must is not in this batch
11487         # (because we do not need to tag one-line blocks, although this
11488         # should also be caught with a positive -csci value)
11489         && $mate_index_to_go[$i_terminal] < 0
11490
11491         # ..and either
11492         && (
11493
11494             # this is the last token (line doesnt have a side comment)
11495             !$have_side_comment
11496
11497             # or the old side comment is a closing side comment
11498             || $tokens_to_go[$max_index_to_go] =~
11499             /$closing_side_comment_prefix_pattern/o
11500         )
11501       )
11502     {
11503
11504         # then make the closing side comment text
11505         if ($block_label) { $block_label .= " " }
11506         my $token =
11507 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
11508
11509         # append any extra descriptive text collected above
11510         if ( $i_block_leading_text == $i_terminal ) {
11511             $token .= $block_leading_text;
11512         }
11513
11514         $token = balance_csc_text($token)
11515           if $rOpts->{'closing-side-comments-balanced'};
11516
11517         $token =~ s/\s*$//;    # trim any trailing whitespace
11518
11519         # handle case of existing closing side comment
11520         if ($have_side_comment) {
11521
11522             # warn if requested and tokens differ significantly
11523             if ( $rOpts->{'closing-side-comment-warnings'} ) {
11524                 my $old_csc = $tokens_to_go[$max_index_to_go];
11525                 my $new_csc = $token;
11526                 $new_csc =~ s/\s+//g;            # trim all whitespace
11527                 $old_csc =~ s/\s+//g;            # trim all whitespace
11528                 $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11529                 $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11530                 $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
11531                 my $new_trailing_dots = $1;
11532                 $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
11533
11534                 # Patch to handle multiple closing side comments at
11535                 # else and elsif's.  These have become too complicated
11536                 # to check, so if we see an indication of
11537                 # '[ if' or '[ # elsif', then assume they were made
11538                 # by perltidy.
11539                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
11540                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
11541                 }
11542                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
11543                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
11544                 }
11545
11546                 # if old comment is contained in new comment,
11547                 # only compare the common part.
11548                 if ( length($new_csc) > length($old_csc) ) {
11549                     $new_csc = substr( $new_csc, 0, length($old_csc) );
11550                 }
11551
11552                 # if the new comment is shorter and has been limited,
11553                 # only compare the common part.
11554                 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
11555                 {
11556                     $old_csc = substr( $old_csc, 0, length($new_csc) );
11557                 }
11558
11559                 # any remaining difference?
11560                 if ( $new_csc ne $old_csc ) {
11561
11562                     # just leave the old comment if we are below the threshold
11563                     # for creating side comments
11564                     if ( $block_line_count <
11565                         $rOpts->{'closing-side-comment-interval'} )
11566                     {
11567                         $token = undef;
11568                     }
11569
11570                     # otherwise we'll make a note of it
11571                     else {
11572
11573                         warning(
11574 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
11575                         );
11576
11577                      # save the old side comment in a new trailing block comment
11578                         my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
11579                         $year  += 1900;
11580                         $month += 1;
11581                         $cscw_block_comment =
11582 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
11583                     }
11584                 }
11585                 else {
11586
11587                     # No differences.. we can safely delete old comment if we
11588                     # are below the threshold
11589                     if ( $block_line_count <
11590                         $rOpts->{'closing-side-comment-interval'} )
11591                     {
11592                         $token = undef;
11593                         unstore_token_to_go()
11594                           if ( $types_to_go[$max_index_to_go] eq '#' );
11595                         unstore_token_to_go()
11596                           if ( $types_to_go[$max_index_to_go] eq 'b' );
11597                     }
11598                 }
11599             }
11600
11601             # switch to the new csc (unless we deleted it!)
11602             $tokens_to_go[$max_index_to_go] = $token if $token;
11603         }
11604
11605         # handle case of NO existing closing side comment
11606         else {
11607
11608             # insert the new side comment into the output token stream
11609             my $type          = '#';
11610             my $block_type    = '';
11611             my $type_sequence = '';
11612             my $container_environment =
11613               $container_environment_to_go[$max_index_to_go];
11614             my $level                = $levels_to_go[$max_index_to_go];
11615             my $slevel               = $nesting_depth_to_go[$max_index_to_go];
11616             my $no_internal_newlines = 0;
11617
11618             my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
11619             my $ci_level           = $ci_levels_to_go[$max_index_to_go];
11620             my $in_continued_quote = 0;
11621
11622             # first insert a blank token
11623             insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
11624
11625             # then the side comment
11626             insert_new_token_to_go( $token, $type, $slevel,
11627                 $no_internal_newlines );
11628         }
11629     }
11630     return $cscw_block_comment;
11631 }
11632
11633 sub previous_nonblank_token {
11634     my ($i)  = @_;
11635     my $name = "";
11636     my $im   = $i - 1;
11637     return "" if ( $im < 0 );
11638     if ( $types_to_go[$im] eq 'b' ) { $im--; }
11639     return "" if ( $im < 0 );
11640     $name = $tokens_to_go[$im];
11641
11642     # prepend any sub name to an isolated -> to avoid unwanted alignments
11643     # [test case is test8/penco.pl]
11644     if ( $name eq '->' ) {
11645         $im--;
11646         if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
11647             $name = $tokens_to_go[$im] . $name;
11648         }
11649     }
11650     return $name;
11651 }
11652
11653 sub send_lines_to_vertical_aligner {
11654
11655     my ( $ri_first, $ri_last, $do_not_pad ) = @_;
11656
11657     my $rindentation_list = [0];    # ref to indentations for each line
11658
11659     # define the array @matching_token_to_go for the output tokens
11660     # which will be non-blank for each special token (such as =>)
11661     # for which alignment is required.
11662     set_vertical_alignment_markers( $ri_first, $ri_last );
11663
11664     # flush if necessary to avoid unwanted alignment
11665     my $must_flush = 0;
11666     if ( @$ri_first > 1 ) {
11667
11668         # flush before a long if statement
11669         if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
11670             $must_flush = 1;
11671         }
11672     }
11673     if ($must_flush) {
11674         Perl::Tidy::VerticalAligner::flush();
11675     }
11676
11677     undo_ci( $ri_first, $ri_last );
11678
11679     set_logical_padding( $ri_first, $ri_last );
11680
11681     # loop to prepare each line for shipment
11682     my $n_last_line = @$ri_first - 1;
11683     my $in_comma_list;
11684     for my $n ( 0 .. $n_last_line ) {
11685         my $ibeg = $$ri_first[$n];
11686         my $iend = $$ri_last[$n];
11687
11688         my ( $rtokens, $rfields, $rpatterns ) =
11689           make_alignment_patterns( $ibeg, $iend );
11690
11691         # Set flag to show how much level changes between this line
11692         # and the next line, if we have it.
11693         my $ljump = 0;
11694         if ( $n < $n_last_line ) {
11695             my $ibegp = $$ri_first[ $n + 1 ];
11696             $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
11697         }
11698
11699         my ( $indentation, $lev, $level_end, $terminal_type,
11700             $is_semicolon_terminated, $is_outdented_line )
11701           = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
11702             $ri_first, $ri_last, $rindentation_list, $ljump );
11703
11704         # we will allow outdenting of long lines..
11705         my $outdent_long_lines = (
11706
11707             # which are long quotes, if allowed
11708             ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
11709
11710             # which are long block comments, if allowed
11711               || (
11712                    $types_to_go[$ibeg] eq '#'
11713                 && $rOpts->{'outdent-long-comments'}
11714
11715                 # but not if this is a static block comment
11716                 && !$is_static_block_comment
11717               )
11718         );
11719
11720         my $level_jump =
11721           $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
11722
11723         my $rvertical_tightness_flags =
11724           set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
11725             $ri_first, $ri_last );
11726
11727         # flush an outdented line to avoid any unwanted vertical alignment
11728         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11729
11730         my $is_terminal_ternary = 0;
11731         if (   $tokens_to_go[$ibeg] eq ':'
11732             || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
11733         {
11734             if (   ( $terminal_type eq ';' && $level_end <= $lev )
11735                 || ( $level_end < $lev ) )
11736             {
11737                 $is_terminal_ternary = 1;
11738             }
11739         }
11740
11741         # send this new line down the pipe
11742         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
11743         Perl::Tidy::VerticalAligner::append_line(
11744             $lev,
11745             $level_end,
11746             $indentation,
11747             $rfields,
11748             $rtokens,
11749             $rpatterns,
11750             $forced_breakpoint_to_go[$iend] || $in_comma_list,
11751             $outdent_long_lines,
11752             $is_terminal_ternary,
11753             $is_semicolon_terminated,
11754             $do_not_pad,
11755             $rvertical_tightness_flags,
11756             $level_jump,
11757         );
11758         $in_comma_list =
11759           $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
11760
11761         # flush an outdented line to avoid any unwanted vertical alignment
11762         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11763
11764         $do_not_pad = 0;
11765
11766         # Set flag indicating if this line ends in an opening
11767         # token and is very short, so that a blank line is not
11768         # needed if the subsequent line is a comment.
11769         # Examples of what we are looking for:
11770         #   {
11771         #   && (
11772         #   BEGIN {
11773         #   default {
11774         #   sub {
11775         $last_output_short_opening_token
11776
11777           # line ends in opening token
11778           = $types_to_go[$iend] =~ /^[\{\(\[L]$/
11779
11780           # and either
11781           && (
11782             # line has either single opening token
11783             $iend == $ibeg
11784
11785             # or is a single token followed by opening token.
11786             # Note that sub identifiers have blanks like 'sub doit'
11787             || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
11788           )
11789
11790           # and limit total to 10 character widths
11791           && token_sequence_length( $ibeg, $iend ) <= 10;
11792
11793 ##        $last_output_short_opening_token =
11794 ##             $types_to_go[$iend] =~ /^[\{\(\[L]$/
11795 ##          && $iend - $ibeg <= 2
11796 ##          && $tokens_to_go[$ibeg] !~ /^sub/
11797 ##          && token_sequence_length( $ibeg, $iend ) <= 10;
11798
11799     }    # end of loop to output each line
11800
11801     # remember indentation of lines containing opening containers for
11802     # later use by sub set_adjusted_indentation
11803     save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
11804 }
11805
11806 {        # begin make_alignment_patterns
11807
11808     my %block_type_map;
11809     my %keyword_map;
11810
11811     BEGIN {
11812
11813         # map related block names into a common name to
11814         # allow alignment
11815         %block_type_map = (
11816             'unless'  => 'if',
11817             'else'    => 'if',
11818             'elsif'   => 'if',
11819             'when'    => 'if',
11820             'default' => 'if',
11821             'case'    => 'if',
11822             'sort'    => 'map',
11823             'grep'    => 'map',
11824         );
11825
11826         # map certain keywords to the same 'if' class to align
11827         # long if/elsif sequences. [elsif.pl]
11828         %keyword_map = (
11829             'unless'  => 'if',
11830             'else'    => 'if',
11831             'elsif'   => 'if',
11832             'when'    => 'given',
11833             'default' => 'given',
11834             'case'    => 'switch',
11835
11836             # treat an 'undef' similar to numbers and quotes
11837             'undef' => 'Q',
11838         );
11839     }
11840
11841     sub make_alignment_patterns {
11842
11843         # Here we do some important preliminary work for the
11844         # vertical aligner.  We create three arrays for one
11845         # output line. These arrays contain strings that can
11846         # be tested by the vertical aligner to see if
11847         # consecutive lines can be aligned vertically.
11848         #
11849         # The three arrays are indexed on the vertical
11850         # alignment fields and are:
11851         # @tokens - a list of any vertical alignment tokens for this line.
11852         #   These are tokens, such as '=' '&&' '#' etc which
11853         #   we want to might align vertically.  These are
11854         #   decorated with various information such as
11855         #   nesting depth to prevent unwanted vertical
11856         #   alignment matches.
11857         # @fields - the actual text of the line between the vertical alignment
11858         #   tokens.
11859         # @patterns - a modified list of token types, one for each alignment
11860         #   field.  These should normally each match before alignment is
11861         #   allowed, even when the alignment tokens match.
11862         my ( $ibeg, $iend ) = @_;
11863         my @tokens   = ();
11864         my @fields   = ();
11865         my @patterns = ();
11866         my $i_start  = $ibeg;
11867         my $i;
11868
11869         my $depth                 = 0;
11870         my @container_name        = ("");
11871         my @multiple_comma_arrows = (undef);
11872
11873         my $j = 0;    # field index
11874
11875         $patterns[0] = "";
11876         for $i ( $ibeg .. $iend ) {
11877
11878             # Keep track of containers balanced on this line only.
11879             # These are used below to prevent unwanted cross-line alignments.
11880             # Unbalanced containers already avoid aligning across
11881             # container boundaries.
11882             if ( $tokens_to_go[$i] eq '(' ) {
11883
11884                 # if container is balanced on this line...
11885                 my $i_mate = $mate_index_to_go[$i];
11886                 if ( $i_mate > $i && $i_mate <= $iend ) {
11887                     $depth++;
11888                     my $seqno = $type_sequence_to_go[$i];
11889                     my $count = comma_arrow_count($seqno);
11890                     $multiple_comma_arrows[$depth] = $count && $count > 1;
11891
11892                     # Append the previous token name to make the container name
11893                     # more unique.  This name will also be given to any commas
11894                     # within this container, and it helps avoid undesirable
11895                     # alignments of different types of containers.
11896                     my $name = previous_nonblank_token($i);
11897                     $name =~ s/^->//;
11898                     $container_name[$depth] = "+" . $name;
11899
11900                     # Make the container name even more unique if necessary.
11901                     # If we are not vertically aligning this opening paren,
11902                     # append a character count to avoid bad alignment because
11903                     # it usually looks bad to align commas within continers
11904                     # for which the opening parens do not align.  Here
11905                     # is an example very BAD alignment of commas (because
11906                     # the atan2 functions are not all aligned):
11907                     #    $XY =
11908                     #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
11909                     #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
11910                     #      $X * atan2( $X,            1 ) -
11911                     #      $Y * atan2( $Y,            1 );
11912                     #
11913                     # On the other hand, it is usually okay to align commas if
11914                     # opening parens align, such as:
11915                     #    glVertex3d( $cx + $s * $xs, $cy,            $z );
11916                     #    glVertex3d( $cx,            $cy + $s * $ys, $z );
11917                     #    glVertex3d( $cx - $s * $xs, $cy,            $z );
11918                     #    glVertex3d( $cx,            $cy - $s * $ys, $z );
11919                     #
11920                     # To distinguish between these situations, we will
11921                     # append the length of the line from the previous matching
11922                     # token, or beginning of line, to the function name.  This
11923                     # will allow the vertical aligner to reject undesirable
11924                     # matches.
11925
11926                     # if we are not aligning on this paren...
11927                     if ( $matching_token_to_go[$i] eq '' ) {
11928
11929                         # Sum length from previous alignment, or start of line.
11930                         # Note that we have to sum token lengths here because
11931                         # padding has been done and so array $lengths_to_go
11932                         # is now wrong.
11933                         my $len =
11934                           length(
11935                             join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11936                         $len += leading_spaces_to_go($i_start)
11937                           if ( $i_start == $ibeg );
11938
11939                         # tack length onto the container name to make unique
11940                         $container_name[$depth] .= "-" . $len;
11941                     }
11942                 }
11943             }
11944             elsif ( $tokens_to_go[$i] eq ')' ) {
11945                 $depth-- if $depth > 0;
11946             }
11947
11948             # if we find a new synchronization token, we are done with
11949             # a field
11950             if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
11951
11952                 my $tok = my $raw_tok = $matching_token_to_go[$i];
11953
11954                 # make separators in different nesting depths unique
11955                 # by appending the nesting depth digit.
11956                 if ( $raw_tok ne '#' ) {
11957                     $tok .= "$nesting_depth_to_go[$i]";
11958                 }
11959
11960                 # also decorate commas with any container name to avoid
11961                 # unwanted cross-line alignments.
11962                 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
11963                     if ( $container_name[$depth] ) {
11964                         $tok .= $container_name[$depth];
11965                     }
11966                 }
11967
11968                 # Patch to avoid aligning leading and trailing if, unless.
11969                 # Mark trailing if, unless statements with container names.
11970                 # This makes them different from leading if, unless which
11971                 # are not so marked at present.  If we ever need to name
11972                 # them too, we could use ci to distinguish them.
11973                 # Example problem to avoid:
11974                 #    return ( 2, "DBERROR" )
11975                 #      if ( $retval == 2 );
11976                 #    if   ( scalar @_ ) {
11977                 #        my ( $a, $b, $c, $d, $e, $f ) = @_;
11978                 #    }
11979                 if ( $raw_tok eq '(' ) {
11980                     my $ci = $ci_levels_to_go[$ibeg];
11981                     if (   $container_name[$depth] =~ /^\+(if|unless)/
11982                         && $ci )
11983                     {
11984                         $tok .= $container_name[$depth];
11985                     }
11986                 }
11987
11988                 # Decorate block braces with block types to avoid
11989                 # unwanted alignments such as the following:
11990                 # foreach ( @{$routput_array} ) { $fh->print($_) }
11991                 # eval                          { $fh->close() };
11992                 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
11993                     my $block_type = $block_type_to_go[$i];
11994
11995                     # map certain related block types to allow
11996                     # else blocks to align
11997                     $block_type = $block_type_map{$block_type}
11998                       if ( defined( $block_type_map{$block_type} ) );
11999
12000                     # remove sub names to allow one-line sub braces to align
12001                     # regardless of name
12002                     if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
12003
12004                     # allow all control-type blocks to align
12005                     if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
12006
12007                     $tok .= $block_type;
12008                 }
12009
12010                 # concatenate the text of the consecutive tokens to form
12011                 # the field
12012                 push( @fields,
12013                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
12014
12015                 # store the alignment token for this field
12016                 push( @tokens, $tok );
12017
12018                 # get ready for the next batch
12019                 $i_start = $i;
12020                 $j++;
12021                 $patterns[$j] = "";
12022             }
12023
12024             # continue accumulating tokens
12025             # handle non-keywords..
12026             if ( $types_to_go[$i] ne 'k' ) {
12027                 my $type = $types_to_go[$i];
12028
12029                 # Mark most things before arrows as a quote to
12030                 # get them to line up. Testfile: mixed.pl.
12031                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
12032                     my $next_type = $types_to_go[ $i + 1 ];
12033                     my $i_next_nonblank =
12034                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12035
12036                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
12037                         $type = 'Q';
12038
12039                         # Patch to ignore leading minus before words,
12040                         # by changing pattern 'mQ' into just 'Q',
12041                         # so that we can align things like this:
12042                         #  Button   => "Print letter \"~$_\"",
12043                         #  -command => [ sub { print "$_[0]\n" }, $_ ],
12044                         if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
12045                     }
12046                 }
12047
12048                 # patch to make numbers and quotes align
12049                 if ( $type eq 'n' ) { $type = 'Q' }
12050
12051                 # patch to ignore any ! in patterns
12052                 if ( $type eq '!' ) { $type = '' }
12053
12054                 $patterns[$j] .= $type;
12055             }
12056
12057             # for keywords we have to use the actual text
12058             else {
12059
12060                 my $tok = $tokens_to_go[$i];
12061
12062                 # but map certain keywords to a common string to allow
12063                 # alignment.
12064                 $tok = $keyword_map{$tok}
12065                   if ( defined( $keyword_map{$tok} ) );
12066                 $patterns[$j] .= $tok;
12067             }
12068         }
12069
12070         # done with this line .. join text of tokens to make the last field
12071         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
12072         return ( \@tokens, \@fields, \@patterns );
12073     }
12074
12075 }    # end make_alignment_patterns
12076
12077 {    # begin unmatched_indexes
12078
12079     # closure to keep track of unbalanced containers.
12080     # arrays shared by the routines in this block:
12081     my @unmatched_opening_indexes_in_this_batch;
12082     my @unmatched_closing_indexes_in_this_batch;
12083     my %comma_arrow_count;
12084
12085     sub is_unbalanced_batch {
12086         @unmatched_opening_indexes_in_this_batch +
12087           @unmatched_closing_indexes_in_this_batch;
12088     }
12089
12090     sub comma_arrow_count {
12091         my $seqno = $_[0];
12092         return $comma_arrow_count{$seqno};
12093     }
12094
12095     sub match_opening_and_closing_tokens {
12096
12097         # Match up indexes of opening and closing braces, etc, in this batch.
12098         # This has to be done after all tokens are stored because unstoring
12099         # of tokens would otherwise cause trouble.
12100
12101         @unmatched_opening_indexes_in_this_batch = ();
12102         @unmatched_closing_indexes_in_this_batch = ();
12103         %comma_arrow_count                       = ();
12104
12105         my ( $i, $i_mate, $token );
12106         foreach $i ( 0 .. $max_index_to_go ) {
12107             if ( $type_sequence_to_go[$i] ) {
12108                 $token = $tokens_to_go[$i];
12109                 if ( $token =~ /^[\(\[\{\?]$/ ) {
12110                     push @unmatched_opening_indexes_in_this_batch, $i;
12111                 }
12112                 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
12113
12114                     $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12115                     if ( defined($i_mate) && $i_mate >= 0 ) {
12116                         if ( $type_sequence_to_go[$i_mate] ==
12117                             $type_sequence_to_go[$i] )
12118                         {
12119                             $mate_index_to_go[$i]      = $i_mate;
12120                             $mate_index_to_go[$i_mate] = $i;
12121                         }
12122                         else {
12123                             push @unmatched_opening_indexes_in_this_batch,
12124                               $i_mate;
12125                             push @unmatched_closing_indexes_in_this_batch, $i;
12126                         }
12127                     }
12128                     else {
12129                         push @unmatched_closing_indexes_in_this_batch, $i;
12130                     }
12131                 }
12132             }
12133             elsif ( $tokens_to_go[$i] eq '=>' ) {
12134                 if (@unmatched_opening_indexes_in_this_batch) {
12135                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
12136                     my $seqno = $type_sequence_to_go[$j];
12137                     $comma_arrow_count{$seqno}++;
12138                 }
12139             }
12140         }
12141     }
12142
12143     sub save_opening_indentation {
12144
12145         # This should be called after each batch of tokens is output. It
12146         # saves indentations of lines of all unmatched opening tokens.
12147         # These will be used by sub get_opening_indentation.
12148
12149         my ( $ri_first, $ri_last, $rindentation_list ) = @_;
12150
12151         # we no longer need indentations of any saved indentations which
12152         # are unmatched closing tokens in this batch, because we will
12153         # never encounter them again.  So we can delete them to keep
12154         # the hash size down.
12155         foreach (@unmatched_closing_indexes_in_this_batch) {
12156             my $seqno = $type_sequence_to_go[$_];
12157             delete $saved_opening_indentation{$seqno};
12158         }
12159
12160         # we need to save indentations of any unmatched opening tokens
12161         # in this batch because we may need them in a subsequent batch.
12162         foreach (@unmatched_opening_indexes_in_this_batch) {
12163             my $seqno = $type_sequence_to_go[$_];
12164             $saved_opening_indentation{$seqno} = [
12165                 lookup_opening_indentation(
12166                     $_, $ri_first, $ri_last, $rindentation_list
12167                 )
12168             ];
12169         }
12170     }
12171 }    # end unmatched_indexes
12172
12173 sub get_opening_indentation {
12174
12175     # get the indentation of the line which output the opening token
12176     # corresponding to a given closing token in the current output batch.
12177     #
12178     # given:
12179     # $i_closing - index in this line of a closing token ')' '}' or ']'
12180     #
12181     # $ri_first - reference to list of the first index $i for each output
12182     #               line in this batch
12183     # $ri_last - reference to list of the last index $i for each output line
12184     #              in this batch
12185     # $rindentation_list - reference to a list containing the indentation
12186     #            used for each line.
12187     #
12188     # return:
12189     #   -the indentation of the line which contained the opening token
12190     #    which matches the token at index $i_opening
12191     #   -and its offset (number of columns) from the start of the line
12192     #
12193     my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
12194
12195     # first, see if the opening token is in the current batch
12196     my $i_opening = $mate_index_to_go[$i_closing];
12197     my ( $indent, $offset, $is_leading, $exists );
12198     $exists = 1;
12199     if ( $i_opening >= 0 ) {
12200
12201         # it is..look up the indentation
12202         ( $indent, $offset, $is_leading ) =
12203           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
12204             $rindentation_list );
12205     }
12206
12207     # if not, it should have been stored in the hash by a previous batch
12208     else {
12209         my $seqno = $type_sequence_to_go[$i_closing];
12210         if ($seqno) {
12211             if ( $saved_opening_indentation{$seqno} ) {
12212                 ( $indent, $offset, $is_leading ) =
12213                   @{ $saved_opening_indentation{$seqno} };
12214             }
12215
12216             # some kind of serious error
12217             # (example is badfile.t)
12218             else {
12219                 $indent     = 0;
12220                 $offset     = 0;
12221                 $is_leading = 0;
12222                 $exists     = 0;
12223             }
12224         }
12225
12226         # if no sequence number it must be an unbalanced container
12227         else {
12228             $indent     = 0;
12229             $offset     = 0;
12230             $is_leading = 0;
12231             $exists     = 0;
12232         }
12233     }
12234     return ( $indent, $offset, $is_leading, $exists );
12235 }
12236
12237 sub lookup_opening_indentation {
12238
12239     # get the indentation of the line in the current output batch
12240     # which output a selected opening token
12241     #
12242     # given:
12243     #   $i_opening - index of an opening token in the current output batch
12244     #                whose line indentation we need
12245     #   $ri_first - reference to list of the first index $i for each output
12246     #               line in this batch
12247     #   $ri_last - reference to list of the last index $i for each output line
12248     #              in this batch
12249     #   $rindentation_list - reference to a list containing the indentation
12250     #            used for each line.  (NOTE: the first slot in
12251     #            this list is the last returned line number, and this is
12252     #            followed by the list of indentations).
12253     #
12254     # return
12255     #   -the indentation of the line which contained token $i_opening
12256     #   -and its offset (number of columns) from the start of the line
12257
12258     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
12259
12260     my $nline = $rindentation_list->[0];    # line number of previous lookup
12261
12262     # reset line location if necessary
12263     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
12264
12265     # find the correct line
12266     unless ( $i_opening > $ri_last->[-1] ) {
12267         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
12268     }
12269
12270     # error - token index is out of bounds - shouldn't happen
12271     else {
12272         warning(
12273 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
12274         );
12275         report_definite_bug();
12276         $nline = $#{$ri_last};
12277     }
12278
12279     $rindentation_list->[0] =
12280       $nline;    # save line number to start looking next call
12281     my $ibeg       = $ri_start->[$nline];
12282     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
12283     my $is_leading = ( $ibeg == $i_opening );
12284     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
12285 }
12286
12287 {
12288     my %is_if_elsif_else_unless_while_until_for_foreach;
12289
12290     BEGIN {
12291
12292         # These block types may have text between the keyword and opening
12293         # curly.  Note: 'else' does not, but must be included to allow trailing
12294         # if/elsif text to be appended.
12295         # patch for SWITCH/CASE: added 'case' and 'when'
12296         @_ = qw(if elsif else unless while until for foreach case when);
12297         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
12298     }
12299
12300     sub set_adjusted_indentation {
12301
12302         # This routine has the final say regarding the actual indentation of
12303         # a line.  It starts with the basic indentation which has been
12304         # defined for the leading token, and then takes into account any
12305         # options that the user has set regarding special indenting and
12306         # outdenting.
12307
12308         my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
12309             $rindentation_list, $level_jump )
12310           = @_;
12311
12312         # we need to know the last token of this line
12313         my ( $terminal_type, $i_terminal ) =
12314           terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
12315
12316         my $is_outdented_line = 0;
12317
12318         my $is_semicolon_terminated = $terminal_type eq ';'
12319           && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
12320
12321         ##########################################################
12322         # Section 1: set a flag and a default indentation
12323         #
12324         # Most lines are indented according to the initial token.
12325         # But it is common to outdent to the level just after the
12326         # terminal token in certain cases...
12327         # adjust_indentation flag:
12328         #       0 - do not adjust
12329         #       1 - outdent
12330         #       2 - vertically align with opening token
12331         #       3 - indent
12332         ##########################################################
12333         my $adjust_indentation         = 0;
12334         my $default_adjust_indentation = $adjust_indentation;
12335
12336         my (
12337             $opening_indentation, $opening_offset,
12338             $is_leading,          $opening_exists
12339         );
12340
12341         # if we are at a closing token of some type..
12342         if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
12343
12344             # get the indentation of the line containing the corresponding
12345             # opening token
12346             (
12347                 $opening_indentation, $opening_offset,
12348                 $is_leading,          $opening_exists
12349               )
12350               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12351                 $rindentation_list );
12352
12353             # First set the default behavior:
12354             if (
12355
12356                 # default behavior is to outdent closing lines
12357                 # of the form:   ");  };  ];  )->xxx;"
12358                 $is_semicolon_terminated
12359
12360                 # and 'cuddled parens' of the form:   ")->pack("
12361                 || (
12362                        $terminal_type eq '('
12363                     && $types_to_go[$ibeg] eq ')'
12364                     && ( $nesting_depth_to_go[$iend] + 1 ==
12365                         $nesting_depth_to_go[$ibeg] )
12366                 )
12367
12368                 # and when the next line is at a lower indentation level
12369                 # PATCH: and only if the style allows undoing continuation
12370                 # for all closing token types. We should really wait until
12371                 # the indentation of the next line is known and then make
12372                 # a decision, but that would require another pass.
12373                 || ( $level_jump < 0 && !$some_closing_token_indentation )
12374               )
12375             {
12376                 $adjust_indentation = 1;
12377             }
12378
12379             # outdent something like '),'
12380             if (
12381                 $terminal_type eq ','
12382
12383                 # allow just one character before the comma
12384                 && $i_terminal == $ibeg + 1
12385
12386                 # requre LIST environment; otherwise, we may outdent too much --
12387                 # this can happen in calls without parentheses (overload.t);
12388                 && $container_environment_to_go[$i_terminal] eq 'LIST'
12389               )
12390             {
12391                 $adjust_indentation = 1;
12392             }
12393
12394             # undo continuation indentation of a terminal closing token if
12395             # it is the last token before a level decrease.  This will allow
12396             # a closing token to line up with its opening counterpart, and
12397             # avoids a indentation jump larger than 1 level.
12398             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
12399                 && $i_terminal == $ibeg )
12400             {
12401                 my $ci        = $ci_levels_to_go[$ibeg];
12402                 my $lev       = $levels_to_go[$ibeg];
12403                 my $next_type = $types_to_go[ $ibeg + 1 ];
12404                 my $i_next_nonblank =
12405                   ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
12406                 if (   $i_next_nonblank <= $max_index_to_go
12407                     && $levels_to_go[$i_next_nonblank] < $lev )
12408                 {
12409                     $adjust_indentation = 1;
12410                 }
12411             }
12412
12413             # YVES patch 1 of 2:
12414             # Undo ci of line with leading closing eval brace,
12415             # but not beyond the indention of the line with
12416             # the opening brace.
12417             if (   $block_type_to_go[$ibeg] eq 'eval'
12418                 && !$rOpts->{'line-up-parentheses'}
12419                 && !$rOpts->{'indent-closing-brace'} )
12420             {
12421                 (
12422                     $opening_indentation, $opening_offset,
12423                     $is_leading,          $opening_exists
12424                   )
12425                   = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12426                     $rindentation_list );
12427                 my $indentation = $leading_spaces_to_go[$ibeg];
12428                 if ( defined($opening_indentation)
12429                     && $indentation > $opening_indentation )
12430                 {
12431                     $adjust_indentation = 1;
12432                 }
12433             }
12434
12435             $default_adjust_indentation = $adjust_indentation;
12436
12437             # Now modify default behavior according to user request:
12438             # handle option to indent non-blocks of the form );  };  ];
12439             # But don't do special indentation to something like ')->pack('
12440             if ( !$block_type_to_go[$ibeg] ) {
12441                 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
12442                 if ( $cti == 1 ) {
12443                     if (   $i_terminal <= $ibeg + 1
12444                         || $is_semicolon_terminated )
12445                     {
12446                         $adjust_indentation = 2;
12447                     }
12448                     else {
12449                         $adjust_indentation = 0;
12450                     }
12451                 }
12452                 elsif ( $cti == 2 ) {
12453                     if ($is_semicolon_terminated) {
12454                         $adjust_indentation = 3;
12455                     }
12456                     else {
12457                         $adjust_indentation = 0;
12458                     }
12459                 }
12460                 elsif ( $cti == 3 ) {
12461                     $adjust_indentation = 3;
12462                 }
12463             }
12464
12465             # handle option to indent blocks
12466             else {
12467                 if (
12468                     $rOpts->{'indent-closing-brace'}
12469                     && (
12470                         $i_terminal == $ibeg    #  isolated terminal '}'
12471                         || $is_semicolon_terminated
12472                     )
12473                   )                             #  } xxxx ;
12474                 {
12475                     $adjust_indentation = 3;
12476                 }
12477             }
12478         }
12479
12480         # if at ');', '};', '>;', and '];' of a terminal qw quote
12481         elsif ($$rpatterns[0] =~ /^qb*;$/
12482             && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
12483         {
12484             if ( $closing_token_indentation{$1} == 0 ) {
12485                 $adjust_indentation = 1;
12486             }
12487             else {
12488                 $adjust_indentation = 3;
12489             }
12490         }
12491
12492         # if line begins with a ':', align it with any
12493         # previous line leading with corresponding ?
12494         elsif ( $types_to_go[$ibeg] eq ':' ) {
12495             (
12496                 $opening_indentation, $opening_offset,
12497                 $is_leading,          $opening_exists
12498               )
12499               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12500                 $rindentation_list );
12501             if ($is_leading) { $adjust_indentation = 2; }
12502         }
12503
12504         ##########################################################
12505         # Section 2: set indentation according to flag set above
12506         #
12507         # Select the indentation object to define leading
12508         # whitespace.  If we are outdenting something like '} } );'
12509         # then we want to use one level below the last token
12510         # ($i_terminal) in order to get it to fully outdent through
12511         # all levels.
12512         ##########################################################
12513         my $indentation;
12514         my $lev;
12515         my $level_end = $levels_to_go[$iend];
12516
12517         if ( $adjust_indentation == 0 ) {
12518             $indentation = $leading_spaces_to_go[$ibeg];
12519             $lev         = $levels_to_go[$ibeg];
12520         }
12521         elsif ( $adjust_indentation == 1 ) {
12522             $indentation = $reduced_spaces_to_go[$i_terminal];
12523             $lev         = $levels_to_go[$i_terminal];
12524         }
12525
12526         # handle indented closing token which aligns with opening token
12527         elsif ( $adjust_indentation == 2 ) {
12528
12529             # handle option to align closing token with opening token
12530             $lev = $levels_to_go[$ibeg];
12531
12532             # calculate spaces needed to align with opening token
12533             my $space_count =
12534               get_SPACES($opening_indentation) + $opening_offset;
12535
12536             # Indent less than the previous line.
12537             #
12538             # Problem: For -lp we don't exactly know what it was if there
12539             # were recoverable spaces sent to the aligner.  A good solution
12540             # would be to force a flush of the vertical alignment buffer, so
12541             # that we would know.  For now, this rule is used for -lp:
12542             #
12543             # When the last line did not start with a closing token we will
12544             # be optimistic that the aligner will recover everything wanted.
12545             #
12546             # This rule will prevent us from breaking a hierarchy of closing
12547             # tokens, and in a worst case will leave a closing paren too far
12548             # indented, but this is better than frequently leaving it not
12549             # indented enough.
12550             my $last_spaces = get_SPACES($last_indentation_written);
12551             if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
12552                 $last_spaces +=
12553                   get_RECOVERABLE_SPACES($last_indentation_written);
12554             }
12555
12556             # reset the indentation to the new space count if it works
12557             # only options are all or none: nothing in-between looks good
12558             $lev = $levels_to_go[$ibeg];
12559             if ( $space_count < $last_spaces ) {
12560                 if ($rOpts_line_up_parentheses) {
12561                     my $lev = $levels_to_go[$ibeg];
12562                     $indentation =
12563                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12564                 }
12565                 else {
12566                     $indentation = $space_count;
12567                 }
12568             }
12569
12570             # revert to default if it doesnt work
12571             else {
12572                 $space_count = leading_spaces_to_go($ibeg);
12573                 if ( $default_adjust_indentation == 0 ) {
12574                     $indentation = $leading_spaces_to_go[$ibeg];
12575                 }
12576                 elsif ( $default_adjust_indentation == 1 ) {
12577                     $indentation = $reduced_spaces_to_go[$i_terminal];
12578                     $lev         = $levels_to_go[$i_terminal];
12579                 }
12580             }
12581         }
12582
12583         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
12584         else {
12585
12586             # handle -icb (indented closing code block braces)
12587             # Updated method for indented block braces: indent one full level if
12588             # there is no continuation indentation.  This will occur for major
12589             # structures such as sub, if, else, but not for things like map
12590             # blocks.
12591             #
12592             # Note: only code blocks without continuation indentation are
12593             # handled here (if, else, unless, ..). In the following snippet,
12594             # the terminal brace of the sort block will have continuation
12595             # indentation as shown so it will not be handled by the coding
12596             # here.  We would have to undo the continuation indentation to do
12597             # this, but it probably looks ok as is.  This is a possible future
12598             # update for semicolon terminated lines.
12599             #
12600             #     if ($sortby eq 'date' or $sortby eq 'size') {
12601             #         @files = sort {
12602             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
12603             #                 or $a cmp $b
12604             #                 } @files;
12605             #         }
12606             #
12607             if (   $block_type_to_go[$ibeg]
12608                 && $ci_levels_to_go[$i_terminal] == 0 )
12609             {
12610                 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
12611                 $indentation = $spaces + $rOpts_indent_columns;
12612
12613                 # NOTE: for -lp we could create a new indentation object, but
12614                 # there is probably no need to do it
12615             }
12616
12617             # handle -icp and any -icb block braces which fall through above
12618             # test such as the 'sort' block mentioned above.
12619             else {
12620
12621                 # There are currently two ways to handle -icp...
12622                 # One way is to use the indentation of the previous line:
12623                 # $indentation = $last_indentation_written;
12624
12625                 # The other way is to use the indentation that the previous line
12626                 # would have had if it hadn't been adjusted:
12627                 $indentation = $last_unadjusted_indentation;
12628
12629                 # Current method: use the minimum of the two. This avoids
12630                 # inconsistent indentation.
12631                 if ( get_SPACES($last_indentation_written) <
12632                     get_SPACES($indentation) )
12633                 {
12634                     $indentation = $last_indentation_written;
12635                 }
12636             }
12637
12638             # use previous indentation but use own level
12639             # to cause list to be flushed properly
12640             $lev = $levels_to_go[$ibeg];
12641         }
12642
12643         # remember indentation except for multi-line quotes, which get
12644         # no indentation
12645         unless ( $ibeg == 0 && $starting_in_quote ) {
12646             $last_indentation_written    = $indentation;
12647             $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
12648             $last_leading_token          = $tokens_to_go[$ibeg];
12649         }
12650
12651         # be sure lines with leading closing tokens are not outdented more
12652         # than the line which contained the corresponding opening token.
12653
12654         #############################################################
12655         # updated per bug report in alex_bug.pl: we must not
12656         # mess with the indentation of closing logical braces so
12657         # we must treat something like '} else {' as if it were
12658         # an isolated brace my $is_isolated_block_brace = (
12659         # $iend == $ibeg ) && $block_type_to_go[$ibeg];
12660         #############################################################
12661         my $is_isolated_block_brace = $block_type_to_go[$ibeg]
12662           && ( $iend == $ibeg
12663             || $is_if_elsif_else_unless_while_until_for_foreach{
12664                 $block_type_to_go[$ibeg]
12665             } );
12666
12667         # only do this for a ':; which is aligned with its leading '?'
12668         my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
12669         if (   defined($opening_indentation)
12670             && !$is_isolated_block_brace
12671             && !$is_unaligned_colon )
12672         {
12673             if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
12674                 $indentation = $opening_indentation;
12675             }
12676         }
12677
12678         # remember the indentation of each line of this batch
12679         push @{$rindentation_list}, $indentation;
12680
12681         # outdent lines with certain leading tokens...
12682         if (
12683
12684             # must be first word of this batch
12685             $ibeg == 0
12686
12687             # and ...
12688             && (
12689
12690                 # certain leading keywords if requested
12691                 (
12692                        $rOpts->{'outdent-keywords'}
12693                     && $types_to_go[$ibeg] eq 'k'
12694                     && $outdent_keyword{ $tokens_to_go[$ibeg] }
12695                 )
12696
12697                 # or labels if requested
12698                 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
12699
12700                 # or static block comments if requested
12701                 || (   $types_to_go[$ibeg] eq '#'
12702                     && $rOpts->{'outdent-static-block-comments'}
12703                     && $is_static_block_comment )
12704             )
12705           )
12706
12707         {
12708             my $space_count = leading_spaces_to_go($ibeg);
12709             if ( $space_count > 0 ) {
12710                 $space_count -= $rOpts_continuation_indentation;
12711                 $is_outdented_line = 1;
12712                 if ( $space_count < 0 ) { $space_count = 0 }
12713
12714                 # do not promote a spaced static block comment to non-spaced;
12715                 # this is not normally necessary but could be for some
12716                 # unusual user inputs (such as -ci = -i)
12717                 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
12718                     $space_count = 1;
12719                 }
12720
12721                 if ($rOpts_line_up_parentheses) {
12722                     $indentation =
12723                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12724                 }
12725                 else {
12726                     $indentation = $space_count;
12727                 }
12728             }
12729         }
12730
12731         return ( $indentation, $lev, $level_end, $terminal_type,
12732             $is_semicolon_terminated, $is_outdented_line );
12733     }
12734 }
12735
12736 sub set_vertical_tightness_flags {
12737
12738     my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
12739
12740     # Define vertical tightness controls for the nth line of a batch.
12741     # We create an array of parameters which tell the vertical aligner
12742     # if we should combine this line with the next line to achieve the
12743     # desired vertical tightness.  The array of parameters contains:
12744     #
12745     #   [0] type: 1=is opening tok 2=is closing tok  3=is opening block brace
12746     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
12747     #             if closing: spaces of padding to use
12748     #   [2] sequence number of container
12749     #   [3] valid flag: do not append if this flag is false. Will be
12750     #       true if appropriate -vt flag is set.  Otherwise, Will be
12751     #       made true only for 2 line container in parens with -lp
12752     #
12753     # These flags are used by sub set_leading_whitespace in
12754     # the vertical aligner
12755
12756     my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
12757
12758     # For non-BLOCK tokens, we will need to examine the next line
12759     # too, so we won't consider the last line.
12760     if ( $n < $n_last_line ) {
12761
12762         # see if last token is an opening token...not a BLOCK...
12763         my $ibeg_next = $$ri_first[ $n + 1 ];
12764         my $token_end = $tokens_to_go[$iend];
12765         my $iend_next = $$ri_last[ $n + 1 ];
12766         if (
12767                $type_sequence_to_go[$iend]
12768             && !$block_type_to_go[$iend]
12769             && $is_opening_token{$token_end}
12770             && (
12771                 $opening_vertical_tightness{$token_end} > 0
12772
12773                 # allow 2-line method call to be closed up
12774                 || (   $rOpts_line_up_parentheses
12775                     && $token_end eq '('
12776                     && $iend > $ibeg
12777                     && $types_to_go[ $iend - 1 ] ne 'b' )
12778             )
12779           )
12780         {
12781
12782             # avoid multiple jumps in nesting depth in one line if
12783             # requested
12784             my $ovt       = $opening_vertical_tightness{$token_end};
12785             my $iend_next = $$ri_last[ $n + 1 ];
12786             unless (
12787                 $ovt < 2
12788                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
12789                     $nesting_depth_to_go[$ibeg_next] )
12790               )
12791             {
12792
12793                 # If -vt flag has not been set, mark this as invalid
12794                 # and aligner will validate it if it sees the closing paren
12795                 # within 2 lines.
12796                 my $valid_flag = $ovt;
12797                 @{$rvertical_tightness_flags} =
12798                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
12799             }
12800         }
12801
12802         # see if first token of next line is a closing token...
12803         # ..and be sure this line does not have a side comment
12804         my $token_next = $tokens_to_go[$ibeg_next];
12805         if (   $type_sequence_to_go[$ibeg_next]
12806             && !$block_type_to_go[$ibeg_next]
12807             && $is_closing_token{$token_next}
12808             && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
12809         {
12810             my $ovt = $opening_vertical_tightness{$token_next};
12811             my $cvt = $closing_vertical_tightness{$token_next};
12812             if (
12813
12814                 # never append a trailing line like   )->pack(
12815                 # because it will throw off later alignment
12816                 (
12817                     $nesting_depth_to_go[$ibeg_next] ==
12818                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
12819                 )
12820                 && (
12821                     $cvt == 2
12822                     || (
12823                         $container_environment_to_go[$ibeg_next] ne 'LIST'
12824                         && (
12825                             $cvt == 1
12826
12827                             # allow closing up 2-line method calls
12828                             || (   $rOpts_line_up_parentheses
12829                                 && $token_next eq ')' )
12830                         )
12831                     )
12832                 )
12833               )
12834             {
12835
12836                 # decide which trailing closing tokens to append..
12837                 my $ok = 0;
12838                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
12839                 else {
12840                     my $str = join( '',
12841                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
12842
12843                     # append closing token if followed by comment or ';'
12844                     if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
12845                 }
12846
12847                 if ($ok) {
12848                     my $valid_flag = $cvt;
12849                     @{$rvertical_tightness_flags} = (
12850                         2,
12851                         $tightness{$token_next} == 2 ? 0 : 1,
12852                         $type_sequence_to_go[$ibeg_next], $valid_flag,
12853                     );
12854                 }
12855             }
12856         }
12857
12858         # Opening Token Right
12859         # If requested, move an isolated trailing opening token to the end of
12860         # the previous line which ended in a comma.  We could do this
12861         # in sub recombine_breakpoints but that would cause problems
12862         # with -lp formatting.  The problem is that indentation will
12863         # quickly move far to the right in nested expressions.  By
12864         # doing it after indentation has been set, we avoid changes
12865         # to the indentation.  Actual movement of the token takes place
12866         # in sub write_leader_and_string.
12867         if (
12868             $opening_token_right{ $tokens_to_go[$ibeg_next] }
12869
12870             # previous line is not opening
12871             # (use -sot to combine with it)
12872             && !$is_opening_token{$token_end}
12873
12874             # previous line ended in one of these
12875             # (add other cases if necessary; '=>' and '.' are not necessary
12876             ##&& ($is_opening_token{$token_end} || $token_end eq ',')
12877             && !$block_type_to_go[$ibeg_next]
12878
12879             # this is a line with just an opening token
12880             && (   $iend_next == $ibeg_next
12881                 || $iend_next == $ibeg_next + 2
12882                 && $types_to_go[$iend_next] eq '#' )
12883
12884             # looks bad if we align vertically with the wrong container
12885             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
12886           )
12887         {
12888             my $valid_flag = 1;
12889             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12890             @{$rvertical_tightness_flags} =
12891               ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
12892         }
12893
12894         # Stacking of opening and closing tokens
12895         my $stackable;
12896         my $token_beg_next = $tokens_to_go[$ibeg_next];
12897
12898         # patch to make something like 'qw(' behave like an opening paren
12899         # (aran.t)
12900         if ( $types_to_go[$ibeg_next] eq 'q' ) {
12901             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
12902                 $token_beg_next = $1;
12903             }
12904         }
12905
12906         if (   $is_closing_token{$token_end}
12907             && $is_closing_token{$token_beg_next} )
12908         {
12909             $stackable = $stack_closing_token{$token_beg_next}
12910               unless ( $block_type_to_go[$ibeg_next] )
12911               ;    # shouldn't happen; just checking
12912         }
12913         elsif ($is_opening_token{$token_end}
12914             && $is_opening_token{$token_beg_next} )
12915         {
12916             $stackable = $stack_opening_token{$token_beg_next}
12917               unless ( $block_type_to_go[$ibeg_next] )
12918               ;    # shouldn't happen; just checking
12919         }
12920
12921         if ($stackable) {
12922
12923             my $is_semicolon_terminated;
12924             if ( $n + 1 == $n_last_line ) {
12925                 my ( $terminal_type, $i_terminal ) = terminal_type(
12926                     \@types_to_go, \@block_type_to_go,
12927                     $ibeg_next,    $iend_next
12928                 );
12929                 $is_semicolon_terminated = $terminal_type eq ';'
12930                   && $nesting_depth_to_go[$iend_next] <
12931                   $nesting_depth_to_go[$ibeg_next];
12932             }
12933
12934             # this must be a line with just an opening token
12935             # or end in a semicolon
12936             if (
12937                 $is_semicolon_terminated
12938                 || (   $iend_next == $ibeg_next
12939                     || $iend_next == $ibeg_next + 2
12940                     && $types_to_go[$iend_next] eq '#' )
12941               )
12942             {
12943                 my $valid_flag = 1;
12944                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12945                 @{$rvertical_tightness_flags} =
12946                   ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
12947                   );
12948             }
12949         }
12950     }
12951
12952     # Check for a last line with isolated opening BLOCK curly
12953     elsif ($rOpts_block_brace_vertical_tightness
12954         && $ibeg eq $iend
12955         && $types_to_go[$iend] eq '{'
12956         && $block_type_to_go[$iend] =~
12957         /$block_brace_vertical_tightness_pattern/o )
12958     {
12959         @{$rvertical_tightness_flags} =
12960           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
12961     }
12962
12963     # pack in the sequence numbers of the ends of this line
12964     $rvertical_tightness_flags->[4] = get_seqno($ibeg);
12965     $rvertical_tightness_flags->[5] = get_seqno($iend);
12966     return $rvertical_tightness_flags;
12967 }
12968
12969 sub get_seqno {
12970
12971     # get opening and closing sequence numbers of a token for the vertical
12972     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
12973     # to be treated somewhat like opening and closing tokens for stacking
12974     # tokens by the vertical aligner.
12975     my ($ii) = @_;
12976     my $seqno = $type_sequence_to_go[$ii];
12977     if ( $types_to_go[$ii] eq 'q' ) {
12978         my $SEQ_QW = -1;
12979         if ( $ii > 0 ) {
12980             $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
12981         }
12982         else {
12983             if ( !$ending_in_quote ) {
12984                 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
12985             }
12986         }
12987     }
12988     return ($seqno);
12989 }
12990
12991 {
12992     my %is_vertical_alignment_type;
12993     my %is_vertical_alignment_keyword;
12994
12995     BEGIN {
12996
12997         @_ = qw#
12998           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
12999           { ? : => =~ && || // ~~ !~~
13000           #;
13001         @is_vertical_alignment_type{@_} = (1) x scalar(@_);
13002
13003         @_ = qw(if unless and or err eq ne for foreach while until);
13004         @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
13005     }
13006
13007     sub set_vertical_alignment_markers {
13008
13009         # This routine takes the first step toward vertical alignment of the
13010         # lines of output text.  It looks for certain tokens which can serve as
13011         # vertical alignment markers (such as an '=').
13012         #
13013         # Method: We look at each token $i in this output batch and set
13014         # $matching_token_to_go[$i] equal to those tokens at which we would
13015         # accept vertical alignment.
13016
13017         # nothing to do if we aren't allowed to change whitespace
13018         if ( !$rOpts_add_whitespace ) {
13019             for my $i ( 0 .. $max_index_to_go ) {
13020                 $matching_token_to_go[$i] = '';
13021             }
13022             return;
13023         }
13024
13025         my ( $ri_first, $ri_last ) = @_;
13026
13027         # remember the index of last nonblank token before any sidecomment
13028         my $i_terminal = $max_index_to_go;
13029         if ( $types_to_go[$i_terminal] eq '#' ) {
13030             if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
13031                 if ( $i_terminal > 0 ) { --$i_terminal }
13032             }
13033         }
13034
13035         # look at each line of this batch..
13036         my $last_vertical_alignment_before_index;
13037         my $vert_last_nonblank_type;
13038         my $vert_last_nonblank_token;
13039         my $vert_last_nonblank_block_type;
13040         my $max_line = @$ri_first - 1;
13041         my ( $i, $type, $token, $block_type, $alignment_type );
13042         my ( $ibeg, $iend, $line );
13043
13044         foreach $line ( 0 .. $max_line ) {
13045             $ibeg                                 = $$ri_first[$line];
13046             $iend                                 = $$ri_last[$line];
13047             $last_vertical_alignment_before_index = -1;
13048             $vert_last_nonblank_type              = '';
13049             $vert_last_nonblank_token             = '';
13050             $vert_last_nonblank_block_type        = '';
13051
13052             # look at each token in this output line..
13053             foreach $i ( $ibeg .. $iend ) {
13054                 $alignment_type = '';
13055                 $type           = $types_to_go[$i];
13056                 $block_type     = $block_type_to_go[$i];
13057                 $token          = $tokens_to_go[$i];
13058
13059                 # check for flag indicating that we should not align
13060                 # this token
13061                 if ( $matching_token_to_go[$i] ) {
13062                     $matching_token_to_go[$i] = '';
13063                     next;
13064                 }
13065
13066                 #--------------------------------------------------------
13067                 # First see if we want to align BEFORE this token
13068                 #--------------------------------------------------------
13069
13070                 # The first possible token that we can align before
13071                 # is index 2 because: 1) it doesn't normally make sense to
13072                 # align before the first token and 2) the second
13073                 # token must be a blank if we are to align before
13074                 # the third
13075                 if ( $i < $ibeg + 2 ) { }
13076
13077                 # must follow a blank token
13078                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
13079
13080                 # align a side comment --
13081                 elsif ( $type eq '#' ) {
13082
13083                     unless (
13084
13085                         # it is a static side comment
13086                         (
13087                                $rOpts->{'static-side-comments'}
13088                             && $token =~ /$static_side_comment_pattern/o
13089                         )
13090
13091                         # or a closing side comment
13092                         || (   $vert_last_nonblank_block_type
13093                             && $token =~
13094                             /$closing_side_comment_prefix_pattern/o )
13095                       )
13096                     {
13097                         $alignment_type = $type;
13098                     }    ## Example of a static side comment
13099                 }
13100
13101                 # otherwise, do not align two in a row to create a
13102                 # blank field
13103                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
13104
13105                 # align before one of these keywords
13106                 # (within a line, since $i>1)
13107                 elsif ( $type eq 'k' ) {
13108
13109                     #  /^(if|unless|and|or|eq|ne)$/
13110                     if ( $is_vertical_alignment_keyword{$token} ) {
13111                         $alignment_type = $token;
13112                     }
13113                 }
13114
13115                 # align before one of these types..
13116                 # Note: add '.' after new vertical aligner is operational
13117                 elsif ( $is_vertical_alignment_type{$type} ) {
13118                     $alignment_type = $token;
13119
13120                     # Do not align a terminal token.  Although it might
13121                     # occasionally look ok to do this, it has been found to be
13122                     # a good general rule.  The main problems are:
13123                     # (1) that the terminal token (such as an = or :) might get
13124                     # moved far to the right where it is hard to see because
13125                     # nothing follows it, and
13126                     # (2) doing so may prevent other good alignments.
13127                     if ( $i == $iend || $i >= $i_terminal ) {
13128                         $alignment_type = "";
13129                     }
13130
13131                     # Do not align leading ': (' or '. ('.  This would prevent
13132                     # alignment in something like the following:
13133                     #   $extra_space .=
13134                     #       ( $input_line_number < 10 )  ? "  "
13135                     #     : ( $input_line_number < 100 ) ? " "
13136                     #     :                                "";
13137                     # or
13138                     #  $code =
13139                     #      ( $case_matters ? $accessor : " lc($accessor) " )
13140                     #    . ( $yesno        ? " eq "       : " ne " )
13141                     if (   $i == $ibeg + 2
13142                         && $types_to_go[$ibeg] =~ /^[\.\:]$/
13143                         && $types_to_go[ $i - 1 ] eq 'b' )
13144                     {
13145                         $alignment_type = "";
13146                     }
13147
13148                     # For a paren after keyword, only align something like this:
13149                     #    if    ( $a ) { &a }
13150                     #    elsif ( $b ) { &b }
13151                     if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
13152                         $alignment_type = ""
13153                           unless $vert_last_nonblank_token =~
13154                           /^(if|unless|elsif)$/;
13155                     }
13156
13157                     # be sure the alignment tokens are unique
13158                     # This didn't work well: reason not determined
13159                     # if ($token ne $type) {$alignment_type .= $type}
13160                 }
13161
13162                 # NOTE: This is deactivated because it causes the previous
13163                 # if/elsif alignment to fail
13164                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
13165                 #{ $alignment_type = $type; }
13166
13167                 if ($alignment_type) {
13168                     $last_vertical_alignment_before_index = $i;
13169                 }
13170
13171                 #--------------------------------------------------------
13172                 # Next see if we want to align AFTER the previous nonblank
13173                 #--------------------------------------------------------
13174
13175                 # We want to line up ',' and interior ';' tokens, with the added
13176                 # space AFTER these tokens.  (Note: interior ';' is included
13177                 # because it may occur in short blocks).
13178                 if (
13179
13180                     # we haven't already set it
13181                     !$alignment_type
13182
13183                     # and its not the first token of the line
13184                     && ( $i > $ibeg )
13185
13186                     # and it follows a blank
13187                     && $types_to_go[ $i - 1 ] eq 'b'
13188
13189                     # and previous token IS one of these:
13190                     && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
13191
13192                     # and it's NOT one of these
13193                     && ( $type !~ /^[b\#\)\]\}]$/ )
13194
13195                     # then go ahead and align
13196                   )
13197
13198                 {
13199                     $alignment_type = $vert_last_nonblank_type;
13200                 }
13201
13202                 #--------------------------------------------------------
13203                 # then store the value
13204                 #--------------------------------------------------------
13205                 $matching_token_to_go[$i] = $alignment_type;
13206                 if ( $type ne 'b' ) {
13207                     $vert_last_nonblank_type       = $type;
13208                     $vert_last_nonblank_token      = $token;
13209                     $vert_last_nonblank_block_type = $block_type;
13210                 }
13211             }
13212         }
13213     }
13214 }
13215
13216 sub terminal_type {
13217
13218     #    returns type of last token on this line (terminal token), as follows:
13219     #    returns # for a full-line comment
13220     #    returns ' ' for a blank line
13221     #    otherwise returns final token type
13222
13223     my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
13224
13225     # check for full-line comment..
13226     if ( $$rtype[$ibeg] eq '#' ) {
13227         return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
13228     }
13229     else {
13230
13231         # start at end and walk bakwards..
13232         for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
13233
13234             # skip past any side comment and blanks
13235             next if ( $$rtype[$i] eq 'b' );
13236             next if ( $$rtype[$i] eq '#' );
13237
13238             # found it..make sure it is a BLOCK termination,
13239             # but hide a terminal } after sort/grep/map because it is not
13240             # necessarily the end of the line.  (terminal.t)
13241             my $terminal_type = $$rtype[$i];
13242             if (
13243                 $terminal_type eq '}'
13244                 && ( !$$rblock_type[$i]
13245                     || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
13246               )
13247             {
13248                 $terminal_type = 'b';
13249             }
13250             return wantarray ? ( $terminal_type, $i ) : $terminal_type;
13251         }
13252
13253         # empty line
13254         return wantarray ? ( ' ', $ibeg ) : ' ';
13255     }
13256 }
13257
13258 {
13259     my %is_good_keyword_breakpoint;
13260     my %is_lt_gt_le_ge;
13261
13262     sub set_bond_strengths {
13263
13264         BEGIN {
13265
13266             @_ = qw(if unless while until for foreach);
13267             @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
13268
13269             @_ = qw(lt gt le ge);
13270             @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
13271
13272             ###############################################################
13273             # NOTE: NO_BREAK's set here are HINTS which may not be honored;
13274             # essential NO_BREAKS's must be enforced in section 2, below.
13275             ###############################################################
13276
13277             # adding NEW_TOKENS: add a left and right bond strength by
13278             # mimmicking what is done for an existing token type.  You
13279             # can skip this step at first and take the default, then
13280             # tweak later to get desired results.
13281
13282             # The bond strengths should roughly follow precenence order where
13283             # possible.  If you make changes, please check the results very
13284             # carefully on a variety of scripts.
13285
13286             # no break around possible filehandle
13287             $left_bond_strength{'Z'}  = NO_BREAK;
13288             $right_bond_strength{'Z'} = NO_BREAK;
13289
13290             # never put a bare word on a new line:
13291             # example print (STDERR, "bla"); will fail with break after (
13292             $left_bond_strength{'w'} = NO_BREAK;
13293
13294         # blanks always have infinite strength to force breaks after real tokens
13295             $right_bond_strength{'b'} = NO_BREAK;
13296
13297             # try not to break on exponentation
13298             @_                       = qw" ** .. ... <=> ";
13299             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
13300             @right_bond_strength{@_} = (STRONG) x scalar(@_);
13301
13302             # The comma-arrow has very low precedence but not a good break point
13303             $left_bond_strength{'=>'}  = NO_BREAK;
13304             $right_bond_strength{'=>'} = NOMINAL;
13305
13306             # ok to break after label
13307             $left_bond_strength{'J'}  = NO_BREAK;
13308             $right_bond_strength{'J'} = NOMINAL;
13309             $left_bond_strength{'j'}  = STRONG;
13310             $right_bond_strength{'j'} = STRONG;
13311             $left_bond_strength{'A'}  = STRONG;
13312             $right_bond_strength{'A'} = STRONG;
13313
13314             $left_bond_strength{'->'}  = STRONG;
13315             $right_bond_strength{'->'} = VERY_STRONG;
13316
13317             # breaking AFTER modulus operator is ok:
13318             @_ = qw" % ";
13319             @left_bond_strength{@_} = (STRONG) x scalar(@_);
13320             @right_bond_strength{@_} =
13321               ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
13322
13323             # Break AFTER math operators * and /
13324             @_                       = qw" * / x  ";
13325             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
13326             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13327
13328             # Break AFTER weakest math operators + and -
13329             # Make them weaker than * but a bit stronger than '.'
13330             @_ = qw" + - ";
13331             @left_bond_strength{@_} = (STRONG) x scalar(@_);
13332             @right_bond_strength{@_} =
13333               ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
13334
13335             # breaking BEFORE these is just ok:
13336             @_                       = qw" >> << ";
13337             @right_bond_strength{@_} = (STRONG) x scalar(@_);
13338             @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
13339
13340             # breaking before the string concatenation operator seems best
13341             # because it can be hard to see at the end of a line
13342             $right_bond_strength{'.'} = STRONG;
13343             $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
13344
13345             @_                       = qw"} ] ) ";
13346             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
13347             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13348
13349             # make these a little weaker than nominal so that they get
13350             # favored for end-of-line characters
13351             @_ = qw"!= == =~ !~ ~~ !~~";
13352             @left_bond_strength{@_} = (STRONG) x scalar(@_);
13353             @right_bond_strength{@_} =
13354               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
13355
13356             # break AFTER these
13357             @_ = qw" < >  | & >= <=";
13358             @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
13359             @right_bond_strength{@_} =
13360               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
13361
13362             # breaking either before or after a quote is ok
13363             # but bias for breaking before a quote
13364             $left_bond_strength{'Q'}  = NOMINAL;
13365             $right_bond_strength{'Q'} = NOMINAL + 0.02;
13366             $left_bond_strength{'q'}  = NOMINAL;
13367             $right_bond_strength{'q'} = NOMINAL;
13368
13369             # starting a line with a keyword is usually ok
13370             $left_bond_strength{'k'} = NOMINAL;
13371
13372             # we usually want to bond a keyword strongly to what immediately
13373             # follows, rather than leaving it stranded at the end of a line
13374             $right_bond_strength{'k'} = STRONG;
13375
13376             $left_bond_strength{'G'}  = NOMINAL;
13377             $right_bond_strength{'G'} = STRONG;
13378
13379             # it is good to break AFTER various assignment operators
13380             @_ = qw(
13381               = **= += *= &= <<= &&=
13382               -= /= |= >>= ||= //=
13383               .= %= ^=
13384               x=
13385             );
13386             @left_bond_strength{@_} = (STRONG) x scalar(@_);
13387             @right_bond_strength{@_} =
13388               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
13389
13390             # break BEFORE '&&' and '||' and '//'
13391             # set strength of '||' to same as '=' so that chains like
13392             # $a = $b || $c || $d   will break before the first '||'
13393             $right_bond_strength{'||'} = NOMINAL;
13394             $left_bond_strength{'||'}  = $right_bond_strength{'='};
13395
13396             # same thing for '//'
13397             $right_bond_strength{'//'} = NOMINAL;
13398             $left_bond_strength{'//'}  = $right_bond_strength{'='};
13399
13400             # set strength of && a little higher than ||
13401             $right_bond_strength{'&&'} = NOMINAL;
13402             $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
13403
13404             $left_bond_strength{';'}  = VERY_STRONG;
13405             $right_bond_strength{';'} = VERY_WEAK;
13406             $left_bond_strength{'f'}  = VERY_STRONG;
13407
13408             # make right strength of for ';' a little less than '='
13409             # to make for contents break after the ';' to avoid this:
13410             #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
13411             #     $number_of_fields )
13412             # and make it weaker than ',' and 'and' too
13413             $right_bond_strength{'f'} = VERY_WEAK - 0.03;
13414
13415             # The strengths of ?/: should be somewhere between
13416             # an '=' and a quote (NOMINAL),
13417             # make strength of ':' slightly less than '?' to help
13418             # break long chains of ? : after the colons
13419             $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
13420             $right_bond_strength{':'} = NO_BREAK;
13421             $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
13422             $right_bond_strength{'?'} = NO_BREAK;
13423
13424             $left_bond_strength{','}  = VERY_STRONG;
13425             $right_bond_strength{','} = VERY_WEAK;
13426
13427             # Set bond strengths of certain keywords
13428             # make 'or', 'err', 'and' slightly weaker than a ','
13429             $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
13430             $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
13431             $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
13432             $left_bond_strength{'xor'}  = NOMINAL;
13433             $right_bond_strength{'and'} = NOMINAL;
13434             $right_bond_strength{'or'}  = NOMINAL;
13435             $right_bond_strength{'err'} = NOMINAL;
13436             $right_bond_strength{'xor'} = STRONG;
13437         }
13438
13439         # patch-its always ok to break at end of line
13440         $nobreak_to_go[$max_index_to_go] = 0;
13441
13442         # adding a small 'bias' to strengths is a simple way to make a line
13443         # break at the first of a sequence of identical terms.  For example,
13444         # to force long string of conditional operators to break with
13445         # each line ending in a ':', we can add a small number to the bond
13446         # strength of each ':'
13447         my $colon_bias = 0;
13448         my $amp_bias   = 0;
13449         my $bar_bias   = 0;
13450         my $and_bias   = 0;
13451         my $or_bias    = 0;
13452         my $dot_bias   = 0;
13453         my $f_bias     = 0;
13454         my $code_bias  = -.01;
13455         my $type       = 'b';
13456         my $token      = ' ';
13457         my $last_type;
13458         my $last_nonblank_type  = $type;
13459         my $last_nonblank_token = $token;
13460         my $delta_bias          = 0.0001;
13461         my $list_str            = $left_bond_strength{'?'};
13462
13463         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
13464             $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
13465         );
13466
13467         # preliminary loop to compute bond strengths
13468         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
13469             $last_type = $type;
13470             if ( $type ne 'b' ) {
13471                 $last_nonblank_type  = $type;
13472                 $last_nonblank_token = $token;
13473             }
13474             $type = $types_to_go[$i];
13475
13476             # strength on both sides of a blank is the same
13477             if ( $type eq 'b' && $last_type ne 'b' ) {
13478                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
13479                 next;
13480             }
13481
13482             $token               = $tokens_to_go[$i];
13483             $block_type          = $block_type_to_go[$i];
13484             $i_next              = $i + 1;
13485             $next_type           = $types_to_go[$i_next];
13486             $next_token          = $tokens_to_go[$i_next];
13487             $total_nesting_depth = $nesting_depth_to_go[$i_next];
13488             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13489             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
13490             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
13491
13492             # Some token chemistry...  The decision about where to break a
13493             # line depends upon a "bond strength" between tokens.  The LOWER
13494             # the bond strength, the MORE likely a break.  The strength
13495             # values are based on trial-and-error, and need to be tweaked
13496             # occasionally to get desired results.  Things to keep in mind
13497             # are:
13498             #   1. relative strengths are important.  small differences
13499             #      in strengths can make big formatting differences.
13500             #   2. each indentation level adds one unit of bond strength
13501             #   3. a value of NO_BREAK makes an unbreakable bond
13502             #   4. a value of VERY_WEAK is the strength of a ','
13503             #   5. values below NOMINAL are considered ok break points
13504             #   6. values above NOMINAL are considered poor break points
13505             # We are computing the strength of the bond between the current
13506             # token and the NEXT token.
13507             my $bond_str = VERY_STRONG;    # a default, high strength
13508
13509             #---------------------------------------------------------------
13510             # section 1:
13511             # use minimum of left and right bond strengths if defined;
13512             # digraphs and trigraphs like to break on their left
13513             #---------------------------------------------------------------
13514             my $bsr = $right_bond_strength{$type};
13515
13516             if ( !defined($bsr) ) {
13517
13518                 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
13519                     $bsr = STRONG;
13520                 }
13521                 else {
13522                     $bsr = VERY_STRONG;
13523                 }
13524             }
13525
13526             # define right bond strengths of certain keywords
13527             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
13528                 $bsr = $right_bond_strength{$token};
13529             }
13530             elsif ( $token eq 'ne' or $token eq 'eq' ) {
13531                 $bsr = NOMINAL;
13532             }
13533             my $bsl = $left_bond_strength{$next_nonblank_type};
13534
13535             # set terminal bond strength to the nominal value
13536             # this will cause good preceding breaks to be retained
13537             if ( $i_next_nonblank > $max_index_to_go ) {
13538                 $bsl = NOMINAL;
13539             }
13540
13541             if ( !defined($bsl) ) {
13542
13543                 if (   $is_digraph{$next_nonblank_type}
13544                     || $is_trigraph{$next_nonblank_type} )
13545                 {
13546                     $bsl = WEAK;
13547                 }
13548                 else {
13549                     $bsl = VERY_STRONG;
13550                 }
13551             }
13552
13553             # define right bond strengths of certain keywords
13554             if ( $next_nonblank_type eq 'k'
13555                 && defined( $left_bond_strength{$next_nonblank_token} ) )
13556             {
13557                 $bsl = $left_bond_strength{$next_nonblank_token};
13558             }
13559             elsif ($next_nonblank_token eq 'ne'
13560                 or $next_nonblank_token eq 'eq' )
13561             {
13562                 $bsl = NOMINAL;
13563             }
13564             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
13565                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
13566             }
13567
13568             # Note: it might seem that we would want to keep a NO_BREAK if
13569             # either token has this value.  This didn't work, because in an
13570             # arrow list, it prevents the comma from separating from the
13571             # following bare word (which is probably quoted by its arrow).
13572             # So necessary NO_BREAK's have to be handled as special cases
13573             # in the final section.
13574             $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
13575             my $bond_str_1 = $bond_str;
13576
13577             #---------------------------------------------------------------
13578             # section 2:
13579             # special cases
13580             #---------------------------------------------------------------
13581
13582             # allow long lines before final { in an if statement, as in:
13583             #    if (..........
13584             #      ..........)
13585             #    {
13586             #
13587             # Otherwise, the line before the { tends to be too short.
13588             if ( $type eq ')' ) {
13589                 if ( $next_nonblank_type eq '{' ) {
13590                     $bond_str = VERY_WEAK + 0.03;
13591                 }
13592             }
13593
13594             elsif ( $type eq '(' ) {
13595                 if ( $next_nonblank_type eq '{' ) {
13596                     $bond_str = NOMINAL;
13597                 }
13598             }
13599
13600             # break on something like '} (', but keep this stronger than a ','
13601             # example is in 'howe.pl'
13602             elsif ( $type eq 'R' or $type eq '}' ) {
13603                 if ( $next_nonblank_type eq '(' ) {
13604                     $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
13605                 }
13606             }
13607
13608             #-----------------------------------------------------------------
13609             # adjust bond strength bias
13610             #-----------------------------------------------------------------
13611
13612             # add any bias set by sub scan_list at old comma break points.
13613             elsif ( $type eq ',' ) {
13614                 $bond_str += $bond_strength_to_go[$i];
13615             }
13616
13617             elsif ( $type eq 'f' ) {
13618                 $bond_str += $f_bias;
13619                 $f_bias   += $delta_bias;
13620             }
13621
13622           # in long ?: conditionals, bias toward just one set per line (colon.t)
13623             elsif ( $type eq ':' ) {
13624                 if ( !$want_break_before{$type} ) {
13625                     $bond_str   += $colon_bias;
13626                     $colon_bias += $delta_bias;
13627                 }
13628             }
13629
13630             if (   $next_nonblank_type eq ':'
13631                 && $want_break_before{$next_nonblank_type} )
13632             {
13633                 $bond_str   += $colon_bias;
13634                 $colon_bias += $delta_bias;
13635             }
13636
13637             # if leading '.' is used, align all but 'short' quotes;
13638             # the idea is to not place something like "\n" on a single line.
13639             elsif ( $next_nonblank_type eq '.' ) {
13640                 if ( $want_break_before{'.'} ) {
13641                     unless (
13642                         $last_nonblank_type eq '.'
13643                         && (
13644                             length($token) <=
13645                             $rOpts_short_concatenation_item_length )
13646                         && ( $token !~ /^[\)\]\}]$/ )
13647                       )
13648                     {
13649                         $dot_bias += $delta_bias;
13650                     }
13651                     $bond_str += $dot_bias;
13652                 }
13653             }
13654             elsif ($next_nonblank_type eq '&&'
13655                 && $want_break_before{$next_nonblank_type} )
13656             {
13657                 $bond_str += $amp_bias;
13658                 $amp_bias += $delta_bias;
13659             }
13660             elsif ($next_nonblank_type eq '||'
13661                 && $want_break_before{$next_nonblank_type} )
13662             {
13663                 $bond_str += $bar_bias;
13664                 $bar_bias += $delta_bias;
13665             }
13666             elsif ( $next_nonblank_type eq 'k' ) {
13667
13668                 if (   $next_nonblank_token eq 'and'
13669                     && $want_break_before{$next_nonblank_token} )
13670                 {
13671                     $bond_str += $and_bias;
13672                     $and_bias += $delta_bias;
13673                 }
13674                 elsif ($next_nonblank_token =~ /^(or|err)$/
13675                     && $want_break_before{$next_nonblank_token} )
13676                 {
13677                     $bond_str += $or_bias;
13678                     $or_bias  += $delta_bias;
13679                 }
13680
13681                 # FIXME: needs more testing
13682                 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
13683                     $bond_str = $list_str if ( $bond_str > $list_str );
13684                 }
13685                 elsif ( $token eq 'err'
13686                     && !$want_break_before{$token} )
13687                 {
13688                     $bond_str += $or_bias;
13689                     $or_bias  += $delta_bias;
13690                 }
13691             }
13692
13693             if ( $type eq ':'
13694                 && !$want_break_before{$type} )
13695             {
13696                 $bond_str   += $colon_bias;
13697                 $colon_bias += $delta_bias;
13698             }
13699             elsif ( $type eq '&&'
13700                 && !$want_break_before{$type} )
13701             {
13702                 $bond_str += $amp_bias;
13703                 $amp_bias += $delta_bias;
13704             }
13705             elsif ( $type eq '||'
13706                 && !$want_break_before{$type} )
13707             {
13708                 $bond_str += $bar_bias;
13709                 $bar_bias += $delta_bias;
13710             }
13711             elsif ( $type eq 'k' ) {
13712
13713                 if ( $token eq 'and'
13714                     && !$want_break_before{$token} )
13715                 {
13716                     $bond_str += $and_bias;
13717                     $and_bias += $delta_bias;
13718                 }
13719                 elsif ( $token eq 'or'
13720                     && !$want_break_before{$token} )
13721                 {
13722                     $bond_str += $or_bias;
13723                     $or_bias  += $delta_bias;
13724                 }
13725             }
13726
13727             # keep matrix and hash indices together
13728             # but make them a little below STRONG to allow breaking open
13729             # something like {'some-word'}{'some-very-long-word'} at the }{
13730             # (bracebrk.t)
13731             if (   ( $type eq ']' or $type eq 'R' )
13732                 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
13733               )
13734             {
13735                 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
13736             }
13737
13738             if ( $next_nonblank_token =~ /^->/ ) {
13739
13740                 # increase strength to the point where a break in the following
13741                 # will be after the opening paren rather than at the arrow:
13742                 #    $a->$b($c);
13743                 if ( $type eq 'i' ) {
13744                     $bond_str = 1.45 * STRONG;
13745                 }
13746
13747                 elsif ( $type =~ /^[\)\]\}R]$/ ) {
13748                     $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
13749                 }
13750
13751                 # otherwise make strength before an '->' a little over a '+'
13752                 else {
13753                     if ( $bond_str <= NOMINAL ) {
13754                         $bond_str = NOMINAL + 0.01;
13755                     }
13756                 }
13757             }
13758
13759             if ( $token eq ')' && $next_nonblank_token eq '[' ) {
13760                 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
13761             }
13762
13763             # map1.t -- correct for a quirk in perl
13764             if (   $token eq '('
13765                 && $next_nonblank_type eq 'i'
13766                 && $last_nonblank_type eq 'k'
13767                 && $is_sort_map_grep{$last_nonblank_token} )
13768
13769               #     /^(sort|map|grep)$/ )
13770             {
13771                 $bond_str = NO_BREAK;
13772             }
13773
13774             # extrude.t: do not break before paren at:
13775             #    -l pid_filename(
13776             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
13777                 $bond_str = NO_BREAK;
13778             }
13779
13780             # good to break after end of code blocks
13781             if ( $type eq '}' && $block_type ) {
13782
13783                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
13784                 $code_bias += $delta_bias;
13785             }
13786
13787             if ( $type eq 'k' ) {
13788
13789                 # allow certain control keywords to stand out
13790                 if (   $next_nonblank_type eq 'k'
13791                     && $is_last_next_redo_return{$token} )
13792                 {
13793                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
13794                 }
13795
13796 # Don't break after keyword my.  This is a quick fix for a
13797 # rare problem with perl. An example is this line from file
13798 # Container.pm:
13799 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
13800
13801                 if ( $token eq 'my' ) {
13802                     $bond_str = NO_BREAK;
13803                 }
13804
13805             }
13806
13807             # good to break before 'if', 'unless', etc
13808             if ( $is_if_brace_follower{$next_nonblank_token} ) {
13809                 $bond_str = VERY_WEAK;
13810             }
13811
13812             if ( $next_nonblank_type eq 'k' ) {
13813
13814                 # keywords like 'unless', 'if', etc, within statements
13815                 # make good breaks
13816                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
13817                     $bond_str = VERY_WEAK / 1.05;
13818                 }
13819             }
13820
13821             # try not to break before a comma-arrow
13822             elsif ( $next_nonblank_type eq '=>' ) {
13823                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
13824             }
13825
13826          #----------------------------------------------------------------------
13827          # only set NO_BREAK's from here on
13828          #----------------------------------------------------------------------
13829             if ( $type eq 'C' or $type eq 'U' ) {
13830
13831                 # use strict requires that bare word and => not be separated
13832                 if ( $next_nonblank_type eq '=>' ) {
13833                     $bond_str = NO_BREAK;
13834                 }
13835
13836                 # Never break between a bareword and a following paren because
13837                 # perl may give an error.  For example, if a break is placed
13838                 # between 'to_filehandle' and its '(' the following line will
13839                 # give a syntax error [Carp.pm]: my( $no) =fileno(
13840                 # to_filehandle( $in)) ;
13841                 if ( $next_nonblank_token eq '(' ) {
13842                     $bond_str = NO_BREAK;
13843                 }
13844             }
13845
13846            # use strict requires that bare word within braces not start new line
13847             elsif ( $type eq 'L' ) {
13848
13849                 if ( $next_nonblank_type eq 'w' ) {
13850                     $bond_str = NO_BREAK;
13851                 }
13852             }
13853
13854             # in older version of perl, use strict can cause problems with
13855             # breaks before bare words following opening parens.  For example,
13856             # this will fail under older versions if a break is made between
13857             # '(' and 'MAIL':
13858             #  use strict;
13859             #  open( MAIL, "a long filename or command");
13860             #  close MAIL;
13861             elsif ( $type eq '{' ) {
13862
13863                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
13864
13865                     # but it's fine to break if the word is followed by a '=>'
13866                     # or if it is obviously a sub call
13867                     my $i_next_next_nonblank = $i_next_nonblank + 1;
13868                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
13869                     if (   $next_next_type eq 'b'
13870                         && $i_next_nonblank < $max_index_to_go )
13871                     {
13872                         $i_next_next_nonblank++;
13873                         $next_next_type = $types_to_go[$i_next_next_nonblank];
13874                     }
13875
13876                     ##if ( $next_next_type ne '=>' ) {
13877                     # these are ok: '->xxx', '=>', '('
13878
13879                     # We'll check for an old breakpoint and keep a leading
13880                     # bareword if it was that way in the input file.
13881                     # Presumably it was ok that way.  For example, the
13882                     # following would remain unchanged:
13883                     #
13884                     # @months = (
13885                     #   January,   February, March,    April,
13886                     #   May,       June,     July,     August,
13887                     #   September, October,  November, December,
13888                     # );
13889                     #
13890                     # This should be sufficient:
13891                     if ( !$old_breakpoint_to_go[$i]
13892                         && ( $next_next_type eq ',' || $next_next_type eq '}' )
13893                       )
13894                     {
13895                         $bond_str = NO_BREAK;
13896                     }
13897                 }
13898             }
13899
13900             elsif ( $type eq 'w' ) {
13901
13902                 if ( $next_nonblank_type eq 'R' ) {
13903                     $bond_str = NO_BREAK;
13904                 }
13905
13906                 # use strict requires that bare word and => not be separated
13907                 if ( $next_nonblank_type eq '=>' ) {
13908                     $bond_str = NO_BREAK;
13909                 }
13910             }
13911
13912             # in fact, use strict hates bare words on any new line.  For
13913             # example, a break before the underscore here provokes the
13914             # wrath of use strict:
13915             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
13916             elsif ( $type eq 'F' ) {
13917                 $bond_str = NO_BREAK;
13918             }
13919
13920             # use strict does not allow separating type info from trailing { }
13921             # testfile is readmail.pl
13922             elsif ( $type eq 't' or $type eq 'i' ) {
13923
13924                 if ( $next_nonblank_type eq 'L' ) {
13925                     $bond_str = NO_BREAK;
13926                 }
13927             }
13928
13929             # Do not break between a possible filehandle and a ? or / and do
13930             # not introduce a break after it if there is no blank
13931             # (extrude.t)
13932             elsif ( $type eq 'Z' ) {
13933
13934                 # dont break..
13935                 if (
13936
13937                     # if there is no blank and we do not want one. Examples:
13938                     #    print $x++    # do not break after $x
13939                     #    print HTML"HELLO"   # break ok after HTML
13940                     (
13941                            $next_type ne 'b'
13942                         && defined( $want_left_space{$next_type} )
13943                         && $want_left_space{$next_type} == WS_NO
13944                     )
13945
13946                     # or we might be followed by the start of a quote
13947                     || $next_nonblank_type =~ /^[\/\?]$/
13948                   )
13949                 {
13950                     $bond_str = NO_BREAK;
13951                 }
13952             }
13953
13954             # Do not break before a possible file handle
13955             if ( $next_nonblank_type eq 'Z' ) {
13956                 $bond_str = NO_BREAK;
13957             }
13958
13959             # As a defensive measure, do not break between a '(' and a
13960             # filehandle.  In some cases, this can cause an error.  For
13961             # example, the following program works:
13962             #    my $msg="hi!\n";
13963             #    print
13964             #    ( STDOUT
13965             #    $msg
13966             #    );
13967             #
13968             # But this program fails:
13969             #    my $msg="hi!\n";
13970             #    print
13971             #    (
13972             #    STDOUT
13973             #    $msg
13974             #    );
13975             #
13976             # This is normally only a problem with the 'extrude' option
13977             if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
13978                 $bond_str = NO_BREAK;
13979             }
13980
13981             # Breaking before a ++ can cause perl to guess wrong. For
13982             # example the following line will cause a syntax error
13983             # with -extrude if we break between '$i' and '++' [fixstyle2]
13984             #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
13985             elsif ( $next_nonblank_type eq '++' ) {
13986                 $bond_str = NO_BREAK;
13987             }
13988
13989             # Breaking before a ? before a quote can cause trouble if
13990             # they are not separated by a blank.
13991             # Example: a syntax error occurs if you break before the ? here
13992             #  my$logic=join$all?' && ':' || ',@regexps;
13993             # From: Professional_Perl_Programming_Code/multifind.pl
13994             elsif ( $next_nonblank_type eq '?' ) {
13995                 $bond_str = NO_BREAK
13996                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
13997             }
13998
13999             # Breaking before a . followed by a number
14000             # can cause trouble if there is no intervening space
14001             # Example: a syntax error occurs if you break before the .2 here
14002             #  $str .= pack($endian.2, ensurrogate($ord));
14003             # From: perl58/Unicode.pm
14004             elsif ( $next_nonblank_type eq '.' ) {
14005                 $bond_str = NO_BREAK
14006                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
14007             }
14008
14009             # patch to put cuddled elses back together when on multiple
14010             # lines, as in: } \n else \n { \n
14011             if ($rOpts_cuddled_else) {
14012
14013                 if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
14014                     || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
14015                 {
14016                     $bond_str = NO_BREAK;
14017                 }
14018             }
14019
14020             # keep '}' together with ';'
14021             if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
14022                 $bond_str = NO_BREAK;
14023             }
14024
14025             # never break between sub name and opening paren
14026             if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
14027                 $bond_str = NO_BREAK;
14028             }
14029
14030             #---------------------------------------------------------------
14031             # section 3:
14032             # now take nesting depth into account
14033             #---------------------------------------------------------------
14034             # final strength incorporates the bond strength and nesting depth
14035             my $strength;
14036
14037             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
14038                 if ( $total_nesting_depth > 0 ) {
14039                     $strength = $bond_str + $total_nesting_depth;
14040                 }
14041                 else {
14042                     $strength = $bond_str;
14043                 }
14044             }
14045             else {
14046                 $strength = NO_BREAK;
14047             }
14048
14049             # always break after side comment
14050             if ( $type eq '#' ) { $strength = 0 }
14051
14052             $bond_strength_to_go[$i] = $strength;
14053
14054             FORMATTER_DEBUG_FLAG_BOND && do {
14055                 my $str = substr( $token, 0, 15 );
14056                 $str .= ' ' x ( 16 - length($str) );
14057                 print
14058 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
14059             };
14060         }
14061     }
14062
14063 }
14064
14065 sub pad_array_to_go {
14066
14067     # to simplify coding in scan_list and set_bond_strengths, it helps
14068     # to create some extra blank tokens at the end of the arrays
14069     $tokens_to_go[ $max_index_to_go + 1 ] = '';
14070     $tokens_to_go[ $max_index_to_go + 2 ] = '';
14071     $types_to_go[ $max_index_to_go + 1 ]  = 'b';
14072     $types_to_go[ $max_index_to_go + 2 ]  = 'b';
14073     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
14074       $nesting_depth_to_go[$max_index_to_go];
14075
14076     #    /^[R\}\)\]]$/
14077     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
14078         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
14079
14080             # shouldn't happen:
14081             unless ( get_saw_brace_error() ) {
14082                 warning(
14083 "Program bug in scan_list: hit nesting error which should have been caught\n"
14084                 );
14085                 report_definite_bug();
14086             }
14087         }
14088         else {
14089             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
14090         }
14091     }
14092
14093     #       /^[L\{\(\[]$/
14094     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
14095         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
14096     }
14097 }
14098
14099 {    # begin scan_list
14100
14101     my (
14102         $block_type,                $current_depth,
14103         $depth,                     $i,
14104         $i_last_nonblank_token,     $last_colon_sequence_number,
14105         $last_nonblank_token,       $last_nonblank_type,
14106         $last_old_breakpoint_count, $minimum_depth,
14107         $next_nonblank_block_type,  $next_nonblank_token,
14108         $next_nonblank_type,        $old_breakpoint_count,
14109         $starting_breakpoint_count, $starting_depth,
14110         $token,                     $type,
14111         $type_sequence,
14112     );
14113
14114     my (
14115         @breakpoint_stack,              @breakpoint_undo_stack,
14116         @comma_index,                   @container_type,
14117         @identifier_count_stack,        @index_before_arrow,
14118         @interrupted_list,              @item_count_stack,
14119         @last_comma_index,              @last_dot_index,
14120         @last_nonblank_type,            @old_breakpoint_count_stack,
14121         @opening_structure_index_stack, @rfor_semicolon_list,
14122         @has_old_logical_breakpoints,   @rand_or_list,
14123         @i_equals,
14124     );
14125
14126     # routine to define essential variables when we go 'up' to
14127     # a new depth
14128     sub check_for_new_minimum_depth {
14129         my $depth = shift;
14130         if ( $depth < $minimum_depth ) {
14131
14132             $minimum_depth = $depth;
14133
14134             # these arrays need not retain values between calls
14135             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
14136             $container_type[$depth]                = "";
14137             $identifier_count_stack[$depth]        = 0;
14138             $index_before_arrow[$depth]            = -1;
14139             $interrupted_list[$depth]              = 1;
14140             $item_count_stack[$depth]              = 0;
14141             $last_nonblank_type[$depth]            = "";
14142             $opening_structure_index_stack[$depth] = -1;
14143
14144             $breakpoint_undo_stack[$depth]       = undef;
14145             $comma_index[$depth]                 = undef;
14146             $last_comma_index[$depth]            = undef;
14147             $last_dot_index[$depth]              = undef;
14148             $old_breakpoint_count_stack[$depth]  = undef;
14149             $has_old_logical_breakpoints[$depth] = 0;
14150             $rand_or_list[$depth]                = [];
14151             $rfor_semicolon_list[$depth]         = [];
14152             $i_equals[$depth]                    = -1;
14153
14154             # these arrays must retain values between calls
14155             if ( !defined( $has_broken_sublist[$depth] ) ) {
14156                 $dont_align[$depth]         = 0;
14157                 $has_broken_sublist[$depth] = 0;
14158                 $want_comma_break[$depth]   = 0;
14159             }
14160         }
14161     }
14162
14163     # routine to decide which commas to break at within a container;
14164     # returns:
14165     #   $bp_count = number of comma breakpoints set
14166     #   $do_not_break_apart = a flag indicating if container need not
14167     #     be broken open
14168     sub set_comma_breakpoints {
14169
14170         my $dd                 = shift;
14171         my $bp_count           = 0;
14172         my $do_not_break_apart = 0;
14173
14174         # anything to do?
14175         if ( $item_count_stack[$dd] ) {
14176
14177             # handle commas not in containers...
14178             if ( $dont_align[$dd] ) {
14179                 do_uncontained_comma_breaks($dd);
14180             }
14181
14182             # handle commas within containers...
14183             else {
14184                 my $fbc = $forced_breakpoint_count;
14185
14186                 # always open comma lists not preceded by keywords,
14187                 # barewords, identifiers (that is, anything that doesn't
14188                 # look like a function call)
14189                 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
14190
14191                 set_comma_breakpoints_do(
14192                     $dd,
14193                     $opening_structure_index_stack[$dd],
14194                     $i,
14195                     $item_count_stack[$dd],
14196                     $identifier_count_stack[$dd],
14197                     $comma_index[$dd],
14198                     $next_nonblank_type,
14199                     $container_type[$dd],
14200                     $interrupted_list[$dd],
14201                     \$do_not_break_apart,
14202                     $must_break_open,
14203                 );
14204                 $bp_count = $forced_breakpoint_count - $fbc;
14205                 $do_not_break_apart = 0 if $must_break_open;
14206             }
14207         }
14208         return ( $bp_count, $do_not_break_apart );
14209     }
14210
14211     sub do_uncontained_comma_breaks {
14212
14213         # Handle commas not in containers...
14214         # This is a catch-all routine for commas that we
14215         # don't know what to do with because the don't fall
14216         # within containers.  We will bias the bond strength
14217         # to break at commas which ended lines in the input
14218         # file.  This usually works better than just trying
14219         # to put as many items on a line as possible.  A
14220         # downside is that if the input file is garbage it
14221         # won't work very well. However, the user can always
14222         # prevent following the old breakpoints with the
14223         # -iob flag.
14224         my $dd                    = shift;
14225         my $bias                  = -.01;
14226         my $old_comma_break_count = 0;
14227         foreach my $ii ( @{ $comma_index[$dd] } ) {
14228             if ( $old_breakpoint_to_go[$ii] ) {
14229                 $old_comma_break_count++;
14230                 $bond_strength_to_go[$ii] = $bias;
14231
14232                 # reduce bias magnitude to force breaks in order
14233                 $bias *= 0.99;
14234             }
14235         }
14236
14237         # Also put a break before the first comma if
14238         # (1) there was a break there in the input, and
14239         # (2) that was exactly one previous break in the input
14240         # (3) there are multiple old comma breaks
14241         #
14242         # For example, we will follow the user and break after
14243         # 'print' in this snippet:
14244         #    print
14245         #      "conformability (Not the same dimension)\n",
14246         #      "\t", $have, " is ", text_unit($hu), "\n",
14247         #      "\t", $want, " is ", text_unit($wu), "\n",
14248         #      ;
14249         #  But we will not force a break after the first comma here
14250         #  (causes a blinker):
14251         #        $heap->{stream}->set_output_filter(
14252         #            poe::filter::reference->new('myotherfreezer') ),
14253         #          ;
14254         #
14255         my $i_first_comma = $comma_index[$dd]->[0];
14256         if ( $old_breakpoint_to_go[$i_first_comma] ) {
14257             my $level_comma = $levels_to_go[$i_first_comma];
14258             my $ibreak      = -1;
14259             my $obp_count   = 0;
14260             for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
14261                 if ( $old_breakpoint_to_go[$ii] ) {
14262                     $obp_count++;
14263                     last if ( $obp_count > 1 );
14264                     $ibreak = $ii
14265                       if ( $levels_to_go[$ii] == $level_comma );
14266                 }
14267             }
14268             if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 1 )
14269             {
14270                 set_forced_breakpoint($ibreak);
14271             }
14272         }
14273     }
14274
14275     my %is_logical_container;
14276
14277     BEGIN {
14278         @_ = qw# if elsif unless while and or err not && | || ? : ! #;
14279         @is_logical_container{@_} = (1) x scalar(@_);
14280     }
14281
14282     sub set_for_semicolon_breakpoints {
14283         my $dd = shift;
14284         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
14285             set_forced_breakpoint($_);
14286         }
14287     }
14288
14289     sub set_logical_breakpoints {
14290         my $dd = shift;
14291         if (
14292                $item_count_stack[$dd] == 0
14293             && $is_logical_container{ $container_type[$dd] }
14294
14295             || $has_old_logical_breakpoints[$dd]
14296           )
14297         {
14298
14299             # Look for breaks in this order:
14300             # 0   1    2   3
14301             # or  and  ||  &&
14302             foreach my $i ( 0 .. 3 ) {
14303                 if ( $rand_or_list[$dd][$i] ) {
14304                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
14305                         set_forced_breakpoint($_);
14306                     }
14307
14308                     # break at any 'if' and 'unless' too
14309                     foreach ( @{ $rand_or_list[$dd][4] } ) {
14310                         set_forced_breakpoint($_);
14311                     }
14312                     $rand_or_list[$dd] = [];
14313                     last;
14314                 }
14315             }
14316         }
14317     }
14318
14319     sub is_unbreakable_container {
14320
14321         # never break a container of one of these types
14322         # because bad things can happen (map1.t)
14323         my $dd = shift;
14324         $is_sort_map_grep{ $container_type[$dd] };
14325     }
14326
14327     sub scan_list {
14328
14329         # This routine is responsible for setting line breaks for all lists,
14330         # so that hierarchical structure can be displayed and so that list
14331         # items can be vertically aligned.  The output of this routine is
14332         # stored in the array @forced_breakpoint_to_go, which is used to set
14333         # final breakpoints.
14334
14335         $starting_depth = $nesting_depth_to_go[0];
14336
14337         $block_type                 = ' ';
14338         $current_depth              = $starting_depth;
14339         $i                          = -1;
14340         $last_colon_sequence_number = -1;
14341         $last_nonblank_token        = ';';
14342         $last_nonblank_type         = ';';
14343         $last_nonblank_block_type   = ' ';
14344         $last_old_breakpoint_count  = 0;
14345         $minimum_depth = $current_depth + 1;    # forces update in check below
14346         $old_breakpoint_count      = 0;
14347         $starting_breakpoint_count = $forced_breakpoint_count;
14348         $token                     = ';';
14349         $type                      = ';';
14350         $type_sequence             = '';
14351
14352         check_for_new_minimum_depth($current_depth);
14353
14354         my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
14355         my $want_previous_breakpoint = -1;
14356
14357         my $saw_good_breakpoint;
14358         my $i_line_end   = -1;
14359         my $i_line_start = -1;
14360
14361         # loop over all tokens in this batch
14362         while ( ++$i <= $max_index_to_go ) {
14363             if ( $type ne 'b' ) {
14364                 $i_last_nonblank_token    = $i - 1;
14365                 $last_nonblank_type       = $type;
14366                 $last_nonblank_token      = $token;
14367                 $last_nonblank_block_type = $block_type;
14368             }
14369             $type          = $types_to_go[$i];
14370             $block_type    = $block_type_to_go[$i];
14371             $token         = $tokens_to_go[$i];
14372             $type_sequence = $type_sequence_to_go[$i];
14373             my $next_type       = $types_to_go[ $i + 1 ];
14374             my $next_token      = $tokens_to_go[ $i + 1 ];
14375             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
14376             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
14377             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
14378             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
14379
14380             # set break if flag was set
14381             if ( $want_previous_breakpoint >= 0 ) {
14382                 set_forced_breakpoint($want_previous_breakpoint);
14383                 $want_previous_breakpoint = -1;
14384             }
14385
14386             $last_old_breakpoint_count = $old_breakpoint_count;
14387             if ( $old_breakpoint_to_go[$i] ) {
14388                 $i_line_end   = $i;
14389                 $i_line_start = $i_next_nonblank;
14390
14391                 $old_breakpoint_count++;
14392
14393                 # Break before certain keywords if user broke there and
14394                 # this is a 'safe' break point. The idea is to retain
14395                 # any preferred breaks for sequential list operations,
14396                 # like a schwartzian transform.
14397                 if ($rOpts_break_at_old_keyword_breakpoints) {
14398                     if (
14399                            $next_nonblank_type eq 'k'
14400                         && $is_keyword_returning_list{$next_nonblank_token}
14401                         && (   $type =~ /^[=\)\]\}Riw]$/
14402                             || $type eq 'k'
14403                             && $is_keyword_returning_list{$token} )
14404                       )
14405                     {
14406
14407                         # we actually have to set this break next time through
14408                         # the loop because if we are at a closing token (such
14409                         # as '}') which forms a one-line block, this break might
14410                         # get undone.
14411                         $want_previous_breakpoint = $i;
14412                     }
14413                 }
14414
14415                 # Break before attributes if user broke there
14416                 if ($rOpts_break_at_old_attribute_breakpoints) {
14417                     if ( $next_nonblank_type eq 'A' ) {
14418                         $want_previous_breakpoint = $i;
14419                     }
14420                 }
14421             }
14422             next if ( $type eq 'b' );
14423             $depth = $nesting_depth_to_go[ $i + 1 ];
14424
14425             # safety check - be sure we always break after a comment
14426             # Shouldn't happen .. an error here probably means that the
14427             # nobreak flag did not get turned off correctly during
14428             # formatting.
14429             if ( $type eq '#' ) {
14430                 if ( $i != $max_index_to_go ) {
14431                     warning(
14432 "Non-fatal program bug: backup logic needed to break after a comment\n"
14433                     );
14434                     report_definite_bug();
14435                     $nobreak_to_go[$i] = 0;
14436                     set_forced_breakpoint($i);
14437                 }
14438             }
14439
14440             # Force breakpoints at certain tokens in long lines.
14441             # Note that such breakpoints will be undone later if these tokens
14442             # are fully contained within parens on a line.
14443             if (
14444
14445                 # break before a keyword within a line
14446                 $type eq 'k'
14447                 && $i > 0
14448
14449                 # if one of these keywords:
14450                 && $token =~ /^(if|unless|while|until|for)$/
14451
14452                 # but do not break at something like '1 while'
14453                 && ( $last_nonblank_type ne 'n' || $i > 2 )
14454
14455                 # and let keywords follow a closing 'do' brace
14456                 && $last_nonblank_block_type ne 'do'
14457
14458                 && (
14459                     $is_long_line
14460
14461                     # or container is broken (by side-comment, etc)
14462                     || (   $next_nonblank_token eq '('
14463                         && $mate_index_to_go[$i_next_nonblank] < $i )
14464                 )
14465               )
14466             {
14467                 set_forced_breakpoint( $i - 1 );
14468             }
14469
14470             # remember locations of '||'  and '&&' for possible breaks if we
14471             # decide this is a long logical expression.
14472             if ( $type eq '||' ) {
14473                 push @{ $rand_or_list[$depth][2] }, $i;
14474                 ++$has_old_logical_breakpoints[$depth]
14475                   if ( ( $i == $i_line_start || $i == $i_line_end )
14476                     && $rOpts_break_at_old_logical_breakpoints );
14477             }
14478             elsif ( $type eq '&&' ) {
14479                 push @{ $rand_or_list[$depth][3] }, $i;
14480                 ++$has_old_logical_breakpoints[$depth]
14481                   if ( ( $i == $i_line_start || $i == $i_line_end )
14482                     && $rOpts_break_at_old_logical_breakpoints );
14483             }
14484             elsif ( $type eq 'f' ) {
14485                 push @{ $rfor_semicolon_list[$depth] }, $i;
14486             }
14487             elsif ( $type eq 'k' ) {
14488                 if ( $token eq 'and' ) {
14489                     push @{ $rand_or_list[$depth][1] }, $i;
14490                     ++$has_old_logical_breakpoints[$depth]
14491                       if ( ( $i == $i_line_start || $i == $i_line_end )
14492                         && $rOpts_break_at_old_logical_breakpoints );
14493                 }
14494
14495                 # break immediately at 'or's which are probably not in a logical
14496                 # block -- but we will break in logical breaks below so that
14497                 # they do not add to the forced_breakpoint_count
14498                 elsif ( $token eq 'or' ) {
14499                     push @{ $rand_or_list[$depth][0] }, $i;
14500                     ++$has_old_logical_breakpoints[$depth]
14501                       if ( ( $i == $i_line_start || $i == $i_line_end )
14502                         && $rOpts_break_at_old_logical_breakpoints );
14503                     if ( $is_logical_container{ $container_type[$depth] } ) {
14504                     }
14505                     else {
14506                         if ($is_long_line) { set_forced_breakpoint($i) }
14507                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
14508                             && $rOpts_break_at_old_logical_breakpoints )
14509                         {
14510                             $saw_good_breakpoint = 1;
14511                         }
14512                     }
14513                 }
14514                 elsif ( $token eq 'if' || $token eq 'unless' ) {
14515                     push @{ $rand_or_list[$depth][4] }, $i;
14516                     if ( ( $i == $i_line_start || $i == $i_line_end )
14517                         && $rOpts_break_at_old_logical_breakpoints )
14518                     {
14519                         set_forced_breakpoint($i);
14520                     }
14521                 }
14522             }
14523             elsif ( $is_assignment{$type} ) {
14524                 $i_equals[$depth] = $i;
14525             }
14526
14527             if ($type_sequence) {
14528
14529                 # handle any postponed closing breakpoints
14530                 if ( $token =~ /^[\)\]\}\:]$/ ) {
14531                     if ( $type eq ':' ) {
14532                         $last_colon_sequence_number = $type_sequence;
14533
14534                         # retain break at a ':' line break
14535                         if ( ( $i == $i_line_start || $i == $i_line_end )
14536                             && $rOpts_break_at_old_ternary_breakpoints )
14537                         {
14538
14539                             # TESTING:
14540                             set_forced_breakpoint($i);
14541
14542                             # break at previous '='
14543                             if ( $i_equals[$depth] > 0 ) {
14544                                 set_forced_breakpoint( $i_equals[$depth] );
14545                                 $i_equals[$depth] = -1;
14546                             }
14547                         }
14548                     }
14549                     if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
14550                         my $inc = ( $type eq ':' ) ? 0 : 1;
14551                         set_forced_breakpoint( $i - $inc );
14552                         delete $postponed_breakpoint{$type_sequence};
14553                     }
14554                 }
14555
14556                 # set breaks at ?/: if they will get separated (and are
14557                 # not a ?/: chain), or if the '?' is at the end of the
14558                 # line
14559                 elsif ( $token eq '?' ) {
14560                     my $i_colon = $mate_index_to_go[$i];
14561                     if (
14562                         $i_colon <= 0  # the ':' is not in this batch
14563                         || $i == 0     # this '?' is the first token of the line
14564                         || $i ==
14565                         $max_index_to_go    # or this '?' is the last token
14566                       )
14567                     {
14568
14569                         # don't break at a '?' if preceded by ':' on
14570                         # this line of previous ?/: pair on this line.
14571                         # This is an attempt to preserve a chain of ?/:
14572                         # expressions (elsif2.t).  And don't break if
14573                         # this has a side comment.
14574                         set_forced_breakpoint($i)
14575                           unless (
14576                             $type_sequence == (
14577                                 $last_colon_sequence_number +
14578                                   TYPE_SEQUENCE_INCREMENT
14579                             )
14580                             || $tokens_to_go[$max_index_to_go] eq '#'
14581                           );
14582                         set_closing_breakpoint($i);
14583                     }
14584                 }
14585             }
14586
14587 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
14588
14589             #------------------------------------------------------------
14590             # Handle Increasing Depth..
14591             #
14592             # prepare for a new list when depth increases
14593             # token $i is a '(','{', or '['
14594             #------------------------------------------------------------
14595             if ( $depth > $current_depth ) {
14596
14597                 $breakpoint_stack[$depth]       = $forced_breakpoint_count;
14598                 $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
14599                 $has_broken_sublist[$depth]     = 0;
14600                 $identifier_count_stack[$depth] = 0;
14601                 $index_before_arrow[$depth]     = -1;
14602                 $interrupted_list[$depth]       = 0;
14603                 $item_count_stack[$depth]       = 0;
14604                 $last_comma_index[$depth]       = undef;
14605                 $last_dot_index[$depth]         = undef;
14606                 $last_nonblank_type[$depth]     = $last_nonblank_type;
14607                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
14608                 $opening_structure_index_stack[$depth] = $i;
14609                 $rand_or_list[$depth]                  = [];
14610                 $rfor_semicolon_list[$depth]           = [];
14611                 $i_equals[$depth]                      = -1;
14612                 $want_comma_break[$depth]              = 0;
14613                 $container_type[$depth] =
14614                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
14615                   ? $last_nonblank_token
14616                   : "";
14617                 $has_old_logical_breakpoints[$depth] = 0;
14618
14619                 # if line ends here then signal closing token to break
14620                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
14621                 {
14622                     set_closing_breakpoint($i);
14623                 }
14624
14625                 # Not all lists of values should be vertically aligned..
14626                 $dont_align[$depth] =
14627
14628                   # code BLOCKS are handled at a higher level
14629                   ( $block_type ne "" )
14630
14631                   # certain paren lists
14632                   || ( $type eq '(' ) && (
14633
14634                     # it does not usually look good to align a list of
14635                     # identifiers in a parameter list, as in:
14636                     #    my($var1, $var2, ...)
14637                     # (This test should probably be refined, for now I'm just
14638                     # testing for any keyword)
14639                     ( $last_nonblank_type eq 'k' )
14640
14641                     # a trailing '(' usually indicates a non-list
14642                     || ( $next_nonblank_type eq '(' )
14643                   );
14644
14645                 # patch to outdent opening brace of long if/for/..
14646                 # statements (like this one).  See similar coding in
14647                 # set_continuation breaks.  We have also catch it here for
14648                 # short line fragments which otherwise will not go through
14649                 # set_continuation_breaks.
14650                 if (
14651                     $block_type
14652
14653                     # if we have the ')' but not its '(' in this batch..
14654                     && ( $last_nonblank_token eq ')' )
14655                     && $mate_index_to_go[$i_last_nonblank_token] < 0
14656
14657                     # and user wants brace to left
14658                     && !$rOpts->{'opening-brace-always-on-right'}
14659
14660                     && ( $type  eq '{' )    # should be true
14661                     && ( $token eq '{' )    # should be true
14662                   )
14663                 {
14664                     set_forced_breakpoint( $i - 1 );
14665                 }
14666             }
14667
14668             #------------------------------------------------------------
14669             # Handle Decreasing Depth..
14670             #
14671             # finish off any old list when depth decreases
14672             # token $i is a ')','}', or ']'
14673             #------------------------------------------------------------
14674             elsif ( $depth < $current_depth ) {
14675
14676                 check_for_new_minimum_depth($depth);
14677
14678                 # force all outer logical containers to break after we see on
14679                 # old breakpoint
14680                 $has_old_logical_breakpoints[$depth] ||=
14681                   $has_old_logical_breakpoints[$current_depth];
14682
14683                 # Patch to break between ') {' if the paren list is broken.
14684                 # There is similar logic in set_continuation_breaks for
14685                 # non-broken lists.
14686                 if (   $token eq ')'
14687                     && $next_nonblank_block_type
14688                     && $interrupted_list[$current_depth]
14689                     && $next_nonblank_type eq '{'
14690                     && !$rOpts->{'opening-brace-always-on-right'} )
14691                 {
14692                     set_forced_breakpoint($i);
14693                 }
14694
14695 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
14696
14697                 # set breaks at commas if necessary
14698                 my ( $bp_count, $do_not_break_apart ) =
14699                   set_comma_breakpoints($current_depth);
14700
14701                 my $i_opening = $opening_structure_index_stack[$current_depth];
14702                 my $saw_opening_structure = ( $i_opening >= 0 );
14703
14704                 # this term is long if we had to break at interior commas..
14705                 my $is_long_term = $bp_count > 0;
14706
14707                 # ..or if the length between opening and closing parens exceeds
14708                 # allowed line length
14709                 if ( !$is_long_term && $saw_opening_structure ) {
14710                     my $i_opening_minus = find_token_starting_list($i_opening);
14711
14712                     # Note: we have to allow for one extra space after a
14713                     # closing token so that we do not strand a comma or
14714                     # semicolon, hence the '>=' here (oneline.t)
14715                     $is_long_term =
14716                       excess_line_length( $i_opening_minus, $i ) >= 0;
14717                 }
14718
14719                 # We've set breaks after all comma-arrows.  Now we have to
14720                 # undo them if this can be a one-line block
14721                 # (the only breakpoints set will be due to comma-arrows)
14722                 if (
14723
14724                     # user doesn't require breaking after all comma-arrows
14725                     ( $rOpts_comma_arrow_breakpoints != 0 )
14726
14727                     # and if the opening structure is in this batch
14728                     && $saw_opening_structure
14729
14730                     # and either on the same old line
14731                     && (
14732                         $old_breakpoint_count_stack[$current_depth] ==
14733                         $last_old_breakpoint_count
14734
14735                         # or user wants to form long blocks with arrows
14736                         || $rOpts_comma_arrow_breakpoints == 2
14737                     )
14738
14739                   # and we made some breakpoints between the opening and closing
14740                     && ( $breakpoint_undo_stack[$current_depth] <
14741                         $forced_breakpoint_undo_count )
14742
14743                     # and this block is short enough to fit on one line
14744                     # Note: use < because need 1 more space for possible comma
14745                     && !$is_long_term
14746
14747                   )
14748                 {
14749                     undo_forced_breakpoint_stack(
14750                         $breakpoint_undo_stack[$current_depth] );
14751                 }
14752
14753                 # now see if we have any comma breakpoints left
14754                 my $has_comma_breakpoints =
14755                   ( $breakpoint_stack[$current_depth] !=
14756                       $forced_breakpoint_count );
14757
14758                 # update broken-sublist flag of the outer container
14759                 $has_broken_sublist[$depth] =
14760                      $has_broken_sublist[$depth]
14761                   || $has_broken_sublist[$current_depth]
14762                   || $is_long_term
14763                   || $has_comma_breakpoints;
14764
14765 # Having come to the closing ')', '}', or ']', now we have to decide if we
14766 # should 'open up' the structure by placing breaks at the opening and
14767 # closing containers.  This is a tricky decision.  Here are some of the
14768 # basic considerations:
14769 #
14770 # -If this is a BLOCK container, then any breakpoints will have already
14771 # been set (and according to user preferences), so we need do nothing here.
14772 #
14773 # -If we have a comma-separated list for which we can align the list items,
14774 # then we need to do so because otherwise the vertical aligner cannot
14775 # currently do the alignment.
14776 #
14777 # -If this container does itself contain a container which has been broken
14778 # open, then it should be broken open to properly show the structure.
14779 #
14780 # -If there is nothing to align, and no other reason to break apart,
14781 # then do not do it.
14782 #
14783 # We will not break open the parens of a long but 'simple' logical expression.
14784 # For example:
14785 #
14786 # This is an example of a simple logical expression and its formatting:
14787 #
14788 #     if ( $bigwasteofspace1 && $bigwasteofspace2
14789 #         || $bigwasteofspace3 && $bigwasteofspace4 )
14790 #
14791 # Most people would prefer this than the 'spacey' version:
14792 #
14793 #     if (
14794 #         $bigwasteofspace1 && $bigwasteofspace2
14795 #         || $bigwasteofspace3 && $bigwasteofspace4
14796 #     )
14797 #
14798 # To illustrate the rules for breaking logical expressions, consider:
14799 #
14800 #             FULLY DENSE:
14801 #             if ( $opt_excl
14802 #                 and ( exists $ids_excl_uc{$id_uc}
14803 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
14804 #
14805 # This is on the verge of being difficult to read.  The current default is to
14806 # open it up like this:
14807 #
14808 #             DEFAULT:
14809 #             if (
14810 #                 $opt_excl
14811 #                 and ( exists $ids_excl_uc{$id_uc}
14812 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
14813 #               )
14814 #
14815 # This is a compromise which tries to avoid being too dense and to spacey.
14816 # A more spaced version would be:
14817 #
14818 #             SPACEY:
14819 #             if (
14820 #                 $opt_excl
14821 #                 and (
14822 #                     exists $ids_excl_uc{$id_uc}
14823 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
14824 #                 )
14825 #               )
14826 #
14827 # Some people might prefer the spacey version -- an option could be added.  The
14828 # innermost expression contains a long block '( exists $ids_...  ')'.
14829 #
14830 # Here is how the logic goes: We will force a break at the 'or' that the
14831 # innermost expression contains, but we will not break apart its opening and
14832 # closing containers because (1) it contains no multi-line sub-containers itself,
14833 # and (2) there is no alignment to be gained by breaking it open like this
14834 #
14835 #             and (
14836 #                 exists $ids_excl_uc{$id_uc}
14837 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
14838 #             )
14839 #
14840 # (although this looks perfectly ok and might be good for long expressions).  The
14841 # outer 'if' container, though, contains a broken sub-container, so it will be
14842 # broken open to avoid too much density.  Also, since it contains no 'or's, there
14843 # will be a forced break at its 'and'.
14844
14845                 # set some flags telling something about this container..
14846                 my $is_simple_logical_expression = 0;
14847                 if (   $item_count_stack[$current_depth] == 0
14848                     && $saw_opening_structure
14849                     && $tokens_to_go[$i_opening] eq '('
14850                     && $is_logical_container{ $container_type[$current_depth] }
14851                   )
14852                 {
14853
14854                     # This seems to be a simple logical expression with
14855                     # no existing breakpoints.  Set a flag to prevent
14856                     # opening it up.
14857                     if ( !$has_comma_breakpoints ) {
14858                         $is_simple_logical_expression = 1;
14859                     }
14860
14861                     # This seems to be a simple logical expression with
14862                     # breakpoints (broken sublists, for example).  Break
14863                     # at all 'or's and '||'s.
14864                     else {
14865                         set_logical_breakpoints($current_depth);
14866                     }
14867                 }
14868
14869                 if ( $is_long_term
14870                     && @{ $rfor_semicolon_list[$current_depth] } )
14871                 {
14872                     set_for_semicolon_breakpoints($current_depth);
14873
14874                     # open up a long 'for' or 'foreach' container to allow
14875                     # leading term alignment unless -lp is used.
14876                     $has_comma_breakpoints = 1
14877                       unless $rOpts_line_up_parentheses;
14878                 }
14879
14880                 if (
14881
14882                     # breaks for code BLOCKS are handled at a higher level
14883                     !$block_type
14884
14885                     # we do not need to break at the top level of an 'if'
14886                     # type expression
14887                     && !$is_simple_logical_expression
14888
14889                     ## modification to keep ': (' containers vertically tight;
14890                     ## but probably better to let user set -vt=1 to avoid
14891                     ## inconsistency with other paren types
14892                     ## && ($container_type[$current_depth] ne ':')
14893
14894                     # otherwise, we require one of these reasons for breaking:
14895                     && (
14896
14897                         # - this term has forced line breaks
14898                         $has_comma_breakpoints
14899
14900                        # - the opening container is separated from this batch
14901                        #   for some reason (comment, blank line, code block)
14902                        # - this is a non-paren container spanning multiple lines
14903                         || !$saw_opening_structure
14904
14905                         # - this is a long block contained in another breakable
14906                         #   container
14907                         || (   $is_long_term
14908                             && $container_environment_to_go[$i_opening] ne
14909                             'BLOCK' )
14910                     )
14911                   )
14912                 {
14913
14914                     # For -lp option, we must put a breakpoint before
14915                     # the token which has been identified as starting
14916                     # this indentation level.  This is necessary for
14917                     # proper alignment.
14918                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
14919                     {
14920                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
14921                         if (   $i_opening + 1 < $max_index_to_go
14922                             && $types_to_go[ $i_opening + 1 ] eq 'b' )
14923                         {
14924                             $item = $leading_spaces_to_go[ $i_opening + 2 ];
14925                         }
14926                         if ( defined($item) ) {
14927                             my $i_start_2 = $item->get_STARTING_INDEX();
14928                             if (
14929                                 defined($i_start_2)
14930
14931                                 # we are breaking after an opening brace, paren,
14932                                 # so don't break before it too
14933                                 && $i_start_2 ne $i_opening
14934                               )
14935                             {
14936
14937                                 # Only break for breakpoints at the same
14938                                 # indentation level as the opening paren
14939                                 my $test1 = $nesting_depth_to_go[$i_opening];
14940                                 my $test2 = $nesting_depth_to_go[$i_start_2];
14941                                 if ( $test2 == $test1 ) {
14942                                     set_forced_breakpoint( $i_start_2 - 1 );
14943                                 }
14944                             }
14945                         }
14946                     }
14947
14948                     # break after opening structure.
14949                     # note: break before closing structure will be automatic
14950                     if ( $minimum_depth <= $current_depth ) {
14951
14952                         set_forced_breakpoint($i_opening)
14953                           unless ( $do_not_break_apart
14954                             || is_unbreakable_container($current_depth) );
14955
14956                         # break at '.' of lower depth level before opening token
14957                         if ( $last_dot_index[$depth] ) {
14958                             set_forced_breakpoint( $last_dot_index[$depth] );
14959                         }
14960
14961                         # break before opening structure if preeced by another
14962                         # closing structure and a comma.  This is normally
14963                         # done by the previous closing brace, but not
14964                         # if it was a one-line block.
14965                         if ( $i_opening > 2 ) {
14966                             my $i_prev =
14967                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
14968                               ? $i_opening - 2
14969                               : $i_opening - 1;
14970
14971                             if (   $types_to_go[$i_prev] eq ','
14972                                 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
14973                             {
14974                                 set_forced_breakpoint($i_prev);
14975                             }
14976
14977                             # also break before something like ':('  or '?('
14978                             # if appropriate.
14979                             elsif (
14980                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
14981                             {
14982                                 my $token_prev = $tokens_to_go[$i_prev];
14983                                 if ( $want_break_before{$token_prev} ) {
14984                                     set_forced_breakpoint($i_prev);
14985                                 }
14986                             }
14987                         }
14988                     }
14989
14990                     # break after comma following closing structure
14991                     if ( $next_type eq ',' ) {
14992                         set_forced_breakpoint( $i + 1 );
14993                     }
14994
14995                     # break before an '=' following closing structure
14996                     if (
14997                         $is_assignment{$next_nonblank_type}
14998                         && ( $breakpoint_stack[$current_depth] !=
14999                             $forced_breakpoint_count )
15000                       )
15001                     {
15002                         set_forced_breakpoint($i);
15003                     }
15004
15005                     # break at any comma before the opening structure Added
15006                     # for -lp, but seems to be good in general.  It isn't
15007                     # obvious how far back to look; the '5' below seems to
15008                     # work well and will catch the comma in something like
15009                     #  push @list, myfunc( $param, $param, ..
15010
15011                     my $icomma = $last_comma_index[$depth];
15012                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
15013                         unless ( $forced_breakpoint_to_go[$icomma] ) {
15014                             set_forced_breakpoint($icomma);
15015                         }
15016                     }
15017                 }    # end logic to open up a container
15018
15019                 # Break open a logical container open if it was already open
15020                 elsif ($is_simple_logical_expression
15021                     && $has_old_logical_breakpoints[$current_depth] )
15022                 {
15023                     set_logical_breakpoints($current_depth);
15024                 }
15025
15026                 # Handle long container which does not get opened up
15027                 elsif ($is_long_term) {
15028
15029                     # must set fake breakpoint to alert outer containers that
15030                     # they are complex
15031                     set_fake_breakpoint();
15032                 }
15033             }
15034
15035             #------------------------------------------------------------
15036             # Handle this token
15037             #------------------------------------------------------------
15038
15039             $current_depth = $depth;
15040
15041             # handle comma-arrow
15042             if ( $type eq '=>' ) {
15043                 next if ( $last_nonblank_type eq '=>' );
15044                 next if $rOpts_break_at_old_comma_breakpoints;
15045                 next if $rOpts_comma_arrow_breakpoints == 3;
15046                 $want_comma_break[$depth]   = 1;
15047                 $index_before_arrow[$depth] = $i_last_nonblank_token;
15048                 next;
15049             }
15050
15051             elsif ( $type eq '.' ) {
15052                 $last_dot_index[$depth] = $i;
15053             }
15054
15055             # Turn off alignment if we are sure that this is not a list
15056             # environment.  To be safe, we will do this if we see certain
15057             # non-list tokens, such as ';', and also the environment is
15058             # not a list.  Note that '=' could be in any of the = operators
15059             # (lextest.t). We can't just use the reported environment
15060             # because it can be incorrect in some cases.
15061             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
15062                 && $container_environment_to_go[$i] ne 'LIST' )
15063             {
15064                 $dont_align[$depth]         = 1;
15065                 $want_comma_break[$depth]   = 0;
15066                 $index_before_arrow[$depth] = -1;
15067             }
15068
15069             # now just handle any commas
15070             next unless ( $type eq ',' );
15071
15072             $last_dot_index[$depth]   = undef;
15073             $last_comma_index[$depth] = $i;
15074
15075             # break here if this comma follows a '=>'
15076             # but not if there is a side comment after the comma
15077             if ( $want_comma_break[$depth] ) {
15078
15079                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
15080                     $want_comma_break[$depth]   = 0;
15081                     $index_before_arrow[$depth] = -1;
15082                     next;
15083                 }
15084
15085                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15086
15087                 # break before the previous token if it looks safe
15088                 # Example of something that we will not try to break before:
15089                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
15090                 # Also we don't want to break at a binary operator (like +):
15091                 # $c->createOval(
15092                 #    $x + $R, $y +
15093                 #    $R => $x - $R,
15094                 #    $y - $R, -fill   => 'black',
15095                 # );
15096                 my $ibreak = $index_before_arrow[$depth] - 1;
15097                 if (   $ibreak > 0
15098                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
15099                 {
15100                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
15101                     if ( $types_to_go[$ibreak]  eq 'b' ) { $ibreak-- }
15102                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
15103
15104                         # don't break pointer calls, such as the following:
15105                         #  File::Spec->curdir  => 1,
15106                         # (This is tokenized as adjacent 'w' tokens)
15107                         if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
15108                             set_forced_breakpoint($ibreak);
15109                         }
15110                     }
15111                 }
15112
15113                 $want_comma_break[$depth]   = 0;
15114                 $index_before_arrow[$depth] = -1;
15115
15116                 # handle list which mixes '=>'s and ','s:
15117                 # treat any list items so far as an interrupted list
15118                 $interrupted_list[$depth] = 1;
15119                 next;
15120             }
15121
15122             # break after all commas above starting depth
15123             if ( $depth < $starting_depth && !$dont_align[$depth] ) {
15124                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15125                 next;
15126             }
15127
15128             # add this comma to the list..
15129             my $item_count = $item_count_stack[$depth];
15130             if ( $item_count == 0 ) {
15131
15132                 # but do not form a list with no opening structure
15133                 # for example:
15134
15135                 #            open INFILE_COPY, ">$input_file_copy"
15136                 #              or die ("very long message");
15137
15138                 if ( ( $opening_structure_index_stack[$depth] < 0 )
15139                     && $container_environment_to_go[$i] eq 'BLOCK' )
15140                 {
15141                     $dont_align[$depth] = 1;
15142                 }
15143             }
15144
15145             $comma_index[$depth][$item_count] = $i;
15146             ++$item_count_stack[$depth];
15147             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
15148                 $identifier_count_stack[$depth]++;
15149             }
15150         }
15151
15152         #-------------------------------------------
15153         # end of loop over all tokens in this batch
15154         #-------------------------------------------
15155
15156         # set breaks for any unfinished lists ..
15157         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
15158
15159             $interrupted_list[$dd] = 1;
15160             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
15161             set_comma_breakpoints($dd);
15162             set_logical_breakpoints($dd)
15163               if ( $has_old_logical_breakpoints[$dd] );
15164             set_for_semicolon_breakpoints($dd);
15165
15166             # break open container...
15167             my $i_opening = $opening_structure_index_stack[$dd];
15168             set_forced_breakpoint($i_opening)
15169               unless (
15170                 is_unbreakable_container($dd)
15171
15172                 # Avoid a break which would place an isolated ' or "
15173                 # on a line
15174                 || (   $type eq 'Q'
15175                     && $i_opening >= $max_index_to_go - 2
15176                     && $token =~ /^['"]$/ )
15177               );
15178         }
15179
15180         # Return a flag indicating if the input file had some good breakpoints.
15181         # This flag will be used to force a break in a line shorter than the
15182         # allowed line length.
15183         if ( $has_old_logical_breakpoints[$current_depth] ) {
15184             $saw_good_breakpoint = 1;
15185         }
15186         return $saw_good_breakpoint;
15187     }
15188 }    # end scan_list
15189
15190 sub find_token_starting_list {
15191
15192     # When testing to see if a block will fit on one line, some
15193     # previous token(s) may also need to be on the line; particularly
15194     # if this is a sub call.  So we will look back at least one
15195     # token. NOTE: This isn't perfect, but not critical, because
15196     # if we mis-identify a block, it will be wrapped and therefore
15197     # fixed the next time it is formatted.
15198     my $i_opening_paren = shift;
15199     my $i_opening_minus = $i_opening_paren;
15200     my $im1             = $i_opening_paren - 1;
15201     my $im2             = $i_opening_paren - 2;
15202     my $im3             = $i_opening_paren - 3;
15203     my $typem1          = $types_to_go[$im1];
15204     my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
15205     if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
15206         $i_opening_minus = $i_opening_paren;
15207     }
15208     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
15209         $i_opening_minus = $im1 if $im1 >= 0;
15210
15211         # walk back to improve length estimate
15212         for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
15213             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
15214             $i_opening_minus = $j;
15215         }
15216         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
15217     }
15218     elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
15219     elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
15220         $i_opening_minus = $im2;
15221     }
15222     return $i_opening_minus;
15223 }
15224
15225 {    # begin set_comma_breakpoints_do
15226
15227     my %is_keyword_with_special_leading_term;
15228
15229     BEGIN {
15230
15231         # These keywords have prototypes which allow a special leading item
15232         # followed by a list
15233         @_ =
15234           qw(formline grep kill map printf sprintf push chmod join pack unshift);
15235         @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
15236     }
15237
15238     sub set_comma_breakpoints_do {
15239
15240         # Given a list with some commas, set breakpoints at some of the
15241         # commas, if necessary, to make it easy to read.  This list is
15242         # an example:
15243         my (
15244             $depth,               $i_opening_paren,  $i_closing_paren,
15245             $item_count,          $identifier_count, $rcomma_index,
15246             $next_nonblank_type,  $list_type,        $interrupted,
15247             $rdo_not_break_apart, $must_break_open,
15248         ) = @_;
15249
15250         # nothing to do if no commas seen
15251         return if ( $item_count < 1 );
15252         my $i_first_comma     = $$rcomma_index[0];
15253         my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
15254         my $i_last_comma      = $i_true_last_comma;
15255         if ( $i_last_comma >= $max_index_to_go ) {
15256             $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
15257             return if ( $item_count < 1 );
15258         }
15259
15260         #---------------------------------------------------------------
15261         # find lengths of all items in the list to calculate page layout
15262         #---------------------------------------------------------------
15263         my $comma_count = $item_count;
15264         my @item_lengths;
15265         my @i_term_begin;
15266         my @i_term_end;
15267         my @i_term_comma;
15268         my $i_prev_plus;
15269         my @max_length = ( 0, 0 );
15270         my $first_term_length;
15271         my $i      = $i_opening_paren;
15272         my $is_odd = 1;
15273
15274         for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
15275             $is_odd      = 1 - $is_odd;
15276             $i_prev_plus = $i + 1;
15277             $i           = $$rcomma_index[$j];
15278
15279             my $i_term_end =
15280               ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
15281             my $i_term_begin =
15282               ( $types_to_go[$i_prev_plus] eq 'b' )
15283               ? $i_prev_plus + 1
15284               : $i_prev_plus;
15285             push @i_term_begin, $i_term_begin;
15286             push @i_term_end,   $i_term_end;
15287             push @i_term_comma, $i;
15288
15289             # note: currently adding 2 to all lengths (for comma and space)
15290             my $length =
15291               2 + token_sequence_length( $i_term_begin, $i_term_end );
15292             push @item_lengths, $length;
15293
15294             if ( $j == 0 ) {
15295                 $first_term_length = $length;
15296             }
15297             else {
15298
15299                 if ( $length > $max_length[$is_odd] ) {
15300                     $max_length[$is_odd] = $length;
15301                 }
15302             }
15303         }
15304
15305         # now we have to make a distinction between the comma count and item
15306         # count, because the item count will be one greater than the comma
15307         # count if the last item is not terminated with a comma
15308         my $i_b =
15309           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
15310           ? $i_last_comma + 1
15311           : $i_last_comma;
15312         my $i_e =
15313           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
15314           ? $i_closing_paren - 2
15315           : $i_closing_paren - 1;
15316         my $i_effective_last_comma = $i_last_comma;
15317
15318         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
15319
15320         if ( $last_item_length > 0 ) {
15321
15322             # add 2 to length because other lengths include a comma and a blank
15323             $last_item_length += 2;
15324             push @item_lengths, $last_item_length;
15325             push @i_term_begin, $i_b + 1;
15326             push @i_term_end,   $i_e;
15327             push @i_term_comma, undef;
15328
15329             my $i_odd = $item_count % 2;
15330
15331             if ( $last_item_length > $max_length[$i_odd] ) {
15332                 $max_length[$i_odd] = $last_item_length;
15333             }
15334
15335             $item_count++;
15336             $i_effective_last_comma = $i_e + 1;
15337
15338             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
15339                 $identifier_count++;
15340             }
15341         }
15342
15343         #---------------------------------------------------------------
15344         # End of length calculations
15345         #---------------------------------------------------------------
15346
15347         #---------------------------------------------------------------
15348         # Compound List Rule 1:
15349         # Break at (almost) every comma for a list containing a broken
15350         # sublist.  This has higher priority than the Interrupted List
15351         # Rule.
15352         #---------------------------------------------------------------
15353         if ( $has_broken_sublist[$depth] ) {
15354
15355             # Break at every comma except for a comma between two
15356             # simple, small terms.  This prevents long vertical
15357             # columns of, say, just 0's.
15358             my $small_length = 10;    # 2 + actual maximum length wanted
15359
15360             # We'll insert a break in long runs of small terms to
15361             # allow alignment in uniform tables.
15362             my $skipped_count = 0;
15363             my $columns       = table_columns_available($i_first_comma);
15364             my $fields        = int( $columns / $small_length );
15365             if (   $rOpts_maximum_fields_per_table
15366                 && $fields > $rOpts_maximum_fields_per_table )
15367             {
15368                 $fields = $rOpts_maximum_fields_per_table;
15369             }
15370             my $max_skipped_count = $fields - 1;
15371
15372             my $is_simple_last_term = 0;
15373             my $is_simple_next_term = 0;
15374             foreach my $j ( 0 .. $item_count ) {
15375                 $is_simple_last_term = $is_simple_next_term;
15376                 $is_simple_next_term = 0;
15377                 if (   $j < $item_count
15378                     && $i_term_end[$j] == $i_term_begin[$j]
15379                     && $item_lengths[$j] <= $small_length )
15380                 {
15381                     $is_simple_next_term = 1;
15382                 }
15383                 next if $j == 0;
15384                 if (   $is_simple_last_term
15385                     && $is_simple_next_term
15386                     && $skipped_count < $max_skipped_count )
15387                 {
15388                     $skipped_count++;
15389                 }
15390                 else {
15391                     $skipped_count = 0;
15392                     my $i = $i_term_comma[ $j - 1 ];
15393                     last unless defined $i;
15394                     set_forced_breakpoint($i);
15395                 }
15396             }
15397
15398             # always break at the last comma if this list is
15399             # interrupted; we wouldn't want to leave a terminal '{', for
15400             # example.
15401             if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
15402             return;
15403         }
15404
15405 #my ( $a, $b, $c ) = caller();
15406 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
15407 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
15408 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
15409
15410         #---------------------------------------------------------------
15411         # Interrupted List Rule:
15412         # A list is is forced to use old breakpoints if it was interrupted
15413         # by side comments or blank lines, or requested by user.
15414         #---------------------------------------------------------------
15415         if (   $rOpts_break_at_old_comma_breakpoints
15416             || $interrupted
15417             || $i_opening_paren < 0 )
15418         {
15419             copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
15420             return;
15421         }
15422
15423         #---------------------------------------------------------------
15424         # Looks like a list of items.  We have to look at it and size it up.
15425         #---------------------------------------------------------------
15426
15427         my $opening_token = $tokens_to_go[$i_opening_paren];
15428         my $opening_environment =
15429           $container_environment_to_go[$i_opening_paren];
15430
15431         #-------------------------------------------------------------------
15432         # Return if this will fit on one line
15433         #-------------------------------------------------------------------
15434
15435         my $i_opening_minus = find_token_starting_list($i_opening_paren);
15436         return
15437           unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
15438
15439         #-------------------------------------------------------------------
15440         # Now we know that this block spans multiple lines; we have to set
15441         # at least one breakpoint -- real or fake -- as a signal to break
15442         # open any outer containers.
15443         #-------------------------------------------------------------------
15444         set_fake_breakpoint();
15445
15446         # be sure we do not extend beyond the current list length
15447         if ( $i_effective_last_comma >= $max_index_to_go ) {
15448             $i_effective_last_comma = $max_index_to_go - 1;
15449         }
15450
15451         # Set a flag indicating if we need to break open to keep -lp
15452         # items aligned.  This is necessary if any of the list terms
15453         # exceeds the available space after the '('.
15454         my $need_lp_break_open = $must_break_open;
15455         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
15456             my $columns_if_unbroken = $rOpts_maximum_line_length -
15457               total_line_length( $i_opening_minus, $i_opening_paren );
15458             $need_lp_break_open =
15459                  ( $max_length[0] > $columns_if_unbroken )
15460               || ( $max_length[1] > $columns_if_unbroken )
15461               || ( $first_term_length > $columns_if_unbroken );
15462         }
15463
15464         # Specify if the list must have an even number of fields or not.
15465         # It is generally safest to assume an even number, because the
15466         # list items might be a hash list.  But if we can be sure that
15467         # it is not a hash, then we can allow an odd number for more
15468         # flexibility.
15469         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
15470
15471         if (   $identifier_count >= $item_count - 1
15472             || $is_assignment{$next_nonblank_type}
15473             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
15474           )
15475         {
15476             $odd_or_even = 1;
15477         }
15478
15479         # do we have a long first term which should be
15480         # left on a line by itself?
15481         my $use_separate_first_term = (
15482             $odd_or_even == 1       # only if we can use 1 field/line
15483               && $item_count > 3    # need several items
15484               && $first_term_length >
15485               2 * $max_length[0] - 2    # need long first term
15486               && $first_term_length >
15487               2 * $max_length[1] - 2    # need long first term
15488         );
15489
15490         # or do we know from the type of list that the first term should
15491         # be placed alone?
15492         if ( !$use_separate_first_term ) {
15493             if ( $is_keyword_with_special_leading_term{$list_type} ) {
15494                 $use_separate_first_term = 1;
15495
15496                 # should the container be broken open?
15497                 if ( $item_count < 3 ) {
15498                     if ( $i_first_comma - $i_opening_paren < 4 ) {
15499                         $$rdo_not_break_apart = 1;
15500                     }
15501                 }
15502                 elsif ($first_term_length < 20
15503                     && $i_first_comma - $i_opening_paren < 4 )
15504                 {
15505                     my $columns = table_columns_available($i_first_comma);
15506                     if ( $first_term_length < $columns ) {
15507                         $$rdo_not_break_apart = 1;
15508                     }
15509                 }
15510             }
15511         }
15512
15513         # if so,
15514         if ($use_separate_first_term) {
15515
15516             # ..set a break and update starting values
15517             $use_separate_first_term = 1;
15518             set_forced_breakpoint($i_first_comma);
15519             $i_opening_paren = $i_first_comma;
15520             $i_first_comma   = $$rcomma_index[1];
15521             $item_count--;
15522             return if $comma_count == 1;
15523             shift @item_lengths;
15524             shift @i_term_begin;
15525             shift @i_term_end;
15526             shift @i_term_comma;
15527         }
15528
15529         # if not, update the metrics to include the first term
15530         else {
15531             if ( $first_term_length > $max_length[0] ) {
15532                 $max_length[0] = $first_term_length;
15533             }
15534         }
15535
15536         # Field width parameters
15537         my $pair_width = ( $max_length[0] + $max_length[1] );
15538         my $max_width =
15539           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
15540
15541         # Number of free columns across the page width for laying out tables
15542         my $columns = table_columns_available($i_first_comma);
15543
15544         # Estimated maximum number of fields which fit this space
15545         # This will be our first guess
15546         my $number_of_fields_max =
15547           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
15548             $pair_width );
15549         my $number_of_fields = $number_of_fields_max;
15550
15551         # Find the best-looking number of fields
15552         # and make this our second guess if possible
15553         my ( $number_of_fields_best, $ri_ragged_break_list,
15554             $new_identifier_count )
15555           = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
15556             $max_width );
15557
15558         if (   $number_of_fields_best != 0
15559             && $number_of_fields_best < $number_of_fields_max )
15560         {
15561             $number_of_fields = $number_of_fields_best;
15562         }
15563
15564         # ----------------------------------------------------------------------
15565         # If we are crowded and the -lp option is being used, try to
15566         # undo some indentation
15567         # ----------------------------------------------------------------------
15568         if (
15569             $rOpts_line_up_parentheses
15570             && (
15571                 $number_of_fields == 0
15572                 || (   $number_of_fields == 1
15573                     && $number_of_fields != $number_of_fields_best )
15574             )
15575           )
15576         {
15577             my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
15578             if ( $available_spaces > 0 ) {
15579
15580                 my $spaces_wanted = $max_width - $columns;    # for 1 field
15581
15582                 if ( $number_of_fields_best == 0 ) {
15583                     $number_of_fields_best =
15584                       get_maximum_fields_wanted( \@item_lengths );
15585                 }
15586
15587                 if ( $number_of_fields_best != 1 ) {
15588                     my $spaces_wanted_2 =
15589                       1 + $pair_width - $columns;             # for 2 fields
15590                     if ( $available_spaces > $spaces_wanted_2 ) {
15591                         $spaces_wanted = $spaces_wanted_2;
15592                     }
15593                 }
15594
15595                 if ( $spaces_wanted > 0 ) {
15596                     my $deleted_spaces =
15597                       reduce_lp_indentation( $i_first_comma, $spaces_wanted );
15598
15599                     # redo the math
15600                     if ( $deleted_spaces > 0 ) {
15601                         $columns = table_columns_available($i_first_comma);
15602                         $number_of_fields_max =
15603                           maximum_number_of_fields( $columns, $odd_or_even,
15604                             $max_width, $pair_width );
15605                         $number_of_fields = $number_of_fields_max;
15606
15607                         if (   $number_of_fields_best == 1
15608                             && $number_of_fields >= 1 )
15609                         {
15610                             $number_of_fields = $number_of_fields_best;
15611                         }
15612                     }
15613                 }
15614             }
15615         }
15616
15617         # try for one column if two won't work
15618         if ( $number_of_fields <= 0 ) {
15619             $number_of_fields = int( $columns / $max_width );
15620         }
15621
15622         # The user can place an upper bound on the number of fields,
15623         # which can be useful for doing maintenance on tables
15624         if (   $rOpts_maximum_fields_per_table
15625             && $number_of_fields > $rOpts_maximum_fields_per_table )
15626         {
15627             $number_of_fields = $rOpts_maximum_fields_per_table;
15628         }
15629
15630         # How many columns (characters) and lines would this container take
15631         # if no additional whitespace were added?
15632         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
15633             $i_effective_last_comma + 1 );
15634         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
15635         my $packed_lines = 1 + int( $packed_columns / $columns );
15636
15637         # are we an item contained in an outer list?
15638         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
15639
15640         if ( $number_of_fields <= 0 ) {
15641
15642 #         #---------------------------------------------------------------
15643 #         # We're in trouble.  We can't find a single field width that works.
15644 #         # There is no simple answer here; we may have a single long list
15645 #         # item, or many.
15646 #         #---------------------------------------------------------------
15647 #
15648 #         In many cases, it may be best to not force a break if there is just one
15649 #         comma, because the standard continuation break logic will do a better
15650 #         job without it.
15651 #
15652 #         In the common case that all but one of the terms can fit
15653 #         on a single line, it may look better not to break open the
15654 #         containing parens.  Consider, for example
15655 #
15656 #             $color =
15657 #               join ( '/',
15658 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
15659 #                 keys %colors );
15660 #
15661 #         which will look like this with the container broken:
15662 #
15663 #             $color = join (
15664 #                 '/',
15665 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
15666 #             );
15667 #
15668 #         Here is an example of this rule for a long last term:
15669 #
15670 #             log_message( 0, 256, 128,
15671 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
15672 #
15673 #         And here is an example with a long first term:
15674 #
15675 #         $s = sprintf(
15676 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
15677 #             $r, $pu, $ps, $cu, $cs, $tt
15678 #           )
15679 #           if $style eq 'all';
15680
15681             my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
15682             my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
15683             my $long_first_term =
15684               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
15685
15686             # break at every comma ...
15687             if (
15688
15689                 # if requested by user or is best looking
15690                 $number_of_fields_best == 1
15691
15692                 # or if this is a sublist of a larger list
15693                 || $in_hierarchical_list
15694
15695                 # or if multiple commas and we dont have a long first or last
15696                 # term
15697                 || ( $comma_count > 1
15698                     && !( $long_last_term || $long_first_term ) )
15699               )
15700             {
15701                 foreach ( 0 .. $comma_count - 1 ) {
15702                     set_forced_breakpoint( $$rcomma_index[$_] );
15703                 }
15704             }
15705             elsif ($long_last_term) {
15706
15707                 set_forced_breakpoint($i_last_comma);
15708                 $$rdo_not_break_apart = 1 unless $must_break_open;
15709             }
15710             elsif ($long_first_term) {
15711
15712                 set_forced_breakpoint($i_first_comma);
15713             }
15714             else {
15715
15716                 # let breaks be defined by default bond strength logic
15717             }
15718             return;
15719         }
15720
15721         # --------------------------------------------------------
15722         # We have a tentative field count that seems to work.
15723         # How many lines will this require?
15724         # --------------------------------------------------------
15725         my $formatted_lines = $item_count / ($number_of_fields);
15726         if ( $formatted_lines != int $formatted_lines ) {
15727             $formatted_lines = 1 + int $formatted_lines;
15728         }
15729
15730         # So far we've been trying to fill out to the right margin.  But
15731         # compact tables are easier to read, so let's see if we can use fewer
15732         # fields without increasing the number of lines.
15733         $number_of_fields =
15734           compactify_table( $item_count, $number_of_fields, $formatted_lines,
15735             $odd_or_even );
15736
15737         # How many spaces across the page will we fill?
15738         my $columns_per_line =
15739           ( int $number_of_fields / 2 ) * $pair_width +
15740           ( $number_of_fields % 2 ) * $max_width;
15741
15742         my $formatted_columns;
15743
15744         if ( $number_of_fields > 1 ) {
15745             $formatted_columns =
15746               ( $pair_width * ( int( $item_count / 2 ) ) +
15747                   ( $item_count % 2 ) * $max_width );
15748         }
15749         else {
15750             $formatted_columns = $max_width * $item_count;
15751         }
15752         if ( $formatted_columns < $packed_columns ) {
15753             $formatted_columns = $packed_columns;
15754         }
15755
15756         my $unused_columns = $formatted_columns - $packed_columns;
15757
15758         # set some empirical parameters to help decide if we should try to
15759         # align; high sparsity does not look good, especially with few lines
15760         my $sparsity = ($unused_columns) / ($formatted_columns);
15761         my $max_allowed_sparsity =
15762             ( $item_count < 3 )    ? 0.1
15763           : ( $packed_lines == 1 ) ? 0.15
15764           : ( $packed_lines == 2 ) ? 0.4
15765           :                          0.7;
15766
15767         # Begin check for shortcut methods, which avoid treating a list
15768         # as a table for relatively small parenthesized lists.  These
15769         # are usually easier to read if not formatted as tables.
15770         if (
15771             $packed_lines <= 2    # probably can fit in 2 lines
15772             && $item_count < 9    # doesn't have too many items
15773             && $opening_environment eq 'BLOCK'    # not a sub-container
15774             && $opening_token       eq '('        # is paren list
15775           )
15776         {
15777
15778             # Shortcut method 1: for -lp and just one comma:
15779             # This is a no-brainer, just break at the comma.
15780             if (
15781                 $rOpts_line_up_parentheses        # -lp
15782                 && $item_count == 2               # two items, one comma
15783                 && !$must_break_open
15784               )
15785             {
15786                 my $i_break = $$rcomma_index[0];
15787                 set_forced_breakpoint($i_break);
15788                 $$rdo_not_break_apart = 1;
15789                 set_non_alignment_flags( $comma_count, $rcomma_index );
15790                 return;
15791
15792             }
15793
15794             # method 2 is for most small ragged lists which might look
15795             # best if not displayed as a table.
15796             if (
15797                 ( $number_of_fields == 2 && $item_count == 3 )
15798                 || (
15799                     $new_identifier_count > 0    # isn't all quotes
15800                     && $sparsity > 0.15
15801                 )    # would be fairly spaced gaps if aligned
15802               )
15803             {
15804
15805                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15806                     $ri_ragged_break_list );
15807                 ++$break_count if ($use_separate_first_term);
15808
15809                 # NOTE: we should really use the true break count here,
15810                 # which can be greater if there are large terms and
15811                 # little space, but usually this will work well enough.
15812                 unless ($must_break_open) {
15813
15814                     if ( $break_count <= 1 ) {
15815                         $$rdo_not_break_apart = 1;
15816                     }
15817                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15818                     {
15819                         $$rdo_not_break_apart = 1;
15820                     }
15821                 }
15822                 set_non_alignment_flags( $comma_count, $rcomma_index );
15823                 return;
15824             }
15825
15826         }    # end shortcut methods
15827
15828         # debug stuff
15829
15830         FORMATTER_DEBUG_FLAG_SPARSE && do {
15831             print
15832 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
15833
15834         };
15835
15836         #---------------------------------------------------------------
15837         # Compound List Rule 2:
15838         # If this list is too long for one line, and it is an item of a
15839         # larger list, then we must format it, regardless of sparsity
15840         # (ian.t).  One reason that we have to do this is to trigger
15841         # Compound List Rule 1, above, which causes breaks at all commas of
15842         # all outer lists.  In this way, the structure will be properly
15843         # displayed.
15844         #---------------------------------------------------------------
15845
15846         # Decide if this list is too long for one line unless broken
15847         my $total_columns = table_columns_available($i_opening_paren);
15848         my $too_long      = $packed_columns > $total_columns;
15849
15850         # For a paren list, include the length of the token just before the
15851         # '(' because this is likely a sub call, and we would have to
15852         # include the sub name on the same line as the list.  This is still
15853         # imprecise, but not too bad.  (steve.t)
15854         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
15855
15856             $too_long = excess_line_length( $i_opening_minus,
15857                 $i_effective_last_comma + 1 ) > 0;
15858         }
15859
15860         # FIXME: For an item after a '=>', try to include the length of the
15861         # thing before the '=>'.  This is crude and should be improved by
15862         # actually looking back token by token.
15863         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
15864             my $i_opening_minus = $i_opening_paren - 4;
15865             if ( $i_opening_minus >= 0 ) {
15866                 $too_long = excess_line_length( $i_opening_minus,
15867                     $i_effective_last_comma + 1 ) > 0;
15868             }
15869         }
15870
15871         # Always break lists contained in '[' and '{' if too long for 1 line,
15872         # and always break lists which are too long and part of a more complex
15873         # structure.
15874         my $must_break_open_container = $must_break_open
15875           || ( $too_long
15876             && ( $in_hierarchical_list || $opening_token ne '(' ) );
15877
15878 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
15879
15880         #---------------------------------------------------------------
15881         # The main decision:
15882         # Now decide if we will align the data into aligned columns.  Do not
15883         # attempt to align columns if this is a tiny table or it would be
15884         # too spaced.  It seems that the more packed lines we have, the
15885         # sparser the list that can be allowed and still look ok.
15886         #---------------------------------------------------------------
15887
15888         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
15889             || ( $formatted_lines < 2 )
15890             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
15891           )
15892         {
15893
15894             #---------------------------------------------------------------
15895             # too sparse: would look ugly if aligned in a table;
15896             #---------------------------------------------------------------
15897
15898             # use old breakpoints if this is a 'big' list
15899             # FIXME: goal is to improve set_ragged_breakpoints so that
15900             # this is not necessary.
15901             if ( $packed_lines > 2 && $item_count > 10 ) {
15902                 write_logfile_entry("List sparse: using old breakpoints\n");
15903                 copy_old_breakpoints( $i_first_comma, $i_last_comma );
15904             }
15905
15906             # let the continuation logic handle it if 2 lines
15907             else {
15908
15909                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15910                     $ri_ragged_break_list );
15911                 ++$break_count if ($use_separate_first_term);
15912
15913                 unless ($must_break_open_container) {
15914                     if ( $break_count <= 1 ) {
15915                         $$rdo_not_break_apart = 1;
15916                     }
15917                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15918                     {
15919                         $$rdo_not_break_apart = 1;
15920                     }
15921                 }
15922                 set_non_alignment_flags( $comma_count, $rcomma_index );
15923             }
15924             return;
15925         }
15926
15927         #---------------------------------------------------------------
15928         # go ahead and format as a table
15929         #---------------------------------------------------------------
15930         write_logfile_entry(
15931             "List: auto formatting with $number_of_fields fields/row\n");
15932
15933         my $j_first_break =
15934           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
15935
15936         for (
15937             my $j = $j_first_break ;
15938             $j < $comma_count ;
15939             $j += $number_of_fields
15940           )
15941         {
15942             my $i = $$rcomma_index[$j];
15943             set_forced_breakpoint($i);
15944         }
15945         return;
15946     }
15947 }
15948
15949 sub set_non_alignment_flags {
15950
15951     # set flag which indicates that these commas should not be
15952     # aligned
15953     my ( $comma_count, $rcomma_index ) = @_;
15954     foreach ( 0 .. $comma_count - 1 ) {
15955         $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
15956     }
15957 }
15958
15959 sub study_list_complexity {
15960
15961     # Look for complex tables which should be formatted with one term per line.
15962     # Returns the following:
15963     #
15964     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
15965     #    which are hard to read
15966     #  $number_of_fields_best = suggested number of fields based on
15967     #    complexity; = 0 if any number may be used.
15968     #
15969     my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
15970     my $item_count            = @{$ri_term_begin};
15971     my $complex_item_count    = 0;
15972     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
15973     my $i_max                 = @{$ritem_lengths} - 1;
15974     ##my @item_complexity;
15975
15976     my $i_last_last_break = -3;
15977     my $i_last_break      = -2;
15978     my @i_ragged_break_list;
15979
15980     my $definitely_complex = 30;
15981     my $definitely_simple  = 12;
15982     my $quote_count        = 0;
15983
15984     for my $i ( 0 .. $i_max ) {
15985         my $ib = $ri_term_begin->[$i];
15986         my $ie = $ri_term_end->[$i];
15987
15988         # define complexity: start with the actual term length
15989         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
15990
15991         ##TBD: join types here and check for variations
15992         ##my $str=join "", @tokens_to_go[$ib..$ie];
15993
15994         my $is_quote = 0;
15995         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
15996             $is_quote = 1;
15997             $quote_count++;
15998         }
15999         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
16000             $quote_count++;
16001         }
16002
16003         if ( $ib eq $ie ) {
16004             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
16005                 $complex_item_count++;
16006                 $weighted_length *= 2;
16007             }
16008             else {
16009             }
16010         }
16011         else {
16012             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
16013                 $complex_item_count++;
16014                 $weighted_length *= 2;
16015             }
16016             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
16017                 $weighted_length += 4;
16018             }
16019         }
16020
16021         # add weight for extra tokens.
16022         $weighted_length += 2 * ( $ie - $ib );
16023
16024 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
16025 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
16026
16027 ##push @item_complexity, $weighted_length;
16028
16029         # now mark a ragged break after this item it if it is 'long and
16030         # complex':
16031         if ( $weighted_length >= $definitely_complex ) {
16032
16033             # if we broke after the previous term
16034             # then break before it too
16035             if (   $i_last_break == $i - 1
16036                 && $i > 1
16037                 && $i_last_last_break != $i - 2 )
16038             {
16039
16040                 ## FIXME: don't strand a small term
16041                 pop @i_ragged_break_list;
16042                 push @i_ragged_break_list, $i - 2;
16043                 push @i_ragged_break_list, $i - 1;
16044             }
16045
16046             push @i_ragged_break_list, $i;
16047             $i_last_last_break = $i_last_break;
16048             $i_last_break      = $i;
16049         }
16050
16051         # don't break before a small last term -- it will
16052         # not look good on a line by itself.
16053         elsif ($i == $i_max
16054             && $i_last_break == $i - 1
16055             && $weighted_length <= $definitely_simple )
16056         {
16057             pop @i_ragged_break_list;
16058         }
16059     }
16060
16061     my $identifier_count = $i_max + 1 - $quote_count;
16062
16063     # Need more tuning here..
16064     if (   $max_width > 12
16065         && $complex_item_count > $item_count / 2
16066         && $number_of_fields_best != 2 )
16067     {
16068         $number_of_fields_best = 1;
16069     }
16070
16071     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
16072 }
16073
16074 sub get_maximum_fields_wanted {
16075
16076     # Not all tables look good with more than one field of items.
16077     # This routine looks at a table and decides if it should be
16078     # formatted with just one field or not.
16079     # This coding is still under development.
16080     my ($ritem_lengths) = @_;
16081
16082     my $number_of_fields_best = 0;
16083
16084     # For just a few items, we tentatively assume just 1 field.
16085     my $item_count = @{$ritem_lengths};
16086     if ( $item_count <= 5 ) {
16087         $number_of_fields_best = 1;
16088     }
16089
16090     # For larger tables, look at it both ways and see what looks best
16091     else {
16092
16093         my $is_odd            = 1;
16094         my @max_length        = ( 0, 0 );
16095         my @last_length_2     = ( undef, undef );
16096         my @first_length_2    = ( undef, undef );
16097         my $last_length       = undef;
16098         my $total_variation_1 = 0;
16099         my $total_variation_2 = 0;
16100         my @total_variation_2 = ( 0, 0 );
16101         for ( my $j = 0 ; $j < $item_count ; $j++ ) {
16102
16103             $is_odd = 1 - $is_odd;
16104             my $length = $ritem_lengths->[$j];
16105             if ( $length > $max_length[$is_odd] ) {
16106                 $max_length[$is_odd] = $length;
16107             }
16108
16109             if ( defined($last_length) ) {
16110                 my $dl = abs( $length - $last_length );
16111                 $total_variation_1 += $dl;
16112             }
16113             $last_length = $length;
16114
16115             my $ll = $last_length_2[$is_odd];
16116             if ( defined($ll) ) {
16117                 my $dl = abs( $length - $ll );
16118                 $total_variation_2[$is_odd] += $dl;
16119             }
16120             else {
16121                 $first_length_2[$is_odd] = $length;
16122             }
16123             $last_length_2[$is_odd] = $length;
16124         }
16125         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
16126
16127         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
16128         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
16129             $number_of_fields_best = 1;
16130         }
16131     }
16132     return ($number_of_fields_best);
16133 }
16134
16135 sub table_columns_available {
16136     my $i_first_comma = shift;
16137     my $columns =
16138       $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
16139
16140     # Patch: the vertical formatter does not line up lines whose lengths
16141     # exactly equal the available line length because of allowances
16142     # that must be made for side comments.  Therefore, the number of
16143     # available columns is reduced by 1 character.
16144     $columns -= 1;
16145     return $columns;
16146 }
16147
16148 sub maximum_number_of_fields {
16149
16150     # how many fields will fit in the available space?
16151     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
16152     my $max_pairs        = int( $columns / $pair_width );
16153     my $number_of_fields = $max_pairs * 2;
16154     if (   $odd_or_even == 1
16155         && $max_pairs * $pair_width + $max_width <= $columns )
16156     {
16157         $number_of_fields++;
16158     }
16159     return $number_of_fields;
16160 }
16161
16162 sub compactify_table {
16163
16164     # given a table with a certain number of fields and a certain number
16165     # of lines, see if reducing the number of fields will make it look
16166     # better.
16167     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
16168     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
16169         my $min_fields;
16170
16171         for (
16172             $min_fields = $number_of_fields ;
16173             $min_fields >= $odd_or_even
16174             && $min_fields * $formatted_lines >= $item_count ;
16175             $min_fields -= $odd_or_even
16176           )
16177         {
16178             $number_of_fields = $min_fields;
16179         }
16180     }
16181     return $number_of_fields;
16182 }
16183
16184 sub set_ragged_breakpoints {
16185
16186     # Set breakpoints in a list that cannot be formatted nicely as a
16187     # table.
16188     my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
16189
16190     my $break_count = 0;
16191     foreach (@$ri_ragged_break_list) {
16192         my $j = $ri_term_comma->[$_];
16193         if ($j) {
16194             set_forced_breakpoint($j);
16195             $break_count++;
16196         }
16197     }
16198     return $break_count;
16199 }
16200
16201 sub copy_old_breakpoints {
16202     my ( $i_first_comma, $i_last_comma ) = @_;
16203     for my $i ( $i_first_comma .. $i_last_comma ) {
16204         if ( $old_breakpoint_to_go[$i] ) {
16205             set_forced_breakpoint($i);
16206         }
16207     }
16208 }
16209
16210 sub set_nobreaks {
16211     my ( $i, $j ) = @_;
16212     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
16213
16214         FORMATTER_DEBUG_FLAG_NOBREAK && do {
16215             my ( $a, $b, $c ) = caller();
16216             print(
16217 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
16218             );
16219         };
16220
16221         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
16222     }
16223
16224     # shouldn't happen; non-critical error
16225     else {
16226         FORMATTER_DEBUG_FLAG_NOBREAK && do {
16227             my ( $a, $b, $c ) = caller();
16228             print(
16229 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
16230             );
16231         };
16232     }
16233 }
16234
16235 sub set_fake_breakpoint {
16236
16237     # Just bump up the breakpoint count as a signal that there are breaks.
16238     # This is useful if we have breaks but may want to postpone deciding where
16239     # to make them.
16240     $forced_breakpoint_count++;
16241 }
16242
16243 sub set_forced_breakpoint {
16244     my $i = shift;
16245
16246     return unless defined $i && $i >= 0;
16247
16248     # when called with certain tokens, use bond strengths to decide
16249     # if we break before or after it
16250     my $token = $tokens_to_go[$i];
16251
16252     if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
16253         if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
16254     }
16255
16256     # breaks are forced before 'if' and 'unless'
16257     elsif ( $is_if_unless{$token} ) { $i-- }
16258
16259     if ( $i >= 0 && $i <= $max_index_to_go ) {
16260         my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
16261
16262         FORMATTER_DEBUG_FLAG_FORCE && do {
16263             my ( $a, $b, $c ) = caller();
16264             print
16265 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
16266         };
16267
16268         if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
16269             $forced_breakpoint_to_go[$i_nonblank] = 1;
16270
16271             if ( $i_nonblank > $index_max_forced_break ) {
16272                 $index_max_forced_break = $i_nonblank;
16273             }
16274             $forced_breakpoint_count++;
16275             $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
16276               $i_nonblank;
16277
16278             # if we break at an opening container..break at the closing
16279             if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
16280                 set_closing_breakpoint($i_nonblank);
16281             }
16282         }
16283     }
16284 }
16285
16286 sub clear_breakpoint_undo_stack {
16287     $forced_breakpoint_undo_count = 0;
16288 }
16289
16290 sub undo_forced_breakpoint_stack {
16291
16292     my $i_start = shift;
16293     if ( $i_start < 0 ) {
16294         $i_start = 0;
16295         my ( $a, $b, $c ) = caller();
16296         warning(
16297 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
16298         );
16299     }
16300
16301     while ( $forced_breakpoint_undo_count > $i_start ) {
16302         my $i =
16303           $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
16304         if ( $i >= 0 && $i <= $max_index_to_go ) {
16305             $forced_breakpoint_to_go[$i] = 0;
16306             $forced_breakpoint_count--;
16307
16308             FORMATTER_DEBUG_FLAG_UNDOBP && do {
16309                 my ( $a, $b, $c ) = caller();
16310                 print(
16311 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
16312                 );
16313             };
16314         }
16315
16316         # shouldn't happen, but not a critical error
16317         else {
16318             FORMATTER_DEBUG_FLAG_UNDOBP && do {
16319                 my ( $a, $b, $c ) = caller();
16320                 print(
16321 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
16322                 );
16323             };
16324         }
16325     }
16326 }
16327
16328 {    # begin recombine_breakpoints
16329
16330     my %is_amp_amp;
16331     my %is_ternary;
16332     my %is_math_op;
16333
16334     BEGIN {
16335
16336         @_ = qw( && || );
16337         @is_amp_amp{@_} = (1) x scalar(@_);
16338
16339         @_ = qw( ? : );
16340         @is_ternary{@_} = (1) x scalar(@_);
16341
16342         @_ = qw( + - * / );
16343         @is_math_op{@_} = (1) x scalar(@_);
16344     }
16345
16346     sub recombine_breakpoints {
16347
16348         # sub set_continuation_breaks is very liberal in setting line breaks
16349         # for long lines, always setting breaks at good breakpoints, even
16350         # when that creates small lines.  Occasionally small line fragments
16351         # are produced which would look better if they were combined.
16352         # That's the task of this routine, recombine_breakpoints.
16353         #
16354         # $ri_beg = ref to array of BEGinning indexes of each line
16355         # $ri_end = ref to array of ENDing indexes of each line
16356         my ( $ri_beg, $ri_end ) = @_;
16357
16358         my $more_to_do = 1;
16359
16360         # We keep looping over all of the lines of this batch
16361         # until there are no more possible recombinations
16362         my $nmax_last = @$ri_end;
16363         while ($more_to_do) {
16364             my $n_best = 0;
16365             my $bs_best;
16366             my $n;
16367             my $nmax = @$ri_end - 1;
16368
16369             # safety check for infinite loop
16370             unless ( $nmax < $nmax_last ) {
16371
16372             # shouldn't happen because splice below decreases nmax on each pass:
16373             # but i get paranoid sometimes
16374                 die "Program bug-infinite loop in recombine breakpoints\n";
16375             }
16376             $nmax_last  = $nmax;
16377             $more_to_do = 0;
16378             my $previous_outdentable_closing_paren;
16379             my $leading_amp_count = 0;
16380             my $this_line_is_semicolon_terminated;
16381
16382             # loop over all remaining lines in this batch
16383             for $n ( 1 .. $nmax ) {
16384
16385                 #----------------------------------------------------------
16386                 # If we join the current pair of lines,
16387                 # line $n-1 will become the left part of the joined line
16388                 # line $n will become the right part of the joined line
16389                 #
16390                 # Here are Indexes of the endpoint tokens of the two lines:
16391                 #
16392                 #  -----line $n-1--- | -----line $n-----
16393                 #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
16394                 #                    ^
16395                 #                    |
16396                 # We want to decide if we should remove the line break
16397                 # betwen the tokens at $iend_1 and $ibeg_2
16398                 #
16399                 # We will apply a number of ad-hoc tests to see if joining
16400                 # here will look ok.  The code will just issue a 'next'
16401                 # command if the join doesn't look good.  If we get through
16402                 # the gauntlet of tests, the lines will be recombined.
16403                 #----------------------------------------------------------
16404                 #
16405                 # beginning and ending tokens of the lines we are working on
16406                 my $ibeg_1 = $$ri_beg[ $n - 1 ];
16407                 my $iend_1 = $$ri_end[ $n - 1 ];
16408                 my $iend_2 = $$ri_end[$n];
16409                 my $ibeg_2 = $$ri_beg[$n];
16410
16411                 my $ibeg_nmax = $$ri_beg[$nmax];
16412
16413                 # some beginning indexes of other lines, which may not exist
16414                 my $ibeg_0 = $n > 1          ? $$ri_beg[ $n - 2 ] : -1;
16415                 my $ibeg_3 = $n < $nmax      ? $$ri_beg[ $n + 1 ] : -1;
16416                 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
16417
16418                 my $bs_tweak = 0;
16419
16420                 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
16421                 #        $nesting_depth_to_go[$ibeg_1] );
16422
16423 ##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
16424
16425                 # If line $n is the last line, we set some flags and
16426                 # do any special checks for it
16427                 if ( $n == $nmax ) {
16428
16429                     # a terminal '{' should stay where it is
16430                     next if $types_to_go[$ibeg_2] eq '{';
16431
16432                     # set flag if statement $n ends in ';'
16433                     $this_line_is_semicolon_terminated =
16434                       $types_to_go[$iend_2] eq ';'
16435
16436                       # with possible side comment
16437                       || ( $types_to_go[$iend_2] eq '#'
16438                         && $iend_2 - $ibeg_2 >= 2
16439                         && $types_to_go[ $iend_2 - 2 ] eq ';'
16440                         && $types_to_go[ $iend_2 - 1 ] eq 'b' );
16441                 }
16442
16443                 #----------------------------------------------------------
16444                 # Section 1: examine token at $iend_1 (right end of first line
16445                 # of pair)
16446                 #----------------------------------------------------------
16447
16448                 # an isolated '}' may join with a ';' terminated segment
16449                 if ( $types_to_go[$iend_1] eq '}' ) {
16450
16451                     # Check for cases where combining a semicolon terminated
16452                     # statement with a previous isolated closing paren will
16453                     # allow the combined line to be outdented.  This is
16454                     # generally a good move.  For example, we can join up
16455                     # the last two lines here:
16456                     #  (
16457                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16458                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16459                     #    )
16460                     #    = stat($file);
16461                     #
16462                     # to get:
16463                     #  (
16464                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16465                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16466                     #  ) = stat($file);
16467                     #
16468                     # which makes the parens line up.
16469                     #
16470                     # Another example, from Joe Matarazzo, probably looks best
16471                     # with the 'or' clause appended to the trailing paren:
16472                     #  $self->some_method(
16473                     #      PARAM1 => 'foo',
16474                     #      PARAM2 => 'bar'
16475                     #  ) or die "Some_method didn't work";
16476                     #
16477                     $previous_outdentable_closing_paren =
16478                       $this_line_is_semicolon_terminated    # ends in ';'
16479                       && $ibeg_1 == $iend_1    # only one token on last line
16480                       && $tokens_to_go[$iend_1] eq
16481                       ')'                      # must be structural paren
16482
16483                       # only &&, ||, and : if no others seen
16484                       # (but note: our count made below could be wrong
16485                       # due to intervening comments)
16486                       && ( $leading_amp_count == 0
16487                         || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
16488
16489                       # but leading colons probably line up with with a
16490                       # previous colon or question (count could be wrong).
16491                       && $types_to_go[$ibeg_2] ne ':'
16492
16493                       # only one step in depth allowed.  this line must not
16494                       # begin with a ')' itself.
16495                       && ( $nesting_depth_to_go[$iend_1] ==
16496                         $nesting_depth_to_go[$iend_2] + 1 );
16497
16498                     # YVES patch 2 of 2:
16499                     # Allow cuddled eval chains, like this:
16500                     #   eval {
16501                     #       #STUFF;
16502                     #       1; # return true
16503                     #   } or do {
16504                     #       #handle error
16505                     #   };
16506                     # This patch works together with a patch in
16507                     # setting adjusted indentation (where the closing eval
16508                     # brace is outdented if possible).
16509                     # The problem is that an 'eval' block has continuation
16510                     # indentation and it looks better to undo it in some
16511                     # cases.  If we do not use this patch we would get:
16512                     #   eval {
16513                     #       #STUFF;
16514                     #       1; # return true
16515                     #       }
16516                     #       or do {
16517                     #       #handle error
16518                     #     };
16519                     # The alternative, for uncuddled style, is to create
16520                     # a patch in set_adjusted_indentation which undoes
16521                     # the indentation of a leading line like 'or do {'.
16522                     # This doesn't work well with -icb through
16523                     if (
16524                            $block_type_to_go[$iend_1] eq 'eval'
16525                         && !$rOpts->{'line-up-parentheses'}
16526                         && !$rOpts->{'indent-closing-brace'}
16527                         && $tokens_to_go[$iend_2] eq '{'
16528                         && (
16529                             ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ )
16530                             || (   $types_to_go[$ibeg_2] eq 'k'
16531                                 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16532                             || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16533                         )
16534                       )
16535                     {
16536                         $previous_outdentable_closing_paren ||= 1;
16537                     }
16538
16539                     next
16540                       unless (
16541                         $previous_outdentable_closing_paren
16542
16543                         # handle '.' and '?' specially below
16544                         || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
16545                       );
16546                 }
16547
16548                 # YVES
16549                 # honor breaks at opening brace
16550                 # Added to prevent recombining something like this:
16551                 #  } || eval { package main;
16552                 elsif ( $types_to_go[$iend_1] eq '{' ) {
16553                     next if $forced_breakpoint_to_go[$iend_1];
16554                 }
16555
16556                 # do not recombine lines with ending &&, ||,
16557                 elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
16558                     next unless $want_break_before{ $types_to_go[$iend_1] };
16559                 }
16560
16561                 # keep a terminal colon
16562                 elsif ( $types_to_go[$iend_1] eq ':' ) {
16563                     next unless $want_break_before{ $types_to_go[$iend_1] };
16564                 }
16565
16566                 # Identify and recombine a broken ?/: chain
16567                 elsif ( $types_to_go[$iend_1] eq '?' ) {
16568
16569                     # Do not recombine different levels
16570                     next
16571                       if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16572
16573                     # do not recombine unless next line ends in :
16574                     next unless $types_to_go[$iend_2] eq ':';
16575                 }
16576
16577                 # for lines ending in a comma...
16578                 elsif ( $types_to_go[$iend_1] eq ',' ) {
16579
16580                     # Do not recombine at comma which is following the
16581                     # input bias.
16582                     # TODO: might be best to make a special flag
16583                     next if ( $old_breakpoint_to_go[$iend_1] );
16584
16585                  # an isolated '},' may join with an identifier + ';'
16586                  # this is useful for the class of a 'bless' statement (bless.t)
16587                     if (   $types_to_go[$ibeg_1] eq '}'
16588                         && $types_to_go[$ibeg_2] eq 'i' )
16589                     {
16590                         next
16591                           unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16592                             && ( $iend_2 == ( $ibeg_2 + 1 ) )
16593                             && $this_line_is_semicolon_terminated );
16594
16595                         # override breakpoint
16596                         $forced_breakpoint_to_go[$iend_1] = 0;
16597                     }
16598
16599                     # but otherwise ..
16600                     else {
16601
16602                         # do not recombine after a comma unless this will leave
16603                         # just 1 more line
16604                         next unless ( $n + 1 >= $nmax );
16605
16606                     # do not recombine if there is a change in indentation depth
16607                         next
16608                           if (
16609                             $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
16610
16611                         # do not recombine a "complex expression" after a
16612                         # comma.  "complex" means no parens.
16613                         my $saw_paren;
16614                         foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16615                             if ( $tokens_to_go[$ii] eq '(' ) {
16616                                 $saw_paren = 1;
16617                                 last;
16618                             }
16619                         }
16620                         next if $saw_paren;
16621                     }
16622                 }
16623
16624                 # opening paren..
16625                 elsif ( $types_to_go[$iend_1] eq '(' ) {
16626
16627                     # No longer doing this
16628                 }
16629
16630                 elsif ( $types_to_go[$iend_1] eq ')' ) {
16631
16632                     # No longer doing this
16633                 }
16634
16635                 # keep a terminal for-semicolon
16636                 elsif ( $types_to_go[$iend_1] eq 'f' ) {
16637                     next;
16638                 }
16639
16640                 # if '=' at end of line ...
16641                 elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
16642
16643                     # keep break after = if it was in input stream
16644                     # this helps prevent 'blinkers'
16645                     next if $old_breakpoint_to_go[$iend_1]
16646
16647                       # don't strand an isolated '='
16648                       && $iend_1 != $ibeg_1;
16649
16650                     my $is_short_quote =
16651                       (      $types_to_go[$ibeg_2] eq 'Q'
16652                           && $ibeg_2 == $iend_2
16653                           && length( $tokens_to_go[$ibeg_2] ) <
16654                           $rOpts_short_concatenation_item_length );
16655                     my $is_ternary =
16656                       ( $types_to_go[$ibeg_1] eq '?'
16657                           && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
16658
16659                     # always join an isolated '=', a short quote, or if this
16660                     # will put ?/: at start of adjacent lines
16661                     if (   $ibeg_1 != $iend_1
16662                         && !$is_short_quote
16663                         && !$is_ternary )
16664                     {
16665                         next
16666                           unless (
16667                             (
16668
16669                                 # unless we can reduce this to two lines
16670                                 $nmax < $n + 2
16671
16672                              # or three lines, the last with a leading semicolon
16673                                 || (   $nmax == $n + 2
16674                                     && $types_to_go[$ibeg_nmax] eq ';' )
16675
16676                                 # or the next line ends with a here doc
16677                                 || $types_to_go[$iend_2] eq 'h'
16678
16679                                # or the next line ends in an open paren or brace
16680                                # and the break hasn't been forced [dima.t]
16681                                 || (  !$forced_breakpoint_to_go[$iend_1]
16682                                     && $types_to_go[$iend_2] eq '{' )
16683                             )
16684
16685                             # do not recombine if the two lines might align well
16686                             # this is a very approximate test for this
16687                             && (   $ibeg_3 >= 0
16688                                 && $types_to_go[$ibeg_2] ne
16689                                 $types_to_go[$ibeg_3] )
16690                           );
16691
16692                         # -lp users often prefer this:
16693                         #  my $title = function($env, $env, $sysarea,
16694                         #                       "bubba Borrower Entry");
16695                         #  so we will recombine if -lp is used we have ending
16696                         #  comma
16697                         if (  !$rOpts_line_up_parentheses
16698                             || $types_to_go[$iend_2] ne ',' )
16699                         {
16700
16701                            # otherwise, scan the rhs line up to last token for
16702                            # complexity.  Note that we are not counting the last
16703                            # token in case it is an opening paren.
16704                             my $tv    = 0;
16705                             my $depth = $nesting_depth_to_go[$ibeg_2];
16706                             for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
16707                                 if ( $nesting_depth_to_go[$i] != $depth ) {
16708                                     $tv++;
16709                                     last if ( $tv > 1 );
16710                                 }
16711                                 $depth = $nesting_depth_to_go[$i];
16712                             }
16713
16714                          # ok to recombine if no level changes before last token
16715                             if ( $tv > 0 ) {
16716
16717                                 # otherwise, do not recombine if more than two
16718                                 # level changes.
16719                                 next if ( $tv > 1 );
16720
16721                               # check total complexity of the two adjacent lines
16722                               # that will occur if we do this join
16723                                 my $istop =
16724                                   ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
16725                                 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
16726                                     if ( $nesting_depth_to_go[$i] != $depth ) {
16727                                         $tv++;
16728                                         last if ( $tv > 2 );
16729                                     }
16730                                     $depth = $nesting_depth_to_go[$i];
16731                                 }
16732
16733                         # do not recombine if total is more than 2 level changes
16734                                 next if ( $tv > 2 );
16735                             }
16736                         }
16737                     }
16738
16739                     unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16740                         $forced_breakpoint_to_go[$iend_1] = 0;
16741                     }
16742                 }
16743
16744                 # for keywords..
16745                 elsif ( $types_to_go[$iend_1] eq 'k' ) {
16746
16747                     # make major control keywords stand out
16748                     # (recombine.t)
16749                     next
16750                       if (
16751
16752                         #/^(last|next|redo|return)$/
16753                         $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16754
16755                         # but only if followed by multiple lines
16756                         && $n < $nmax
16757                       );
16758
16759                     if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16760                         next
16761                           unless $want_break_before{ $tokens_to_go[$iend_1] };
16762                     }
16763                 }
16764
16765                 # handle trailing + - * /
16766                 elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
16767
16768                     # combine lines if next line has single number
16769                     # or a short term followed by same operator
16770                     my $i_next_nonblank = $ibeg_2;
16771                     my $i_next_next     = $i_next_nonblank + 1;
16772                     $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16773                     my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
16774                       && (
16775                         $i_next_nonblank == $iend_2
16776                         || (   $i_next_next == $iend_2
16777                             && $is_math_op{ $types_to_go[$i_next_next] } )
16778                         || $types_to_go[$i_next_next] eq ';'
16779                       );
16780
16781                     # find token before last operator of previous line
16782                     my $iend_1_minus = $iend_1;
16783                     $iend_1_minus--
16784                       if ( $iend_1_minus > $ibeg_1 );
16785                     $iend_1_minus--
16786                       if ( $types_to_go[$iend_1_minus] eq 'b'
16787                         && $iend_1_minus > $ibeg_1 );
16788
16789                     my $short_term_follows =
16790                       (      $types_to_go[$iend_2] eq $types_to_go[$iend_1]
16791                           && $types_to_go[$iend_1_minus] =~ /^[in]$/
16792                           && $iend_2 <= $ibeg_2 + 2
16793                           && length( $tokens_to_go[$ibeg_2] ) <
16794                           $rOpts_short_concatenation_item_length );
16795
16796                     next
16797                       unless ( $number_follows || $short_term_follows );
16798                 }
16799
16800                 #----------------------------------------------------------
16801                 # Section 2: Now examine token at $ibeg_2 (left end of second
16802                 # line of pair)
16803                 #----------------------------------------------------------
16804
16805                 # join lines identified above as capable of
16806                 # causing an outdented line with leading closing paren
16807                 if ($previous_outdentable_closing_paren) {
16808                     $forced_breakpoint_to_go[$iend_1] = 0;
16809                 }
16810
16811                 # do not recombine lines with leading :
16812                 elsif ( $types_to_go[$ibeg_2] eq ':' ) {
16813                     $leading_amp_count++;
16814                     next if $want_break_before{ $types_to_go[$ibeg_2] };
16815                 }
16816
16817                 # handle lines with leading &&, ||
16818                 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16819
16820                     $leading_amp_count++;
16821
16822                     # ok to recombine if it follows a ? or :
16823                     # and is followed by an open paren..
16824                     my $ok =
16825                       (      $is_ternary{ $types_to_go[$ibeg_1] }
16826                           && $tokens_to_go[$iend_2] eq '(' )
16827
16828                     # or is followed by a ? or : at same depth
16829                     #
16830                     # We are looking for something like this. We can
16831                     # recombine the && line with the line above to make the
16832                     # structure more clear:
16833                     #  return
16834                     #    exists $G->{Attr}->{V}
16835                     #    && exists $G->{Attr}->{V}->{$u}
16836                     #    ? %{ $G->{Attr}->{V}->{$u} }
16837                     #    : ();
16838                     #
16839                     # We should probably leave something like this alone:
16840                     #  return
16841                     #       exists $G->{Attr}->{E}
16842                     #    && exists $G->{Attr}->{E}->{$u}
16843                     #    && exists $G->{Attr}->{E}->{$u}->{$v}
16844                     #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16845                     #    : ();
16846                     # so that we either have all of the &&'s (or ||'s)
16847                     # on one line, as in the first example, or break at
16848                     # each one as in the second example.  However, it
16849                     # sometimes makes things worse to check for this because
16850                     # it prevents multiple recombinations.  So this is not done.
16851                       || ( $ibeg_3 >= 0
16852                         && $is_ternary{ $types_to_go[$ibeg_3] }
16853                         && $nesting_depth_to_go[$ibeg_3] ==
16854                         $nesting_depth_to_go[$ibeg_2] );
16855
16856                     next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
16857                     $forced_breakpoint_to_go[$iend_1] = 0;
16858
16859                     # tweak the bond strength to give this joint priority
16860                     # over ? and :
16861                     $bs_tweak = 0.25;
16862                 }
16863
16864                 # Identify and recombine a broken ?/: chain
16865                 elsif ( $types_to_go[$ibeg_2] eq '?' ) {
16866
16867                     # Do not recombine different levels
16868                     my $lev = $levels_to_go[$ibeg_2];
16869                     next if ( $lev ne $levels_to_go[$ibeg_1] );
16870
16871                     # Do not recombine a '?' if either next line or
16872                     # previous line does not start with a ':'.  The reasons
16873                     # are that (1) no alignment of the ? will be possible
16874                     # and (2) the expression is somewhat complex, so the
16875                     # '?' is harder to see in the interior of the line.
16876                     my $follows_colon =
16877                       $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
16878                     my $precedes_colon =
16879                       $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16880                     next unless ( $follows_colon || $precedes_colon );
16881
16882                     # we will always combining a ? line following a : line
16883                     if ( !$follows_colon ) {
16884
16885                         # ...otherwise recombine only if it looks like a chain.
16886                         # we will just look at a few nearby lines to see if
16887                         # this looks like a chain.
16888                         my $local_count = 0;
16889                         foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
16890                             $local_count++
16891                               if $ii >= 0
16892                               && $types_to_go[$ii] eq ':'
16893                               && $levels_to_go[$ii] == $lev;
16894                         }
16895                         next unless ( $local_count > 1 );
16896                     }
16897                     $forced_breakpoint_to_go[$iend_1] = 0;
16898                 }
16899
16900                 # do not recombine lines with leading '.'
16901                 elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
16902                     my $i_next_nonblank = $ibeg_2 + 1;
16903                     if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16904                         $i_next_nonblank++;
16905                     }
16906
16907                     next
16908                       unless (
16909
16910                    # ... unless there is just one and we can reduce
16911                    # this to two lines if we do.  For example, this
16912                    #
16913                    #
16914                    #  $bodyA .=
16915                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16916                    #
16917                    #  looks better than this:
16918                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16919                    #    . '$args .= $pat;'
16920
16921                         (
16922                                $n == 2
16923                             && $n == $nmax
16924                             && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
16925                         )
16926
16927                         #  ... or this would strand a short quote , like this
16928                         #                . "some long qoute"
16929                         #                . "\n";
16930                         || (   $types_to_go[$i_next_nonblank] eq 'Q'
16931                             && $i_next_nonblank >= $iend_2 - 1
16932                             && length( $tokens_to_go[$i_next_nonblank] ) <
16933                             $rOpts_short_concatenation_item_length )
16934                       );
16935                 }
16936
16937                 # handle leading keyword..
16938                 elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
16939
16940                     # handle leading "or"
16941                     if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16942                         next
16943                           unless (
16944                             $this_line_is_semicolon_terminated
16945                             && (
16946
16947                                 # following 'if' or 'unless' or 'or'
16948                                 $types_to_go[$ibeg_1] eq 'k'
16949                                 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16950
16951                                 # important: only combine a very simple or
16952                                 # statement because the step below may have
16953                                 # combined a trailing 'and' with this or,
16954                                 # and we do not want to then combine
16955                                 # everything together
16956                                 && ( $iend_2 - $ibeg_2 <= 7 )
16957                             )
16958                           );
16959                     }
16960
16961                     # handle leading 'and'
16962                     elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
16963
16964                         # Decide if we will combine a single terminal 'and'
16965                         # after an 'if' or 'unless'.
16966
16967                         #     This looks best with the 'and' on the same
16968                         #     line as the 'if':
16969                         #
16970                         #         $a = 1
16971                         #           if $seconds and $nu < 2;
16972                         #
16973                         #     But this looks better as shown:
16974                         #
16975                         #         $a = 1
16976                         #           if !$this->{Parents}{$_}
16977                         #           or $this->{Parents}{$_} eq $_;
16978                         #
16979                         next
16980                           unless (
16981                             $this_line_is_semicolon_terminated
16982                             && (
16983
16984                                 # following 'if' or 'unless' or 'or'
16985                                 $types_to_go[$ibeg_1] eq 'k'
16986                                 && (   $is_if_unless{ $tokens_to_go[$ibeg_1] }
16987                                     || $tokens_to_go[$ibeg_1] eq 'or' )
16988                             )
16989                           );
16990                     }
16991
16992                     # handle leading "if" and "unless"
16993                     elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16994
16995                       # FIXME: This is still experimental..may not be too useful
16996                         next
16997                           unless (
16998                             $this_line_is_semicolon_terminated
16999
17000                             #  previous line begins with 'and' or 'or'
17001                             && $types_to_go[$ibeg_1] eq 'k'
17002                             && $is_and_or{ $tokens_to_go[$ibeg_1] }
17003
17004                           );
17005                     }
17006
17007                     # handle all other leading keywords
17008                     else {
17009
17010                         # keywords look best at start of lines,
17011                         # but combine things like "1 while"
17012                         unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
17013                             next
17014                               if ( ( $types_to_go[$iend_1] ne 'k' )
17015                                 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
17016                         }
17017                     }
17018                 }
17019
17020                 # similar treatment of && and || as above for 'and' and 'or':
17021                 # NOTE: This block of code is currently bypassed because
17022                 # of a previous block but is retained for possible future use.
17023                 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
17024
17025                     # maybe looking at something like:
17026                     # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
17027
17028                     next
17029                       unless (
17030                         $this_line_is_semicolon_terminated
17031
17032                         # previous line begins with an 'if' or 'unless' keyword
17033                         && $types_to_go[$ibeg_1] eq 'k'
17034                         && $is_if_unless{ $tokens_to_go[$ibeg_1] }
17035
17036                       );
17037                 }
17038
17039                 # handle leading + - * /
17040                 elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
17041                     my $i_next_nonblank = $ibeg_2 + 1;
17042                     if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
17043                         $i_next_nonblank++;
17044                     }
17045
17046                     my $i_next_next = $i_next_nonblank + 1;
17047                     $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
17048
17049                     my $is_number = (
17050                         $types_to_go[$i_next_nonblank] eq 'n'
17051                           && ( $i_next_nonblank >= $iend_2 - 1
17052                             || $types_to_go[$i_next_next] eq ';' )
17053                     );
17054
17055                     my $iend_1_nonblank =
17056                       $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
17057                     my $iend_2_nonblank =
17058                       $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
17059
17060                     my $is_short_term =
17061                       (      $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
17062                           && $types_to_go[$iend_2_nonblank] =~ /^[in]$/
17063                           && $types_to_go[$iend_1_nonblank] =~ /^[in]$/
17064                           && $iend_2_nonblank <= $ibeg_2 + 2
17065                           && length( $tokens_to_go[$iend_2_nonblank] ) <
17066                           $rOpts_short_concatenation_item_length );
17067
17068                     # Combine these lines if this line is a single
17069                     # number, or if it is a short term with same
17070                     # operator as the previous line.  For example, in
17071                     # the following code we will combine all of the
17072                     # short terms $A, $B, $C, $D, $E, $F, together
17073                     # instead of leaving them one per line:
17074                     #  my $time =
17075                     #    $A * $B * $C * $D * $E * $F *
17076                     #    ( 2. * $eps * $sigma * $area ) *
17077                     #    ( 1. / $tcold**3 - 1. / $thot**3 );
17078                     # This can be important in math-intensive code.
17079                     next
17080                       unless (
17081                            $is_number
17082                         || $is_short_term
17083
17084                         # or if we can reduce this to two lines if we do.
17085                         || (   $n == 2
17086                             && $n == $nmax
17087                             && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
17088                       );
17089                 }
17090
17091                 # handle line with leading = or similar
17092                 elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
17093                     next unless ( $n == 1 || $n == $nmax );
17094                     next
17095                       unless (
17096
17097                         # unless we can reduce this to two lines
17098                         $nmax == 2
17099
17100                         # or three lines, the last with a leading semicolon
17101                         || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
17102
17103                         # or the next line ends with a here doc
17104                         || $types_to_go[$iend_2] eq 'h'
17105
17106                         # or this is a short line ending in ;
17107                         || ( $n == $nmax && $this_line_is_semicolon_terminated )
17108                       );
17109                     $forced_breakpoint_to_go[$iend_1] = 0;
17110                 }
17111
17112                 #----------------------------------------------------------
17113                 # Section 3:
17114                 # Combine the lines if we arrive here and it is possible
17115                 #----------------------------------------------------------
17116
17117                 # honor hard breakpoints
17118                 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
17119
17120                 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
17121
17122                 # combined line cannot be too long
17123                 my $excess = excess_line_length( $ibeg_1, $iend_2 );
17124                 next if ( $excess > 0 );
17125
17126                 # Require a few extra spaces before recombining lines if we are
17127                 # at an old breakpoint unless this is a simple list or terminal
17128                 # line.  The goal is to avoid oscillating between two
17129                 # quasi-stable end states.  For example this snippet caused
17130                 # problems:
17131 ##    my $this =
17132 ##    bless {
17133 ##        TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
17134 ##      },
17135 ##      $type;
17136                 next
17137                   if ( $old_breakpoint_to_go[$iend_1]
17138                     && !$this_line_is_semicolon_terminated
17139                     && $n < $nmax
17140                     && $excess + 4 > 0
17141                     && $types_to_go[$iend_2] ne ',' );
17142
17143                 # do not recombine if we would skip in indentation levels
17144                 if ( $n < $nmax ) {
17145                     my $if_next = $$ri_beg[ $n + 1 ];
17146                     next
17147                       if (
17148                            $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
17149                         && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
17150
17151                         # but an isolated 'if (' is undesirable
17152                         && !(
17153                                $n == 1
17154                             && $iend_1 - $ibeg_1 <= 2
17155                             && $types_to_go[$ibeg_1]  eq 'k'
17156                             && $tokens_to_go[$ibeg_1] eq 'if'
17157                             && $tokens_to_go[$iend_1] ne '('
17158                         )
17159                       );
17160                 }
17161
17162                 # honor no-break's
17163                 next if ( $bs == NO_BREAK );
17164
17165                 # remember the pair with the greatest bond strength
17166                 if ( !$n_best ) {
17167                     $n_best  = $n;
17168                     $bs_best = $bs;
17169                 }
17170                 else {
17171
17172                     if ( $bs > $bs_best ) {
17173                         $n_best  = $n;
17174                         $bs_best = $bs;
17175                     }
17176                 }
17177             }
17178
17179             # recombine the pair with the greatest bond strength
17180             if ($n_best) {
17181                 splice @$ri_beg, $n_best, 1;
17182                 splice @$ri_end, $n_best - 1, 1;
17183
17184                 # keep going if we are still making progress
17185                 $more_to_do++;
17186             }
17187         }
17188         return ( $ri_beg, $ri_end );
17189     }
17190 }    # end recombine_breakpoints
17191
17192 sub break_all_chain_tokens {
17193
17194     # scan the current breakpoints looking for breaks at certain "chain
17195     # operators" (. : && || + etc) which often occur repeatedly in a long
17196     # statement.  If we see a break at any one, break at all similar tokens
17197     # within the same container.
17198     #
17199     my ( $ri_left, $ri_right ) = @_;
17200
17201     my %saw_chain_type;
17202     my %left_chain_type;
17203     my %right_chain_type;
17204     my %interior_chain_type;
17205     my $nmax = @$ri_right - 1;
17206
17207     # scan the left and right end tokens of all lines
17208     my $count = 0;
17209     for my $n ( 0 .. $nmax ) {
17210         my $il    = $$ri_left[$n];
17211         my $ir    = $$ri_right[$n];
17212         my $typel = $types_to_go[$il];
17213         my $typer = $types_to_go[$ir];
17214         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
17215         $typer = '+' if ( $typer eq '-' );
17216         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
17217         $typer = '*' if ( $typer eq '/' );
17218         my $tokenl = $tokens_to_go[$il];
17219         my $tokenr = $tokens_to_go[$ir];
17220
17221         if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
17222             next if ( $typel eq '?' );
17223             push @{ $left_chain_type{$typel} }, $il;
17224             $saw_chain_type{$typel} = 1;
17225             $count++;
17226         }
17227         if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
17228             next if ( $typer eq '?' );
17229             push @{ $right_chain_type{$typer} }, $ir;
17230             $saw_chain_type{$typer} = 1;
17231             $count++;
17232         }
17233     }
17234     return unless $count;
17235
17236     # now look for any interior tokens of the same types
17237     $count = 0;
17238     for my $n ( 0 .. $nmax ) {
17239         my $il = $$ri_left[$n];
17240         my $ir = $$ri_right[$n];
17241         for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
17242             my $type = $types_to_go[$i];
17243             $type = '+' if ( $type eq '-' );
17244             $type = '*' if ( $type eq '/' );
17245             if ( $saw_chain_type{$type} ) {
17246                 push @{ $interior_chain_type{$type} }, $i;
17247                 $count++;
17248             }
17249         }
17250     }
17251     return unless $count;
17252
17253     # now make a list of all new break points
17254     my @insert_list;
17255
17256     # loop over all chain types
17257     foreach my $type ( keys %saw_chain_type ) {
17258
17259         # quit if just ONE continuation line with leading .  For example--
17260         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
17261         #  . $contents;
17262         last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
17263
17264         # loop over all interior chain tokens
17265         foreach my $itest ( @{ $interior_chain_type{$type} } ) {
17266
17267             # loop over all left end tokens of same type
17268             if ( $left_chain_type{$type} ) {
17269                 next if $nobreak_to_go[ $itest - 1 ];
17270                 foreach my $i ( @{ $left_chain_type{$type} } ) {
17271                     next unless in_same_container( $i, $itest );
17272                     push @insert_list, $itest - 1;
17273
17274                     # Break at matching ? if this : is at a different level.
17275                     # For example, the ? before $THRf_DEAD in the following
17276                     # should get a break if its : gets a break.
17277                     #
17278                     # my $flags =
17279                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
17280                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
17281                     #   :              $THRf_R_JOINABLE;
17282                     if (   $type eq ':'
17283                         && $levels_to_go[$i] != $levels_to_go[$itest] )
17284                     {
17285                         my $i_question = $mate_index_to_go[$itest];
17286                         if ( $i_question > 0 ) {
17287                             push @insert_list, $i_question - 1;
17288                         }
17289                     }
17290                     last;
17291                 }
17292             }
17293
17294             # loop over all right end tokens of same type
17295             if ( $right_chain_type{$type} ) {
17296                 next if $nobreak_to_go[$itest];
17297                 foreach my $i ( @{ $right_chain_type{$type} } ) {
17298                     next unless in_same_container( $i, $itest );
17299                     push @insert_list, $itest;
17300
17301                     # break at matching ? if this : is at a different level
17302                     if (   $type eq ':'
17303                         && $levels_to_go[$i] != $levels_to_go[$itest] )
17304                     {
17305                         my $i_question = $mate_index_to_go[$itest];
17306                         if ( $i_question >= 0 ) {
17307                             push @insert_list, $i_question;
17308                         }
17309                     }
17310                     last;
17311                 }
17312             }
17313         }
17314     }
17315
17316     # insert any new break points
17317     if (@insert_list) {
17318         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17319     }
17320 }
17321
17322 sub break_equals {
17323
17324     # Look for assignment operators that could use a breakpoint.
17325     # For example, in the following snippet
17326     #
17327     #    $HOME = $ENV{HOME}
17328     #      || $ENV{LOGDIR}
17329     #      || $pw[7]
17330     #      || die "no home directory for user $<";
17331     #
17332     # we could break at the = to get this, which is a little nicer:
17333     #    $HOME =
17334     #         $ENV{HOME}
17335     #      || $ENV{LOGDIR}
17336     #      || $pw[7]
17337     #      || die "no home directory for user $<";
17338     #
17339     # The logic here follows the logic in set_logical_padding, which
17340     # will add the padding in the second line to improve alignment.
17341     #
17342     my ( $ri_left, $ri_right ) = @_;
17343     my $nmax = @$ri_right - 1;
17344     return unless ( $nmax >= 2 );
17345
17346     # scan the left ends of first two lines
17347     my $tokbeg = "";
17348     my $depth_beg;
17349     for my $n ( 1 .. 2 ) {
17350         my $il     = $$ri_left[$n];
17351         my $typel  = $types_to_go[$il];
17352         my $tokenl = $tokens_to_go[$il];
17353
17354         my $has_leading_op = ( $tokenl =~ /^\w/ )
17355           ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
17356           : $is_chain_operator{$typel};    # and, or
17357         return unless ($has_leading_op);
17358         if ( $n > 1 ) {
17359             return
17360               unless ( $tokenl eq $tokbeg
17361                 && $nesting_depth_to_go[$il] eq $depth_beg );
17362         }
17363         $tokbeg    = $tokenl;
17364         $depth_beg = $nesting_depth_to_go[$il];
17365     }
17366
17367     # now look for any interior tokens of the same types
17368     my $il = $$ri_left[0];
17369     my $ir = $$ri_right[0];
17370
17371     # now make a list of all new break points
17372     my @insert_list;
17373     for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
17374         my $type = $types_to_go[$i];
17375         if (   $is_assignment{$type}
17376             && $nesting_depth_to_go[$i] eq $depth_beg )
17377         {
17378             if ( $want_break_before{$type} ) {
17379                 push @insert_list, $i - 1;
17380             }
17381             else {
17382                 push @insert_list, $i;
17383             }
17384         }
17385     }
17386
17387     # Break after a 'return' followed by a chain of operators
17388     #  return ( $^O !~ /win32|dos/i )
17389     #    && ( $^O ne 'VMS' )
17390     #    && ( $^O ne 'OS2' )
17391     #    && ( $^O ne 'MacOS' );
17392     # To give:
17393     #  return
17394     #       ( $^O !~ /win32|dos/i )
17395     #    && ( $^O ne 'VMS' )
17396     #    && ( $^O ne 'OS2' )
17397     #    && ( $^O ne 'MacOS' );
17398     my $i = 0;
17399     if (   $types_to_go[$i] eq 'k'
17400         && $tokens_to_go[$i] eq 'return'
17401         && $ir > $il
17402         && $nesting_depth_to_go[$i] eq $depth_beg )
17403     {
17404         push @insert_list, $i;
17405     }
17406
17407     return unless (@insert_list);
17408
17409     # One final check...
17410     # scan second and thrid lines and be sure there are no assignments
17411     # we want to avoid breaking at an = to make something like this:
17412     #    unless ( $icon =
17413     #           $html_icons{"$type-$state"}
17414     #        or $icon = $html_icons{$type}
17415     #        or $icon = $html_icons{$state} )
17416     for my $n ( 1 .. 2 ) {
17417         my $il = $$ri_left[$n];
17418         my $ir = $$ri_right[$n];
17419         for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
17420             my $type = $types_to_go[$i];
17421             return
17422               if ( $is_assignment{$type}
17423                 && $nesting_depth_to_go[$i] eq $depth_beg );
17424         }
17425     }
17426
17427     # ok, insert any new break point
17428     if (@insert_list) {
17429         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17430     }
17431 }
17432
17433 sub insert_final_breaks {
17434
17435     my ( $ri_left, $ri_right ) = @_;
17436
17437     my $nmax = @$ri_right - 1;
17438
17439     # scan the left and right end tokens of all lines
17440     my $count         = 0;
17441     my $i_first_colon = -1;
17442     for my $n ( 0 .. $nmax ) {
17443         my $il    = $$ri_left[$n];
17444         my $ir    = $$ri_right[$n];
17445         my $typel = $types_to_go[$il];
17446         my $typer = $types_to_go[$ir];
17447         return if ( $typel eq '?' );
17448         return if ( $typer eq '?' );
17449         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
17450         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
17451     }
17452
17453     # For long ternary chains,
17454     # if the first : we see has its # ? is in the interior
17455     # of a preceding line, then see if there are any good
17456     # breakpoints before the ?.
17457     if ( $i_first_colon > 0 ) {
17458         my $i_question = $mate_index_to_go[$i_first_colon];
17459         if ( $i_question > 0 ) {
17460             my @insert_list;
17461             for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
17462                 my $token = $tokens_to_go[$ii];
17463                 my $type  = $types_to_go[$ii];
17464
17465                 # For now, a good break is either a comma or a 'return'.
17466                 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
17467                     && in_same_container( $ii, $i_question ) )
17468                 {
17469                     push @insert_list, $ii;
17470                     last;
17471                 }
17472             }
17473
17474             # insert any new break points
17475             if (@insert_list) {
17476                 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
17477             }
17478         }
17479     }
17480 }
17481
17482 sub in_same_container {
17483
17484     # check to see if tokens at i1 and i2 are in the
17485     # same container, and not separated by a comma, ? or :
17486     my ( $i1, $i2 ) = @_;
17487     my $type  = $types_to_go[$i1];
17488     my $depth = $nesting_depth_to_go[$i1];
17489     return unless ( $nesting_depth_to_go[$i2] == $depth );
17490     if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
17491
17492     ###########################################################
17493     # This is potentially a very slow routine and not critical.
17494     # For safety just give up for large differences.
17495     # See test file 'infinite_loop.txt'
17496     # TODO: replace this loop with a data structure
17497     ###########################################################
17498     return if ( $i2 - $i1 > 200 );
17499
17500     for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
17501         next   if ( $nesting_depth_to_go[$i] > $depth );
17502         return if ( $nesting_depth_to_go[$i] < $depth );
17503
17504         my $tok = $tokens_to_go[$i];
17505         $tok = ',' if $tok eq '=>';    # treat => same as ,
17506
17507         # Example: we would not want to break at any of these .'s
17508         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
17509         if ( $type ne ':' ) {
17510             return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
17511         }
17512         else {
17513             return if ( $tok =~ /^[\,]$/ );
17514         }
17515     }
17516     return 1;
17517 }
17518
17519 sub set_continuation_breaks {
17520
17521     # Define an array of indexes for inserting newline characters to
17522     # keep the line lengths below the maximum desired length.  There is
17523     # an implied break after the last token, so it need not be included.
17524
17525     # Method:
17526     # This routine is part of series of routines which adjust line
17527     # lengths.  It is only called if a statement is longer than the
17528     # maximum line length, or if a preliminary scanning located
17529     # desirable break points.   Sub scan_list has already looked at
17530     # these tokens and set breakpoints (in array
17531     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17532     # after commas, after opening parens, and before closing parens).
17533     # This routine will honor these breakpoints and also add additional
17534     # breakpoints as necessary to keep the line length below the maximum
17535     # requested.  It bases its decision on where the 'bond strength' is
17536     # lowest.
17537
17538     # Output: returns references to the arrays:
17539     #  @i_first
17540     #  @i_last
17541     # which contain the indexes $i of the first and last tokens on each
17542     # line.
17543
17544     # In addition, the array:
17545     #   $forced_breakpoint_to_go[$i]
17546     # may be updated to be =1 for any index $i after which there must be
17547     # a break.  This signals later routines not to undo the breakpoint.
17548
17549     my $saw_good_break = shift;
17550     my @i_first        = ();      # the first index to output
17551     my @i_last         = ();      # the last index to output
17552     my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
17553     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17554
17555     set_bond_strengths();
17556
17557     my $imin = 0;
17558     my $imax = $max_index_to_go;
17559     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17560     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17561     my $i_begin = $imin;          # index for starting next iteration
17562
17563     my $leading_spaces          = leading_spaces_to_go($imin);
17564     my $line_count              = 0;
17565     my $last_break_strength     = NO_BREAK;
17566     my $i_last_break            = -1;
17567     my $max_bias                = 0.001;
17568     my $tiny_bias               = 0.0001;
17569     my $leading_alignment_token = "";
17570     my $leading_alignment_type  = "";
17571
17572     # see if any ?/:'s are in order
17573     my $colons_in_order = 1;
17574     my $last_tok        = "";
17575     my @colon_list  = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
17576     my $colon_count = @colon_list;
17577     foreach (@colon_list) {
17578         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17579         $last_tok = $_;
17580     }
17581
17582     # This is a sufficient but not necessary condition for colon chain
17583     my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
17584
17585     #-------------------------------------------------------
17586     # BEGINNING of main loop to set continuation breakpoints
17587     # Keep iterating until we reach the end
17588     #-------------------------------------------------------
17589     while ( $i_begin <= $imax ) {
17590         my $lowest_strength        = NO_BREAK;
17591         my $starting_sum           = $lengths_to_go[$i_begin];
17592         my $i_lowest               = -1;
17593         my $i_test                 = -1;
17594         my $lowest_next_token      = '';
17595         my $lowest_next_type       = 'b';
17596         my $i_lowest_next_nonblank = -1;
17597
17598         #-------------------------------------------------------
17599         # BEGINNING of inner loop to find the best next breakpoint
17600         #-------------------------------------------------------
17601         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
17602             my $type       = $types_to_go[$i_test];
17603             my $token      = $tokens_to_go[$i_test];
17604             my $next_type  = $types_to_go[ $i_test + 1 ];
17605             my $next_token = $tokens_to_go[ $i_test + 1 ];
17606             my $i_next_nonblank =
17607               ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
17608             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
17609             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
17610             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17611             my $strength                 = $bond_strength_to_go[$i_test];
17612
17613             # use old breaks as a tie-breaker.  For example to
17614             # prevent blinkers with -pbp in this code:
17615
17616 ##@keywords{
17617 ##    qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
17618 ##    = ();
17619
17620             # At the same time try to prevent a leading * in this code
17621             # with the default formatting:
17622             #
17623 ##                return
17624 ##                    factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
17625 ##                  * ( $x**( $a - 1 ) )
17626 ##                  * ( ( 1 - $x )**( $b - 1 ) );
17627
17628             # reduce strength a bit to break ties at an old breakpoint ...
17629             $strength -= $tiny_bias
17630               if $old_breakpoint_to_go[$i_test]
17631
17632               # which is a 'good' breakpoint, meaning ...
17633               # we don't want to break before it
17634               && !$want_break_before{$type}
17635
17636               # and either we want to break before the next token
17637               # or the next token is not short (i.e. not a '*', '/' etc.)
17638               && $i_next_nonblank <= $imax
17639               && (
17640                 $want_break_before{$next_nonblank_type}
17641                 || ( $lengths_to_go[ $i_next_nonblank + 1 ] -
17642                     $lengths_to_go[$i_next_nonblank] > 2 )
17643                 || $next_nonblank_type =~ /^[\(\[\{L]$/
17644               );
17645
17646             my $must_break = 0;
17647
17648             # FIXME: Might want to be able to break after these
17649             # force an immediate break at certain operators
17650             # with lower level than the start of the line
17651             if (
17652                 (
17653                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17654                     || (   $next_nonblank_type eq 'k'
17655                         && $next_nonblank_token =~ /^(and|or)$/ )
17656                 )
17657                 && ( $nesting_depth_to_go[$i_begin] >
17658                     $nesting_depth_to_go[$i_next_nonblank] )
17659               )
17660             {
17661                 set_forced_breakpoint($i_next_nonblank);
17662             }
17663
17664             if (
17665
17666                 # Try to put a break where requested by scan_list
17667                 $forced_breakpoint_to_go[$i_test]
17668
17669                 # break between ) { in a continued line so that the '{' can
17670                 # be outdented
17671                 # See similar logic in scan_list which catches instances
17672                 # where a line is just something like ') {'
17673                 || (   $line_count
17674                     && ( $token              eq ')' )
17675                     && ( $next_nonblank_type eq '{' )
17676                     && ($next_nonblank_block_type)
17677                     && !$rOpts->{'opening-brace-always-on-right'} )
17678
17679                 # There is an implied forced break at a terminal opening brace
17680                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17681               )
17682             {
17683
17684                 # Forced breakpoints must sometimes be overridden, for example
17685                 # because of a side comment causing a NO_BREAK.  It is easier
17686                 # to catch this here than when they are set.
17687                 if ( $strength < NO_BREAK ) {
17688                     $strength   = $lowest_strength - $tiny_bias;
17689                     $must_break = 1;
17690                 }
17691             }
17692
17693             # quit if a break here would put a good terminal token on
17694             # the next line and we already have a possible break
17695             if (
17696                    !$must_break
17697                 && ( $next_nonblank_type =~ /^[\;\,]$/ )
17698                 && (
17699                     (
17700                         $leading_spaces +
17701                         $lengths_to_go[ $i_next_nonblank + 1 ] -
17702                         $starting_sum
17703                     ) > $rOpts_maximum_line_length
17704                 )
17705               )
17706             {
17707                 last if ( $i_lowest >= 0 );
17708             }
17709
17710             # Avoid a break which would strand a single punctuation
17711             # token.  For example, we do not want to strand a leading
17712             # '.' which is followed by a long quoted string.
17713             # But note that we do want to do this with -extrude (l=1)
17714             # so please test any changes to this code on -extrude.
17715             if (
17716                    !$must_break
17717                 && ( $i_test == $i_begin )
17718                 && ( $i_test < $imax )
17719                 && ( $token eq $type )
17720                 && (
17721                     (
17722                         $leading_spaces +
17723                         $lengths_to_go[ $i_test + 1 ] -
17724                         $starting_sum
17725                     ) < $rOpts_maximum_line_length
17726                 )
17727               )
17728             {
17729                 $i_test++;
17730
17731                 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
17732                     $i_test++;
17733                 }
17734                 redo;
17735             }
17736
17737             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17738             {
17739
17740                 # break at previous best break if it would have produced
17741                 # a leading alignment of certain common tokens, and it
17742                 # is different from the latest candidate break
17743                 last
17744                   if ($leading_alignment_type);
17745
17746                 # Force at least one breakpoint if old code had good
17747                 # break It is only called if a breakpoint is required or
17748                 # desired.  This will probably need some adjustments
17749                 # over time.  A goal is to try to be sure that, if a new
17750                 # side comment is introduced into formated text, then
17751                 # the same breakpoints will occur.  scbreak.t
17752                 last
17753                   if (
17754                     $i_test == $imax                # we are at the end
17755                     && !$forced_breakpoint_count    #
17756                     && $saw_good_break              # old line had good break
17757                     && $type =~ /^[#;\{]$/          # and this line ends in
17758                                                     # ';' or side comment
17759                     && $i_last_break < 0        # and we haven't made a break
17760                     && $i_lowest > 0            # and we saw a possible break
17761                     && $i_lowest < $imax - 1    # (but not just before this ;)
17762                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17763                   );
17764
17765                 $lowest_strength        = $strength;
17766                 $i_lowest               = $i_test;
17767                 $lowest_next_token      = $next_nonblank_token;
17768                 $lowest_next_type       = $next_nonblank_type;
17769                 $i_lowest_next_nonblank = $i_next_nonblank;
17770                 last if $must_break;
17771
17772                 # set flags to remember if a break here will produce a
17773                 # leading alignment of certain common tokens
17774                 if (   $line_count > 0
17775                     && $i_test < $imax
17776                     && ( $lowest_strength - $last_break_strength <= $max_bias )
17777                   )
17778                 {
17779                     my $i_last_end = $i_begin - 1;
17780                     if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
17781                     my $tok_beg  = $tokens_to_go[$i_begin];
17782                     my $type_beg = $types_to_go[$i_begin];
17783                     if (
17784
17785                         # check for leading alignment of certain tokens
17786                         (
17787                                $tok_beg eq $next_nonblank_token
17788                             && $is_chain_operator{$tok_beg}
17789                             && (   $type_beg eq 'k'
17790                                 || $type_beg eq $tok_beg )
17791                             && $nesting_depth_to_go[$i_begin] >=
17792                             $nesting_depth_to_go[$i_next_nonblank]
17793                         )
17794
17795                         || (   $tokens_to_go[$i_last_end] eq $token
17796                             && $is_chain_operator{$token}
17797                             && ( $type eq 'k' || $type eq $token )
17798                             && $nesting_depth_to_go[$i_last_end] >=
17799                             $nesting_depth_to_go[$i_test] )
17800                       )
17801                     {
17802                         $leading_alignment_token = $next_nonblank_token;
17803                         $leading_alignment_type  = $next_nonblank_type;
17804                     }
17805                 }
17806             }
17807
17808             my $too_long =
17809               ( $i_test >= $imax )
17810               ? 1
17811               : (
17812                 (
17813                     $leading_spaces +
17814                       $lengths_to_go[ $i_test + 2 ] -
17815                       $starting_sum
17816                 ) > $rOpts_maximum_line_length
17817               );
17818
17819             FORMATTER_DEBUG_FLAG_BREAK
17820               && print
17821 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
17822
17823             # allow one extra terminal token after exceeding line length
17824             # if it would strand this token.
17825             if (   $rOpts_fuzzy_line_length
17826                 && $too_long
17827                 && ( $i_lowest == $i_test )
17828                 && ( length($token) > 1 )
17829                 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
17830             {
17831                 $too_long = 0;
17832             }
17833
17834             last
17835               if (
17836                 ( $i_test == $imax )    # we're done if no more tokens,
17837                 || (
17838                     ( $i_lowest >= 0 )    # or no more space and we have a break
17839                     && $too_long
17840                 )
17841               );
17842         }
17843
17844         #-------------------------------------------------------
17845         # END of inner loop to find the best next breakpoint
17846         # Now decide exactly where to put the breakpoint
17847         #-------------------------------------------------------
17848
17849         # it's always ok to break at imax if no other break was found
17850         if ( $i_lowest < 0 ) { $i_lowest = $imax }
17851
17852         # semi-final index calculation
17853         my $i_next_nonblank = (
17854             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17855             ? $i_lowest + 2
17856             : $i_lowest + 1
17857         );
17858         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17859         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17860
17861         #-------------------------------------------------------
17862         # ?/: rule 1 : if a break here will separate a '?' on this
17863         # line from its closing ':', then break at the '?' instead.
17864         #-------------------------------------------------------
17865         my $i;
17866         foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17867             next unless ( $tokens_to_go[$i] eq '?' );
17868
17869             # do not break if probable sequence of ?/: statements
17870             next if ($is_colon_chain);
17871
17872             # do not break if statement is broken by side comment
17873             next
17874               if (
17875                 $tokens_to_go[$max_index_to_go] eq '#'
17876                 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
17877                     $max_index_to_go ) !~ /^[\;\}]$/
17878               );
17879
17880             # no break needed if matching : is also on the line
17881             next
17882               if ( $mate_index_to_go[$i] >= 0
17883                 && $mate_index_to_go[$i] <= $i_next_nonblank );
17884
17885             $i_lowest = $i;
17886             if ( $want_break_before{'?'} ) { $i_lowest-- }
17887             last;
17888         }
17889
17890         #-------------------------------------------------------
17891         # END of inner loop to find the best next breakpoint:
17892         # Break the line after the token with index i=$i_lowest
17893         #-------------------------------------------------------
17894
17895         # final index calculation
17896         $i_next_nonblank = (
17897             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17898             ? $i_lowest + 2
17899             : $i_lowest + 1
17900         );
17901         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17902         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17903
17904         FORMATTER_DEBUG_FLAG_BREAK
17905           && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
17906
17907         #-------------------------------------------------------
17908         # ?/: rule 2 : if we break at a '?', then break at its ':'
17909         #
17910         # Note: this rule is also in sub scan_list to handle a break
17911         # at the start and end of a line (in case breaks are dictated
17912         # by side comments).
17913         #-------------------------------------------------------
17914         if ( $next_nonblank_type eq '?' ) {
17915             set_closing_breakpoint($i_next_nonblank);
17916         }
17917         elsif ( $types_to_go[$i_lowest] eq '?' ) {
17918             set_closing_breakpoint($i_lowest);
17919         }
17920
17921         #-------------------------------------------------------
17922         # ?/: rule 3 : if we break at a ':' then we save
17923         # its location for further work below.  We may need to go
17924         # back and break at its '?'.
17925         #-------------------------------------------------------
17926         if ( $next_nonblank_type eq ':' ) {
17927             push @i_colon_breaks, $i_next_nonblank;
17928         }
17929         elsif ( $types_to_go[$i_lowest] eq ':' ) {
17930             push @i_colon_breaks, $i_lowest;
17931         }
17932
17933         # here we should set breaks for all '?'/':' pairs which are
17934         # separated by this line
17935
17936         $line_count++;
17937
17938         # save this line segment, after trimming blanks at the ends
17939         push( @i_first,
17940             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17941         push( @i_last,
17942             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17943
17944         # set a forced breakpoint at a container opening, if necessary, to
17945         # signal a break at a closing container.  Excepting '(' for now.
17946         if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
17947             && !$forced_breakpoint_to_go[$i_lowest] )
17948         {
17949             set_closing_breakpoint($i_lowest);
17950         }
17951
17952         # get ready to go again
17953         $i_begin                 = $i_lowest + 1;
17954         $last_break_strength     = $lowest_strength;
17955         $i_last_break            = $i_lowest;
17956         $leading_alignment_token = "";
17957         $leading_alignment_type  = "";
17958         $lowest_next_token       = '';
17959         $lowest_next_type        = 'b';
17960
17961         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17962             $i_begin++;
17963         }
17964
17965         # update indentation size
17966         if ( $i_begin <= $imax ) {
17967             $leading_spaces = leading_spaces_to_go($i_begin);
17968         }
17969     }
17970
17971     #-------------------------------------------------------
17972     # END of main loop to set continuation breakpoints
17973     # Now go back and make any necessary corrections
17974     #-------------------------------------------------------
17975
17976     #-------------------------------------------------------
17977     # ?/: rule 4 -- if we broke at a ':', then break at
17978     # corresponding '?' unless this is a chain of ?: expressions
17979     #-------------------------------------------------------
17980     if (@i_colon_breaks) {
17981
17982         # using a simple method for deciding if we are in a ?/: chain --
17983         # this is a chain if it has multiple ?/: pairs all in order;
17984         # otherwise not.
17985         # Note that if line starts in a ':' we count that above as a break
17986         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17987
17988         unless ($is_chain) {
17989             my @insert_list = ();
17990             foreach (@i_colon_breaks) {
17991                 my $i_question = $mate_index_to_go[$_];
17992                 if ( $i_question >= 0 ) {
17993                     if ( $want_break_before{'?'} ) {
17994                         $i_question--;
17995                         if (   $i_question > 0
17996                             && $types_to_go[$i_question] eq 'b' )
17997                         {
17998                             $i_question--;
17999                         }
18000                     }
18001
18002                     if ( $i_question >= 0 ) {
18003                         push @insert_list, $i_question;
18004                     }
18005                 }
18006                 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
18007             }
18008         }
18009     }
18010     return ( \@i_first, \@i_last, $colon_count );
18011 }
18012
18013 sub insert_additional_breaks {
18014
18015     # this routine will add line breaks at requested locations after
18016     # sub set_continuation_breaks has made preliminary breaks.
18017
18018     my ( $ri_break_list, $ri_first, $ri_last ) = @_;
18019     my $i_f;
18020     my $i_l;
18021     my $line_number = 0;
18022     my $i_break_left;
18023     foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
18024
18025         $i_f = $$ri_first[$line_number];
18026         $i_l = $$ri_last[$line_number];
18027         while ( $i_break_left >= $i_l ) {
18028             $line_number++;
18029
18030             # shouldn't happen unless caller passes bad indexes
18031             if ( $line_number >= @$ri_last ) {
18032                 warning(
18033 "Non-fatal program bug: couldn't set break at $i_break_left\n"
18034                 );
18035                 report_definite_bug();
18036                 return;
18037             }
18038             $i_f = $$ri_first[$line_number];
18039             $i_l = $$ri_last[$line_number];
18040         }
18041
18042         # Do not leave a blank at the end of a line; back up if necessary
18043         if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
18044
18045         my $i_break_right = $i_break_left + 1;
18046         if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
18047
18048         if (   $i_break_left >= $i_f
18049             && $i_break_left < $i_l
18050             && $i_break_right > $i_f
18051             && $i_break_right <= $i_l )
18052         {
18053             splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
18054             splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
18055         }
18056     }
18057 }
18058
18059 sub set_closing_breakpoint {
18060
18061     # set a breakpoint at a matching closing token
18062     # at present, this is only used to break at a ':' which matches a '?'
18063     my $i_break = shift;
18064
18065     if ( $mate_index_to_go[$i_break] >= 0 ) {
18066
18067         # CAUTION: infinite recursion possible here:
18068         #   set_closing_breakpoint calls set_forced_breakpoint, and
18069         #   set_forced_breakpoint call set_closing_breakpoint
18070         #   ( test files attrib.t, BasicLyx.pm.html).
18071         # Don't reduce the '2' in the statement below
18072         if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
18073
18074             # break before } ] and ), but sub set_forced_breakpoint will decide
18075             # to break before or after a ? and :
18076             my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
18077             set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
18078         }
18079     }
18080     else {
18081         my $type_sequence = $type_sequence_to_go[$i_break];
18082         if ($type_sequence) {
18083             my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
18084             $postponed_breakpoint{$type_sequence} = 1;
18085         }
18086     }
18087 }
18088
18089 # check to see if output line tabbing agrees with input line
18090 # this can be very useful for debugging a script which has an extra
18091 # or missing brace
18092 sub compare_indentation_levels {
18093
18094     my ( $python_indentation_level, $structural_indentation_level ) = @_;
18095     if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
18096         $last_tabbing_disagreement = $input_line_number;
18097
18098         if ($in_tabbing_disagreement) {
18099         }
18100         else {
18101             $tabbing_disagreement_count++;
18102
18103             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
18104                 write_logfile_entry(
18105 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
18106                 );
18107             }
18108             $in_tabbing_disagreement    = $input_line_number;
18109             $first_tabbing_disagreement = $in_tabbing_disagreement
18110               unless ($first_tabbing_disagreement);
18111         }
18112     }
18113     else {
18114
18115         if ($in_tabbing_disagreement) {
18116
18117             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
18118                 write_logfile_entry(
18119 "End indentation disagreement from input line $in_tabbing_disagreement\n"
18120                 );
18121
18122                 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
18123                     write_logfile_entry(
18124                         "No further tabbing disagreements will be noted\n");
18125                 }
18126             }
18127             $in_tabbing_disagreement = 0;
18128         }
18129     }
18130 }
18131
18132 #####################################################################
18133 #
18134 # the Perl::Tidy::IndentationItem class supplies items which contain
18135 # how much whitespace should be used at the start of a line
18136 #
18137 #####################################################################
18138
18139 package Perl::Tidy::IndentationItem;
18140
18141 # Indexes for indentation items
18142 use constant SPACES             => 0;     # total leading white spaces
18143 use constant LEVEL              => 1;     # the indentation 'level'
18144 use constant CI_LEVEL           => 2;     # the 'continuation level'
18145 use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
18146                                           # for this level
18147 use constant CLOSED             => 4;     # index where we saw closing '}'
18148 use constant COMMA_COUNT        => 5;     # how many commas at this level?
18149 use constant SEQUENCE_NUMBER    => 6;     # output batch number
18150 use constant INDEX              => 7;     # index in output batch list
18151 use constant HAVE_CHILD         => 8;     # any dependents?
18152 use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
18153                                           # we would like to move to get
18154                                           # alignment (negative if left)
18155 use constant ALIGN_PAREN        => 10;    # do we want to try to align
18156                                           # with an opening structure?
18157 use constant MARKED             => 11;    # if visited by corrector logic
18158 use constant STACK_DEPTH        => 12;    # indentation nesting depth
18159 use constant STARTING_INDEX     => 13;    # first token index of this level
18160 use constant ARROW_COUNT        => 14;    # how many =>'s
18161
18162 sub new {
18163
18164     # Create an 'indentation_item' which describes one level of leading
18165     # whitespace when the '-lp' indentation is used.  We return
18166     # a reference to an anonymous array of associated variables.
18167     # See above constants for storage scheme.
18168     my (
18169         $class,               $spaces,           $level,
18170         $ci_level,            $available_spaces, $index,
18171         $gnu_sequence_number, $align_paren,      $stack_depth,
18172         $starting_index,
18173     ) = @_;
18174     my $closed            = -1;
18175     my $arrow_count       = 0;
18176     my $comma_count       = 0;
18177     my $have_child        = 0;
18178     my $want_right_spaces = 0;
18179     my $marked            = 0;
18180     bless [
18181         $spaces,              $level,          $ci_level,
18182         $available_spaces,    $closed,         $comma_count,
18183         $gnu_sequence_number, $index,          $have_child,
18184         $want_right_spaces,   $align_paren,    $marked,
18185         $stack_depth,         $starting_index, $arrow_count,
18186     ], $class;
18187 }
18188
18189 sub permanently_decrease_AVAILABLE_SPACES {
18190
18191     # make a permanent reduction in the available indentation spaces
18192     # at one indentation item.  NOTE: if there are child nodes, their
18193     # total SPACES must be reduced by the caller.
18194
18195     my ( $item, $spaces_needed ) = @_;
18196     my $available_spaces = $item->get_AVAILABLE_SPACES();
18197     my $deleted_spaces =
18198       ( $available_spaces > $spaces_needed )
18199       ? $spaces_needed
18200       : $available_spaces;
18201     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
18202     $item->decrease_SPACES($deleted_spaces);
18203     $item->set_RECOVERABLE_SPACES(0);
18204
18205     return $deleted_spaces;
18206 }
18207
18208 sub tentatively_decrease_AVAILABLE_SPACES {
18209
18210     # We are asked to tentatively delete $spaces_needed of indentation
18211     # for a indentation item.  We may want to undo this later.  NOTE: if
18212     # there are child nodes, their total SPACES must be reduced by the
18213     # caller.
18214     my ( $item, $spaces_needed ) = @_;
18215     my $available_spaces = $item->get_AVAILABLE_SPACES();
18216     my $deleted_spaces =
18217       ( $available_spaces > $spaces_needed )
18218       ? $spaces_needed
18219       : $available_spaces;
18220     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
18221     $item->decrease_SPACES($deleted_spaces);
18222     $item->increase_RECOVERABLE_SPACES($deleted_spaces);
18223     return $deleted_spaces;
18224 }
18225
18226 sub get_STACK_DEPTH {
18227     my $self = shift;
18228     return $self->[STACK_DEPTH];
18229 }
18230
18231 sub get_SPACES {
18232     my $self = shift;
18233     return $self->[SPACES];
18234 }
18235
18236 sub get_MARKED {
18237     my $self = shift;
18238     return $self->[MARKED];
18239 }
18240
18241 sub set_MARKED {
18242     my ( $self, $value ) = @_;
18243     if ( defined($value) ) {
18244         $self->[MARKED] = $value;
18245     }
18246     return $self->[MARKED];
18247 }
18248
18249 sub get_AVAILABLE_SPACES {
18250     my $self = shift;
18251     return $self->[AVAILABLE_SPACES];
18252 }
18253
18254 sub decrease_SPACES {
18255     my ( $self, $value ) = @_;
18256     if ( defined($value) ) {
18257         $self->[SPACES] -= $value;
18258     }
18259     return $self->[SPACES];
18260 }
18261
18262 sub decrease_AVAILABLE_SPACES {
18263     my ( $self, $value ) = @_;
18264     if ( defined($value) ) {
18265         $self->[AVAILABLE_SPACES] -= $value;
18266     }
18267     return $self->[AVAILABLE_SPACES];
18268 }
18269
18270 sub get_ALIGN_PAREN {
18271     my $self = shift;
18272     return $self->[ALIGN_PAREN];
18273 }
18274
18275 sub get_RECOVERABLE_SPACES {
18276     my $self = shift;
18277     return $self->[RECOVERABLE_SPACES];
18278 }
18279
18280 sub set_RECOVERABLE_SPACES {
18281     my ( $self, $value ) = @_;
18282     if ( defined($value) ) {
18283         $self->[RECOVERABLE_SPACES] = $value;
18284     }
18285     return $self->[RECOVERABLE_SPACES];
18286 }
18287
18288 sub increase_RECOVERABLE_SPACES {
18289     my ( $self, $value ) = @_;
18290     if ( defined($value) ) {
18291         $self->[RECOVERABLE_SPACES] += $value;
18292     }
18293     return $self->[RECOVERABLE_SPACES];
18294 }
18295
18296 sub get_CI_LEVEL {
18297     my $self = shift;
18298     return $self->[CI_LEVEL];
18299 }
18300
18301 sub get_LEVEL {
18302     my $self = shift;
18303     return $self->[LEVEL];
18304 }
18305
18306 sub get_SEQUENCE_NUMBER {
18307     my $self = shift;
18308     return $self->[SEQUENCE_NUMBER];
18309 }
18310
18311 sub get_INDEX {
18312     my $self = shift;
18313     return $self->[INDEX];
18314 }
18315
18316 sub get_STARTING_INDEX {
18317     my $self = shift;
18318     return $self->[STARTING_INDEX];
18319 }
18320
18321 sub set_HAVE_CHILD {
18322     my ( $self, $value ) = @_;
18323     if ( defined($value) ) {
18324         $self->[HAVE_CHILD] = $value;
18325     }
18326     return $self->[HAVE_CHILD];
18327 }
18328
18329 sub get_HAVE_CHILD {
18330     my $self = shift;
18331     return $self->[HAVE_CHILD];
18332 }
18333
18334 sub set_ARROW_COUNT {
18335     my ( $self, $value ) = @_;
18336     if ( defined($value) ) {
18337         $self->[ARROW_COUNT] = $value;
18338     }
18339     return $self->[ARROW_COUNT];
18340 }
18341
18342 sub get_ARROW_COUNT {
18343     my $self = shift;
18344     return $self->[ARROW_COUNT];
18345 }
18346
18347 sub set_COMMA_COUNT {
18348     my ( $self, $value ) = @_;
18349     if ( defined($value) ) {
18350         $self->[COMMA_COUNT] = $value;
18351     }
18352     return $self->[COMMA_COUNT];
18353 }
18354
18355 sub get_COMMA_COUNT {
18356     my $self = shift;
18357     return $self->[COMMA_COUNT];
18358 }
18359
18360 sub set_CLOSED {
18361     my ( $self, $value ) = @_;
18362     if ( defined($value) ) {
18363         $self->[CLOSED] = $value;
18364     }
18365     return $self->[CLOSED];
18366 }
18367
18368 sub get_CLOSED {
18369     my $self = shift;
18370     return $self->[CLOSED];
18371 }
18372
18373 #####################################################################
18374 #
18375 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
18376 # contain a single output line
18377 #
18378 #####################################################################
18379
18380 package Perl::Tidy::VerticalAligner::Line;
18381
18382 {
18383
18384     use strict;
18385     use Carp;
18386
18387     use constant JMAX                      => 0;
18388     use constant JMAX_ORIGINAL_LINE        => 1;
18389     use constant RTOKENS                   => 2;
18390     use constant RFIELDS                   => 3;
18391     use constant RPATTERNS                 => 4;
18392     use constant INDENTATION               => 5;
18393     use constant LEADING_SPACE_COUNT       => 6;
18394     use constant OUTDENT_LONG_LINES        => 7;
18395     use constant LIST_TYPE                 => 8;
18396     use constant IS_HANGING_SIDE_COMMENT   => 9;
18397     use constant RALIGNMENTS               => 10;
18398     use constant MAXIMUM_LINE_LENGTH       => 11;
18399     use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
18400
18401     my %_index_map;
18402     $_index_map{jmax}                      = JMAX;
18403     $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
18404     $_index_map{rtokens}                   = RTOKENS;
18405     $_index_map{rfields}                   = RFIELDS;
18406     $_index_map{rpatterns}                 = RPATTERNS;
18407     $_index_map{indentation}               = INDENTATION;
18408     $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
18409     $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
18410     $_index_map{list_type}                 = LIST_TYPE;
18411     $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
18412     $_index_map{ralignments}               = RALIGNMENTS;
18413     $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
18414     $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
18415
18416     my @_default_data = ();
18417     $_default_data[JMAX]                      = undef;
18418     $_default_data[JMAX_ORIGINAL_LINE]        = undef;
18419     $_default_data[RTOKENS]                   = undef;
18420     $_default_data[RFIELDS]                   = undef;
18421     $_default_data[RPATTERNS]                 = undef;
18422     $_default_data[INDENTATION]               = undef;
18423     $_default_data[LEADING_SPACE_COUNT]       = undef;
18424     $_default_data[OUTDENT_LONG_LINES]        = undef;
18425     $_default_data[LIST_TYPE]                 = undef;
18426     $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
18427     $_default_data[RALIGNMENTS]               = [];
18428     $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
18429     $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
18430
18431     {
18432
18433         # methods to count object population
18434         my $_count = 0;
18435         sub get_count        { $_count; }
18436         sub _increment_count { ++$_count }
18437         sub _decrement_count { --$_count }
18438     }
18439
18440     # Constructor may be called as a class method
18441     sub new {
18442         my ( $caller, %arg ) = @_;
18443         my $caller_is_obj = ref($caller);
18444         my $class = $caller_is_obj || $caller;
18445         no strict "refs";
18446         my $self = bless [], $class;
18447
18448         $self->[RALIGNMENTS] = [];
18449
18450         my $index;
18451         foreach ( keys %_index_map ) {
18452             $index = $_index_map{$_};
18453             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
18454             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
18455             else { $self->[$index] = $_default_data[$index] }
18456         }
18457
18458         $self->_increment_count();
18459         return $self;
18460     }
18461
18462     sub DESTROY {
18463         $_[0]->_decrement_count();
18464     }
18465
18466     sub get_jmax                      { $_[0]->[JMAX] }
18467     sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
18468     sub get_rtokens                   { $_[0]->[RTOKENS] }
18469     sub get_rfields                   { $_[0]->[RFIELDS] }
18470     sub get_rpatterns                 { $_[0]->[RPATTERNS] }
18471     sub get_indentation               { $_[0]->[INDENTATION] }
18472     sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
18473     sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
18474     sub get_list_type                 { $_[0]->[LIST_TYPE] }
18475     sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
18476     sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
18477
18478     sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
18479     sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
18480     sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
18481     sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
18482
18483     sub get_starting_column {
18484         $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
18485     }
18486
18487     sub increment_column {
18488         $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
18489     }
18490     sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
18491
18492     sub current_field_width {
18493         my $self = shift;
18494         my ($j) = @_;
18495         if ( $j == 0 ) {
18496             return $self->get_column($j);
18497         }
18498         else {
18499             return $self->get_column($j) - $self->get_column( $j - 1 );
18500         }
18501     }
18502
18503     sub field_width_growth {
18504         my $self = shift;
18505         my $j    = shift;
18506         return $self->get_column($j) - $self->get_starting_column($j);
18507     }
18508
18509     sub starting_field_width {
18510         my $self = shift;
18511         my $j    = shift;
18512         if ( $j == 0 ) {
18513             return $self->get_starting_column($j);
18514         }
18515         else {
18516             return $self->get_starting_column($j) -
18517               $self->get_starting_column( $j - 1 );
18518         }
18519     }
18520
18521     sub increase_field_width {
18522
18523         my $self = shift;
18524         my ( $j, $pad ) = @_;
18525         my $jmax = $self->get_jmax();
18526         for my $k ( $j .. $jmax ) {
18527             $self->increment_column( $k, $pad );
18528         }
18529     }
18530
18531     sub get_available_space_on_right {
18532         my $self = shift;
18533         my $jmax = $self->get_jmax();
18534         return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
18535     }
18536
18537     sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
18538     sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
18539     sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
18540     sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
18541     sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
18542     sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
18543     sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
18544     sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
18545     sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
18546     sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
18547     sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
18548
18549 }
18550
18551 #####################################################################
18552 #
18553 # the Perl::Tidy::VerticalAligner::Alignment class holds information
18554 # on a single column being aligned
18555 #
18556 #####################################################################
18557 package Perl::Tidy::VerticalAligner::Alignment;
18558
18559 {
18560
18561     use strict;
18562
18563     #use Carp;
18564
18565     # Symbolic array indexes
18566     use constant COLUMN          => 0;    # the current column number
18567     use constant STARTING_COLUMN => 1;    # column number when created
18568     use constant MATCHING_TOKEN  => 2;    # what token we are matching
18569     use constant STARTING_LINE   => 3;    # the line index of creation
18570     use constant ENDING_LINE     => 4;    # the most recent line to use it
18571     use constant SAVED_COLUMN    => 5;    # the most recent line to use it
18572     use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
18573                                           # (just its index in an array)
18574
18575     # Correspondence between variables and array indexes
18576     my %_index_map;
18577     $_index_map{column}          = COLUMN;
18578     $_index_map{starting_column} = STARTING_COLUMN;
18579     $_index_map{matching_token}  = MATCHING_TOKEN;
18580     $_index_map{starting_line}   = STARTING_LINE;
18581     $_index_map{ending_line}     = ENDING_LINE;
18582     $_index_map{saved_column}    = SAVED_COLUMN;
18583     $_index_map{serial_number}   = SERIAL_NUMBER;
18584
18585     my @_default_data = ();
18586     $_default_data[COLUMN]          = undef;
18587     $_default_data[STARTING_COLUMN] = undef;
18588     $_default_data[MATCHING_TOKEN]  = undef;
18589     $_default_data[STARTING_LINE]   = undef;
18590     $_default_data[ENDING_LINE]     = undef;
18591     $_default_data[SAVED_COLUMN]    = undef;
18592     $_default_data[SERIAL_NUMBER]   = undef;
18593
18594     # class population count
18595     {
18596         my $_count = 0;
18597         sub get_count        { $_count; }
18598         sub _increment_count { ++$_count }
18599         sub _decrement_count { --$_count }
18600     }
18601
18602     # constructor
18603     sub new {
18604         my ( $caller, %arg ) = @_;
18605         my $caller_is_obj = ref($caller);
18606         my $class = $caller_is_obj || $caller;
18607         no strict "refs";
18608         my $self = bless [], $class;
18609
18610         foreach ( keys %_index_map ) {
18611             my $index = $_index_map{$_};
18612             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
18613             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
18614             else { $self->[$index] = $_default_data[$index] }
18615         }
18616         $self->_increment_count();
18617         return $self;
18618     }
18619
18620     sub DESTROY {
18621         $_[0]->_decrement_count();
18622     }
18623
18624     sub get_column          { return $_[0]->[COLUMN] }
18625     sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
18626     sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
18627     sub get_starting_line   { return $_[0]->[STARTING_LINE] }
18628     sub get_ending_line     { return $_[0]->[ENDING_LINE] }
18629     sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
18630
18631     sub set_column          { $_[0]->[COLUMN]          = $_[1] }
18632     sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
18633     sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
18634     sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
18635     sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
18636     sub increment_column { $_[0]->[COLUMN] += $_[1] }
18637
18638     sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
18639     sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
18640
18641 }
18642
18643 package Perl::Tidy::VerticalAligner;
18644
18645 # The Perl::Tidy::VerticalAligner package collects output lines and
18646 # attempts to line up certain common tokens, such as => and #, which are
18647 # identified by the calling routine.
18648 #
18649 # There are two main routines: append_line and flush.  Append acts as a
18650 # storage buffer, collecting lines into a group which can be vertically
18651 # aligned.  When alignment is no longer possible or desirable, it dumps
18652 # the group to flush.
18653 #
18654 #     append_line -----> flush
18655 #
18656 #     collects          writes
18657 #     vertical          one
18658 #     groups            group
18659
18660 BEGIN {
18661
18662     # Caution: these debug flags produce a lot of output
18663     # They should all be 0 except when debugging small scripts
18664
18665     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
18666     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
18667     use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
18668
18669     my $debug_warning = sub {
18670         print "VALIGN_DEBUGGING with key $_[0]\n";
18671     };
18672
18673     VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
18674     VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
18675
18676 }
18677
18678 use vars qw(
18679   $vertical_aligner_self
18680   $current_line
18681   $maximum_alignment_index
18682   $ralignment_list
18683   $maximum_jmax_seen
18684   $minimum_jmax_seen
18685   $previous_minimum_jmax_seen
18686   $previous_maximum_jmax_seen
18687   $maximum_line_index
18688   $group_level
18689   $group_type
18690   $group_maximum_gap
18691   $marginal_match
18692   $last_group_level_written
18693   $last_leading_space_count
18694   $extra_indent_ok
18695   $zero_count
18696   @group_lines
18697   $last_comment_column
18698   $last_side_comment_line_number
18699   $last_side_comment_length
18700   $last_side_comment_level
18701   $outdented_line_count
18702   $first_outdented_line_at
18703   $last_outdented_line_at
18704   $diagnostics_object
18705   $logger_object
18706   $file_writer_object
18707   @side_comment_history
18708   $comment_leading_space_count
18709   $is_matching_terminal_line
18710
18711   $cached_line_text
18712   $cached_line_type
18713   $cached_line_flag
18714   $cached_seqno
18715   $cached_line_valid
18716   $cached_line_leading_space_count
18717   $cached_seqno_string
18718
18719   $seqno_string
18720   $last_nonblank_seqno_string
18721
18722   $rOpts
18723
18724   $rOpts_maximum_line_length
18725   $rOpts_continuation_indentation
18726   $rOpts_indent_columns
18727   $rOpts_tabs
18728   $rOpts_entab_leading_whitespace
18729   $rOpts_valign
18730
18731   $rOpts_fixed_position_side_comment
18732   $rOpts_minimum_space_to_comment
18733
18734 );
18735
18736 sub initialize {
18737
18738     my $class;
18739
18740     ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
18741       = @_;
18742
18743     # variables describing the entire space group:
18744     $ralignment_list            = [];
18745     $group_level                = 0;
18746     $last_group_level_written   = -1;
18747     $extra_indent_ok            = 0;    # can we move all lines to the right?
18748     $last_side_comment_length   = 0;
18749     $maximum_jmax_seen          = 0;
18750     $minimum_jmax_seen          = 0;
18751     $previous_minimum_jmax_seen = 0;
18752     $previous_maximum_jmax_seen = 0;
18753
18754     # variables describing each line of the group
18755     @group_lines = ();                  # list of all lines in group
18756
18757     $outdented_line_count          = 0;
18758     $first_outdented_line_at       = 0;
18759     $last_outdented_line_at        = 0;
18760     $last_side_comment_line_number = 0;
18761     $last_side_comment_level       = -1;
18762     $is_matching_terminal_line     = 0;
18763
18764     # most recent 3 side comments; [ line number, column ]
18765     $side_comment_history[0] = [ -300, 0 ];
18766     $side_comment_history[1] = [ -200, 0 ];
18767     $side_comment_history[2] = [ -100, 0 ];
18768
18769     # write_leader_and_string cache:
18770     $cached_line_text                = "";
18771     $cached_line_type                = 0;
18772     $cached_line_flag                = 0;
18773     $cached_seqno                    = 0;
18774     $cached_line_valid               = 0;
18775     $cached_line_leading_space_count = 0;
18776     $cached_seqno_string             = "";
18777
18778     # string of sequence numbers joined together
18779     $seqno_string               = "";
18780     $last_nonblank_seqno_string = "";
18781
18782     # frequently used parameters
18783     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
18784     $rOpts_tabs                     = $rOpts->{'tabs'};
18785     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
18786     $rOpts_fixed_position_side_comment =
18787       $rOpts->{'fixed-position-side-comment'};
18788     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
18789     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
18790     $rOpts_valign                   = $rOpts->{'valign'};
18791
18792     forget_side_comment();
18793
18794     initialize_for_new_group();
18795
18796     $vertical_aligner_self = {};
18797     bless $vertical_aligner_self, $class;
18798     return $vertical_aligner_self;
18799 }
18800
18801 sub initialize_for_new_group {
18802     $maximum_line_index      = -1;      # lines in the current group
18803     $maximum_alignment_index = -1;      # alignments in current group
18804     $zero_count              = 0;       # count consecutive lines without tokens
18805     $current_line            = undef;   # line being matched for alignment
18806     $group_maximum_gap       = 0;       # largest gap introduced
18807     $group_type              = "";
18808     $marginal_match          = 0;
18809     $comment_leading_space_count = 0;
18810     $last_leading_space_count    = 0;
18811 }
18812
18813 # interface to Perl::Tidy::Diagnostics routines
18814 sub write_diagnostics {
18815     if ($diagnostics_object) {
18816         $diagnostics_object->write_diagnostics(@_);
18817     }
18818 }
18819
18820 # interface to Perl::Tidy::Logger routines
18821 sub warning {
18822     if ($logger_object) {
18823         $logger_object->warning(@_);
18824     }
18825 }
18826
18827 sub write_logfile_entry {
18828     if ($logger_object) {
18829         $logger_object->write_logfile_entry(@_);
18830     }
18831 }
18832
18833 sub report_definite_bug {
18834     if ($logger_object) {
18835         $logger_object->report_definite_bug();
18836     }
18837 }
18838
18839 sub get_SPACES {
18840
18841     # return the number of leading spaces associated with an indentation
18842     # variable $indentation is either a constant number of spaces or an
18843     # object with a get_SPACES method.
18844     my $indentation = shift;
18845     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
18846 }
18847
18848 sub get_RECOVERABLE_SPACES {
18849
18850     # return the number of spaces (+ means shift right, - means shift left)
18851     # that we would like to shift a group of lines with the same indentation
18852     # to get them to line up with their opening parens
18853     my $indentation = shift;
18854     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
18855 }
18856
18857 sub get_STACK_DEPTH {
18858
18859     my $indentation = shift;
18860     return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
18861 }
18862
18863 sub make_alignment {
18864     my ( $col, $token ) = @_;
18865
18866     # make one new alignment at column $col which aligns token $token
18867     ++$maximum_alignment_index;
18868     my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
18869         column          => $col,
18870         starting_column => $col,
18871         matching_token  => $token,
18872         starting_line   => $maximum_line_index,
18873         ending_line     => $maximum_line_index,
18874         serial_number   => $maximum_alignment_index,
18875     );
18876     $ralignment_list->[$maximum_alignment_index] = $alignment;
18877     return $alignment;
18878 }
18879
18880 sub dump_alignments {
18881     print
18882 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
18883     for my $i ( 0 .. $maximum_alignment_index ) {
18884         my $column          = $ralignment_list->[$i]->get_column();
18885         my $starting_column = $ralignment_list->[$i]->get_starting_column();
18886         my $matching_token  = $ralignment_list->[$i]->get_matching_token();
18887         my $starting_line   = $ralignment_list->[$i]->get_starting_line();
18888         my $ending_line     = $ralignment_list->[$i]->get_ending_line();
18889         print
18890 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
18891     }
18892 }
18893
18894 sub save_alignment_columns {
18895     for my $i ( 0 .. $maximum_alignment_index ) {
18896         $ralignment_list->[$i]->save_column();
18897     }
18898 }
18899
18900 sub restore_alignment_columns {
18901     for my $i ( 0 .. $maximum_alignment_index ) {
18902         $ralignment_list->[$i]->restore_column();
18903     }
18904 }
18905
18906 sub forget_side_comment {
18907     $last_comment_column = 0;
18908 }
18909
18910 sub append_line {
18911
18912     # sub append is called to place one line in the current vertical group.
18913     #
18914     # The input parameters are:
18915     #     $level = indentation level of this line
18916     #     $rfields = reference to array of fields
18917     #     $rpatterns = reference to array of patterns, one per field
18918     #     $rtokens   = reference to array of tokens starting fields 1,2,..
18919     #
18920     # Here is an example of what this package does.  In this example,
18921     # we are trying to line up both the '=>' and the '#'.
18922     #
18923     #         '18' => 'grave',    #   \`
18924     #         '19' => 'acute',    #   `'
18925     #         '20' => 'caron',    #   \v
18926     # <-tabs-><f1-><--field 2 ---><-f3->
18927     # |            |              |    |
18928     # |            |              |    |
18929     # col1        col2         col3 col4
18930     #
18931     # The calling routine has already broken the entire line into 3 fields as
18932     # indicated.  (So the work of identifying promising common tokens has
18933     # already been done).
18934     #
18935     # In this example, there will be 2 tokens being matched: '=>' and '#'.
18936     # They are the leading parts of fields 2 and 3, but we do need to know
18937     # what they are so that we can dump a group of lines when these tokens
18938     # change.
18939     #
18940     # The fields contain the actual characters of each field.  The patterns
18941     # are like the fields, but they contain mainly token types instead
18942     # of tokens, so they have fewer characters.  They are used to be
18943     # sure we are matching fields of similar type.
18944     #
18945     # In this example, there will be 4 column indexes being adjusted.  The
18946     # first one is always at zero.  The interior columns are at the start of
18947     # the matching tokens, and the last one tracks the maximum line length.
18948     #
18949     # Basically, each time a new line comes in, it joins the current vertical
18950     # group if possible.  Otherwise it causes the current group to be dumped
18951     # and a new group is started.
18952     #
18953     # For each new group member, the column locations are increased, as
18954     # necessary, to make room for the new fields.  When the group is finally
18955     # output, these column numbers are used to compute the amount of spaces of
18956     # padding needed for each field.
18957     #
18958     # Programming note: the fields are assumed not to have any tab characters.
18959     # Tabs have been previously removed except for tabs in quoted strings and
18960     # side comments.  Tabs in these fields can mess up the column counting.
18961     # The log file warns the user if there are any such tabs.
18962
18963     my (
18964         $level,               $level_end,
18965         $indentation,         $rfields,
18966         $rtokens,             $rpatterns,
18967         $is_forced_break,     $outdent_long_lines,
18968         $is_terminal_ternary, $is_terminal_statement,
18969         $do_not_pad,          $rvertical_tightness_flags,
18970         $level_jump,
18971     ) = @_;
18972
18973     # number of fields is $jmax
18974     # number of tokens between fields is $jmax-1
18975     my $jmax = $#{$rfields};
18976
18977     my $leading_space_count = get_SPACES($indentation);
18978
18979     # set outdented flag to be sure we either align within statements or
18980     # across statement boundaries, but not both.
18981     my $is_outdented = $last_leading_space_count > $leading_space_count;
18982     $last_leading_space_count = $leading_space_count;
18983
18984     # Patch: undo for hanging side comment
18985     my $is_hanging_side_comment =
18986       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
18987     $is_outdented = 0 if $is_hanging_side_comment;
18988
18989     VALIGN_DEBUG_FLAG_APPEND0 && do {
18990         print
18991 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
18992     };
18993
18994     # Validate cached line if necessary: If we can produce a container
18995     # with just 2 lines total by combining an existing cached opening
18996     # token with the closing token to follow, then we will mark both
18997     # cached flags as valid.
18998     if ($rvertical_tightness_flags) {
18999         if (   $maximum_line_index <= 0
19000             && $cached_line_type
19001             && $cached_seqno
19002             && $rvertical_tightness_flags->[2]
19003             && $rvertical_tightness_flags->[2] == $cached_seqno )
19004         {
19005             $rvertical_tightness_flags->[3] ||= 1;
19006             $cached_line_valid ||= 1;
19007         }
19008     }
19009
19010     # do not join an opening block brace with an unbalanced line
19011     # unless requested with a flag value of 2
19012     if (   $cached_line_type == 3
19013         && $maximum_line_index < 0
19014         && $cached_line_flag < 2
19015         && $level_jump != 0 )
19016     {
19017         $cached_line_valid = 0;
19018     }
19019
19020     # patch until new aligner is finished
19021     if ($do_not_pad) { my_flush() }
19022
19023     # shouldn't happen:
19024     if ( $level < 0 ) { $level = 0 }
19025
19026     # do not align code across indentation level changes
19027     # or if vertical alignment is turned off for debugging
19028     if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
19029
19030         # we are allowed to shift a group of lines to the right if its
19031         # level is greater than the previous and next group
19032         $extra_indent_ok =
19033           ( $level < $group_level && $last_group_level_written < $group_level );
19034
19035         my_flush();
19036
19037         # If we know that this line will get flushed out by itself because
19038         # of level changes, we can leave the extra_indent_ok flag set.
19039         # That way, if we get an external flush call, we will still be
19040         # able to do some -lp alignment if necessary.
19041         $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
19042
19043         $group_level = $level;
19044
19045         # wait until after the above flush to get the leading space
19046         # count because it may have been changed if the -icp flag is in
19047         # effect
19048         $leading_space_count = get_SPACES($indentation);
19049
19050     }
19051
19052     # --------------------------------------------------------------------
19053     # Patch to collect outdentable block COMMENTS
19054     # --------------------------------------------------------------------
19055     my $is_blank_line = "";
19056     my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
19057     if ( $group_type eq 'COMMENT' ) {
19058         if (
19059             (
19060                    $is_block_comment
19061                 && $outdent_long_lines
19062                 && $leading_space_count == $comment_leading_space_count
19063             )
19064             || $is_blank_line
19065           )
19066         {
19067             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
19068             return;
19069         }
19070         else {
19071             my_flush();
19072         }
19073     }
19074
19075     # --------------------------------------------------------------------
19076     # add dummy fields for terminal ternary
19077     # --------------------------------------------------------------------
19078     my $j_terminal_match;
19079     if ( $is_terminal_ternary && $current_line ) {
19080         $j_terminal_match =
19081           fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
19082         $jmax = @{$rfields} - 1;
19083     }
19084
19085     # --------------------------------------------------------------------
19086     # add dummy fields for else statement
19087     # --------------------------------------------------------------------
19088     if (   $rfields->[0] =~ /^else\s*$/
19089         && $current_line
19090         && $level_jump == 0 )
19091     {
19092         $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
19093         $jmax = @{$rfields} - 1;
19094     }
19095
19096     # --------------------------------------------------------------------
19097     # Step 1. Handle simple line of code with no fields to match.
19098     # --------------------------------------------------------------------
19099     if ( $jmax <= 0 ) {
19100         $zero_count++;
19101
19102         if ( $maximum_line_index >= 0
19103             && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
19104         {
19105
19106             # flush the current group if it has some aligned columns..
19107             if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
19108
19109             # flush current group if we are just collecting side comments..
19110             elsif (
19111
19112                 # ...and we haven't seen a comment lately
19113                 ( $zero_count > 3 )
19114
19115                 # ..or if this new line doesn't fit to the left of the comments
19116                 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
19117                     $group_lines[0]->get_column(0) )
19118               )
19119             {
19120                 my_flush();
19121             }
19122         }
19123
19124         # patch to start new COMMENT group if this comment may be outdented
19125         if (   $is_block_comment
19126             && $outdent_long_lines
19127             && $maximum_line_index < 0 )
19128         {
19129             $group_type                           = 'COMMENT';
19130             $comment_leading_space_count          = $leading_space_count;
19131             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
19132             return;
19133         }
19134
19135         # just write this line directly if no current group, no side comment,
19136         # and no space recovery is needed.
19137         if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
19138         {
19139             write_leader_and_string( $leading_space_count, $$rfields[0], 0,
19140                 $outdent_long_lines, $rvertical_tightness_flags );
19141             return;
19142         }
19143     }
19144     else {
19145         $zero_count = 0;
19146     }
19147
19148     # programming check: (shouldn't happen)
19149     # an error here implies an incorrect call was made
19150     if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
19151         warning(
19152 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
19153         );
19154         report_definite_bug();
19155     }
19156
19157     # --------------------------------------------------------------------
19158     # create an object to hold this line
19159     # --------------------------------------------------------------------
19160     my $new_line = new Perl::Tidy::VerticalAligner::Line(
19161         jmax                      => $jmax,
19162         jmax_original_line        => $jmax,
19163         rtokens                   => $rtokens,
19164         rfields                   => $rfields,
19165         rpatterns                 => $rpatterns,
19166         indentation               => $indentation,
19167         leading_space_count       => $leading_space_count,
19168         outdent_long_lines        => $outdent_long_lines,
19169         list_type                 => "",
19170         is_hanging_side_comment   => $is_hanging_side_comment,
19171         maximum_line_length       => $rOpts->{'maximum-line-length'},
19172         rvertical_tightness_flags => $rvertical_tightness_flags,
19173     );
19174
19175     # Initialize a global flag saying if the last line of the group should
19176     # match end of group and also terminate the group.  There should be no
19177     # returns between here and where the flag is handled at the bottom.
19178     my $col_matching_terminal = 0;
19179     if ( defined($j_terminal_match) ) {
19180
19181         # remember the column of the terminal ? or { to match with
19182         $col_matching_terminal = $current_line->get_column($j_terminal_match);
19183
19184         # set global flag for sub decide_if_aligned
19185         $is_matching_terminal_line = 1;
19186     }
19187
19188     # --------------------------------------------------------------------
19189     # It simplifies things to create a zero length side comment
19190     # if none exists.
19191     # --------------------------------------------------------------------
19192     make_side_comment( $new_line, $level_end );
19193
19194     # --------------------------------------------------------------------
19195     # Decide if this is a simple list of items.
19196     # There are 3 list types: none, comma, comma-arrow.
19197     # We use this below to be less restrictive in deciding what to align.
19198     # --------------------------------------------------------------------
19199     if ($is_forced_break) {
19200         decide_if_list($new_line);
19201     }
19202
19203     if ($current_line) {
19204
19205         # --------------------------------------------------------------------
19206         # Allow hanging side comment to join current group, if any
19207         # This will help keep side comments aligned, because otherwise we
19208         # will have to start a new group, making alignment less likely.
19209         # --------------------------------------------------------------------
19210         join_hanging_comment( $new_line, $current_line )
19211           if $is_hanging_side_comment;
19212
19213         # --------------------------------------------------------------------
19214         # If there is just one previous line, and it has more fields
19215         # than the new line, try to join fields together to get a match with
19216         # the new line.  At the present time, only a single leading '=' is
19217         # allowed to be compressed out.  This is useful in rare cases where
19218         # a table is forced to use old breakpoints because of side comments,
19219         # and the table starts out something like this:
19220         #   my %MonthChars = ('0', 'Jan',   # side comment
19221         #                     '1', 'Feb',
19222         #                     '2', 'Mar',
19223         # Eliminating the '=' field will allow the remaining fields to line up.
19224         # This situation does not occur if there are no side comments
19225         # because scan_list would put a break after the opening '('.
19226         # --------------------------------------------------------------------
19227         eliminate_old_fields( $new_line, $current_line );
19228
19229         # --------------------------------------------------------------------
19230         # If the new line has more fields than the current group,
19231         # see if we can match the first fields and combine the remaining
19232         # fields of the new line.
19233         # --------------------------------------------------------------------
19234         eliminate_new_fields( $new_line, $current_line );
19235
19236         # --------------------------------------------------------------------
19237         # Flush previous group unless all common tokens and patterns match..
19238         # --------------------------------------------------------------------
19239         check_match( $new_line, $current_line );
19240
19241         # --------------------------------------------------------------------
19242         # See if there is space for this line in the current group (if any)
19243         # --------------------------------------------------------------------
19244         if ($current_line) {
19245             check_fit( $new_line, $current_line );
19246         }
19247     }
19248
19249     # --------------------------------------------------------------------
19250     # Append this line to the current group (or start new group)
19251     # --------------------------------------------------------------------
19252     accept_line($new_line);
19253
19254     # Future update to allow this to vary:
19255     $current_line = $new_line if ( $maximum_line_index == 0 );
19256
19257     # output this group if it ends in a terminal else or ternary line
19258     if ( defined($j_terminal_match) ) {
19259
19260         # if there is only one line in the group (maybe due to failure to match
19261         # perfectly with previous lines), then align the ? or { of this
19262         # terminal line with the previous one unless that would make the line
19263         # too long
19264         if ( $maximum_line_index == 0 ) {
19265             my $col_now = $current_line->get_column($j_terminal_match);
19266             my $pad     = $col_matching_terminal - $col_now;
19267             my $padding_available =
19268               $current_line->get_available_space_on_right();
19269             if ( $pad > 0 && $pad <= $padding_available ) {
19270                 $current_line->increase_field_width( $j_terminal_match, $pad );
19271             }
19272         }
19273         my_flush();
19274         $is_matching_terminal_line = 0;
19275     }
19276
19277     # --------------------------------------------------------------------
19278     # Step 8. Some old debugging stuff
19279     # --------------------------------------------------------------------
19280     VALIGN_DEBUG_FLAG_APPEND && do {
19281         print "APPEND fields:";
19282         dump_array(@$rfields);
19283         print "APPEND tokens:";
19284         dump_array(@$rtokens);
19285         print "APPEND patterns:";
19286         dump_array(@$rpatterns);
19287         dump_alignments();
19288     };
19289
19290     return;
19291 }
19292
19293 sub join_hanging_comment {
19294
19295     my $line = shift;
19296     my $jmax = $line->get_jmax();
19297     return 0 unless $jmax == 1;    # must be 2 fields
19298     my $rtokens = $line->get_rtokens();
19299     return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
19300     my $rfields = $line->get_rfields();
19301     return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
19302     my $old_line            = shift;
19303     my $maximum_field_index = $old_line->get_jmax();
19304     return 0
19305       unless $maximum_field_index > $jmax;    # the current line has more fields
19306     my $rpatterns = $line->get_rpatterns();
19307
19308     $line->set_is_hanging_side_comment(1);
19309     $jmax = $maximum_field_index;
19310     $line->set_jmax($jmax);
19311     $$rfields[$jmax]         = $$rfields[1];
19312     $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
19313     $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
19314     for ( my $j = 1 ; $j < $jmax ; $j++ ) {
19315         $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
19316         $$rtokens[ $j - 1 ]   = "";
19317         $$rpatterns[ $j - 1 ] = "";
19318     }
19319     return 1;
19320 }
19321
19322 sub eliminate_old_fields {
19323
19324     my $new_line = shift;
19325     my $jmax     = $new_line->get_jmax();
19326     if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
19327     if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
19328
19329     # there must be one previous line
19330     return unless ( $maximum_line_index == 0 );
19331
19332     my $old_line            = shift;
19333     my $maximum_field_index = $old_line->get_jmax();
19334
19335     ###############################################
19336     # this line must have fewer fields
19337     return unless $maximum_field_index > $jmax;
19338     ###############################################
19339
19340     # Identify specific cases where field elimination is allowed:
19341     # case=1: both lines have comma-separated lists, and the first
19342     #         line has an equals
19343     # case=2: both lines have leading equals
19344
19345     # case 1 is the default
19346     my $case = 1;
19347
19348     # See if case 2: both lines have leading '='
19349     # We'll require smiliar leading patterns in this case
19350     my $old_rtokens   = $old_line->get_rtokens();
19351     my $rtokens       = $new_line->get_rtokens();
19352     my $rpatterns     = $new_line->get_rpatterns();
19353     my $old_rpatterns = $old_line->get_rpatterns();
19354     if (   $rtokens->[0] =~ /^=\d*$/
19355         && $old_rtokens->[0]   eq $rtokens->[0]
19356         && $old_rpatterns->[0] eq $rpatterns->[0] )
19357     {
19358         $case = 2;
19359     }
19360
19361     # not too many fewer fields in new line for case 1
19362     return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
19363
19364     # case 1 must have side comment
19365     my $old_rfields = $old_line->get_rfields();
19366     return
19367       if ( $case == 1
19368         && length( $$old_rfields[$maximum_field_index] ) == 0 );
19369
19370     my $rfields = $new_line->get_rfields();
19371
19372     my $hid_equals = 0;
19373
19374     my @new_alignments        = ();
19375     my @new_fields            = ();
19376     my @new_matching_patterns = ();
19377     my @new_matching_tokens   = ();
19378
19379     my $j = 0;
19380     my $k;
19381     my $current_field   = '';
19382     my $current_pattern = '';
19383
19384     # loop over all old tokens
19385     my $in_match = 0;
19386     for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
19387         $current_field   .= $$old_rfields[$k];
19388         $current_pattern .= $$old_rpatterns[$k];
19389         last if ( $j > $jmax - 1 );
19390
19391         if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
19392             $in_match                  = 1;
19393             $new_fields[$j]            = $current_field;
19394             $new_matching_patterns[$j] = $current_pattern;
19395             $current_field             = '';
19396             $current_pattern           = '';
19397             $new_matching_tokens[$j]   = $$old_rtokens[$k];
19398             $new_alignments[$j]        = $old_line->get_alignment($k);
19399             $j++;
19400         }
19401         else {
19402
19403             if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
19404                 last if ( $case == 2 );    # avoid problems with stuff
19405                                            # like:   $a=$b=$c=$d;
19406                 $hid_equals = 1;
19407             }
19408             last
19409               if ( $in_match && $case == 1 )
19410               ;    # disallow gaps in matching field types in case 1
19411         }
19412     }
19413
19414     # Modify the current state if we are successful.
19415     # We must exactly reach the ends of both lists for success.
19416     if (   ( $j == $jmax )
19417         && ( $current_field eq '' )
19418         && ( $case != 1 || $hid_equals ) )
19419     {
19420         $k = $maximum_field_index;
19421         $current_field   .= $$old_rfields[$k];
19422         $current_pattern .= $$old_rpatterns[$k];
19423         $new_fields[$j]            = $current_field;
19424         $new_matching_patterns[$j] = $current_pattern;
19425
19426         $new_alignments[$j] = $old_line->get_alignment($k);
19427         $maximum_field_index = $j;
19428
19429         $old_line->set_alignments(@new_alignments);
19430         $old_line->set_jmax($jmax);
19431         $old_line->set_rtokens( \@new_matching_tokens );
19432         $old_line->set_rfields( \@new_fields );
19433         $old_line->set_rpatterns( \@$rpatterns );
19434     }
19435 }
19436
19437 # create an empty side comment if none exists
19438 sub make_side_comment {
19439     my $new_line  = shift;
19440     my $level_end = shift;
19441     my $jmax      = $new_line->get_jmax();
19442     my $rtokens   = $new_line->get_rtokens();
19443
19444     # if line does not have a side comment...
19445     if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
19446         my $rfields   = $new_line->get_rfields();
19447         my $rpatterns = $new_line->get_rpatterns();
19448         $$rtokens[$jmax]     = '#';
19449         $$rfields[ ++$jmax ] = '';
19450         $$rpatterns[$jmax]   = '#';
19451         $new_line->set_jmax($jmax);
19452         $new_line->set_jmax_original_line($jmax);
19453     }
19454
19455     # line has a side comment..
19456     else {
19457
19458         # don't remember old side comment location for very long
19459         my $line_number = $vertical_aligner_self->get_output_line_number();
19460         my $rfields     = $new_line->get_rfields();
19461         if (
19462             $line_number - $last_side_comment_line_number > 12
19463
19464             # and don't remember comment location across block level changes
19465             || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
19466           )
19467         {
19468             forget_side_comment();
19469         }
19470         $last_side_comment_line_number = $line_number;
19471         $last_side_comment_level       = $level_end;
19472     }
19473 }
19474
19475 sub decide_if_list {
19476
19477     my $line = shift;
19478
19479     # A list will be taken to be a line with a forced break in which all
19480     # of the field separators are commas or comma-arrows (except for the
19481     # trailing #)
19482
19483     # List separator tokens are things like ',3'   or '=>2',
19484     # where the trailing digit is the nesting depth.  Allow braces
19485     # to allow nested list items.
19486     my $rtokens    = $line->get_rtokens();
19487     my $test_token = $$rtokens[0];
19488     if ( $test_token =~ /^(\,|=>)/ ) {
19489         my $list_type = $test_token;
19490         my $jmax      = $line->get_jmax();
19491
19492         foreach ( 1 .. $jmax - 2 ) {
19493             if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
19494                 $list_type = "";
19495                 last;
19496             }
19497         }
19498         $line->set_list_type($list_type);
19499     }
19500 }
19501
19502 sub eliminate_new_fields {
19503
19504     return unless ( $maximum_line_index >= 0 );
19505     my ( $new_line, $old_line ) = @_;
19506     my $jmax = $new_line->get_jmax();
19507
19508     my $old_rtokens = $old_line->get_rtokens();
19509     my $rtokens     = $new_line->get_rtokens();
19510     my $is_assignment =
19511       ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
19512
19513     # must be monotonic variation
19514     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
19515
19516     # must be more fields in the new line
19517     my $maximum_field_index = $old_line->get_jmax();
19518     return unless ( $maximum_field_index < $jmax );
19519
19520     unless ($is_assignment) {
19521         return
19522           unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
19523           ;    # only if monotonic
19524
19525         # never combine fields of a comma list
19526         return
19527           unless ( $maximum_field_index > 1 )
19528           && ( $new_line->get_list_type() !~ /^,/ );
19529     }
19530
19531     my $rfields       = $new_line->get_rfields();
19532     my $rpatterns     = $new_line->get_rpatterns();
19533     my $old_rpatterns = $old_line->get_rpatterns();
19534
19535     # loop over all OLD tokens except comment and check match
19536     my $match = 1;
19537     my $k;
19538     for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
19539         if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
19540             || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
19541         {
19542             $match = 0;
19543             last;
19544         }
19545     }
19546
19547     # first tokens agree, so combine extra new tokens
19548     if ($match) {
19549         for $k ( $maximum_field_index .. $jmax - 1 ) {
19550
19551             $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
19552             $$rfields[$k] = "";
19553             $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
19554             $$rpatterns[$k] = "";
19555         }
19556
19557         $$rtokens[ $maximum_field_index - 1 ] = '#';
19558         $$rfields[$maximum_field_index]       = $$rfields[$jmax];
19559         $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
19560         $jmax                                 = $maximum_field_index;
19561     }
19562     $new_line->set_jmax($jmax);
19563 }
19564
19565 sub fix_terminal_ternary {
19566
19567     # Add empty fields as necessary to align a ternary term
19568     # like this:
19569     #
19570     #  my $leapyear =
19571     #      $year % 4   ? 0
19572     #    : $year % 100 ? 1
19573     #    : $year % 400 ? 0
19574     #    :               1;
19575     #
19576     # returns 1 if the terminal item should be indented
19577
19578     my ( $rfields, $rtokens, $rpatterns ) = @_;
19579
19580     my $jmax        = @{$rfields} - 1;
19581     my $old_line    = $group_lines[$maximum_line_index];
19582     my $rfields_old = $old_line->get_rfields();
19583
19584     my $rpatterns_old       = $old_line->get_rpatterns();
19585     my $rtokens_old         = $old_line->get_rtokens();
19586     my $maximum_field_index = $old_line->get_jmax();
19587
19588     # look for the question mark after the :
19589     my ($jquestion);
19590     my $depth_question;
19591     my $pad = "";
19592     for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
19593         my $tok = $rtokens_old->[$j];
19594         if ( $tok =~ /^\?(\d+)$/ ) {
19595             $depth_question = $1;
19596
19597             # depth must be correct
19598             next unless ( $depth_question eq $group_level );
19599
19600             $jquestion = $j;
19601             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
19602                 $pad = " " x length($1);
19603             }
19604             else {
19605                 return;    # shouldn't happen
19606             }
19607             last;
19608         }
19609     }
19610     return unless ( defined($jquestion) );    # shouldn't happen
19611
19612     # Now splice the tokens and patterns of the previous line
19613     # into the else line to insure a match.  Add empty fields
19614     # as necessary.
19615     my $jadd = $jquestion;
19616
19617     # Work on copies of the actual arrays in case we have
19618     # to return due to an error
19619     my @fields   = @{$rfields};
19620     my @patterns = @{$rpatterns};
19621     my @tokens   = @{$rtokens};
19622
19623     VALIGN_DEBUG_FLAG_TERNARY && do {
19624         local $" = '><';
19625         print "CURRENT FIELDS=<@{$rfields_old}>\n";
19626         print "CURRENT TOKENS=<@{$rtokens_old}>\n";
19627         print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
19628         print "UNMODIFIED FIELDS=<@{$rfields}>\n";
19629         print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
19630         print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
19631     };
19632
19633     # handle cases of leading colon on this line
19634     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
19635
19636         my ( $colon, $therest ) = ( $1, $2 );
19637
19638         # Handle sub-case of first field with leading colon plus additional code
19639         # This is the usual situation as at the '1' below:
19640         #  ...
19641         #  : $year % 400 ? 0
19642         #  :               1;
19643         if ($therest) {
19644
19645             # Split the first field after the leading colon and insert padding.
19646             # Note that this padding will remain even if the terminal value goes
19647             # out on a separate line.  This does not seem to look to bad, so no
19648             # mechanism has been included to undo it.
19649             my $field1 = shift @fields;
19650             unshift @fields, ( $colon, $pad . $therest );
19651
19652             # change the leading pattern from : to ?
19653             return unless ( $patterns[0] =~ s/^\:/?/ );
19654
19655             # install leading tokens and patterns of existing line
19656             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19657             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19658
19659             # insert appropriate number of empty fields
19660             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19661         }
19662
19663         # handle sub-case of first field just equal to leading colon.
19664         # This can happen for example in the example below where
19665         # the leading '(' would create a new alignment token
19666         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
19667         # :                        ( $mname = $name . '->' );
19668         else {
19669
19670             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
19671
19672             # prepend a leading ? onto the second pattern
19673             $patterns[1] = "?b" . $patterns[1];
19674
19675             # pad the second field
19676             $fields[1] = $pad . $fields[1];
19677
19678             # install leading tokens and patterns of existing line, replacing
19679             # leading token and inserting appropriate number of empty fields
19680             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
19681             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
19682             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19683         }
19684     }
19685
19686     # Handle case of no leading colon on this line.  This will
19687     # be the case when -wba=':' is used.  For example,
19688     #  $year % 400 ? 0 :
19689     #                1;
19690     else {
19691
19692         # install leading tokens and patterns of existing line
19693         $patterns[0] = '?' . 'b' . $patterns[0];
19694         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19695         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19696
19697         # insert appropriate number of empty fields
19698         $jadd = $jquestion + 1;
19699         $fields[0] = $pad . $fields[0];
19700         splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
19701     }
19702
19703     VALIGN_DEBUG_FLAG_TERNARY && do {
19704         local $" = '><';
19705         print "MODIFIED TOKENS=<@tokens>\n";
19706         print "MODIFIED PATTERNS=<@patterns>\n";
19707         print "MODIFIED FIELDS=<@fields>\n";
19708     };
19709
19710     # all ok .. update the arrays
19711     @{$rfields}   = @fields;
19712     @{$rtokens}   = @tokens;
19713     @{$rpatterns} = @patterns;
19714
19715     # force a flush after this line
19716     return $jquestion;
19717 }
19718
19719 sub fix_terminal_else {
19720
19721     # Add empty fields as necessary to align a balanced terminal
19722     # else block to a previous if/elsif/unless block,
19723     # like this:
19724     #
19725     #  if   ( 1 || $x ) { print "ok 13\n"; }
19726     #  else             { print "not ok 13\n"; }
19727     #
19728     # returns 1 if the else block should be indented
19729     #
19730     my ( $rfields, $rtokens, $rpatterns ) = @_;
19731     my $jmax = @{$rfields} - 1;
19732     return unless ( $jmax > 0 );
19733
19734     # check for balanced else block following if/elsif/unless
19735     my $rfields_old = $current_line->get_rfields();
19736
19737     # TBD: add handling for 'case'
19738     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
19739
19740     # look for the opening brace after the else, and extrace the depth
19741     my $tok_brace = $rtokens->[0];
19742     my $depth_brace;
19743     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
19744
19745     # probably:  "else # side_comment"
19746     else { return }
19747
19748     my $rpatterns_old       = $current_line->get_rpatterns();
19749     my $rtokens_old         = $current_line->get_rtokens();
19750     my $maximum_field_index = $current_line->get_jmax();
19751
19752     # be sure the previous if/elsif is followed by an opening paren
19753     my $jparen    = 0;
19754     my $tok_paren = '(' . $depth_brace;
19755     my $tok_test  = $rtokens_old->[$jparen];
19756     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
19757
19758     # Now find the opening block brace
19759     my ($jbrace);
19760     for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
19761         my $tok = $rtokens_old->[$j];
19762         if ( $tok eq $tok_brace ) {
19763             $jbrace = $j;
19764             last;
19765         }
19766     }
19767     return unless ( defined($jbrace) );           # shouldn't happen
19768
19769     # Now splice the tokens and patterns of the previous line
19770     # into the else line to insure a match.  Add empty fields
19771     # as necessary.
19772     my $jadd = $jbrace - $jparen;
19773     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
19774     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
19775     splice( @{$rfields}, 1, 0, ('') x $jadd );
19776
19777     # force a flush after this line if it does not follow a case
19778     return $jbrace
19779       unless ( $rfields_old->[0] =~ /^case\s*$/ );
19780 }
19781
19782 {    # sub check_match
19783     my %is_good_alignment;
19784
19785     BEGIN {
19786
19787         # Vertically aligning on certain "good" tokens is usually okay
19788         # so we can be less restrictive in marginal cases.
19789         @_ = qw( { ? => = );
19790         push @_, (',');
19791         @is_good_alignment{@_} = (1) x scalar(@_);
19792     }
19793
19794     sub check_match {
19795
19796         # See if the current line matches the current vertical alignment group.
19797         # If not, flush the current group.
19798         my $new_line = shift;
19799         my $old_line = shift;
19800
19801         # uses global variables:
19802         #  $previous_minimum_jmax_seen
19803         #  $maximum_jmax_seen
19804         #  $maximum_line_index
19805         #  $marginal_match
19806         my $jmax                = $new_line->get_jmax();
19807         my $maximum_field_index = $old_line->get_jmax();
19808
19809         # flush if this line has too many fields
19810         if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
19811
19812         # flush if adding this line would make a non-monotonic field count
19813         if (
19814             ( $maximum_field_index > $jmax )    # this has too few fields
19815             && (
19816                 ( $previous_minimum_jmax_seen <
19817                     $jmax )                     # and wouldn't be monotonic
19818                 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
19819             )
19820           )
19821         {
19822             goto NO_MATCH;
19823         }
19824
19825         # otherwise see if this line matches the current group
19826         my $jmax_original_line      = $new_line->get_jmax_original_line();
19827         my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19828         my $rtokens                 = $new_line->get_rtokens();
19829         my $rfields                 = $new_line->get_rfields();
19830         my $rpatterns               = $new_line->get_rpatterns();
19831         my $list_type               = $new_line->get_list_type();
19832
19833         my $group_list_type = $old_line->get_list_type();
19834         my $old_rpatterns   = $old_line->get_rpatterns();
19835         my $old_rtokens     = $old_line->get_rtokens();
19836
19837         my $jlimit = $jmax - 1;
19838         if ( $maximum_field_index > $jmax ) {
19839             $jlimit = $jmax_original_line;
19840             --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
19841         }
19842
19843         # handle comma-separated lists ..
19844         if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
19845             for my $j ( 0 .. $jlimit ) {
19846                 my $old_tok = $$old_rtokens[$j];
19847                 next unless $old_tok;
19848                 my $new_tok = $$rtokens[$j];
19849                 next unless $new_tok;
19850
19851                 # lists always match ...
19852                 # unless they would align any '=>'s with ','s
19853                 goto NO_MATCH
19854                   if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
19855                     || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
19856             }
19857         }
19858
19859         # do detailed check for everything else except hanging side comments
19860         elsif ( !$is_hanging_side_comment ) {
19861
19862             my $leading_space_count = $new_line->get_leading_space_count();
19863
19864             my $max_pad = 0;
19865             my $min_pad = 0;
19866             my $saw_good_alignment;
19867
19868             for my $j ( 0 .. $jlimit ) {
19869
19870                 my $old_tok = $$old_rtokens[$j];
19871                 my $new_tok = $$rtokens[$j];
19872
19873                 # Note on encoding used for alignment tokens:
19874                 # -------------------------------------------
19875                 # Tokens are "decorated" with information which can help
19876                 # prevent unwanted alignments.  Consider for example the
19877                 # following two lines:
19878                 #   local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
19879                 #   local ( $i, $f ) = &'bdiv( $xn, $xd );
19880                 # There are three alignment tokens in each line, a comma,
19881                 # an =, and a comma.  In the first line these three tokens
19882                 # are encoded as:
19883                 #    ,4+local-18     =3      ,4+split-7
19884                 # and in the second line they are encoded as
19885                 #    ,4+local-18     =3      ,4+&'bdiv-8
19886                 # Tokens always at least have token name and nesting
19887                 # depth.  So in this example the ='s are at depth 3 and
19888                 # the ,'s are at depth 4.  This prevents aligning tokens
19889                 # of different depths.  Commas contain additional
19890                 # information, as follows:
19891                 # ,  {depth} + {container name} - {spaces to opening paren}
19892                 # This allows us to reject matching the rightmost commas
19893                 # in the above two lines, since they are for different
19894                 # function calls.  This encoding is done in
19895                 # 'sub send_lines_to_vertical_aligner'.
19896
19897                 # Pick off actual token.
19898                 # Everything up to the first digit is the actual token.
19899                 my $alignment_token = $new_tok;
19900                 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
19901
19902                 # see if the decorated tokens match
19903                 my $tokens_match = $new_tok eq $old_tok
19904
19905                   # Exception for matching terminal : of ternary statement..
19906                   # consider containers prefixed by ? and : a match
19907                   || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
19908
19909                 # No match if the alignment tokens differ...
19910                 if ( !$tokens_match ) {
19911
19912                     # ...Unless this is a side comment
19913                     if (
19914                         $j == $jlimit
19915
19916                         # and there is either at least one alignment token
19917                         # or this is a single item following a list.  This
19918                         # latter rule is required for 'December' to join
19919                         # the following list:
19920                         # my (@months) = (
19921                         #     '',       'January',   'February', 'March',
19922                         #     'April',  'May',       'June',     'July',
19923                         #     'August', 'September', 'October',  'November',
19924                         #     'December'
19925                         # );
19926                         # If it doesn't then the -lp formatting will fail.
19927                         && ( $j > 0 || $old_tok =~ /^,/ )
19928                       )
19929                     {
19930                         $marginal_match = 1
19931                           if ( $marginal_match == 0
19932                             && $maximum_line_index == 0 );
19933                         last;
19934                     }
19935
19936                     goto NO_MATCH;
19937                 }
19938
19939                 # Calculate amount of padding required to fit this in.
19940                 # $pad is the number of spaces by which we must increase
19941                 # the current field to squeeze in this field.
19942                 my $pad =
19943                   length( $$rfields[$j] ) - $old_line->current_field_width($j);
19944                 if ( $j == 0 ) { $pad += $leading_space_count; }
19945
19946                 # remember max pads to limit marginal cases
19947                 if ( $alignment_token ne '#' ) {
19948                     if ( $pad > $max_pad ) { $max_pad = $pad }
19949                     if ( $pad < $min_pad ) { $min_pad = $pad }
19950                 }
19951                 if ( $is_good_alignment{$alignment_token} ) {
19952                     $saw_good_alignment = 1;
19953                 }
19954
19955                 # If patterns don't match, we have to be careful...
19956                 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
19957
19958                     # flag this as a marginal match since patterns differ
19959                     $marginal_match = 1
19960                       if ( $marginal_match == 0 && $maximum_line_index == 0 );
19961
19962                     # We have to be very careful about aligning commas
19963                     # when the pattern's don't match, because it can be
19964                     # worse to create an alignment where none is needed
19965                     # than to omit one.  Here's an example where the ','s
19966                     # are not in named continers.  The first line below
19967                     # should not match the next two:
19968                     #   ( $a, $b ) = ( $b, $r );
19969                     #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
19970                     #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
19971                     if ( $alignment_token eq ',' ) {
19972
19973                        # do not align commas unless they are in named containers
19974                         goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
19975                     }
19976
19977                     # do not align parens unless patterns match;
19978                     # large ugly spaces can occur in math expressions.
19979                     elsif ( $alignment_token eq '(' ) {
19980
19981                         # But we can allow a match if the parens don't
19982                         # require any padding.
19983                         if ( $pad != 0 ) { goto NO_MATCH }
19984                     }
19985
19986                     # Handle an '=' alignment with different patterns to
19987                     # the left.
19988                     elsif ( $alignment_token eq '=' ) {
19989
19990                         # It is best to be a little restrictive when
19991                         # aligning '=' tokens.  Here is an example of
19992                         # two lines that we will not align:
19993                         #       my $variable=6;
19994                         #       $bb=4;
19995                         # The problem is that one is a 'my' declaration,
19996                         # and the other isn't, so they're not very similar.
19997                         # We will filter these out by comparing the first
19998                         # letter of the pattern.  This is crude, but works
19999                         # well enough.
20000                         if (
20001                             substr( $$old_rpatterns[$j], 0, 1 ) ne
20002                             substr( $$rpatterns[$j], 0, 1 ) )
20003                         {
20004                             goto NO_MATCH;
20005                         }
20006
20007                         # If we pass that test, we'll call it a marginal match.
20008                         # Here is an example of a marginal match:
20009                         #       $done{$$op} = 1;
20010                         #       $op         = compile_bblock($op);
20011                         # The left tokens are both identifiers, but
20012                         # one accesses a hash and the other doesn't.
20013                         # We'll let this be a tentative match and undo
20014                         # it later if we don't find more than 2 lines
20015                         # in the group.
20016                         elsif ( $maximum_line_index == 0 ) {
20017                             $marginal_match =
20018                               2;    # =2 prevents being undone below
20019                         }
20020                     }
20021                 }
20022
20023                 # Don't let line with fewer fields increase column widths
20024                 # ( align3.t )
20025                 if ( $maximum_field_index > $jmax ) {
20026
20027                     # Exception: suspend this rule to allow last lines to join
20028                     if ( $pad > 0 ) { goto NO_MATCH; }
20029                 }
20030             } ## end for my $j ( 0 .. $jlimit)
20031
20032             # Turn off the "marginal match" flag in some cases...
20033             # A "marginal match" occurs when the alignment tokens agree
20034             # but there are differences in the other tokens (patterns).
20035             # If we leave the marginal match flag set, then the rule is that we
20036             # will align only if there are more than two lines in the group.
20037             # We will turn of the flag if we almost have a match
20038             # and either we have seen a good alignment token or we
20039             # just need a small pad (2 spaces) to fit.  These rules are
20040             # the result of experimentation.  Tokens which misaligned by just
20041             # one or two characters are annoying.  On the other hand,
20042             # large gaps to less important alignment tokens are also annoying.
20043             if (   $marginal_match == 1
20044                 && $jmax == $maximum_field_index
20045                 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
20046               )
20047             {
20048                 $marginal_match = 0;
20049             }
20050             ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
20051         }
20052
20053         # We have a match (even if marginal).
20054         # If the current line has fewer fields than the current group
20055         # but otherwise matches, copy the remaining group fields to
20056         # make it a perfect match.
20057         if ( $maximum_field_index > $jmax ) {
20058             my $comment = $$rfields[$jmax];
20059             for $jmax ( $jlimit .. $maximum_field_index ) {
20060                 $$rtokens[$jmax]     = $$old_rtokens[$jmax];
20061                 $$rfields[ ++$jmax ] = '';
20062                 $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
20063             }
20064             $$rfields[$jmax] = $comment;
20065             $new_line->set_jmax($jmax);
20066         }
20067         return;
20068
20069       NO_MATCH:
20070         ##print "BUBBA: no match jmax=$jmax  max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
20071         my_flush();
20072         return;
20073     }
20074 }
20075
20076 sub check_fit {
20077
20078     return unless ( $maximum_line_index >= 0 );
20079     my $new_line = shift;
20080     my $old_line = shift;
20081
20082     my $jmax                    = $new_line->get_jmax();
20083     my $leading_space_count     = $new_line->get_leading_space_count();
20084     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
20085     my $rtokens                 = $new_line->get_rtokens();
20086     my $rfields                 = $new_line->get_rfields();
20087     my $rpatterns               = $new_line->get_rpatterns();
20088
20089     my $group_list_type = $group_lines[0]->get_list_type();
20090
20091     my $padding_so_far    = 0;
20092     my $padding_available = $old_line->get_available_space_on_right();
20093
20094     # save current columns in case this doesn't work
20095     save_alignment_columns();
20096
20097     my ( $j, $pad, $eight );
20098     my $maximum_field_index = $old_line->get_jmax();
20099     for $j ( 0 .. $jmax ) {
20100
20101         $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
20102
20103         if ( $j == 0 ) {
20104             $pad += $leading_space_count;
20105         }
20106
20107         # remember largest gap of the group, excluding gap to side comment
20108         if (   $pad < 0
20109             && $group_maximum_gap < -$pad
20110             && $j > 0
20111             && $j < $jmax - 1 )
20112         {
20113             $group_maximum_gap = -$pad;
20114         }
20115
20116         next if $pad < 0;
20117
20118         ## This patch helps sometimes, but it doesn't check to see if
20119         ## the line is too long even without the side comment.  It needs
20120         ## to be reworked.
20121         ##don't let a long token with no trailing side comment push
20122         ##side comments out, or end a group.  (sidecmt1.t)
20123         ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
20124
20125         # This line will need space; lets see if we want to accept it..
20126         if (
20127
20128             # not if this won't fit
20129             ( $pad > $padding_available )
20130
20131             # previously, there were upper bounds placed on padding here
20132             # (maximum_whitespace_columns), but they were not really helpful
20133
20134           )
20135         {
20136
20137             # revert to starting state then flush; things didn't work out
20138             restore_alignment_columns();
20139             my_flush();
20140             last;
20141         }
20142
20143         # patch to avoid excessive gaps in previous lines,
20144         # due to a line of fewer fields.
20145         #   return join( ".",
20146         #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
20147         #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
20148         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
20149
20150         # looks ok, squeeze this field in
20151         $old_line->increase_field_width( $j, $pad );
20152         $padding_available -= $pad;
20153
20154         # remember largest gap of the group, excluding gap to side comment
20155         if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
20156             $group_maximum_gap = $pad;
20157         }
20158     }
20159 }
20160
20161 sub accept_line {
20162
20163     # The current line either starts a new alignment group or is
20164     # accepted into the current alignment group.
20165     my $new_line = shift;
20166     $group_lines[ ++$maximum_line_index ] = $new_line;
20167
20168     # initialize field lengths if starting new group
20169     if ( $maximum_line_index == 0 ) {
20170
20171         my $jmax    = $new_line->get_jmax();
20172         my $rfields = $new_line->get_rfields();
20173         my $rtokens = $new_line->get_rtokens();
20174         my $j;
20175         my $col = $new_line->get_leading_space_count();
20176
20177         for $j ( 0 .. $jmax ) {
20178             $col += length( $$rfields[$j] );
20179
20180             # create initial alignments for the new group
20181             my $token = "";
20182             if ( $j < $jmax ) { $token = $$rtokens[$j] }
20183             my $alignment = make_alignment( $col, $token );
20184             $new_line->set_alignment( $j, $alignment );
20185         }
20186
20187         $maximum_jmax_seen = $jmax;
20188         $minimum_jmax_seen = $jmax;
20189     }
20190
20191     # use previous alignments otherwise
20192     else {
20193         my @new_alignments =
20194           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
20195         $new_line->set_alignments(@new_alignments);
20196     }
20197
20198     # remember group jmax extremes for next call to append_line
20199     $previous_minimum_jmax_seen = $minimum_jmax_seen;
20200     $previous_maximum_jmax_seen = $maximum_jmax_seen;
20201 }
20202
20203 sub dump_array {
20204
20205     # debug routine to dump array contents
20206     local $" = ')(';
20207     print "(@_)\n";
20208 }
20209
20210 # flush() sends the current Perl::Tidy::VerticalAligner group down the
20211 # pipeline to Perl::Tidy::FileWriter.
20212
20213 # This is the external flush, which also empties the cache
20214 sub flush {
20215
20216     if ( $maximum_line_index < 0 ) {
20217         if ($cached_line_type) {
20218             $seqno_string = $cached_seqno_string;
20219             entab_and_output( $cached_line_text,
20220                 $cached_line_leading_space_count,
20221                 $last_group_level_written );
20222             $cached_line_type    = 0;
20223             $cached_line_text    = "";
20224             $cached_seqno_string = "";
20225         }
20226     }
20227     else {
20228         my_flush();
20229     }
20230 }
20231
20232 # This is the internal flush, which leaves the cache intact
20233 sub my_flush {
20234
20235     return if ( $maximum_line_index < 0 );
20236
20237     # handle a group of comment lines
20238     if ( $group_type eq 'COMMENT' ) {
20239
20240         VALIGN_DEBUG_FLAG_APPEND0 && do {
20241             my ( $a, $b, $c ) = caller();
20242             print
20243 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
20244
20245         };
20246         my $leading_space_count = $comment_leading_space_count;
20247         my $leading_string      = get_leading_string($leading_space_count);
20248
20249         # zero leading space count if any lines are too long
20250         my $max_excess = 0;
20251         for my $i ( 0 .. $maximum_line_index ) {
20252             my $str = $group_lines[$i];
20253             my $excess =
20254               length($str) + $leading_space_count - $rOpts_maximum_line_length;
20255             if ( $excess > $max_excess ) {
20256                 $max_excess = $excess;
20257             }
20258         }
20259
20260         if ( $max_excess > 0 ) {
20261             $leading_space_count -= $max_excess;
20262             if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
20263             $last_outdented_line_at =
20264               $file_writer_object->get_output_line_number();
20265             unless ($outdented_line_count) {
20266                 $first_outdented_line_at = $last_outdented_line_at;
20267             }
20268             $outdented_line_count += ( $maximum_line_index + 1 );
20269         }
20270
20271         # write the group of lines
20272         my $outdent_long_lines = 0;
20273         for my $i ( 0 .. $maximum_line_index ) {
20274             write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
20275                 $outdent_long_lines, "" );
20276         }
20277     }
20278
20279     # handle a group of code lines
20280     else {
20281
20282         VALIGN_DEBUG_FLAG_APPEND0 && do {
20283             my $group_list_type = $group_lines[0]->get_list_type();
20284             my ( $a, $b, $c ) = caller();
20285             my $maximum_field_index = $group_lines[0]->get_jmax();
20286             print
20287 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
20288
20289         };
20290
20291         # some small groups are best left unaligned
20292         my $do_not_align = decide_if_aligned();
20293
20294         # optimize side comment location
20295         $do_not_align = adjust_side_comment($do_not_align);
20296
20297         # recover spaces for -lp option if possible
20298         my $extra_leading_spaces = get_extra_leading_spaces();
20299
20300         # all lines of this group have the same basic leading spacing
20301         my $group_leader_length = $group_lines[0]->get_leading_space_count();
20302
20303         # add extra leading spaces if helpful
20304         my $min_ci_gap = improve_continuation_indentation( $do_not_align,
20305             $group_leader_length );
20306
20307         # loop to output all lines
20308         for my $i ( 0 .. $maximum_line_index ) {
20309             my $line = $group_lines[$i];
20310             write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
20311                 $group_leader_length, $extra_leading_spaces );
20312         }
20313     }
20314     initialize_for_new_group();
20315 }
20316
20317 sub decide_if_aligned {
20318
20319     # Do not try to align two lines which are not really similar
20320     return unless $maximum_line_index == 1;
20321     return if ($is_matching_terminal_line);
20322
20323     my $group_list_type = $group_lines[0]->get_list_type();
20324
20325     my $do_not_align = (
20326
20327         # always align lists
20328         !$group_list_type
20329
20330           && (
20331
20332             # don't align if it was just a marginal match
20333             $marginal_match
20334
20335             # don't align two lines with big gap
20336             || $group_maximum_gap > 12
20337
20338             # or lines with differing number of alignment tokens
20339             # TODO: this could be improved.  It occasionally rejects
20340             # good matches.
20341             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
20342           )
20343     );
20344
20345     # But try to convert them into a simple comment group if the first line
20346     # a has side comment
20347     my $rfields             = $group_lines[0]->get_rfields();
20348     my $maximum_field_index = $group_lines[0]->get_jmax();
20349     if (   $do_not_align
20350         && ( $maximum_line_index > 0 )
20351         && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
20352     {
20353         combine_fields();
20354         $do_not_align = 0;
20355     }
20356     return $do_not_align;
20357 }
20358
20359 sub adjust_side_comment {
20360
20361     my $do_not_align = shift;
20362
20363     # let's see if we can move the side comment field out a little
20364     # to improve readability (the last field is always a side comment field)
20365     my $have_side_comment       = 0;
20366     my $first_side_comment_line = -1;
20367     my $maximum_field_index     = $group_lines[0]->get_jmax();
20368     for my $i ( 0 .. $maximum_line_index ) {
20369         my $line = $group_lines[$i];
20370
20371         if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
20372             $have_side_comment       = 1;
20373             $first_side_comment_line = $i;
20374             last;
20375         }
20376     }
20377
20378     my $kmax = $maximum_field_index + 1;
20379
20380     if ($have_side_comment) {
20381
20382         my $line = $group_lines[0];
20383
20384         # the maximum space without exceeding the line length:
20385         my $avail = $line->get_available_space_on_right();
20386
20387         # try to use the previous comment column
20388         my $side_comment_column = $line->get_column( $kmax - 2 );
20389         my $move                = $last_comment_column - $side_comment_column;
20390
20391 ##        my $sc_line0 = $side_comment_history[0]->[0];
20392 ##        my $sc_col0  = $side_comment_history[0]->[1];
20393 ##        my $sc_line1 = $side_comment_history[1]->[0];
20394 ##        my $sc_col1  = $side_comment_history[1]->[1];
20395 ##        my $sc_line2 = $side_comment_history[2]->[0];
20396 ##        my $sc_col2  = $side_comment_history[2]->[1];
20397 ##
20398 ##        # FUTURE UPDATES:
20399 ##        # Be sure to ignore 'do not align' and  '} # end comments'
20400 ##        # Find first $move > 0 and $move <= $avail as follows:
20401 ##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
20402 ##        # 2. try sc_col2 if (line-sc_line2) < 12
20403 ##        # 3. try min possible space, plus up to 8,
20404 ##        # 4. try min possible space
20405
20406         if ( $kmax > 0 && !$do_not_align ) {
20407
20408             # but if this doesn't work, give up and use the minimum space
20409             if ( $move > $avail ) {
20410                 $move = $rOpts_minimum_space_to_comment - 1;
20411             }
20412
20413             # but we want some minimum space to the comment
20414             my $min_move = $rOpts_minimum_space_to_comment - 1;
20415             if (   $move >= 0
20416                 && $last_side_comment_length > 0
20417                 && ( $first_side_comment_line == 0 )
20418                 && $group_level == $last_group_level_written )
20419             {
20420                 $min_move = 0;
20421             }
20422
20423             if ( $move < $min_move ) {
20424                 $move = $min_move;
20425             }
20426
20427             # prevously, an upper bound was placed on $move here,
20428             # (maximum_space_to_comment), but it was not helpful
20429
20430             # don't exceed the available space
20431             if ( $move > $avail ) { $move = $avail }
20432
20433             # we can only increase space, never decrease
20434             if ( $move > 0 ) {
20435                 $line->increase_field_width( $maximum_field_index - 1, $move );
20436             }
20437
20438             # remember this column for the next group
20439             $last_comment_column = $line->get_column( $kmax - 2 );
20440         }
20441         else {
20442
20443             # try to at least line up the existing side comment location
20444             if ( $kmax > 0 && $move > 0 && $move < $avail ) {
20445                 $line->increase_field_width( $maximum_field_index - 1, $move );
20446                 $do_not_align = 0;
20447             }
20448
20449             # reset side comment column if we can't align
20450             else {
20451                 forget_side_comment();
20452             }
20453         }
20454     }
20455     return $do_not_align;
20456 }
20457
20458 sub improve_continuation_indentation {
20459     my ( $do_not_align, $group_leader_length ) = @_;
20460
20461     # See if we can increase the continuation indentation
20462     # to move all continuation lines closer to the next field
20463     # (unless it is a comment).
20464     #
20465     # '$min_ci_gap'is the extra indentation that we may need to introduce.
20466     # We will only introduce this to fields which already have some ci.
20467     # Without this variable, we would occasionally get something like this
20468     # (Complex.pm):
20469     #
20470     # use overload '+' => \&plus,
20471     #   '-'            => \&minus,
20472     #   '*'            => \&multiply,
20473     #   ...
20474     #   'tan'          => \&tan,
20475     #   'atan2'        => \&atan2,
20476     #
20477     # Whereas with this variable, we can shift variables over to get this:
20478     #
20479     # use overload '+' => \&plus,
20480     #          '-'     => \&minus,
20481     #          '*'     => \&multiply,
20482     #          ...
20483     #          'tan'   => \&tan,
20484     #          'atan2' => \&atan2,
20485
20486     ## BUB: Deactivated####################
20487     # The trouble with this patch is that it may, for example,
20488     # move in some 'or's  or ':'s, and leave some out, so that the
20489     # left edge alignment suffers.
20490     return 0;
20491     ###########################################
20492
20493     my $maximum_field_index = $group_lines[0]->get_jmax();
20494
20495     my $min_ci_gap = $rOpts_maximum_line_length;
20496     if ( $maximum_field_index > 1 && !$do_not_align ) {
20497
20498         for my $i ( 0 .. $maximum_line_index ) {
20499             my $line                = $group_lines[$i];
20500             my $leading_space_count = $line->get_leading_space_count();
20501             my $rfields             = $line->get_rfields();
20502
20503             my $gap =
20504               $line->get_column(0) -
20505               $leading_space_count -
20506               length( $$rfields[0] );
20507
20508             if ( $leading_space_count > $group_leader_length ) {
20509                 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
20510             }
20511         }
20512
20513         if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
20514             $min_ci_gap = 0;
20515         }
20516     }
20517     else {
20518         $min_ci_gap = 0;
20519     }
20520     return $min_ci_gap;
20521 }
20522
20523 sub write_vertically_aligned_line {
20524
20525     my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
20526         $extra_leading_spaces )
20527       = @_;
20528     my $rfields                   = $line->get_rfields();
20529     my $leading_space_count       = $line->get_leading_space_count();
20530     my $outdent_long_lines        = $line->get_outdent_long_lines();
20531     my $maximum_field_index       = $line->get_jmax();
20532     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
20533
20534     # add any extra spaces
20535     if ( $leading_space_count > $group_leader_length ) {
20536         $leading_space_count += $min_ci_gap;
20537     }
20538
20539     my $str = $$rfields[0];
20540
20541     # loop to concatenate all fields of this line and needed padding
20542     my $total_pad_count = 0;
20543     my ( $j, $pad );
20544     for $j ( 1 .. $maximum_field_index ) {
20545
20546         # skip zero-length side comments
20547         last
20548           if ( ( $j == $maximum_field_index )
20549             && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
20550           );
20551
20552         # compute spaces of padding before this field
20553         my $col = $line->get_column( $j - 1 );
20554         $pad = $col - ( length($str) + $leading_space_count );
20555
20556         if ($do_not_align) {
20557             $pad =
20558               ( $j < $maximum_field_index )
20559               ? 0
20560               : $rOpts_minimum_space_to_comment - 1;
20561         }
20562
20563         # if the -fpsc flag is set, move the side comment to the selected
20564         # column if and only if it is possible, ignoring constraints on
20565         # line length and minimum space to comment
20566         if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
20567         {
20568             my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
20569             if ( $newpad >= 0 ) { $pad = $newpad; }
20570         }
20571
20572         # accumulate the padding
20573         if ( $pad > 0 ) { $total_pad_count += $pad; }
20574
20575         # add this field
20576         if ( !defined $$rfields[$j] ) {
20577             write_diagnostics("UNDEFined field at j=$j\n");
20578         }
20579
20580         # only add padding when we have a finite field;
20581         # this avoids extra terminal spaces if we have empty fields
20582         if ( length( $$rfields[$j] ) > 0 ) {
20583             $str .= ' ' x $total_pad_count;
20584             $total_pad_count = 0;
20585             $str .= $$rfields[$j];
20586         }
20587         else {
20588             $total_pad_count = 0;
20589         }
20590
20591         # update side comment history buffer
20592         if ( $j == $maximum_field_index ) {
20593             my $lineno = $file_writer_object->get_output_line_number();
20594             shift @side_comment_history;
20595             push @side_comment_history, [ $lineno, $col ];
20596         }
20597     }
20598
20599     my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
20600
20601     # ship this line off
20602     write_leader_and_string( $leading_space_count + $extra_leading_spaces,
20603         $str, $side_comment_length, $outdent_long_lines,
20604         $rvertical_tightness_flags );
20605 }
20606
20607 sub get_extra_leading_spaces {
20608
20609     #----------------------------------------------------------
20610     # Define any extra indentation space (for the -lp option).
20611     # Here is why:
20612     # If a list has side comments, sub scan_list must dump the
20613     # list before it sees everything.  When this happens, it sets
20614     # the indentation to the standard scheme, but notes how
20615     # many spaces it would have liked to use.  We may be able
20616     # to recover that space here in the event that that all of the
20617     # lines of a list are back together again.
20618     #----------------------------------------------------------
20619
20620     my $extra_leading_spaces = 0;
20621     if ($extra_indent_ok) {
20622         my $object = $group_lines[0]->get_indentation();
20623         if ( ref($object) ) {
20624             my $extra_indentation_spaces_wanted =
20625               get_RECOVERABLE_SPACES($object);
20626
20627             # all indentation objects must be the same
20628             my $i;
20629             for $i ( 1 .. $maximum_line_index ) {
20630                 if ( $object != $group_lines[$i]->get_indentation() ) {
20631                     $extra_indentation_spaces_wanted = 0;
20632                     last;
20633                 }
20634             }
20635
20636             if ($extra_indentation_spaces_wanted) {
20637
20638                 # the maximum space without exceeding the line length:
20639                 my $avail = $group_lines[0]->get_available_space_on_right();
20640                 $extra_leading_spaces =
20641                   ( $avail > $extra_indentation_spaces_wanted )
20642                   ? $extra_indentation_spaces_wanted
20643                   : $avail;
20644
20645                 # update the indentation object because with -icp the terminal
20646                 # ');' will use the same adjustment.
20647                 $object->permanently_decrease_AVAILABLE_SPACES(
20648                     -$extra_leading_spaces );
20649             }
20650         }
20651     }
20652     return $extra_leading_spaces;
20653 }
20654
20655 sub combine_fields {
20656
20657     # combine all fields except for the comment field  ( sidecmt.t )
20658     # Uses global variables:
20659     #  @group_lines
20660     #  $maximum_line_index
20661     my ( $j, $k );
20662     my $maximum_field_index = $group_lines[0]->get_jmax();
20663     for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
20664         my $line    = $group_lines[$j];
20665         my $rfields = $line->get_rfields();
20666         foreach ( 1 .. $maximum_field_index - 1 ) {
20667             $$rfields[0] .= $$rfields[$_];
20668         }
20669         $$rfields[1] = $$rfields[$maximum_field_index];
20670
20671         $line->set_jmax(1);
20672         $line->set_column( 0, 0 );
20673         $line->set_column( 1, 0 );
20674
20675     }
20676     $maximum_field_index = 1;
20677
20678     for $j ( 0 .. $maximum_line_index ) {
20679         my $line    = $group_lines[$j];
20680         my $rfields = $line->get_rfields();
20681         for $k ( 0 .. $maximum_field_index ) {
20682             my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
20683             if ( $k == 0 ) {
20684                 $pad += $group_lines[$j]->get_leading_space_count();
20685             }
20686
20687             if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
20688
20689         }
20690     }
20691 }
20692
20693 sub get_output_line_number {
20694
20695     # the output line number reported to a caller is the number of items
20696     # written plus the number of items in the buffer
20697     my $self = shift;
20698     1 + $maximum_line_index + $file_writer_object->get_output_line_number();
20699 }
20700
20701 sub write_leader_and_string {
20702
20703     my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
20704         $rvertical_tightness_flags )
20705       = @_;
20706
20707     # handle outdenting of long lines:
20708     if ($outdent_long_lines) {
20709         my $excess =
20710           length($str) -
20711           $side_comment_length +
20712           $leading_space_count -
20713           $rOpts_maximum_line_length;
20714         if ( $excess > 0 ) {
20715             $leading_space_count = 0;
20716             $last_outdented_line_at =
20717               $file_writer_object->get_output_line_number();
20718
20719             unless ($outdented_line_count) {
20720                 $first_outdented_line_at = $last_outdented_line_at;
20721             }
20722             $outdented_line_count++;
20723         }
20724     }
20725
20726     # Make preliminary leading whitespace.  It could get changed
20727     # later by entabbing, so we have to keep track of any changes
20728     # to the leading_space_count from here on.
20729     my $leading_string =
20730       $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
20731
20732     # Unpack any recombination data; it was packed by
20733     # sub send_lines_to_vertical_aligner. Contents:
20734     #
20735     #   [0] type: 1=opening  2=closing  3=opening block brace
20736     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
20737     #             if closing: spaces of padding to use
20738     #   [2] sequence number of container
20739     #   [3] valid flag: do not append if this flag is false
20740     #
20741     my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20742         $seqno_end );
20743     if ($rvertical_tightness_flags) {
20744         (
20745             $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20746             $seqno_end
20747         ) = @{$rvertical_tightness_flags};
20748     }
20749
20750     $seqno_string = $seqno_end;
20751
20752     # handle any cached line ..
20753     # either append this line to it or write it out
20754     if ( length($cached_line_text) ) {
20755
20756         if ( !$cached_line_valid ) {
20757             entab_and_output( $cached_line_text,
20758                 $cached_line_leading_space_count,
20759                 $last_group_level_written );
20760         }
20761
20762         # handle cached line with opening container token
20763         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
20764
20765             my $gap = $leading_space_count - length($cached_line_text);
20766
20767             # handle option of just one tight opening per line:
20768             if ( $cached_line_flag == 1 ) {
20769                 if ( defined($open_or_close) && $open_or_close == 1 ) {
20770                     $gap = -1;
20771                 }
20772             }
20773
20774             if ( $gap >= 0 ) {
20775                 $leading_string      = $cached_line_text . ' ' x $gap;
20776                 $leading_space_count = $cached_line_leading_space_count;
20777                 $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
20778             }
20779             else {
20780                 entab_and_output( $cached_line_text,
20781                     $cached_line_leading_space_count,
20782                     $last_group_level_written );
20783             }
20784         }
20785
20786         # handle cached line to place before this closing container token
20787         else {
20788             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
20789
20790             if ( length($test_line) <= $rOpts_maximum_line_length ) {
20791
20792                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
20793
20794                 # Patch to outdent closing tokens ending # in ');'
20795                 # If we are joining a line like ');' to a previous stacked
20796                 # set of closing tokens, then decide if we may outdent the
20797                 # combined stack to the indentation of the ');'.  Since we
20798                 # should not normally outdent any of the other tokens more than
20799                 # the indentation of the lines that contained them, we will
20800                 # only do this if all of the corresponding opening
20801                 # tokens were on the same line.  This can happen with
20802                 # -sot and -sct.  For example, it is ok here:
20803                 #   __PACKAGE__->load_components( qw(
20804                 #         PK::Auto
20805                 #         Core
20806                 #   ));
20807                 #
20808                 #   But, for example, we do not outdent in this example because
20809                 #   that would put the closing sub brace out farther than the
20810                 #   opening sub brace:
20811                 #
20812                 #   perltidy -sot -sct
20813                 #   $c->Tk::bind(
20814                 #       '<Control-f>' => sub {
20815                 #           my ($c) = @_;
20816                 #           my $e = $c->XEvent;
20817                 #           itemsUnderArea $c;
20818                 #       } );
20819                 #
20820                 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
20821
20822                     # The way to tell this is if the stacked sequence numbers
20823                     # of this output line are the reverse of the stacked
20824                     # sequence numbers of the previous non-blank line of
20825                     # sequence numbers.  So we can join if the previous
20826                     # nonblank string of tokens is the mirror image.  For
20827                     # example if stack )}] is 13:8:6 then we are looking for a
20828                     # leading stack like [{( which is 6:8:13 We only need to
20829                     # check the two ends, because the intermediate tokens must
20830                     # fall in order.  Note on speed: having to split on colons
20831                     # and eliminate multiple colons might appear to be slow,
20832                     # but it's not an issue because we almost never come
20833                     # through here.  In a typical file we don't.
20834                     $seqno_string               =~ s/^:+//;
20835                     $last_nonblank_seqno_string =~ s/^:+//;
20836                     $seqno_string               =~ s/:+/:/g;
20837                     $last_nonblank_seqno_string =~ s/:+/:/g;
20838
20839                     # how many spaces can we outdent?
20840                     my $diff =
20841                       $cached_line_leading_space_count - $leading_space_count;
20842                     if (   $diff > 0
20843                         && length($seqno_string)
20844                         && length($last_nonblank_seqno_string) ==
20845                         length($seqno_string) )
20846                     {
20847                         my @seqno_last =
20848                           ( split ':', $last_nonblank_seqno_string );
20849                         my @seqno_now = ( split ':', $seqno_string );
20850                         if (   $seqno_now[-1] == $seqno_last[0]
20851                             && $seqno_now[0] == $seqno_last[-1] )
20852                         {
20853
20854                             # OK to outdent ..
20855                             # for absolute safety, be sure we only remove
20856                             # whitespace
20857                             my $ws = substr( $test_line, 0, $diff );
20858                             if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
20859
20860                                 $test_line = substr( $test_line, $diff );
20861                                 $cached_line_leading_space_count -= $diff;
20862                             }
20863
20864                             # shouldn't happen, but not critical:
20865                             ##else {
20866                             ## ERROR transferring indentation here
20867                             ##}
20868                         }
20869                     }
20870                 }
20871
20872                 $str                 = $test_line;
20873                 $leading_string      = "";
20874                 $leading_space_count = $cached_line_leading_space_count;
20875             }
20876             else {
20877                 entab_and_output( $cached_line_text,
20878                     $cached_line_leading_space_count,
20879                     $last_group_level_written );
20880             }
20881         }
20882     }
20883     $cached_line_type = 0;
20884     $cached_line_text = "";
20885
20886     # make the line to be written
20887     my $line = $leading_string . $str;
20888
20889     # write or cache this line
20890     if ( !$open_or_close || $side_comment_length > 0 ) {
20891         entab_and_output( $line, $leading_space_count, $group_level );
20892     }
20893     else {
20894         $cached_line_text                = $line;
20895         $cached_line_type                = $open_or_close;
20896         $cached_line_flag                = $tightness_flag;
20897         $cached_seqno                    = $seqno;
20898         $cached_line_valid               = $valid;
20899         $cached_line_leading_space_count = $leading_space_count;
20900         $cached_seqno_string             = $seqno_string;
20901     }
20902
20903     $last_group_level_written = $group_level;
20904     $last_side_comment_length = $side_comment_length;
20905     $extra_indent_ok          = 0;
20906 }
20907
20908 sub entab_and_output {
20909     my ( $line, $leading_space_count, $level ) = @_;
20910
20911     # The line is currently correct if there is no tabbing (recommended!)
20912     # We may have to lop off some leading spaces and replace with tabs.
20913     if ( $leading_space_count > 0 ) {
20914
20915         # Nothing to do if no tabs
20916         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20917             || $rOpts_indent_columns <= 0 )
20918         {
20919
20920             # nothing to do
20921         }
20922
20923         # Handle entab option
20924         elsif ($rOpts_entab_leading_whitespace) {
20925             my $space_count =
20926               $leading_space_count % $rOpts_entab_leading_whitespace;
20927             my $tab_count =
20928               int( $leading_space_count / $rOpts_entab_leading_whitespace );
20929             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
20930             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20931                 substr( $line, 0, $leading_space_count ) = $leading_string;
20932             }
20933             else {
20934
20935                 # shouldn't happen - program error counting whitespace
20936                 # we'll skip entabbing
20937                 warning(
20938 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20939                 );
20940             }
20941         }
20942
20943         # Handle option of one tab per level
20944         else {
20945             my $leading_string = ( "\t" x $level );
20946             my $space_count =
20947               $leading_space_count - $level * $rOpts_indent_columns;
20948
20949             # shouldn't happen:
20950             if ( $space_count < 0 ) {
20951                 warning(
20952 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
20953                 );
20954                 $leading_string = ( ' ' x $leading_space_count );
20955             }
20956             else {
20957                 $leading_string .= ( ' ' x $space_count );
20958             }
20959             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20960                 substr( $line, 0, $leading_space_count ) = $leading_string;
20961             }
20962             else {
20963
20964                 # shouldn't happen - program error counting whitespace
20965                 # we'll skip entabbing
20966                 warning(
20967 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20968                 );
20969             }
20970         }
20971     }
20972     $file_writer_object->write_code_line( $line . "\n" );
20973     if ($seqno_string) {
20974         $last_nonblank_seqno_string = $seqno_string;
20975     }
20976 }
20977
20978 {    # begin get_leading_string
20979
20980     my @leading_string_cache;
20981
20982     sub get_leading_string {
20983
20984         # define the leading whitespace string for this line..
20985         my $leading_whitespace_count = shift;
20986
20987         # Handle case of zero whitespace, which includes multi-line quotes
20988         # (which may have a finite level; this prevents tab problems)
20989         if ( $leading_whitespace_count <= 0 ) {
20990             return "";
20991         }
20992
20993         # look for previous result
20994         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
20995             return $leading_string_cache[$leading_whitespace_count];
20996         }
20997
20998         # must compute a string for this number of spaces
20999         my $leading_string;
21000
21001         # Handle simple case of no tabs
21002         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
21003             || $rOpts_indent_columns <= 0 )
21004         {
21005             $leading_string = ( ' ' x $leading_whitespace_count );
21006         }
21007
21008         # Handle entab option
21009         elsif ($rOpts_entab_leading_whitespace) {
21010             my $space_count =
21011               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
21012             my $tab_count = int(
21013                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
21014             $leading_string = "\t" x $tab_count . ' ' x $space_count;
21015         }
21016
21017         # Handle option of one tab per level
21018         else {
21019             $leading_string = ( "\t" x $group_level );
21020             my $space_count =
21021               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
21022
21023             # shouldn't happen:
21024             if ( $space_count < 0 ) {
21025                 warning(
21026 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
21027                 );
21028                 $leading_string = ( ' ' x $leading_whitespace_count );
21029             }
21030             else {
21031                 $leading_string .= ( ' ' x $space_count );
21032             }
21033         }
21034         $leading_string_cache[$leading_whitespace_count] = $leading_string;
21035         return $leading_string;
21036     }
21037 }    # end get_leading_string
21038
21039 sub report_anything_unusual {
21040     my $self = shift;
21041     if ( $outdented_line_count > 0 ) {
21042         write_logfile_entry(
21043             "$outdented_line_count long lines were outdented:\n");
21044         write_logfile_entry(
21045             "  First at output line $first_outdented_line_at\n");
21046
21047         if ( $outdented_line_count > 1 ) {
21048             write_logfile_entry(
21049                 "   Last at output line $last_outdented_line_at\n");
21050         }
21051         write_logfile_entry(
21052             "  use -noll to prevent outdenting, -l=n to increase line length\n"
21053         );
21054         write_logfile_entry("\n");
21055     }
21056 }
21057
21058 #####################################################################
21059 #
21060 # the Perl::Tidy::FileWriter class writes the output file
21061 #
21062 #####################################################################
21063
21064 package Perl::Tidy::FileWriter;
21065
21066 # Maximum number of little messages; probably need not be changed.
21067 use constant MAX_NAG_MESSAGES => 6;
21068
21069 sub write_logfile_entry {
21070     my $self          = shift;
21071     my $logger_object = $self->{_logger_object};
21072     if ($logger_object) {
21073         $logger_object->write_logfile_entry(@_);
21074     }
21075 }
21076
21077 sub new {
21078     my $class = shift;
21079     my ( $line_sink_object, $rOpts, $logger_object ) = @_;
21080
21081     bless {
21082         _line_sink_object           => $line_sink_object,
21083         _logger_object              => $logger_object,
21084         _rOpts                      => $rOpts,
21085         _output_line_number         => 1,
21086         _consecutive_blank_lines    => 0,
21087         _consecutive_nonblank_lines => 0,
21088         _first_line_length_error    => 0,
21089         _max_line_length_error      => 0,
21090         _last_line_length_error     => 0,
21091         _first_line_length_error_at => 0,
21092         _max_line_length_error_at   => 0,
21093         _last_line_length_error_at  => 0,
21094         _line_length_error_count    => 0,
21095         _max_output_line_length     => 0,
21096         _max_output_line_length_at  => 0,
21097     }, $class;
21098 }
21099
21100 sub tee_on {
21101     my $self = shift;
21102     $self->{_line_sink_object}->tee_on();
21103 }
21104
21105 sub tee_off {
21106     my $self = shift;
21107     $self->{_line_sink_object}->tee_off();
21108 }
21109
21110 sub get_output_line_number {
21111     my $self = shift;
21112     return $self->{_output_line_number};
21113 }
21114
21115 sub decrement_output_line_number {
21116     my $self = shift;
21117     $self->{_output_line_number}--;
21118 }
21119
21120 sub get_consecutive_nonblank_lines {
21121     my $self = shift;
21122     return $self->{_consecutive_nonblank_lines};
21123 }
21124
21125 sub reset_consecutive_blank_lines {
21126     my $self = shift;
21127     $self->{_consecutive_blank_lines} = 0;
21128 }
21129
21130 sub want_blank_line {
21131     my $self = shift;
21132     unless ( $self->{_consecutive_blank_lines} ) {
21133         $self->write_blank_code_line();
21134     }
21135 }
21136
21137 sub require_blank_code_lines {
21138
21139     # write out the requested number of blanks regardless of the value of -mbl
21140     # unless -mbl=0.  This allows extra blank lines to be written for subs and
21141     # packages even with the default -mbl=1
21142     my $self   = shift;
21143     my $count  = shift;
21144     my $need   = $count - $self->{_consecutive_blank_lines};
21145     my $rOpts  = $self->{_rOpts};
21146     my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
21147     for ( my $i = 0 ; $i < $need ; $i++ ) {
21148         $self->write_blank_code_line($forced);
21149     }
21150 }
21151
21152 sub write_blank_code_line {
21153     my $self   = shift;
21154     my $forced = shift;
21155     my $rOpts  = $self->{_rOpts};
21156     return
21157       if (!$forced
21158         && $self->{_consecutive_blank_lines} >=
21159         $rOpts->{'maximum-consecutive-blank-lines'} );
21160     $self->{_consecutive_blank_lines}++;
21161     $self->{_consecutive_nonblank_lines} = 0;
21162     $self->write_line("\n");
21163 }
21164
21165 sub write_code_line {
21166     my $self = shift;
21167     my $a    = shift;
21168
21169     if ( $a =~ /^\s*$/ ) {
21170         my $rOpts = $self->{_rOpts};
21171         return
21172           if ( $self->{_consecutive_blank_lines} >=
21173             $rOpts->{'maximum-consecutive-blank-lines'} );
21174         $self->{_consecutive_blank_lines}++;
21175         $self->{_consecutive_nonblank_lines} = 0;
21176     }
21177     else {
21178         $self->{_consecutive_blank_lines} = 0;
21179         $self->{_consecutive_nonblank_lines}++;
21180     }
21181     $self->write_line($a);
21182 }
21183
21184 sub write_line {
21185     my $self = shift;
21186     my $a    = shift;
21187
21188     # TODO: go through and see if the test is necessary here
21189     if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
21190
21191     $self->{_line_sink_object}->write_line($a);
21192
21193     # This calculation of excess line length ignores any internal tabs
21194     my $rOpts  = $self->{_rOpts};
21195     my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
21196     if ( $a =~ /^\t+/g ) {
21197         $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
21198     }
21199
21200     # Note that we just incremented output line number to future value
21201     # so we must subtract 1 for current line number
21202     if ( length($a) > 1 + $self->{_max_output_line_length} ) {
21203         $self->{_max_output_line_length}    = length($a) - 1;
21204         $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
21205     }
21206
21207     if ( $exceed > 0 ) {
21208         my $output_line_number = $self->{_output_line_number};
21209         $self->{_last_line_length_error}    = $exceed;
21210         $self->{_last_line_length_error_at} = $output_line_number - 1;
21211         if ( $self->{_line_length_error_count} == 0 ) {
21212             $self->{_first_line_length_error}    = $exceed;
21213             $self->{_first_line_length_error_at} = $output_line_number - 1;
21214         }
21215
21216         if (
21217             $self->{_last_line_length_error} > $self->{_max_line_length_error} )
21218         {
21219             $self->{_max_line_length_error}    = $exceed;
21220             $self->{_max_line_length_error_at} = $output_line_number - 1;
21221         }
21222
21223         if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
21224             $self->write_logfile_entry(
21225                 "Line length exceeded by $exceed characters\n");
21226         }
21227         $self->{_line_length_error_count}++;
21228     }
21229
21230 }
21231
21232 sub report_line_length_errors {
21233     my $self                    = shift;
21234     my $rOpts                   = $self->{_rOpts};
21235     my $line_length_error_count = $self->{_line_length_error_count};
21236     if ( $line_length_error_count == 0 ) {
21237         $self->write_logfile_entry(
21238             "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
21239         my $max_output_line_length    = $self->{_max_output_line_length};
21240         my $max_output_line_length_at = $self->{_max_output_line_length_at};
21241         $self->write_logfile_entry(
21242 "  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
21243         );
21244
21245     }
21246     else {
21247
21248         my $word = ( $line_length_error_count > 1 ) ? "s" : "";
21249         $self->write_logfile_entry(
21250 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
21251         );
21252
21253         $word = ( $line_length_error_count > 1 ) ? "First" : "";
21254         my $first_line_length_error    = $self->{_first_line_length_error};
21255         my $first_line_length_error_at = $self->{_first_line_length_error_at};
21256         $self->write_logfile_entry(
21257 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
21258         );
21259
21260         if ( $line_length_error_count > 1 ) {
21261             my $max_line_length_error     = $self->{_max_line_length_error};
21262             my $max_line_length_error_at  = $self->{_max_line_length_error_at};
21263             my $last_line_length_error    = $self->{_last_line_length_error};
21264             my $last_line_length_error_at = $self->{_last_line_length_error_at};
21265             $self->write_logfile_entry(
21266 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
21267             );
21268             $self->write_logfile_entry(
21269 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
21270             );
21271         }
21272     }
21273 }
21274
21275 #####################################################################
21276 #
21277 # The Perl::Tidy::Debugger class shows line tokenization
21278 #
21279 #####################################################################
21280
21281 package Perl::Tidy::Debugger;
21282
21283 sub new {
21284
21285     my ( $class, $filename ) = @_;
21286
21287     bless {
21288         _debug_file        => $filename,
21289         _debug_file_opened => 0,
21290         _fh                => undef,
21291     }, $class;
21292 }
21293
21294 sub really_open_debug_file {
21295
21296     my $self       = shift;
21297     my $debug_file = $self->{_debug_file};
21298     my $fh;
21299     unless ( $fh = IO::File->new("> $debug_file") ) {
21300         warn("can't open $debug_file: $!\n");
21301     }
21302     $self->{_debug_file_opened} = 1;
21303     $self->{_fh}                = $fh;
21304     print $fh
21305       "Use -dump-token-types (-dtt) to get a list of token type codes\n";
21306 }
21307
21308 sub close_debug_file {
21309
21310     my $self = shift;
21311     my $fh   = $self->{_fh};
21312     if ( $self->{_debug_file_opened} ) {
21313
21314         eval { $self->{_fh}->close() };
21315     }
21316 }
21317
21318 sub write_debug_entry {
21319
21320     # This is a debug dump routine which may be modified as necessary
21321     # to dump tokens on a line-by-line basis.  The output will be written
21322     # to the .DEBUG file when the -D flag is entered.
21323     my $self           = shift;
21324     my $line_of_tokens = shift;
21325
21326     my $input_line        = $line_of_tokens->{_line_text};
21327     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
21328     my $rtokens           = $line_of_tokens->{_rtokens};
21329     my $rlevels           = $line_of_tokens->{_rlevels};
21330     my $rslevels          = $line_of_tokens->{_rslevels};
21331     my $rblock_type       = $line_of_tokens->{_rblock_type};
21332     my $input_line_number = $line_of_tokens->{_line_number};
21333     my $line_type         = $line_of_tokens->{_line_type};
21334
21335     my ( $j, $num );
21336
21337     my $token_str              = "$input_line_number: ";
21338     my $reconstructed_original = "$input_line_number: ";
21339     my $block_str              = "$input_line_number: ";
21340
21341     #$token_str .= "$line_type: ";
21342     #$reconstructed_original .= "$line_type: ";
21343
21344     my $pattern   = "";
21345     my @next_char = ( '"', '"' );
21346     my $i_next    = 0;
21347     unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
21348     my $fh = $self->{_fh};
21349
21350     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
21351
21352         # testing patterns
21353         if ( $$rtoken_type[$j] eq 'k' ) {
21354             $pattern .= $$rtokens[$j];
21355         }
21356         else {
21357             $pattern .= $$rtoken_type[$j];
21358         }
21359         $reconstructed_original .= $$rtokens[$j];
21360         $block_str .= "($$rblock_type[$j])";
21361         $num = length( $$rtokens[$j] );
21362         my $type_str = $$rtoken_type[$j];
21363
21364         # be sure there are no blank tokens (shouldn't happen)
21365         # This can only happen if a programming error has been made
21366         # because all valid tokens are non-blank
21367         if ( $type_str eq ' ' ) {
21368             print $fh "BLANK TOKEN on the next line\n";
21369             $type_str = $next_char[$i_next];
21370             $i_next   = 1 - $i_next;
21371         }
21372
21373         if ( length($type_str) == 1 ) {
21374             $type_str = $type_str x $num;
21375         }
21376         $token_str .= $type_str;
21377     }
21378
21379     # Write what you want here ...
21380     # print $fh "$input_line\n";
21381     # print $fh "$pattern\n";
21382     print $fh "$reconstructed_original\n";
21383     print $fh "$token_str\n";
21384
21385     #print $fh "$block_str\n";
21386 }
21387
21388 #####################################################################
21389 #
21390 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
21391 # method for returning the next line to be parsed, as well as a
21392 # 'peek_ahead()' method
21393 #
21394 # The input parameter is an object with a 'get_line()' method
21395 # which returns the next line to be parsed
21396 #
21397 #####################################################################
21398
21399 package Perl::Tidy::LineBuffer;
21400
21401 sub new {
21402
21403     my $class              = shift;
21404     my $line_source_object = shift;
21405
21406     return bless {
21407         _line_source_object => $line_source_object,
21408         _rlookahead_buffer  => [],
21409     }, $class;
21410 }
21411
21412 sub peek_ahead {
21413     my $self               = shift;
21414     my $buffer_index       = shift;
21415     my $line               = undef;
21416     my $line_source_object = $self->{_line_source_object};
21417     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
21418     if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
21419         $line = $$rlookahead_buffer[$buffer_index];
21420     }
21421     else {
21422         $line = $line_source_object->get_line();
21423         push( @$rlookahead_buffer, $line );
21424     }
21425     return $line;
21426 }
21427
21428 sub get_line {
21429     my $self               = shift;
21430     my $line               = undef;
21431     my $line_source_object = $self->{_line_source_object};
21432     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
21433
21434     if ( scalar(@$rlookahead_buffer) ) {
21435         $line = shift @$rlookahead_buffer;
21436     }
21437     else {
21438         $line = $line_source_object->get_line();
21439     }
21440     return $line;
21441 }
21442
21443 ########################################################################
21444 #
21445 # the Perl::Tidy::Tokenizer package is essentially a filter which
21446 # reads lines of perl source code from a source object and provides
21447 # corresponding tokenized lines through its get_line() method.  Lines
21448 # flow from the source_object to the caller like this:
21449 #
21450 # source_object --> LineBuffer_object --> Tokenizer -->  calling routine
21451 #   get_line()         get_line()           get_line()     line_of_tokens
21452 #
21453 # The source object can be any object with a get_line() method which
21454 # supplies one line (a character string) perl call.
21455 # The LineBuffer object is created by the Tokenizer.
21456 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
21457 # containing one tokenized line for each call to its get_line() method.
21458 #
21459 # WARNING: This is not a real class yet.  Only one tokenizer my be used.
21460 #
21461 ########################################################################
21462
21463 package Perl::Tidy::Tokenizer;
21464
21465 BEGIN {
21466
21467     # Caution: these debug flags produce a lot of output
21468     # They should all be 0 except when debugging small scripts
21469
21470     use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
21471     use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
21472     use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
21473     use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
21474     use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
21475
21476     my $debug_warning = sub {
21477         print "TOKENIZER_DEBUGGING with key $_[0]\n";
21478     };
21479
21480     TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
21481     TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
21482     TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
21483     TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
21484     TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
21485
21486 }
21487
21488 use Carp;
21489
21490 # PACKAGE VARIABLES for for processing an entire FILE.
21491 use vars qw{
21492   $tokenizer_self
21493
21494   $last_nonblank_token
21495   $last_nonblank_type
21496   $last_nonblank_block_type
21497   $statement_type
21498   $in_attribute_list
21499   $current_package
21500   $context
21501
21502   %is_constant
21503   %is_user_function
21504   %user_function_prototype
21505   %is_block_function
21506   %is_block_list_function
21507   %saw_function_definition
21508
21509   $brace_depth
21510   $paren_depth
21511   $square_bracket_depth
21512
21513   @current_depth
21514   @total_depth
21515   $total_depth
21516   @nesting_sequence_number
21517   @current_sequence_number
21518   @paren_type
21519   @paren_semicolon_count
21520   @paren_structural_type
21521   @brace_type
21522   @brace_structural_type
21523   @brace_context
21524   @brace_package
21525   @square_bracket_type
21526   @square_bracket_structural_type
21527   @depth_array
21528   @nested_ternary_flag
21529   @nested_statement_type
21530   @starting_line_of_current_depth
21531 };
21532
21533 # GLOBAL CONSTANTS for routines in this package
21534 use vars qw{
21535   %is_indirect_object_taker
21536   %is_block_operator
21537   %expecting_operator_token
21538   %expecting_operator_types
21539   %expecting_term_types
21540   %expecting_term_token
21541   %is_digraph
21542   %is_file_test_operator
21543   %is_trigraph
21544   %is_valid_token_type
21545   %is_keyword
21546   %is_code_block_token
21547   %really_want_term
21548   @opening_brace_names
21549   @closing_brace_names
21550   %is_keyword_taking_list
21551   %is_q_qq_qw_qx_qr_s_y_tr_m
21552 };
21553
21554 # possible values of operator_expected()
21555 use constant TERM     => -1;
21556 use constant UNKNOWN  => 0;
21557 use constant OPERATOR => 1;
21558
21559 # possible values of context
21560 use constant SCALAR_CONTEXT  => -1;
21561 use constant UNKNOWN_CONTEXT => 0;
21562 use constant LIST_CONTEXT    => 1;
21563
21564 # Maximum number of little messages; probably need not be changed.
21565 use constant MAX_NAG_MESSAGES => 6;
21566
21567 {
21568
21569     # methods to count instances
21570     my $_count = 0;
21571     sub get_count        { $_count; }
21572     sub _increment_count { ++$_count }
21573     sub _decrement_count { --$_count }
21574 }
21575
21576 sub DESTROY {
21577     $_[0]->_decrement_count();
21578 }
21579
21580 sub new {
21581
21582     my $class = shift;
21583
21584     # Note: 'tabs' and 'indent_columns' are temporary and should be
21585     # removed asap
21586     my %defaults = (
21587         source_object        => undef,
21588         debugger_object      => undef,
21589         diagnostics_object   => undef,
21590         logger_object        => undef,
21591         starting_level       => undef,
21592         indent_columns       => 4,
21593         tabs                 => 0,
21594         entab_leading_space  => undef,
21595         look_for_hash_bang   => 0,
21596         trim_qw              => 1,
21597         look_for_autoloader  => 1,
21598         look_for_selfloader  => 1,
21599         starting_line_number => 1,
21600     );
21601     my %args = ( %defaults, @_ );
21602
21603     # we are given an object with a get_line() method to supply source lines
21604     my $source_object = $args{source_object};
21605
21606     # we create another object with a get_line() and peek_ahead() method
21607     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
21608
21609     # Tokenizer state data is as follows:
21610     # _rhere_target_list    reference to list of here-doc targets
21611     # _here_doc_target      the target string for a here document
21612     # _here_quote_character the type of here-doc quoting (" ' ` or none)
21613     #                       to determine if interpolation is done
21614     # _quote_target         character we seek if chasing a quote
21615     # _line_start_quote     line where we started looking for a long quote
21616     # _in_here_doc          flag indicating if we are in a here-doc
21617     # _in_pod               flag set if we are in pod documentation
21618     # _in_error             flag set if we saw severe error (binary in script)
21619     # _in_data              flag set if we are in __DATA__ section
21620     # _in_end               flag set if we are in __END__ section
21621     # _in_format            flag set if we are in a format description
21622     # _in_attribute_list    flag telling if we are looking for attributes
21623     # _in_quote             flag telling if we are chasing a quote
21624     # _starting_level       indentation level of first line
21625     # _input_tabstr         string denoting one indentation level of input file
21626     # _know_input_tabstr    flag indicating if we know _input_tabstr
21627     # _line_buffer_object   object with get_line() method to supply source code
21628     # _diagnostics_object   place to write debugging information
21629     # _unexpected_error_count  error count used to limit output
21630     # _lower_case_labels_at  line numbers where lower case labels seen
21631     $tokenizer_self = {
21632         _rhere_target_list                  => [],
21633         _in_here_doc                        => 0,
21634         _here_doc_target                    => "",
21635         _here_quote_character               => "",
21636         _in_data                            => 0,
21637         _in_end                             => 0,
21638         _in_format                          => 0,
21639         _in_error                           => 0,
21640         _in_pod                             => 0,
21641         _in_attribute_list                  => 0,
21642         _in_quote                           => 0,
21643         _quote_target                       => "",
21644         _line_start_quote                   => -1,
21645         _starting_level                     => $args{starting_level},
21646         _know_starting_level                => defined( $args{starting_level} ),
21647         _tabs                               => $args{tabs},
21648         _entab_leading_space                => $args{entab_leading_space},
21649         _indent_columns                     => $args{indent_columns},
21650         _look_for_hash_bang                 => $args{look_for_hash_bang},
21651         _trim_qw                            => $args{trim_qw},
21652         _input_tabstr                       => "",
21653         _know_input_tabstr                  => -1,
21654         _last_line_number                   => $args{starting_line_number} - 1,
21655         _saw_perl_dash_P                    => 0,
21656         _saw_perl_dash_w                    => 0,
21657         _saw_use_strict                     => 0,
21658         _saw_v_string                       => 0,
21659         _look_for_autoloader                => $args{look_for_autoloader},
21660         _look_for_selfloader                => $args{look_for_selfloader},
21661         _saw_autoloader                     => 0,
21662         _saw_selfloader                     => 0,
21663         _saw_hash_bang                      => 0,
21664         _saw_end                            => 0,
21665         _saw_data                           => 0,
21666         _saw_negative_indentation           => 0,
21667         _started_tokenizing                 => 0,
21668         _line_buffer_object                 => $line_buffer_object,
21669         _debugger_object                    => $args{debugger_object},
21670         _diagnostics_object                 => $args{diagnostics_object},
21671         _logger_object                      => $args{logger_object},
21672         _unexpected_error_count             => 0,
21673         _started_looking_for_here_target_at => 0,
21674         _nearly_matched_here_target_at      => undef,
21675         _line_text                          => "",
21676         _rlower_case_labels_at              => undef,
21677     };
21678
21679     prepare_for_a_new_file();
21680     find_starting_indentation_level();
21681
21682     bless $tokenizer_self, $class;
21683
21684     # This is not a full class yet, so die if an attempt is made to
21685     # create more than one object.
21686
21687     if ( _increment_count() > 1 ) {
21688         confess
21689 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
21690     }
21691
21692     return $tokenizer_self;
21693
21694 }
21695
21696 # interface to Perl::Tidy::Logger routines
21697 sub warning {
21698     my $logger_object = $tokenizer_self->{_logger_object};
21699     if ($logger_object) {
21700         $logger_object->warning(@_);
21701     }
21702 }
21703
21704 sub complain {
21705     my $logger_object = $tokenizer_self->{_logger_object};
21706     if ($logger_object) {
21707         $logger_object->complain(@_);
21708     }
21709 }
21710
21711 sub write_logfile_entry {
21712     my $logger_object = $tokenizer_self->{_logger_object};
21713     if ($logger_object) {
21714         $logger_object->write_logfile_entry(@_);
21715     }
21716 }
21717
21718 sub interrupt_logfile {
21719     my $logger_object = $tokenizer_self->{_logger_object};
21720     if ($logger_object) {
21721         $logger_object->interrupt_logfile();
21722     }
21723 }
21724
21725 sub resume_logfile {
21726     my $logger_object = $tokenizer_self->{_logger_object};
21727     if ($logger_object) {
21728         $logger_object->resume_logfile();
21729     }
21730 }
21731
21732 sub increment_brace_error {
21733     my $logger_object = $tokenizer_self->{_logger_object};
21734     if ($logger_object) {
21735         $logger_object->increment_brace_error();
21736     }
21737 }
21738
21739 sub report_definite_bug {
21740     my $logger_object = $tokenizer_self->{_logger_object};
21741     if ($logger_object) {
21742         $logger_object->report_definite_bug();
21743     }
21744 }
21745
21746 sub brace_warning {
21747     my $logger_object = $tokenizer_self->{_logger_object};
21748     if ($logger_object) {
21749         $logger_object->brace_warning(@_);
21750     }
21751 }
21752
21753 sub get_saw_brace_error {
21754     my $logger_object = $tokenizer_self->{_logger_object};
21755     if ($logger_object) {
21756         $logger_object->get_saw_brace_error();
21757     }
21758     else {
21759         0;
21760     }
21761 }
21762
21763 # interface to Perl::Tidy::Diagnostics routines
21764 sub write_diagnostics {
21765     if ( $tokenizer_self->{_diagnostics_object} ) {
21766         $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
21767     }
21768 }
21769
21770 sub report_tokenization_errors {
21771
21772     my $self = shift;
21773
21774     my $level = get_indentation_level();
21775     if ( $level != $tokenizer_self->{_starting_level} ) {
21776         warning("final indentation level: $level\n");
21777     }
21778
21779     check_final_nesting_depths();
21780
21781     if ( $tokenizer_self->{_look_for_hash_bang}
21782         && !$tokenizer_self->{_saw_hash_bang} )
21783     {
21784         warning(
21785             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
21786     }
21787
21788     if ( $tokenizer_self->{_in_format} ) {
21789         warning("hit EOF while in format description\n");
21790     }
21791
21792     if ( $tokenizer_self->{_in_pod} ) {
21793
21794         # Just write log entry if this is after __END__ or __DATA__
21795         # because this happens to often, and it is not likely to be
21796         # a parsing error.
21797         if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21798             write_logfile_entry(
21799 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21800             );
21801         }
21802
21803         else {
21804             complain(
21805 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21806             );
21807         }
21808
21809     }
21810
21811     if ( $tokenizer_self->{_in_here_doc} ) {
21812         my $here_doc_target = $tokenizer_self->{_here_doc_target};
21813         my $started_looking_for_here_target_at =
21814           $tokenizer_self->{_started_looking_for_here_target_at};
21815         if ($here_doc_target) {
21816             warning(
21817 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
21818             );
21819         }
21820         else {
21821             warning(
21822 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
21823             );
21824         }
21825         my $nearly_matched_here_target_at =
21826           $tokenizer_self->{_nearly_matched_here_target_at};
21827         if ($nearly_matched_here_target_at) {
21828             warning(
21829 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
21830             );
21831         }
21832     }
21833
21834     if ( $tokenizer_self->{_in_quote} ) {
21835         my $line_start_quote = $tokenizer_self->{_line_start_quote};
21836         my $quote_target     = $tokenizer_self->{_quote_target};
21837         my $what =
21838           ( $tokenizer_self->{_in_attribute_list} )
21839           ? "attribute list"
21840           : "quote/pattern";
21841         warning(
21842 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
21843         );
21844     }
21845
21846     unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
21847         if ( $] < 5.006 ) {
21848             write_logfile_entry("Suggest including '-w parameter'\n");
21849         }
21850         else {
21851             write_logfile_entry("Suggest including 'use warnings;'\n");
21852         }
21853     }
21854
21855     if ( $tokenizer_self->{_saw_perl_dash_P} ) {
21856         write_logfile_entry("Use of -P parameter for defines is discouraged\n");
21857     }
21858
21859     unless ( $tokenizer_self->{_saw_use_strict} ) {
21860         write_logfile_entry("Suggest including 'use strict;'\n");
21861     }
21862
21863     # it is suggested that lables have at least one upper case character
21864     # for legibility and to avoid code breakage as new keywords are introduced
21865     if ( $tokenizer_self->{_rlower_case_labels_at} ) {
21866         my @lower_case_labels_at =
21867           @{ $tokenizer_self->{_rlower_case_labels_at} };
21868         write_logfile_entry(
21869             "Suggest using upper case characters in label(s)\n");
21870         local $" = ')(';
21871         write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
21872     }
21873 }
21874
21875 sub report_v_string {
21876
21877     # warn if this version can't handle v-strings
21878     my $tok = shift;
21879     unless ( $tokenizer_self->{_saw_v_string} ) {
21880         $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
21881     }
21882     if ( $] < 5.006 ) {
21883         warning(
21884 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
21885         );
21886     }
21887 }
21888
21889 sub get_input_line_number {
21890     return $tokenizer_self->{_last_line_number};
21891 }
21892
21893 # returns the next tokenized line
21894 sub get_line {
21895
21896     my $self = shift;
21897
21898     # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
21899     # $square_bracket_depth, $paren_depth
21900
21901     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
21902     $tokenizer_self->{_line_text} = $input_line;
21903
21904     return undef unless ($input_line);
21905
21906     my $input_line_number = ++$tokenizer_self->{_last_line_number};
21907
21908     # Find and remove what characters terminate this line, including any
21909     # control r
21910     my $input_line_separator = "";
21911     if ( chomp($input_line) ) { $input_line_separator = $/ }
21912
21913     # TODO: what other characters should be included here?
21914     if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
21915         $input_line_separator = $2 . $input_line_separator;
21916     }
21917
21918     # for backwards compatability we keep the line text terminated with
21919     # a newline character
21920     $input_line .= "\n";
21921     $tokenizer_self->{_line_text} = $input_line;    # update
21922
21923     # create a data structure describing this line which will be
21924     # returned to the caller.
21925
21926     # _line_type codes are:
21927     #   SYSTEM         - system-specific code before hash-bang line
21928     #   CODE           - line of perl code (including comments)
21929     #   POD_START      - line starting pod, such as '=head'
21930     #   POD            - pod documentation text
21931     #   POD_END        - last line of pod section, '=cut'
21932     #   HERE           - text of here-document
21933     #   HERE_END       - last line of here-doc (target word)
21934     #   FORMAT         - format section
21935     #   FORMAT_END     - last line of format section, '.'
21936     #   DATA_START     - __DATA__ line
21937     #   DATA           - unidentified text following __DATA__
21938     #   END_START      - __END__ line
21939     #   END            - unidentified text following __END__
21940     #   ERROR          - we are in big trouble, probably not a perl script
21941
21942     # Other variables:
21943     #   _curly_brace_depth     - depth of curly braces at start of line
21944     #   _square_bracket_depth  - depth of square brackets at start of line
21945     #   _paren_depth           - depth of parens at start of line
21946     #   _starting_in_quote     - this line continues a multi-line quote
21947     #                            (so don't trim leading blanks!)
21948     #   _ending_in_quote       - this line ends in a multi-line quote
21949     #                            (so don't trim trailing blanks!)
21950     my $line_of_tokens = {
21951         _line_type                => 'EOF',
21952         _line_text                => $input_line,
21953         _line_number              => $input_line_number,
21954         _rtoken_type              => undef,
21955         _rtokens                  => undef,
21956         _rlevels                  => undef,
21957         _rslevels                 => undef,
21958         _rblock_type              => undef,
21959         _rcontainer_type          => undef,
21960         _rcontainer_environment   => undef,
21961         _rtype_sequence           => undef,
21962         _rnesting_tokens          => undef,
21963         _rci_levels               => undef,
21964         _rnesting_blocks          => undef,
21965         _python_indentation_level => -1,                   ## 0,
21966         _starting_in_quote    => 0,                    # to be set by subroutine
21967         _ending_in_quote      => 0,
21968         _curly_brace_depth    => $brace_depth,
21969         _square_bracket_depth => $square_bracket_depth,
21970         _paren_depth          => $paren_depth,
21971         _quote_character      => '',
21972     };
21973
21974     # must print line unchanged if we are in a here document
21975     if ( $tokenizer_self->{_in_here_doc} ) {
21976
21977         $line_of_tokens->{_line_type} = 'HERE';
21978         my $here_doc_target      = $tokenizer_self->{_here_doc_target};
21979         my $here_quote_character = $tokenizer_self->{_here_quote_character};
21980         my $candidate_target     = $input_line;
21981         chomp $candidate_target;
21982         if ( $candidate_target eq $here_doc_target ) {
21983             $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21984             $line_of_tokens->{_line_type}                     = 'HERE_END';
21985             write_logfile_entry("Exiting HERE document $here_doc_target\n");
21986
21987             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21988             if (@$rhere_target_list) {    # there can be multiple here targets
21989                 ( $here_doc_target, $here_quote_character ) =
21990                   @{ shift @$rhere_target_list };
21991                 $tokenizer_self->{_here_doc_target} = $here_doc_target;
21992                 $tokenizer_self->{_here_quote_character} =
21993                   $here_quote_character;
21994                 write_logfile_entry(
21995                     "Entering HERE document $here_doc_target\n");
21996                 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21997                 $tokenizer_self->{_started_looking_for_here_target_at} =
21998                   $input_line_number;
21999             }
22000             else {
22001                 $tokenizer_self->{_in_here_doc}          = 0;
22002                 $tokenizer_self->{_here_doc_target}      = "";
22003                 $tokenizer_self->{_here_quote_character} = "";
22004             }
22005         }
22006
22007         # check for error of extra whitespace
22008         # note for PERL6: leading whitespace is allowed
22009         else {
22010             $candidate_target =~ s/\s*$//;
22011             $candidate_target =~ s/^\s*//;
22012             if ( $candidate_target eq $here_doc_target ) {
22013                 $tokenizer_self->{_nearly_matched_here_target_at} =
22014                   $input_line_number;
22015             }
22016         }
22017         return $line_of_tokens;
22018     }
22019
22020     # must print line unchanged if we are in a format section
22021     elsif ( $tokenizer_self->{_in_format} ) {
22022
22023         if ( $input_line =~ /^\.[\s#]*$/ ) {
22024             write_logfile_entry("Exiting format section\n");
22025             $tokenizer_self->{_in_format} = 0;
22026             $line_of_tokens->{_line_type} = 'FORMAT_END';
22027         }
22028         else {
22029             $line_of_tokens->{_line_type} = 'FORMAT';
22030         }
22031         return $line_of_tokens;
22032     }
22033
22034     # must print line unchanged if we are in pod documentation
22035     elsif ( $tokenizer_self->{_in_pod} ) {
22036
22037         $line_of_tokens->{_line_type} = 'POD';
22038         if ( $input_line =~ /^=cut/ ) {
22039             $line_of_tokens->{_line_type} = 'POD_END';
22040             write_logfile_entry("Exiting POD section\n");
22041             $tokenizer_self->{_in_pod} = 0;
22042         }
22043         if ( $input_line =~ /^\#\!.*perl\b/ ) {
22044             warning(
22045                 "Hash-bang in pod can cause older versions of perl to fail! \n"
22046             );
22047         }
22048
22049         return $line_of_tokens;
22050     }
22051
22052     # must print line unchanged if we have seen a severe error (i.e., we
22053     # are seeing illegal tokens and connot continue.  Syntax errors do
22054     # not pass this route).  Calling routine can decide what to do, but
22055     # the default can be to just pass all lines as if they were after __END__
22056     elsif ( $tokenizer_self->{_in_error} ) {
22057         $line_of_tokens->{_line_type} = 'ERROR';
22058         return $line_of_tokens;
22059     }
22060
22061     # print line unchanged if we are __DATA__ section
22062     elsif ( $tokenizer_self->{_in_data} ) {
22063
22064         # ...but look for POD
22065         # Note that the _in_data and _in_end flags remain set
22066         # so that we return to that state after seeing the
22067         # end of a pod section
22068         if ( $input_line =~ /^=(?!cut)/ ) {
22069             $line_of_tokens->{_line_type} = 'POD_START';
22070             write_logfile_entry("Entering POD section\n");
22071             $tokenizer_self->{_in_pod} = 1;
22072             return $line_of_tokens;
22073         }
22074         else {
22075             $line_of_tokens->{_line_type} = 'DATA';
22076             return $line_of_tokens;
22077         }
22078     }
22079
22080     # print line unchanged if we are in __END__ section
22081     elsif ( $tokenizer_self->{_in_end} ) {
22082
22083         # ...but look for POD
22084         # Note that the _in_data and _in_end flags remain set
22085         # so that we return to that state after seeing the
22086         # end of a pod section
22087         if ( $input_line =~ /^=(?!cut)/ ) {
22088             $line_of_tokens->{_line_type} = 'POD_START';
22089             write_logfile_entry("Entering POD section\n");
22090             $tokenizer_self->{_in_pod} = 1;
22091             return $line_of_tokens;
22092         }
22093         else {
22094             $line_of_tokens->{_line_type} = 'END';
22095             return $line_of_tokens;
22096         }
22097     }
22098
22099     # check for a hash-bang line if we haven't seen one
22100     if ( !$tokenizer_self->{_saw_hash_bang} ) {
22101         if ( $input_line =~ /^\#\!.*perl\b/ ) {
22102             $tokenizer_self->{_saw_hash_bang} = $input_line_number;
22103
22104             # check for -w and -P flags
22105             if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
22106                 $tokenizer_self->{_saw_perl_dash_P} = 1;
22107             }
22108
22109             if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
22110                 $tokenizer_self->{_saw_perl_dash_w} = 1;
22111             }
22112
22113             if (   ( $input_line_number > 1 )
22114                 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
22115             {
22116
22117                 # this is helpful for VMS systems; we may have accidentally
22118                 # tokenized some DCL commands
22119                 if ( $tokenizer_self->{_started_tokenizing} ) {
22120                     warning(
22121 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
22122                     );
22123                 }
22124                 else {
22125                     complain("Useless hash-bang after line 1\n");
22126                 }
22127             }
22128
22129             # Report the leading hash-bang as a system line
22130             # This will prevent -dac from deleting it
22131             else {
22132                 $line_of_tokens->{_line_type} = 'SYSTEM';
22133                 return $line_of_tokens;
22134             }
22135         }
22136     }
22137
22138     # wait for a hash-bang before parsing if the user invoked us with -x
22139     if ( $tokenizer_self->{_look_for_hash_bang}
22140         && !$tokenizer_self->{_saw_hash_bang} )
22141     {
22142         $line_of_tokens->{_line_type} = 'SYSTEM';
22143         return $line_of_tokens;
22144     }
22145
22146     # a first line of the form ': #' will be marked as SYSTEM
22147     # since lines of this form may be used by tcsh
22148     if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
22149         $line_of_tokens->{_line_type} = 'SYSTEM';
22150         return $line_of_tokens;
22151     }
22152
22153     # now we know that it is ok to tokenize the line...
22154     # the line tokenizer will modify any of these private variables:
22155     #        _rhere_target_list
22156     #        _in_data
22157     #        _in_end
22158     #        _in_format
22159     #        _in_error
22160     #        _in_pod
22161     #        _in_quote
22162     my $ending_in_quote_last = $tokenizer_self->{_in_quote};
22163     tokenize_this_line($line_of_tokens);
22164
22165     # Now finish defining the return structure and return it
22166     $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
22167
22168     # handle severe error (binary data in script)
22169     if ( $tokenizer_self->{_in_error} ) {
22170         $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
22171         warning("Giving up after error\n");
22172         $line_of_tokens->{_line_type} = 'ERROR';
22173         reset_indentation_level(0);          # avoid error messages
22174         return $line_of_tokens;
22175     }
22176
22177     # handle start of pod documentation
22178     if ( $tokenizer_self->{_in_pod} ) {
22179
22180         # This gets tricky..above a __DATA__ or __END__ section, perl
22181         # accepts '=cut' as the start of pod section. But afterwards,
22182         # only pod utilities see it and they may ignore an =cut without
22183         # leading =head.  In any case, this isn't good.
22184         if ( $input_line =~ /^=cut\b/ ) {
22185             if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
22186                 complain("=cut while not in pod ignored\n");
22187                 $tokenizer_self->{_in_pod}    = 0;
22188                 $line_of_tokens->{_line_type} = 'POD_END';
22189             }
22190             else {
22191                 $line_of_tokens->{_line_type} = 'POD_START';
22192                 complain(
22193 "=cut starts a pod section .. this can fool pod utilities.\n"
22194                 );
22195                 write_logfile_entry("Entering POD section\n");
22196             }
22197         }
22198
22199         else {
22200             $line_of_tokens->{_line_type} = 'POD_START';
22201             write_logfile_entry("Entering POD section\n");
22202         }
22203
22204         return $line_of_tokens;
22205     }
22206
22207     # update indentation levels for log messages
22208     if ( $input_line !~ /^\s*$/ ) {
22209         my $rlevels                      = $line_of_tokens->{_rlevels};
22210         my $structural_indentation_level = $$rlevels[0];
22211         my ( $python_indentation_level, $msg ) =
22212           find_indentation_level( $input_line, $structural_indentation_level );
22213         if ($msg) { write_logfile_entry("$msg") }
22214         if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
22215             $line_of_tokens->{_python_indentation_level} =
22216               $python_indentation_level;
22217         }
22218     }
22219
22220     # see if this line contains here doc targets
22221     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
22222     if (@$rhere_target_list) {
22223
22224         my ( $here_doc_target, $here_quote_character ) =
22225           @{ shift @$rhere_target_list };
22226         $tokenizer_self->{_in_here_doc}          = 1;
22227         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
22228         $tokenizer_self->{_here_quote_character} = $here_quote_character;
22229         write_logfile_entry("Entering HERE document $here_doc_target\n");
22230         $tokenizer_self->{_started_looking_for_here_target_at} =
22231           $input_line_number;
22232     }
22233
22234     # NOTE: __END__ and __DATA__ statements are written unformatted
22235     # because they can theoretically contain additional characters
22236     # which are not tokenized (and cannot be read with <DATA> either!).
22237     if ( $tokenizer_self->{_in_data} ) {
22238         $line_of_tokens->{_line_type} = 'DATA_START';
22239         write_logfile_entry("Starting __DATA__ section\n");
22240         $tokenizer_self->{_saw_data} = 1;
22241
22242         # keep parsing after __DATA__ if use SelfLoader was seen
22243         if ( $tokenizer_self->{_saw_selfloader} ) {
22244             $tokenizer_self->{_in_data} = 0;
22245             write_logfile_entry(
22246                 "SelfLoader seen, continuing; -nlsl deactivates\n");
22247         }
22248
22249         return $line_of_tokens;
22250     }
22251
22252     elsif ( $tokenizer_self->{_in_end} ) {
22253         $line_of_tokens->{_line_type} = 'END_START';
22254         write_logfile_entry("Starting __END__ section\n");
22255         $tokenizer_self->{_saw_end} = 1;
22256
22257         # keep parsing after __END__ if use AutoLoader was seen
22258         if ( $tokenizer_self->{_saw_autoloader} ) {
22259             $tokenizer_self->{_in_end} = 0;
22260             write_logfile_entry(
22261                 "AutoLoader seen, continuing; -nlal deactivates\n");
22262         }
22263         return $line_of_tokens;
22264     }
22265
22266     # now, finally, we know that this line is type 'CODE'
22267     $line_of_tokens->{_line_type} = 'CODE';
22268
22269     # remember if we have seen any real code
22270     if (  !$tokenizer_self->{_started_tokenizing}
22271         && $input_line !~ /^\s*$/
22272         && $input_line !~ /^\s*#/ )
22273     {
22274         $tokenizer_self->{_started_tokenizing} = 1;
22275     }
22276
22277     if ( $tokenizer_self->{_debugger_object} ) {
22278         $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
22279     }
22280
22281     # Note: if keyword 'format' occurs in this line code, it is still CODE
22282     # (keyword 'format' need not start a line)
22283     if ( $tokenizer_self->{_in_format} ) {
22284         write_logfile_entry("Entering format section\n");
22285     }
22286
22287     if ( $tokenizer_self->{_in_quote}
22288         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
22289     {
22290
22291         #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
22292         if (
22293             ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
22294         {
22295             $tokenizer_self->{_line_start_quote} = $input_line_number;
22296             write_logfile_entry(
22297                 "Start multi-line quote or pattern ending in $quote_target\n");
22298         }
22299     }
22300     elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
22301         and !$tokenizer_self->{_in_quote} )
22302     {
22303         $tokenizer_self->{_line_start_quote} = -1;
22304         write_logfile_entry("End of multi-line quote or pattern\n");
22305     }
22306
22307     # we are returning a line of CODE
22308     return $line_of_tokens;
22309 }
22310
22311 sub find_starting_indentation_level {
22312
22313     # USES GLOBAL VARIABLES: $tokenizer_self
22314     my $starting_level    = 0;
22315     my $know_input_tabstr = -1;    # flag for find_indentation_level
22316
22317     # use value if given as parameter
22318     if ( $tokenizer_self->{_know_starting_level} ) {
22319         $starting_level = $tokenizer_self->{_starting_level};
22320     }
22321
22322     # if we know there is a hash_bang line, the level must be zero
22323     elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
22324         $tokenizer_self->{_know_starting_level} = 1;
22325     }
22326
22327     # otherwise figure it out from the input file
22328     else {
22329         my $line;
22330         my $i                            = 0;
22331         my $structural_indentation_level = -1; # flag for find_indentation_level
22332
22333         # keep looking at lines until we find a hash bang or piece of code
22334         my $msg = "";
22335         while ( $line =
22336             $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
22337         {
22338
22339             # if first line is #! then assume starting level is zero
22340             if ( $i == 1 && $line =~ /^\#\!/ ) {
22341                 $starting_level = 0;
22342                 last;
22343             }
22344             next if ( $line =~ /^\s*#/ );    # skip past comments
22345             next if ( $line =~ /^\s*$/ );    # skip past blank lines
22346             ( $starting_level, $msg ) =
22347               find_indentation_level( $line, $structural_indentation_level );
22348             if ($msg) { write_logfile_entry("$msg") }
22349             last;
22350         }
22351         $msg = "Line $i implies starting-indentation-level = $starting_level\n";
22352
22353         if ( $starting_level > 0 ) {
22354
22355             my $input_tabstr = $tokenizer_self->{_input_tabstr};
22356             if ( $input_tabstr eq "\t" ) {
22357                 $msg .= "by guessing input tabbing uses 1 tab per level\n";
22358             }
22359             else {
22360                 my $cols = length($input_tabstr);
22361                 $msg .=
22362                   "by guessing input tabbing uses $cols blanks per level\n";
22363             }
22364         }
22365         write_logfile_entry("$msg");
22366     }
22367     $tokenizer_self->{_starting_level} = $starting_level;
22368     reset_indentation_level($starting_level);
22369 }
22370
22371 # Find indentation level given a input line.  At the same time, try to
22372 # figure out the input tabbing scheme.
22373 #
22374 # There are two types of calls:
22375 #
22376 # Type 1: $structural_indentation_level < 0
22377 #  In this case we have to guess $input_tabstr to figure out the level.
22378 #
22379 # Type 2: $structural_indentation_level >= 0
22380 #  In this case the level of this line is known, and this routine can
22381 #  update the tabbing string, if still unknown, to make the level correct.
22382
22383 sub find_indentation_level {
22384     my ( $line, $structural_indentation_level ) = @_;
22385
22386     # USES GLOBAL VARIABLES: $tokenizer_self
22387     my $level = 0;
22388     my $msg   = "";
22389
22390     my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
22391     my $input_tabstr      = $tokenizer_self->{_input_tabstr};
22392
22393     # find leading whitespace
22394     my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
22395
22396     # make first guess at input tabbing scheme if necessary
22397     if ( $know_input_tabstr < 0 ) {
22398
22399         $know_input_tabstr = 0;
22400
22401         # When -et=n is used for the output formatting, we will assume that
22402         # tabs in the input formatting were also produced with -et=n.  This may
22403         # not be true, but it is the best guess because it will keep leading
22404         # whitespace unchanged on repeated formatting on small pieces of code
22405         # when -et=n is used.  Thanks to Sam Kington for this patch.
22406         if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) {
22407             $leading_whitespace =~ s{^ (\t*) }
22408            { " " x (length($1) * $tabsize) }xe;
22409             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
22410         }
22411         elsif ( $tokenizer_self->{_tabs} ) {
22412             $input_tabstr = "\t";
22413             if ( length($leading_whitespace) > 0 ) {
22414                 if ( $leading_whitespace !~ /\t/ ) {
22415
22416                     my $cols = $tokenizer_self->{_indent_columns};
22417
22418                     if ( length($leading_whitespace) < $cols ) {
22419                         $cols = length($leading_whitespace);
22420                     }
22421                     $input_tabstr = " " x $cols;
22422                 }
22423             }
22424         }
22425         else {
22426             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
22427
22428             if ( length($leading_whitespace) > 0 ) {
22429                 if ( $leading_whitespace =~ /^\t/ ) {
22430                     $input_tabstr = "\t";
22431                 }
22432             }
22433         }
22434         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
22435         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
22436     }
22437
22438     # determine the input tabbing scheme if possible
22439     if (   ( $know_input_tabstr == 0 )
22440         && ( length($leading_whitespace) > 0 )
22441         && ( $structural_indentation_level > 0 ) )
22442     {
22443         my $saved_input_tabstr = $input_tabstr;
22444
22445         # check for common case of one tab per indentation level
22446         if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
22447             if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
22448                 $input_tabstr = "\t";
22449                 $msg          = "Guessing old indentation was tab character\n";
22450             }
22451         }
22452
22453         else {
22454
22455             # detab any tabs based on 8 blanks per tab
22456             my $entabbed = "";
22457             if ( $leading_whitespace =~ s/^\t+/        /g ) {
22458                 $entabbed = "entabbed";
22459             }
22460
22461             # now compute tabbing from number of spaces
22462             my $columns =
22463               length($leading_whitespace) / $structural_indentation_level;
22464             if ( $columns == int $columns ) {
22465                 $msg =
22466                   "Guessing old indentation was $columns $entabbed spaces\n";
22467             }
22468             else {
22469                 $columns = int $columns;
22470                 $msg =
22471 "old indentation is unclear, using $columns $entabbed spaces\n";
22472             }
22473             $input_tabstr = " " x $columns;
22474         }
22475         $know_input_tabstr                    = 1;
22476         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
22477         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
22478
22479         # see if mistakes were made
22480         if ( ( $tokenizer_self->{_starting_level} > 0 )
22481             && !$tokenizer_self->{_know_starting_level} )
22482         {
22483
22484             if ( $input_tabstr ne $saved_input_tabstr ) {
22485                 complain(
22486 "I made a bad starting level guess; rerun with a value for -sil \n"
22487                 );
22488             }
22489         }
22490     }
22491
22492     # use current guess at input tabbing to get input indentation level
22493     #
22494     # Patch to handle a common case of entabbed leading whitespace
22495     # If the leading whitespace equals 4 spaces and we also have
22496     # tabs, detab the input whitespace assuming 8 spaces per tab.
22497     if ( length($input_tabstr) == 4 ) {
22498         $leading_whitespace =~ s/^\t+/        /g;
22499     }
22500
22501     if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
22502         my $pos = 0;
22503
22504         while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
22505         {
22506             $pos += $len_tab;
22507             $level++;
22508         }
22509     }
22510     return ( $level, $msg );
22511 }
22512
22513 # This is a currently unused debug routine
22514 sub dump_functions {
22515
22516     my $fh = *STDOUT;
22517     my ( $pkg, $sub );
22518     foreach $pkg ( keys %is_user_function ) {
22519         print $fh "\nnon-constant subs in package $pkg\n";
22520
22521         foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
22522             my $msg = "";
22523             if ( $is_block_list_function{$pkg}{$sub} ) {
22524                 $msg = 'block_list';
22525             }
22526
22527             if ( $is_block_function{$pkg}{$sub} ) {
22528                 $msg = 'block';
22529             }
22530             print $fh "$sub $msg\n";
22531         }
22532     }
22533
22534     foreach $pkg ( keys %is_constant ) {
22535         print $fh "\nconstants and constant subs in package $pkg\n";
22536
22537         foreach $sub ( keys %{ $is_constant{$pkg} } ) {
22538             print $fh "$sub\n";
22539         }
22540     }
22541 }
22542
22543 sub ones_count {
22544
22545     # count number of 1's in a string of 1's and 0's
22546     # example: ones_count("010101010101") gives 6
22547     return ( my $cis = $_[0] ) =~ tr/1/0/;
22548 }
22549
22550 sub prepare_for_a_new_file {
22551
22552     # previous tokens needed to determine what to expect next
22553     $last_nonblank_token      = ';';    # the only possible starting state which
22554     $last_nonblank_type       = ';';    # will make a leading brace a code block
22555     $last_nonblank_block_type = '';
22556
22557     # scalars for remembering statement types across multiple lines
22558     $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
22559     $in_attribute_list = 0;
22560
22561     # scalars for remembering where we are in the file
22562     $current_package = "main";
22563     $context         = UNKNOWN_CONTEXT;
22564
22565     # hashes used to remember function information
22566     %is_constant             = ();      # user-defined constants
22567     %is_user_function        = ();      # user-defined functions
22568     %user_function_prototype = ();      # their prototypes
22569     %is_block_function       = ();
22570     %is_block_list_function  = ();
22571     %saw_function_definition = ();
22572
22573     # variables used to track depths of various containers
22574     # and report nesting errors
22575     $paren_depth          = 0;
22576     $brace_depth          = 0;
22577     $square_bracket_depth = 0;
22578     @current_depth[ 0 .. $#closing_brace_names ] =
22579       (0) x scalar @closing_brace_names;
22580     $total_depth = 0;
22581     @total_depth = ();
22582     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
22583       ( 0 .. $#closing_brace_names );
22584     @current_sequence_number             = ();
22585     $paren_type[$paren_depth]            = '';
22586     $paren_semicolon_count[$paren_depth] = 0;
22587     $paren_structural_type[$brace_depth] = '';
22588     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
22589     $brace_structural_type[$brace_depth]                   = '';
22590     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
22591     $brace_package[$paren_depth]                           = $current_package;
22592     $square_bracket_type[$square_bracket_depth]            = '';
22593     $square_bracket_structural_type[$square_bracket_depth] = '';
22594
22595     initialize_tokenizer_state();
22596 }
22597
22598 {                                       # begin tokenize_this_line
22599
22600     use constant BRACE          => 0;
22601     use constant SQUARE_BRACKET => 1;
22602     use constant PAREN          => 2;
22603     use constant QUESTION_COLON => 3;
22604
22605     # TV1: scalars for processing one LINE.
22606     # Re-initialized on each entry to sub tokenize_this_line.
22607     my (
22608         $block_type,        $container_type,    $expecting,
22609         $i,                 $i_tok,             $input_line,
22610         $input_line_number, $last_nonblank_i,   $max_token_index,
22611         $next_tok,          $next_type,         $peeked_ahead,
22612         $prototype,         $rhere_target_list, $rtoken_map,
22613         $rtoken_type,       $rtokens,           $tok,
22614         $type,              $type_sequence,     $indent_flag,
22615     );
22616
22617     # TV2: refs to ARRAYS for processing one LINE
22618     # Re-initialized on each call.
22619     my $routput_token_list     = [];    # stack of output token indexes
22620     my $routput_token_type     = [];    # token types
22621     my $routput_block_type     = [];    # types of code block
22622     my $routput_container_type = [];    # paren types, such as if, elsif, ..
22623     my $routput_type_sequence  = [];    # nesting sequential number
22624     my $routput_indent_flag    = [];    #
22625
22626     # TV3: SCALARS for quote variables.  These are initialized with a
22627     # subroutine call and continually updated as lines are processed.
22628     my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22629         $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
22630
22631     # TV4: SCALARS for multi-line identifiers and
22632     # statements. These are initialized with a subroutine call
22633     # and continually updated as lines are processed.
22634     my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
22635
22636     # TV5: SCALARS for tracking indentation level.
22637     # Initialized once and continually updated as lines are
22638     # processed.
22639     my (
22640         $nesting_token_string,      $nesting_type_string,
22641         $nesting_block_string,      $nesting_block_flag,
22642         $nesting_list_string,       $nesting_list_flag,
22643         $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22644         $in_statement_continuation, $level_in_tokenizer,
22645         $slevel_in_tokenizer,       $rslevel_stack,
22646     );
22647
22648     # TV6: SCALARS for remembering several previous
22649     # tokens. Initialized once and continually updated as
22650     # lines are processed.
22651     my (
22652         $last_nonblank_container_type,     $last_nonblank_type_sequence,
22653         $last_last_nonblank_token,         $last_last_nonblank_type,
22654         $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
22655         $last_last_nonblank_type_sequence, $last_nonblank_prototype,
22656     );
22657
22658     # ----------------------------------------------------------------
22659     # beginning of tokenizer variable access and manipulation routines
22660     # ----------------------------------------------------------------
22661
22662     sub initialize_tokenizer_state {
22663
22664         # TV1: initialized on each call
22665         # TV2: initialized on each call
22666         # TV3:
22667         $in_quote                = 0;
22668         $quote_type              = 'Q';
22669         $quote_character         = "";
22670         $quote_pos               = 0;
22671         $quote_depth             = 0;
22672         $quoted_string_1         = "";
22673         $quoted_string_2         = "";
22674         $allowed_quote_modifiers = "";
22675
22676         # TV4:
22677         $id_scan_state     = '';
22678         $identifier        = '';
22679         $want_paren        = "";
22680         $indented_if_level = 0;
22681
22682         # TV5:
22683         $nesting_token_string             = "";
22684         $nesting_type_string              = "";
22685         $nesting_block_string             = '1';    # initially in a block
22686         $nesting_block_flag               = 1;
22687         $nesting_list_string              = '0';    # initially not in a list
22688         $nesting_list_flag                = 0;      # initially not in a list
22689         $ci_string_in_tokenizer           = "";
22690         $continuation_string_in_tokenizer = "0";
22691         $in_statement_continuation        = 0;
22692         $level_in_tokenizer               = 0;
22693         $slevel_in_tokenizer              = 0;
22694         $rslevel_stack                    = [];
22695
22696         # TV6:
22697         $last_nonblank_container_type      = '';
22698         $last_nonblank_type_sequence       = '';
22699         $last_last_nonblank_token          = ';';
22700         $last_last_nonblank_type           = ';';
22701         $last_last_nonblank_block_type     = '';
22702         $last_last_nonblank_container_type = '';
22703         $last_last_nonblank_type_sequence  = '';
22704         $last_nonblank_prototype           = "";
22705     }
22706
22707     sub save_tokenizer_state {
22708
22709         my $rTV1 = [
22710             $block_type,        $container_type,    $expecting,
22711             $i,                 $i_tok,             $input_line,
22712             $input_line_number, $last_nonblank_i,   $max_token_index,
22713             $next_tok,          $next_type,         $peeked_ahead,
22714             $prototype,         $rhere_target_list, $rtoken_map,
22715             $rtoken_type,       $rtokens,           $tok,
22716             $type,              $type_sequence,     $indent_flag,
22717         ];
22718
22719         my $rTV2 = [
22720             $routput_token_list,    $routput_token_type,
22721             $routput_block_type,    $routput_container_type,
22722             $routput_type_sequence, $routput_indent_flag,
22723         ];
22724
22725         my $rTV3 = [
22726             $in_quote,        $quote_type,
22727             $quote_character, $quote_pos,
22728             $quote_depth,     $quoted_string_1,
22729             $quoted_string_2, $allowed_quote_modifiers,
22730         ];
22731
22732         my $rTV4 =
22733           [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
22734
22735         my $rTV5 = [
22736             $nesting_token_string,      $nesting_type_string,
22737             $nesting_block_string,      $nesting_block_flag,
22738             $nesting_list_string,       $nesting_list_flag,
22739             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22740             $in_statement_continuation, $level_in_tokenizer,
22741             $slevel_in_tokenizer,       $rslevel_stack,
22742         ];
22743
22744         my $rTV6 = [
22745             $last_nonblank_container_type,
22746             $last_nonblank_type_sequence,
22747             $last_last_nonblank_token,
22748             $last_last_nonblank_type,
22749             $last_last_nonblank_block_type,
22750             $last_last_nonblank_container_type,
22751             $last_last_nonblank_type_sequence,
22752             $last_nonblank_prototype,
22753         ];
22754         return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
22755     }
22756
22757     sub restore_tokenizer_state {
22758         my ($rstate) = @_;
22759         my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
22760         (
22761             $block_type,        $container_type,    $expecting,
22762             $i,                 $i_tok,             $input_line,
22763             $input_line_number, $last_nonblank_i,   $max_token_index,
22764             $next_tok,          $next_type,         $peeked_ahead,
22765             $prototype,         $rhere_target_list, $rtoken_map,
22766             $rtoken_type,       $rtokens,           $tok,
22767             $type,              $type_sequence,     $indent_flag,
22768         ) = @{$rTV1};
22769
22770         (
22771             $routput_token_list,    $routput_token_type,
22772             $routput_block_type,    $routput_container_type,
22773             $routput_type_sequence, $routput_type_sequence,
22774         ) = @{$rTV2};
22775
22776         (
22777             $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22778             $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
22779         ) = @{$rTV3};
22780
22781         ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
22782           @{$rTV4};
22783
22784         (
22785             $nesting_token_string,      $nesting_type_string,
22786             $nesting_block_string,      $nesting_block_flag,
22787             $nesting_list_string,       $nesting_list_flag,
22788             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22789             $in_statement_continuation, $level_in_tokenizer,
22790             $slevel_in_tokenizer,       $rslevel_stack,
22791         ) = @{$rTV5};
22792
22793         (
22794             $last_nonblank_container_type,
22795             $last_nonblank_type_sequence,
22796             $last_last_nonblank_token,
22797             $last_last_nonblank_type,
22798             $last_last_nonblank_block_type,
22799             $last_last_nonblank_container_type,
22800             $last_last_nonblank_type_sequence,
22801             $last_nonblank_prototype,
22802         ) = @{$rTV6};
22803     }
22804
22805     sub get_indentation_level {
22806
22807         # patch to avoid reporting error if indented if is not terminated
22808         if ($indented_if_level) { return $level_in_tokenizer - 1 }
22809         return $level_in_tokenizer;
22810     }
22811
22812     sub reset_indentation_level {
22813         $level_in_tokenizer  = $_[0];
22814         $slevel_in_tokenizer = $_[0];
22815         push @{$rslevel_stack}, $slevel_in_tokenizer;
22816     }
22817
22818     sub peeked_ahead {
22819         $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
22820     }
22821
22822     # ------------------------------------------------------------
22823     # end of tokenizer variable access and manipulation routines
22824     # ------------------------------------------------------------
22825
22826     # ------------------------------------------------------------
22827     # beginning of various scanner interface routines
22828     # ------------------------------------------------------------
22829     sub scan_replacement_text {
22830
22831         # check for here-docs in replacement text invoked by
22832         # a substitution operator with executable modifier 'e'.
22833         #
22834         # given:
22835         #  $replacement_text
22836         # return:
22837         #  $rht = reference to any here-doc targets
22838         my ($replacement_text) = @_;
22839
22840         # quick check
22841         return undef unless ( $replacement_text =~ /<</ );
22842
22843         write_logfile_entry("scanning replacement text for here-doc targets\n");
22844
22845         # save the logger object for error messages
22846         my $logger_object = $tokenizer_self->{_logger_object};
22847
22848         # localize all package variables
22849         local (
22850             $tokenizer_self,                 $last_nonblank_token,
22851             $last_nonblank_type,             $last_nonblank_block_type,
22852             $statement_type,                 $in_attribute_list,
22853             $current_package,                $context,
22854             %is_constant,                    %is_user_function,
22855             %user_function_prototype,        %is_block_function,
22856             %is_block_list_function,         %saw_function_definition,
22857             $brace_depth,                    $paren_depth,
22858             $square_bracket_depth,           @current_depth,
22859             @total_depth,                    $total_depth,
22860             @nesting_sequence_number,        @current_sequence_number,
22861             @paren_type,                     @paren_semicolon_count,
22862             @paren_structural_type,          @brace_type,
22863             @brace_structural_type,          @brace_context,
22864             @brace_package,                  @square_bracket_type,
22865             @square_bracket_structural_type, @depth_array,
22866             @starting_line_of_current_depth, @nested_ternary_flag,
22867             @nested_statement_type,
22868         );
22869
22870         # save all lexical variables
22871         my $rstate = save_tokenizer_state();
22872         _decrement_count();    # avoid error check for multiple tokenizers
22873
22874         # make a new tokenizer
22875         my $rOpts = {};
22876         my $rpending_logfile_message;
22877         my $source_object =
22878           Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
22879             $rpending_logfile_message );
22880         my $tokenizer = Perl::Tidy::Tokenizer->new(
22881             source_object        => $source_object,
22882             logger_object        => $logger_object,
22883             starting_line_number => $input_line_number,
22884         );
22885
22886         # scan the replacement text
22887         1 while ( $tokenizer->get_line() );
22888
22889         # remove any here doc targets
22890         my $rht = undef;
22891         if ( $tokenizer_self->{_in_here_doc} ) {
22892             $rht = [];
22893             push @{$rht},
22894               [
22895                 $tokenizer_self->{_here_doc_target},
22896                 $tokenizer_self->{_here_quote_character}
22897               ];
22898             if ( $tokenizer_self->{_rhere_target_list} ) {
22899                 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
22900                 $tokenizer_self->{_rhere_target_list} = undef;
22901             }
22902             $tokenizer_self->{_in_here_doc} = undef;
22903         }
22904
22905         # now its safe to report errors
22906         $tokenizer->report_tokenization_errors();
22907
22908         # restore all tokenizer lexical variables
22909         restore_tokenizer_state($rstate);
22910
22911         # return the here doc targets
22912         return $rht;
22913     }
22914
22915     sub scan_bare_identifier {
22916         ( $i, $tok, $type, $prototype ) =
22917           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
22918             $rtoken_map, $max_token_index );
22919     }
22920
22921     sub scan_identifier {
22922         ( $i, $tok, $type, $id_scan_state, $identifier ) =
22923           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
22924             $max_token_index, $expecting );
22925     }
22926
22927     sub scan_id {
22928         ( $i, $tok, $type, $id_scan_state ) =
22929           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
22930             $id_scan_state, $max_token_index );
22931     }
22932
22933     sub scan_number {
22934         my $number;
22935         ( $i, $type, $number ) =
22936           scan_number_do( $input_line, $i, $rtoken_map, $type,
22937             $max_token_index );
22938         return $number;
22939     }
22940
22941     # a sub to warn if token found where term expected
22942     sub error_if_expecting_TERM {
22943         if ( $expecting == TERM ) {
22944             if ( $really_want_term{$last_nonblank_type} ) {
22945                 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
22946                     $rtoken_type, $input_line );
22947                 1;
22948             }
22949         }
22950     }
22951
22952     # a sub to warn if token found where operator expected
22953     sub error_if_expecting_OPERATOR {
22954         if ( $expecting == OPERATOR ) {
22955             my $thing = defined $_[0] ? $_[0] : $tok;
22956             unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
22957                 $rtoken_map, $rtoken_type, $input_line );
22958             if ( $i_tok == 0 ) {
22959                 interrupt_logfile();
22960                 warning("Missing ';' above?\n");
22961                 resume_logfile();
22962             }
22963             1;
22964         }
22965     }
22966
22967     # ------------------------------------------------------------
22968     # end scanner interfaces
22969     # ------------------------------------------------------------
22970
22971     my %is_for_foreach;
22972     @_ = qw(for foreach);
22973     @is_for_foreach{@_} = (1) x scalar(@_);
22974
22975     my %is_my_our;
22976     @_ = qw(my our);
22977     @is_my_our{@_} = (1) x scalar(@_);
22978
22979     # These keywords may introduce blocks after parenthesized expressions,
22980     # in the form:
22981     # keyword ( .... ) { BLOCK }
22982     # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
22983     my %is_blocktype_with_paren;
22984     @_ = qw(if elsif unless while until for foreach switch case given when);
22985     @is_blocktype_with_paren{@_} = (1) x scalar(@_);
22986
22987     # ------------------------------------------------------------
22988     # begin hash of code for handling most token types
22989     # ------------------------------------------------------------
22990     my $tokenization_code = {
22991
22992         # no special code for these types yet, but syntax checks
22993         # could be added
22994
22995 ##      '!'   => undef,
22996 ##      '!='  => undef,
22997 ##      '!~'  => undef,
22998 ##      '%='  => undef,
22999 ##      '&&=' => undef,
23000 ##      '&='  => undef,
23001 ##      '+='  => undef,
23002 ##      '-='  => undef,
23003 ##      '..'  => undef,
23004 ##      '..'  => undef,
23005 ##      '...' => undef,
23006 ##      '.='  => undef,
23007 ##      '<<=' => undef,
23008 ##      '<='  => undef,
23009 ##      '<=>' => undef,
23010 ##      '<>'  => undef,
23011 ##      '='   => undef,
23012 ##      '=='  => undef,
23013 ##      '=~'  => undef,
23014 ##      '>='  => undef,
23015 ##      '>>'  => undef,
23016 ##      '>>=' => undef,
23017 ##      '\\'  => undef,
23018 ##      '^='  => undef,
23019 ##      '|='  => undef,
23020 ##      '||=' => undef,
23021 ##      '//=' => undef,
23022 ##      '~'   => undef,
23023 ##      '~~'  => undef,
23024 ##      '!~~'  => undef,
23025
23026         '>' => sub {
23027             error_if_expecting_TERM()
23028               if ( $expecting == TERM );
23029         },
23030         '|' => sub {
23031             error_if_expecting_TERM()
23032               if ( $expecting == TERM );
23033         },
23034         '$' => sub {
23035
23036             # start looking for a scalar
23037             error_if_expecting_OPERATOR("Scalar")
23038               if ( $expecting == OPERATOR );
23039             scan_identifier();
23040
23041             if ( $identifier eq '$^W' ) {
23042                 $tokenizer_self->{_saw_perl_dash_w} = 1;
23043             }
23044
23045             # Check for indentifier in indirect object slot
23046             # (vorboard.pl, sort.t).  Something like:
23047             #   /^(print|printf|sort|exec|system)$/
23048             if (
23049                 $is_indirect_object_taker{$last_nonblank_token}
23050
23051                 || ( ( $last_nonblank_token eq '(' )
23052                     && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
23053                 || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
23054               )
23055             {
23056                 $type = 'Z';
23057             }
23058         },
23059         '(' => sub {
23060
23061             ++$paren_depth;
23062             $paren_semicolon_count[$paren_depth] = 0;
23063             if ($want_paren) {
23064                 $container_type = $want_paren;
23065                 $want_paren     = "";
23066             }
23067             else {
23068                 $container_type = $last_nonblank_token;
23069
23070                 # We can check for a syntax error here of unexpected '(',
23071                 # but this is going to get messy...
23072                 if (
23073                     $expecting == OPERATOR
23074
23075                     # be sure this is not a method call of the form
23076                     # &method(...), $method->(..), &{method}(...),
23077                     # $ref[2](list) is ok & short for $ref[2]->(list)
23078                     # NOTE: at present, braces in something like &{ xxx }
23079                     # are not marked as a block, we might have a method call
23080                     && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
23081
23082                   )
23083                 {
23084
23085                     # ref: camel 3 p 703.
23086                     if ( $last_last_nonblank_token eq 'do' ) {
23087                         complain(
23088 "do SUBROUTINE is deprecated; consider & or -> notation\n"
23089                         );
23090                     }
23091                     else {
23092
23093                         # if this is an empty list, (), then it is not an
23094                         # error; for example, we might have a constant pi and
23095                         # invoke it with pi() or just pi;
23096                         my ( $next_nonblank_token, $i_next ) =
23097                           find_next_nonblank_token( $i, $rtokens,
23098                             $max_token_index );
23099                         if ( $next_nonblank_token ne ')' ) {
23100                             my $hint;
23101                             error_if_expecting_OPERATOR('(');
23102
23103                             if ( $last_nonblank_type eq 'C' ) {
23104                                 $hint =
23105                                   "$last_nonblank_token has a void prototype\n";
23106                             }
23107                             elsif ( $last_nonblank_type eq 'i' ) {
23108                                 if (   $i_tok > 0
23109                                     && $last_nonblank_token =~ /^\$/ )
23110                                 {
23111                                     $hint =
23112 "Do you mean '$last_nonblank_token->(' ?\n";
23113                                 }
23114                             }
23115                             if ($hint) {
23116                                 interrupt_logfile();
23117                                 warning($hint);
23118                                 resume_logfile();
23119                             }
23120                         } ## end if ( $next_nonblank_token...
23121                     } ## end else [ if ( $last_last_nonblank_token...
23122                 } ## end if ( $expecting == OPERATOR...
23123             }
23124             $paren_type[$paren_depth] = $container_type;
23125             ( $type_sequence, $indent_flag ) =
23126               increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
23127
23128             # propagate types down through nested parens
23129             # for example: the second paren in 'if ((' would be structural
23130             # since the first is.
23131
23132             if ( $last_nonblank_token eq '(' ) {
23133                 $type = $last_nonblank_type;
23134             }
23135
23136             #     We exclude parens as structural after a ',' because it
23137             #     causes subtle problems with continuation indentation for
23138             #     something like this, where the first 'or' will not get
23139             #     indented.
23140             #
23141             #         assert(
23142             #             __LINE__,
23143             #             ( not defined $check )
23144             #               or ref $check
23145             #               or $check eq "new"
23146             #               or $check eq "old",
23147             #         );
23148             #
23149             #     Likewise, we exclude parens where a statement can start
23150             #     because of problems with continuation indentation, like
23151             #     these:
23152             #
23153             #         ($firstline =~ /^#\!.*perl/)
23154             #         and (print $File::Find::name, "\n")
23155             #           and (return 1);
23156             #
23157             #         (ref($usage_fref) =~ /CODE/)
23158             #         ? &$usage_fref
23159             #           : (&blast_usage, &blast_params, &blast_general_params);
23160
23161             else {
23162                 $type = '{';
23163             }
23164
23165             if ( $last_nonblank_type eq ')' ) {
23166                 warning(
23167                     "Syntax error? found token '$last_nonblank_type' then '('\n"
23168                 );
23169             }
23170             $paren_structural_type[$paren_depth] = $type;
23171
23172         },
23173         ')' => sub {
23174             ( $type_sequence, $indent_flag ) =
23175               decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
23176
23177             if ( $paren_structural_type[$paren_depth] eq '{' ) {
23178                 $type = '}';
23179             }
23180
23181             $container_type = $paren_type[$paren_depth];
23182
23183             #    /^(for|foreach)$/
23184             if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
23185                 my $num_sc = $paren_semicolon_count[$paren_depth];
23186                 if ( $num_sc > 0 && $num_sc != 2 ) {
23187                     warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
23188                 }
23189             }
23190
23191             if ( $paren_depth > 0 ) { $paren_depth-- }
23192         },
23193         ',' => sub {
23194             if ( $last_nonblank_type eq ',' ) {
23195                 complain("Repeated ','s \n");
23196             }
23197
23198             # patch for operator_expected: note if we are in the list (use.t)
23199             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
23200 ##                FIXME: need to move this elsewhere, perhaps check after a '('
23201 ##                elsif ($last_nonblank_token eq '(') {
23202 ##                    warning("Leading ','s illegal in some versions of perl\n");
23203 ##                }
23204         },
23205         ';' => sub {
23206             $context        = UNKNOWN_CONTEXT;
23207             $statement_type = '';
23208
23209             #    /^(for|foreach)$/
23210             if ( $is_for_foreach{ $paren_type[$paren_depth] } )
23211             {    # mark ; in for loop
23212
23213                 # Be careful: we do not want a semicolon such as the
23214                 # following to be included:
23215                 #
23216                 #    for (sort {strcoll($a,$b);} keys %investments) {
23217
23218                 if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
23219                     && $square_bracket_depth ==
23220                     $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
23221                 {
23222
23223                     $type = 'f';
23224                     $paren_semicolon_count[$paren_depth]++;
23225                 }
23226             }
23227
23228         },
23229         '"' => sub {
23230             error_if_expecting_OPERATOR("String")
23231               if ( $expecting == OPERATOR );
23232             $in_quote                = 1;
23233             $type                    = 'Q';
23234             $allowed_quote_modifiers = "";
23235         },
23236         "'" => sub {
23237             error_if_expecting_OPERATOR("String")
23238               if ( $expecting == OPERATOR );
23239             $in_quote                = 1;
23240             $type                    = 'Q';
23241             $allowed_quote_modifiers = "";
23242         },
23243         '`' => sub {
23244             error_if_expecting_OPERATOR("String")
23245               if ( $expecting == OPERATOR );
23246             $in_quote                = 1;
23247             $type                    = 'Q';
23248             $allowed_quote_modifiers = "";
23249         },
23250         '/' => sub {
23251             my $is_pattern;
23252
23253             if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
23254                 my $msg;
23255                 ( $is_pattern, $msg ) =
23256                   guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
23257                     $max_token_index );
23258
23259                 if ($msg) {
23260                     write_diagnostics("DIVIDE:$msg\n");
23261                     write_logfile_entry($msg);
23262                 }
23263             }
23264             else { $is_pattern = ( $expecting == TERM ) }
23265
23266             if ($is_pattern) {
23267                 $in_quote                = 1;
23268                 $type                    = 'Q';
23269                 $allowed_quote_modifiers = '[msixpodualgc]';
23270             }
23271             else {    # not a pattern; check for a /= token
23272
23273                 if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
23274                     $i++;
23275                     $tok  = '/=';
23276                     $type = $tok;
23277                 }
23278
23279               #DEBUG - collecting info on what tokens follow a divide
23280               # for development of guessing algorithm
23281               #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
23282               #    #write_diagnostics( "DIVIDE? $input_line\n" );
23283               #}
23284             }
23285         },
23286         '{' => sub {
23287
23288             # if we just saw a ')', we will label this block with
23289             # its type.  We need to do this to allow sub
23290             # code_block_type to determine if this brace starts a
23291             # code block or anonymous hash.  (The type of a paren
23292             # pair is the preceding token, such as 'if', 'else',
23293             # etc).
23294             $container_type = "";
23295
23296             # ATTRS: for a '{' following an attribute list, reset
23297             # things to look like we just saw the sub name
23298             if ( $statement_type =~ /^sub/ ) {
23299                 $last_nonblank_token = $statement_type;
23300                 $last_nonblank_type  = 'i';
23301                 $statement_type      = "";
23302             }
23303
23304             # patch for SWITCH/CASE: hide these keywords from an immediately
23305             # following opening brace
23306             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
23307                 && $statement_type eq $last_nonblank_token )
23308             {
23309                 $last_nonblank_token = ";";
23310             }
23311
23312             elsif ( $last_nonblank_token eq ')' ) {
23313                 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
23314
23315                 # defensive move in case of a nesting error (pbug.t)
23316                 # in which this ')' had no previous '('
23317                 # this nesting error will have been caught
23318                 if ( !defined($last_nonblank_token) ) {
23319                     $last_nonblank_token = 'if';
23320                 }
23321
23322                 # check for syntax error here;
23323                 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
23324                     my $list = join( ' ', sort keys %is_blocktype_with_paren );
23325                     warning(
23326                         "syntax error at ') {', didn't see one of: $list\n");
23327                 }
23328             }
23329
23330             # patch for paren-less for/foreach glitch, part 2.
23331             # see note below under 'qw'
23332             elsif ($last_nonblank_token eq 'qw'
23333                 && $is_for_foreach{$want_paren} )
23334             {
23335                 $last_nonblank_token = $want_paren;
23336                 if ( $last_last_nonblank_token eq $want_paren ) {
23337                     warning(
23338 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
23339                     );
23340
23341                 }
23342                 $want_paren = "";
23343             }
23344
23345             # now identify which of the three possible types of
23346             # curly braces we have: hash index container, anonymous
23347             # hash reference, or code block.
23348
23349             # non-structural (hash index) curly brace pair
23350             # get marked 'L' and 'R'
23351             if ( is_non_structural_brace() ) {
23352                 $type = 'L';
23353
23354                 # patch for SWITCH/CASE:
23355                 # allow paren-less identifier after 'when'
23356                 # if the brace is preceded by a space
23357                 if (   $statement_type eq 'when'
23358                     && $last_nonblank_type      eq 'i'
23359                     && $last_last_nonblank_type eq 'k'
23360                     && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
23361                 {
23362                     $type       = '{';
23363                     $block_type = $statement_type;
23364                 }
23365             }
23366
23367             # code and anonymous hash have the same type, '{', but are
23368             # distinguished by 'block_type',
23369             # which will be blank for an anonymous hash
23370             else {
23371
23372                 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
23373                     $max_token_index );
23374
23375                 # patch to promote bareword type to function taking block
23376                 if (   $block_type
23377                     && $last_nonblank_type eq 'w'
23378                     && $last_nonblank_i >= 0 )
23379                 {
23380                     if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
23381                         $routput_token_type->[$last_nonblank_i] = 'G';
23382                     }
23383                 }
23384
23385                 # patch for SWITCH/CASE: if we find a stray opening block brace
23386                 # where we might accept a 'case' or 'when' block, then take it
23387                 if (   $statement_type eq 'case'
23388                     || $statement_type eq 'when' )
23389                 {
23390                     if ( !$block_type || $block_type eq '}' ) {
23391                         $block_type = $statement_type;
23392                     }
23393                 }
23394             }
23395             $brace_type[ ++$brace_depth ]        = $block_type;
23396             $brace_package[$brace_depth]         = $current_package;
23397             $brace_structural_type[$brace_depth] = $type;
23398             $brace_context[$brace_depth]         = $context;
23399             ( $type_sequence, $indent_flag ) =
23400               increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
23401         },
23402         '}' => sub {
23403             $block_type = $brace_type[$brace_depth];
23404             if ($block_type) { $statement_type = '' }
23405             if ( defined( $brace_package[$brace_depth] ) ) {
23406                 $current_package = $brace_package[$brace_depth];
23407             }
23408
23409             # can happen on brace error (caught elsewhere)
23410             else {
23411             }
23412             ( $type_sequence, $indent_flag ) =
23413               decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
23414
23415             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
23416                 $type = 'R';
23417             }
23418
23419             # propagate type information for 'do' and 'eval' blocks.
23420             # This is necessary to enable us to know if an operator
23421             # or term is expected next
23422             if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
23423                 $tok = $brace_type[$brace_depth];
23424             }
23425
23426             $context = $brace_context[$brace_depth];
23427             if ( $brace_depth > 0 ) { $brace_depth--; }
23428         },
23429         '&' => sub {    # maybe sub call? start looking
23430
23431             # We have to check for sub call unless we are sure we
23432             # are expecting an operator.  This example from s2p
23433             # got mistaken as a q operator in an early version:
23434             #   print BODY &q(<<'EOT');
23435             if ( $expecting != OPERATOR ) {
23436
23437                 # But only look for a sub call if we are expecting a term or
23438                 # if there is no existing space after the &.
23439                 # For example we probably don't want & as sub call here:
23440                 #    Fcntl::S_IRUSR & $mode;
23441                 if ( $expecting == TERM || $next_type ne 'b' ) {
23442                     scan_identifier();
23443                 }
23444             }
23445             else {
23446             }
23447         },
23448         '<' => sub {    # angle operator or less than?
23449
23450             if ( $expecting != OPERATOR ) {
23451                 ( $i, $type ) =
23452                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
23453                     $expecting, $max_token_index );
23454
23455                 if ( $type eq '<' && $expecting == TERM ) {
23456                     error_if_expecting_TERM();
23457                     interrupt_logfile();
23458                     warning("Unterminated <> operator?\n");
23459                     resume_logfile();
23460                 }
23461             }
23462             else {
23463             }
23464         },
23465         '?' => sub {    # ?: conditional or starting pattern?
23466
23467             my $is_pattern;
23468
23469             if ( $expecting == UNKNOWN ) {
23470
23471                 my $msg;
23472                 ( $is_pattern, $msg ) =
23473                   guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
23474                     $max_token_index );
23475
23476                 if ($msg) { write_logfile_entry($msg) }
23477             }
23478             else { $is_pattern = ( $expecting == TERM ) }
23479
23480             if ($is_pattern) {
23481                 $in_quote                = 1;
23482                 $type                    = 'Q';
23483                 $allowed_quote_modifiers = '[msixpodualgc]';
23484             }
23485             else {
23486                 ( $type_sequence, $indent_flag ) =
23487                   increase_nesting_depth( QUESTION_COLON,
23488                     $$rtoken_map[$i_tok] );
23489             }
23490         },
23491         '*' => sub {    # typeglob, or multiply?
23492
23493             if ( $expecting == TERM ) {
23494                 scan_identifier();
23495             }
23496             else {
23497
23498                 if ( $$rtokens[ $i + 1 ] eq '=' ) {
23499                     $tok  = '*=';
23500                     $type = $tok;
23501                     $i++;
23502                 }
23503                 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
23504                     $tok  = '**';
23505                     $type = $tok;
23506                     $i++;
23507                     if ( $$rtokens[ $i + 1 ] eq '=' ) {
23508                         $tok  = '**=';
23509                         $type = $tok;
23510                         $i++;
23511                     }
23512                 }
23513             }
23514         },
23515         '.' => sub {    # what kind of . ?
23516
23517             if ( $expecting != OPERATOR ) {
23518                 scan_number();
23519                 if ( $type eq '.' ) {
23520                     error_if_expecting_TERM()
23521                       if ( $expecting == TERM );
23522                 }
23523             }
23524             else {
23525             }
23526         },
23527         ':' => sub {
23528
23529             # if this is the first nonblank character, call it a label
23530             # since perl seems to just swallow it
23531             if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
23532                 $type = 'J';
23533             }
23534
23535             # ATTRS: check for a ':' which introduces an attribute list
23536             # (this might eventually get its own token type)
23537             elsif ( $statement_type =~ /^sub/ ) {
23538                 $type              = 'A';
23539                 $in_attribute_list = 1;
23540             }
23541
23542             # check for scalar attribute, such as
23543             # my $foo : shared = 1;
23544             elsif ($is_my_our{$statement_type}
23545                 && $current_depth[QUESTION_COLON] == 0 )
23546             {
23547                 $type              = 'A';
23548                 $in_attribute_list = 1;
23549             }
23550
23551             # otherwise, it should be part of a ?/: operator
23552             else {
23553                 ( $type_sequence, $indent_flag ) =
23554                   decrease_nesting_depth( QUESTION_COLON,
23555                     $$rtoken_map[$i_tok] );
23556                 if ( $last_nonblank_token eq '?' ) {
23557                     warning("Syntax error near ? :\n");
23558                 }
23559             }
23560         },
23561         '+' => sub {    # what kind of plus?
23562
23563             if ( $expecting == TERM ) {
23564                 my $number = scan_number();
23565
23566                 # unary plus is safest assumption if not a number
23567                 if ( !defined($number) ) { $type = 'p'; }
23568             }
23569             elsif ( $expecting == OPERATOR ) {
23570             }
23571             else {
23572                 if ( $next_type eq 'w' ) { $type = 'p' }
23573             }
23574         },
23575         '@' => sub {
23576
23577             error_if_expecting_OPERATOR("Array")
23578               if ( $expecting == OPERATOR );
23579             scan_identifier();
23580         },
23581         '%' => sub {    # hash or modulo?
23582
23583             # first guess is hash if no following blank
23584             if ( $expecting == UNKNOWN ) {
23585                 if ( $next_type ne 'b' ) { $expecting = TERM }
23586             }
23587             if ( $expecting == TERM ) {
23588                 scan_identifier();
23589             }
23590         },
23591         '[' => sub {
23592             $square_bracket_type[ ++$square_bracket_depth ] =
23593               $last_nonblank_token;
23594             ( $type_sequence, $indent_flag ) =
23595               increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23596
23597             # It may seem odd, but structural square brackets have
23598             # type '{' and '}'.  This simplifies the indentation logic.
23599             if ( !is_non_structural_brace() ) {
23600                 $type = '{';
23601             }
23602             $square_bracket_structural_type[$square_bracket_depth] = $type;
23603         },
23604         ']' => sub {
23605             ( $type_sequence, $indent_flag ) =
23606               decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23607
23608             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
23609             {
23610                 $type = '}';
23611             }
23612             if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
23613         },
23614         '-' => sub {    # what kind of minus?
23615
23616             if ( ( $expecting != OPERATOR )
23617                 && $is_file_test_operator{$next_tok} )
23618             {
23619                 my ( $next_nonblank_token, $i_next ) =
23620                   find_next_nonblank_token( $i + 1, $rtokens,
23621                     $max_token_index );
23622
23623                 # check for a quoted word like "-w=>xx";
23624                 # it is sufficient to just check for a following '='
23625                 if ( $next_nonblank_token eq '=' ) {
23626                     $type = 'm';
23627                 }
23628                 else {
23629                     $i++;
23630                     $tok .= $next_tok;
23631                     $type = 'F';
23632                 }
23633             }
23634             elsif ( $expecting == TERM ) {
23635                 my $number = scan_number();
23636
23637                 # maybe part of bareword token? unary is safest
23638                 if ( !defined($number) ) { $type = 'm'; }
23639
23640             }
23641             elsif ( $expecting == OPERATOR ) {
23642             }
23643             else {
23644
23645                 if ( $next_type eq 'w' ) {
23646                     $type = 'm';
23647                 }
23648             }
23649         },
23650
23651         '^' => sub {
23652
23653             # check for special variables like ${^WARNING_BITS}
23654             if ( $expecting == TERM ) {
23655
23656                 # FIXME: this should work but will not catch errors
23657                 # because we also have to be sure that previous token is
23658                 # a type character ($,@,%).
23659                 if ( $last_nonblank_token eq '{'
23660                     && ( $next_tok =~ /^[A-Za-z_]/ ) )
23661                 {
23662
23663                     if ( $next_tok eq 'W' ) {
23664                         $tokenizer_self->{_saw_perl_dash_w} = 1;
23665                     }
23666                     $tok  = $tok . $next_tok;
23667                     $i    = $i + 1;
23668                     $type = 'w';
23669                 }
23670
23671                 else {
23672                     unless ( error_if_expecting_TERM() ) {
23673
23674                         # Something like this is valid but strange:
23675                         # undef ^I;
23676                         complain("The '^' seems unusual here\n");
23677                     }
23678                 }
23679             }
23680         },
23681
23682         '::' => sub {    # probably a sub call
23683             scan_bare_identifier();
23684         },
23685         '<<' => sub {    # maybe a here-doc?
23686             return
23687               unless ( $i < $max_token_index )
23688               ;          # here-doc not possible if end of line
23689
23690             if ( $expecting != OPERATOR ) {
23691                 my ( $found_target, $here_doc_target, $here_quote_character,
23692                     $saw_error );
23693                 (
23694                     $found_target, $here_doc_target, $here_quote_character, $i,
23695                     $saw_error
23696                   )
23697                   = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
23698                     $max_token_index );
23699
23700                 if ($found_target) {
23701                     push @{$rhere_target_list},
23702                       [ $here_doc_target, $here_quote_character ];
23703                     $type = 'h';
23704                     if ( length($here_doc_target) > 80 ) {
23705                         my $truncated = substr( $here_doc_target, 0, 80 );
23706                         complain("Long here-target: '$truncated' ...\n");
23707                     }
23708                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
23709                         complain(
23710                             "Unconventional here-target: '$here_doc_target'\n"
23711                         );
23712                     }
23713                 }
23714                 elsif ( $expecting == TERM ) {
23715                     unless ($saw_error) {
23716
23717                         # shouldn't happen..
23718                         warning("Program bug; didn't find here doc target\n");
23719                         report_definite_bug();
23720                     }
23721                 }
23722             }
23723             else {
23724             }
23725         },
23726         '->' => sub {
23727
23728             # if -> points to a bare word, we must scan for an identifier,
23729             # otherwise something like ->y would look like the y operator
23730             scan_identifier();
23731         },
23732
23733         # type = 'pp' for pre-increment, '++' for post-increment
23734         '++' => sub {
23735             if ( $expecting == TERM ) { $type = 'pp' }
23736             elsif ( $expecting == UNKNOWN ) {
23737                 my ( $next_nonblank_token, $i_next ) =
23738                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23739                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
23740             }
23741         },
23742
23743         '=>' => sub {
23744             if ( $last_nonblank_type eq $tok ) {
23745                 complain("Repeated '=>'s \n");
23746             }
23747
23748             # patch for operator_expected: note if we are in the list (use.t)
23749             # TODO: make version numbers a new token type
23750             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
23751         },
23752
23753         # type = 'mm' for pre-decrement, '--' for post-decrement
23754         '--' => sub {
23755
23756             if ( $expecting == TERM ) { $type = 'mm' }
23757             elsif ( $expecting == UNKNOWN ) {
23758                 my ( $next_nonblank_token, $i_next ) =
23759                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23760                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
23761             }
23762         },
23763
23764         '&&' => sub {
23765             error_if_expecting_TERM()
23766               if ( $expecting == TERM );
23767         },
23768
23769         '||' => sub {
23770             error_if_expecting_TERM()
23771               if ( $expecting == TERM );
23772         },
23773
23774         '//' => sub {
23775             error_if_expecting_TERM()
23776               if ( $expecting == TERM );
23777         },
23778     };
23779
23780     # ------------------------------------------------------------
23781     # end hash of code for handling individual token types
23782     # ------------------------------------------------------------
23783
23784     my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
23785
23786     # These block types terminate statements and do not need a trailing
23787     # semicolon
23788     # patched for SWITCH/CASE/
23789     my %is_zero_continuation_block_type;
23790     @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
23791       if elsif else unless while until for foreach switch case given when);
23792     @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
23793
23794     my %is_not_zero_continuation_block_type;
23795     @_ = qw(sort grep map do eval);
23796     @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
23797
23798     my %is_logical_container;
23799     @_ = qw(if elsif unless while and or err not && !  || for foreach);
23800     @is_logical_container{@_} = (1) x scalar(@_);
23801
23802     my %is_binary_type;
23803     @_ = qw(|| &&);
23804     @is_binary_type{@_} = (1) x scalar(@_);
23805
23806     my %is_binary_keyword;
23807     @_ = qw(and or err eq ne cmp);
23808     @is_binary_keyword{@_} = (1) x scalar(@_);
23809
23810     # 'L' is token for opening { at hash key
23811     my %is_opening_type;
23812     @_ = qw" L { ( [ ";
23813     @is_opening_type{@_} = (1) x scalar(@_);
23814
23815     # 'R' is token for closing } at hash key
23816     my %is_closing_type;
23817     @_ = qw" R } ) ] ";
23818     @is_closing_type{@_} = (1) x scalar(@_);
23819
23820     my %is_redo_last_next_goto;
23821     @_ = qw(redo last next goto);
23822     @is_redo_last_next_goto{@_} = (1) x scalar(@_);
23823
23824     my %is_use_require;
23825     @_ = qw(use require);
23826     @is_use_require{@_} = (1) x scalar(@_);
23827
23828     my %is_sub_package;
23829     @_ = qw(sub package);
23830     @is_sub_package{@_} = (1) x scalar(@_);
23831
23832     # This hash holds the hash key in $tokenizer_self for these keywords:
23833     my %is_format_END_DATA = (
23834         'format'   => '_in_format',
23835         '__END__'  => '_in_end',
23836         '__DATA__' => '_in_data',
23837     );
23838
23839     # ref: camel 3 p 147,
23840     # but perl may accept undocumented flags
23841     # perl 5.10 adds 'p' (preserve)
23842     # Perl version 5.16, http://perldoc.perl.org/perlop.html,  has these:
23843     # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc
23844     # s/PATTERN/REPLACEMENT/msixpodualgcer
23845     # y/SEARCHLIST/REPLACEMENTLIST/cdsr
23846     # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
23847     # qr/STRING/msixpodual
23848     my %quote_modifiers = (
23849         's'  => '[msixpodualgcer]',
23850         'y'  => '[cdsr]',
23851         'tr' => '[cdsr]',
23852         'm'  => '[msixpodualgc]',
23853         'qr' => '[msixpodual]',
23854         'q'  => "",
23855         'qq' => "",
23856         'qw' => "",
23857         'qx' => "",
23858     );
23859
23860     # table showing how many quoted things to look for after quote operator..
23861     # s, y, tr have 2 (pattern and replacement)
23862     # others have 1 (pattern only)
23863     my %quote_items = (
23864         's'  => 2,
23865         'y'  => 2,
23866         'tr' => 2,
23867         'm'  => 1,
23868         'qr' => 1,
23869         'q'  => 1,
23870         'qq' => 1,
23871         'qw' => 1,
23872         'qx' => 1,
23873     );
23874
23875     sub tokenize_this_line {
23876
23877   # This routine breaks a line of perl code into tokens which are of use in
23878   # indentation and reformatting.  One of my goals has been to define tokens
23879   # such that a newline may be inserted between any pair of tokens without
23880   # changing or invalidating the program. This version comes close to this,
23881   # although there are necessarily a few exceptions which must be caught by
23882   # the formatter.  Many of these involve the treatment of bare words.
23883   #
23884   # The tokens and their types are returned in arrays.  See previous
23885   # routine for their names.
23886   #
23887   # See also the array "valid_token_types" in the BEGIN section for an
23888   # up-to-date list.
23889   #
23890   # To simplify things, token types are either a single character, or they
23891   # are identical to the tokens themselves.
23892   #
23893   # As a debugging aid, the -D flag creates a file containing a side-by-side
23894   # comparison of the input string and its tokenization for each line of a file.
23895   # This is an invaluable debugging aid.
23896   #
23897   # In addition to tokens, and some associated quantities, the tokenizer
23898   # also returns flags indication any special line types.  These include
23899   # quotes, here_docs, formats.
23900   #
23901   # -----------------------------------------------------------------------
23902   #
23903   # How to add NEW_TOKENS:
23904   #
23905   # New token types will undoubtedly be needed in the future both to keep up
23906   # with changes in perl and to help adapt the tokenizer to other applications.
23907   #
23908   # Here are some notes on the minimal steps.  I wrote these notes while
23909   # adding the 'v' token type for v-strings, which are things like version
23910   # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
23911   # can use your editor to search for the string "NEW_TOKENS" to find the
23912   # appropriate sections to change):
23913   #
23914   # *. Try to talk somebody else into doing it!  If not, ..
23915   #
23916   # *. Make a backup of your current version in case things don't work out!
23917   #
23918   # *. Think of a new, unused character for the token type, and add to
23919   # the array @valid_token_types in the BEGIN section of this package.
23920   # For example, I used 'v' for v-strings.
23921   #
23922   # *. Implement coding to recognize the $type of the token in this routine.
23923   # This is the hardest part, and is best done by immitating or modifying
23924   # some of the existing coding.  For example, to recognize v-strings, I
23925   # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
23926   # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
23927   #
23928   # *. Update sub operator_expected.  This update is critically important but
23929   # the coding is trivial.  Look at the comments in that routine for help.
23930   # For v-strings, which should behave like numbers, I just added 'v' to the
23931   # regex used to handle numbers and strings (types 'n' and 'Q').
23932   #
23933   # *. Implement a 'bond strength' rule in sub set_bond_strengths in
23934   # Perl::Tidy::Formatter for breaking lines around this token type.  You can
23935   # skip this step and take the default at first, then adjust later to get
23936   # desired results.  For adding type 'v', I looked at sub bond_strength and
23937   # saw that number type 'n' was using default strengths, so I didn't do
23938   # anything.  I may tune it up someday if I don't like the way line
23939   # breaks with v-strings look.
23940   #
23941   # *. Implement a 'whitespace' rule in sub set_white_space_flag in
23942   # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
23943   # and saw that type 'n' used spaces on both sides, so I just added 'v'
23944   # to the array @spaces_both_sides.
23945   #
23946   # *. Update HtmlWriter package so that users can colorize the token as
23947   # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
23948   # that package.  For v-strings, I initially chose to use a default color
23949   # equal to the default for numbers, but it might be nice to change that
23950   # eventually.
23951   #
23952   # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
23953   #
23954   # *. Run lots and lots of debug tests.  Start with special files designed
23955   # to test the new token type.  Run with the -D flag to create a .DEBUG
23956   # file which shows the tokenization.  When these work ok, test as many old
23957   # scripts as possible.  Start with all of the '.t' files in the 'test'
23958   # directory of the distribution file.  Compare .tdy output with previous
23959   # version and updated version to see the differences.  Then include as
23960   # many more files as possible. My own technique has been to collect a huge
23961   # number of perl scripts (thousands!) into one directory and run perltidy
23962   # *, then run diff between the output of the previous version and the
23963   # current version.
23964   #
23965   # *. For another example, search for the smartmatch operator '~~'
23966   # with your editor to see where updates were made for it.
23967   #
23968   # -----------------------------------------------------------------------
23969
23970         my $line_of_tokens = shift;
23971         my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
23972
23973         # patch while coding change is underway
23974         # make callers private data to allow access
23975         # $tokenizer_self = $caller_tokenizer_self;
23976
23977         # extract line number for use in error messages
23978         $input_line_number = $line_of_tokens->{_line_number};
23979
23980         # reinitialize for multi-line quote
23981         $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
23982
23983         # check for pod documentation
23984         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
23985
23986             # must not be in multi-line quote
23987             # and must not be in an eqn
23988             if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
23989             {
23990                 $tokenizer_self->{_in_pod} = 1;
23991                 return;
23992             }
23993         }
23994
23995         $input_line = $untrimmed_input_line;
23996
23997         chomp $input_line;
23998
23999         # trim start of this line unless we are continuing a quoted line
24000         # do not trim end because we might end in a quote (test: deken4.pl)
24001         # Perl::Tidy::Formatter will delete needless trailing blanks
24002         unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
24003             $input_line =~ s/^\s*//;    # trim left end
24004         }
24005
24006         # update the copy of the line for use in error messages
24007         # This must be exactly what we give the pre_tokenizer
24008         $tokenizer_self->{_line_text} = $input_line;
24009
24010         # re-initialize for the main loop
24011         $routput_token_list     = [];    # stack of output token indexes
24012         $routput_token_type     = [];    # token types
24013         $routput_block_type     = [];    # types of code block
24014         $routput_container_type = [];    # paren types, such as if, elsif, ..
24015         $routput_type_sequence  = [];    # nesting sequential number
24016
24017         $rhere_target_list = [];
24018
24019         $tok             = $last_nonblank_token;
24020         $type            = $last_nonblank_type;
24021         $prototype       = $last_nonblank_prototype;
24022         $last_nonblank_i = -1;
24023         $block_type      = $last_nonblank_block_type;
24024         $container_type  = $last_nonblank_container_type;
24025         $type_sequence   = $last_nonblank_type_sequence;
24026         $indent_flag     = 0;
24027         $peeked_ahead    = 0;
24028
24029         # tokenization is done in two stages..
24030         # stage 1 is a very simple pre-tokenization
24031         my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
24032
24033         # a little optimization for a full-line comment
24034         if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
24035             $max_tokens_wanted = 1    # no use tokenizing a comment
24036         }
24037
24038         # start by breaking the line into pre-tokens
24039         ( $rtokens, $rtoken_map, $rtoken_type ) =
24040           pre_tokenize( $input_line, $max_tokens_wanted );
24041
24042         $max_token_index = scalar(@$rtokens) - 1;
24043         push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
24044         push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
24045         push( @$rtoken_type, 'b', 'b', 'b' );
24046
24047         # initialize for main loop
24048         for $i ( 0 .. $max_token_index + 3 ) {
24049             $routput_token_type->[$i]     = "";
24050             $routput_block_type->[$i]     = "";
24051             $routput_container_type->[$i] = "";
24052             $routput_type_sequence->[$i]  = "";
24053             $routput_indent_flag->[$i]    = 0;
24054         }
24055         $i     = -1;
24056         $i_tok = -1;
24057
24058         # ------------------------------------------------------------
24059         # begin main tokenization loop
24060         # ------------------------------------------------------------
24061
24062         # we are looking at each pre-token of one line and combining them
24063         # into tokens
24064         while ( ++$i <= $max_token_index ) {
24065
24066             if ($in_quote) {    # continue looking for end of a quote
24067                 $type = $quote_type;
24068
24069                 unless ( @{$routput_token_list} )
24070                 {               # initialize if continuation line
24071                     push( @{$routput_token_list}, $i );
24072                     $routput_token_type->[$i] = $type;
24073
24074                 }
24075                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
24076
24077                 # scan for the end of the quote or pattern
24078                 (
24079                     $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
24080                     $quoted_string_1, $quoted_string_2
24081                   )
24082                   = do_quote(
24083                     $i,               $in_quote,    $quote_character,
24084                     $quote_pos,       $quote_depth, $quoted_string_1,
24085                     $quoted_string_2, $rtokens,     $rtoken_map,
24086                     $max_token_index
24087                   );
24088
24089                 # all done if we didn't find it
24090                 last if ($in_quote);
24091
24092                 # save pattern and replacement text for rescanning
24093                 my $qs1 = $quoted_string_1;
24094                 my $qs2 = $quoted_string_2;
24095
24096                 # re-initialize for next search
24097                 $quote_character = '';
24098                 $quote_pos       = 0;
24099                 $quote_type      = 'Q';
24100                 $quoted_string_1 = "";
24101                 $quoted_string_2 = "";
24102                 last if ( ++$i > $max_token_index );
24103
24104                 # look for any modifiers
24105                 if ($allowed_quote_modifiers) {
24106
24107                     # check for exact quote modifiers
24108                     if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
24109                         my $str = $$rtokens[$i];
24110                         my $saw_modifier_e;
24111                         while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
24112                             my $pos = pos($str);
24113                             my $char = substr( $str, $pos - 1, 1 );
24114                             $saw_modifier_e ||= ( $char eq 'e' );
24115                         }
24116
24117                         # For an 'e' quote modifier we must scan the replacement
24118                         # text for here-doc targets.
24119                         if ($saw_modifier_e) {
24120
24121                             my $rht = scan_replacement_text($qs1);
24122
24123                             # Change type from 'Q' to 'h' for quotes with
24124                             # here-doc targets so that the formatter (see sub
24125                             # print_line_of_tokens) will not make any line
24126                             # breaks after this point.
24127                             if ($rht) {
24128                                 push @{$rhere_target_list}, @{$rht};
24129                                 $type = 'h';
24130                                 if ( $i_tok < 0 ) {
24131                                     my $ilast = $routput_token_list->[-1];
24132                                     $routput_token_type->[$ilast] = $type;
24133                                 }
24134                             }
24135                         }
24136
24137                         if ( defined( pos($str) ) ) {
24138
24139                             # matched
24140                             if ( pos($str) == length($str) ) {
24141                                 last if ( ++$i > $max_token_index );
24142                             }
24143
24144                             # Looks like a joined quote modifier
24145                             # and keyword, maybe something like
24146                             # s/xxx/yyy/gefor @k=...
24147                             # Example is "galgen.pl".  Would have to split
24148                             # the word and insert a new token in the
24149                             # pre-token list.  This is so rare that I haven't
24150                             # done it.  Will just issue a warning citation.
24151
24152                             # This error might also be triggered if my quote
24153                             # modifier characters are incomplete
24154                             else {
24155                                 warning(<<EOM);
24156
24157 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
24158 Please put a space between quote modifiers and trailing keywords.
24159 EOM
24160
24161                            # print "token $$rtokens[$i]\n";
24162                            # my $num = length($str) - pos($str);
24163                            # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
24164                            # print "continuing with new token $$rtokens[$i]\n";
24165
24166                                 # skipping past this token does least damage
24167                                 last if ( ++$i > $max_token_index );
24168                             }
24169                         }
24170                         else {
24171
24172                             # example file: rokicki4.pl
24173                             # This error might also be triggered if my quote
24174                             # modifier characters are incomplete
24175                             write_logfile_entry(
24176 "Note: found word $str at quote modifier location\n"
24177                             );
24178                         }
24179                     }
24180
24181                     # re-initialize
24182                     $allowed_quote_modifiers = "";
24183                 }
24184             }
24185
24186             unless ( $tok =~ /^\s*$/ ) {
24187
24188                 # try to catch some common errors
24189                 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
24190
24191                     if ( $last_nonblank_token eq 'eq' ) {
24192                         complain("Should 'eq' be '==' here ?\n");
24193                     }
24194                     elsif ( $last_nonblank_token eq 'ne' ) {
24195                         complain("Should 'ne' be '!=' here ?\n");
24196                     }
24197                 }
24198
24199                 $last_last_nonblank_token      = $last_nonblank_token;
24200                 $last_last_nonblank_type       = $last_nonblank_type;
24201                 $last_last_nonblank_block_type = $last_nonblank_block_type;
24202                 $last_last_nonblank_container_type =
24203                   $last_nonblank_container_type;
24204                 $last_last_nonblank_type_sequence =
24205                   $last_nonblank_type_sequence;
24206                 $last_nonblank_token          = $tok;
24207                 $last_nonblank_type           = $type;
24208                 $last_nonblank_prototype      = $prototype;
24209                 $last_nonblank_block_type     = $block_type;
24210                 $last_nonblank_container_type = $container_type;
24211                 $last_nonblank_type_sequence  = $type_sequence;
24212                 $last_nonblank_i              = $i_tok;
24213             }
24214
24215             # store previous token type
24216             if ( $i_tok >= 0 ) {
24217                 $routput_token_type->[$i_tok]     = $type;
24218                 $routput_block_type->[$i_tok]     = $block_type;
24219                 $routput_container_type->[$i_tok] = $container_type;
24220                 $routput_type_sequence->[$i_tok]  = $type_sequence;
24221                 $routput_indent_flag->[$i_tok]    = $indent_flag;
24222             }
24223             my $pre_tok  = $$rtokens[$i];        # get the next pre-token
24224             my $pre_type = $$rtoken_type[$i];    # and type
24225             $tok  = $pre_tok;
24226             $type = $pre_type;                   # to be modified as necessary
24227             $block_type = "";    # blank for all tokens except code block braces
24228             $container_type = "";    # blank for all tokens except some parens
24229             $type_sequence  = "";    # blank for all tokens except ?/:
24230             $indent_flag    = 0;
24231             $prototype = "";    # blank for all tokens except user defined subs
24232             $i_tok     = $i;
24233
24234             # this pre-token will start an output token
24235             push( @{$routput_token_list}, $i_tok );
24236
24237             # continue gathering identifier if necessary
24238             # but do not start on blanks and comments
24239             if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
24240
24241                 if ( $id_scan_state =~ /^(sub|package)/ ) {
24242                     scan_id();
24243                 }
24244                 else {
24245                     scan_identifier();
24246                 }
24247
24248                 last if ($id_scan_state);
24249                 next if ( ( $i > 0 ) || $type );
24250
24251                 # didn't find any token; start over
24252                 $type = $pre_type;
24253                 $tok  = $pre_tok;
24254             }
24255
24256             # handle whitespace tokens..
24257             next if ( $type eq 'b' );
24258             my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
24259             my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
24260
24261             # Build larger tokens where possible, since we are not in a quote.
24262             #
24263             # First try to assemble digraphs.  The following tokens are
24264             # excluded and handled specially:
24265             # '/=' is excluded because the / might start a pattern.
24266             # 'x=' is excluded since it might be $x=, with $ on previous line
24267             # '**' and *= might be typeglobs of punctuation variables
24268             # I have allowed tokens starting with <, such as <=,
24269             # because I don't think these could be valid angle operators.
24270             # test file: storrs4.pl
24271             my $test_tok   = $tok . $$rtokens[ $i + 1 ];
24272             my $combine_ok = $is_digraph{$test_tok};
24273
24274             # check for special cases which cannot be combined
24275             if ($combine_ok) {
24276
24277                 # '//' must be defined_or operator if an operator is expected.
24278                 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
24279                 # could be migrated here for clarity
24280                 if ( $test_tok eq '//' ) {
24281                     my $next_type = $$rtokens[ $i + 1 ];
24282                     my $expecting =
24283                       operator_expected( $prev_type, $tok, $next_type );
24284                     $combine_ok = 0 unless ( $expecting == OPERATOR );
24285                 }
24286             }
24287
24288             if (
24289                 $combine_ok
24290                 && ( $test_tok ne '/=' )    # might be pattern
24291                 && ( $test_tok ne 'x=' )    # might be $x
24292                 && ( $test_tok ne '**' )    # typeglob?
24293                 && ( $test_tok ne '*=' )    # typeglob?
24294               )
24295             {
24296                 $tok = $test_tok;
24297                 $i++;
24298
24299                 # Now try to assemble trigraphs.  Note that all possible
24300                 # perl trigraphs can be constructed by appending a character
24301                 # to a digraph.
24302                 $test_tok = $tok . $$rtokens[ $i + 1 ];
24303
24304                 if ( $is_trigraph{$test_tok} ) {
24305                     $tok = $test_tok;
24306                     $i++;
24307                 }
24308             }
24309
24310             $type      = $tok;
24311             $next_tok  = $$rtokens[ $i + 1 ];
24312             $next_type = $$rtoken_type[ $i + 1 ];
24313
24314             TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
24315                 local $" = ')(';
24316                 my @debug_list = (
24317                     $last_nonblank_token,      $tok,
24318                     $next_tok,                 $brace_depth,
24319                     $brace_type[$brace_depth], $paren_depth,
24320                     $paren_type[$paren_depth]
24321                 );
24322                 print "TOKENIZE:(@debug_list)\n";
24323             };
24324
24325             # turn off attribute list on first non-blank, non-bareword
24326             if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
24327
24328             ###############################################################
24329             # We have the next token, $tok.
24330             # Now we have to examine this token and decide what it is
24331             # and define its $type
24332             #
24333             # section 1: bare words
24334             ###############################################################
24335
24336             if ( $pre_type eq 'w' ) {
24337                 $expecting = operator_expected( $prev_type, $tok, $next_type );
24338                 my ( $next_nonblank_token, $i_next ) =
24339                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
24340
24341                 # ATTRS: handle sub and variable attributes
24342                 if ($in_attribute_list) {
24343
24344                     # treat bare word followed by open paren like qw(
24345                     if ( $next_nonblank_token eq '(' ) {
24346                         $in_quote                = $quote_items{'q'};
24347                         $allowed_quote_modifiers = $quote_modifiers{'q'};
24348                         $type                    = 'q';
24349                         $quote_type              = 'q';
24350                         next;
24351                     }
24352
24353                     # handle bareword not followed by open paren
24354                     else {
24355                         $type = 'w';
24356                         next;
24357                     }
24358                 }
24359
24360                 # quote a word followed by => operator
24361                 if ( $next_nonblank_token eq '=' ) {
24362
24363                     if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
24364                         if ( $is_constant{$current_package}{$tok} ) {
24365                             $type = 'C';
24366                         }
24367                         elsif ( $is_user_function{$current_package}{$tok} ) {
24368                             $type = 'U';
24369                             $prototype =
24370                               $user_function_prototype{$current_package}{$tok};
24371                         }
24372                         elsif ( $tok =~ /^v\d+$/ ) {
24373                             $type = 'v';
24374                             report_v_string($tok);
24375                         }
24376                         else { $type = 'w' }
24377
24378                         next;
24379                     }
24380                 }
24381
24382      # quote a bare word within braces..like xxx->{s}; note that we
24383      # must be sure this is not a structural brace, to avoid
24384      # mistaking {s} in the following for a quoted bare word:
24385      #     for(@[){s}bla}BLA}
24386      # Also treat q in something like var{-q} as a bare word, not qoute operator
24387                 if (
24388                     $next_nonblank_token eq '}'
24389                     && (
24390                         $last_nonblank_type eq 'L'
24391                         || (   $last_nonblank_type eq 'm'
24392                             && $last_last_nonblank_type eq 'L' )
24393                     )
24394                   )
24395                 {
24396                     $type = 'w';
24397                     next;
24398                 }
24399
24400                 # a bare word immediately followed by :: is not a keyword;
24401                 # use $tok_kw when testing for keywords to avoid a mistake
24402                 my $tok_kw = $tok;
24403                 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
24404                 {
24405                     $tok_kw .= '::';
24406                 }
24407
24408                 # handle operator x (now we know it isn't $x=)
24409                 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
24410                     if ( $tok eq 'x' ) {
24411
24412                         if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
24413                             $tok  = 'x=';
24414                             $type = $tok;
24415                             $i++;
24416                         }
24417                         else {
24418                             $type = 'x';
24419                         }
24420                     }
24421
24422                     # FIXME: Patch: mark something like x4 as an integer for now
24423                     # It gets fixed downstream.  This is easier than
24424                     # splitting the pretoken.
24425                     else {
24426                         $type = 'n';
24427                     }
24428                 }
24429
24430                 elsif ( ( $tok eq 'strict' )
24431                     and ( $last_nonblank_token eq 'use' ) )
24432                 {
24433                     $tokenizer_self->{_saw_use_strict} = 1;
24434                     scan_bare_identifier();
24435                 }
24436
24437                 elsif ( ( $tok eq 'warnings' )
24438                     and ( $last_nonblank_token eq 'use' ) )
24439                 {
24440                     $tokenizer_self->{_saw_perl_dash_w} = 1;
24441
24442                     # scan as identifier, so that we pick up something like:
24443                     # use warnings::register
24444                     scan_bare_identifier();
24445                 }
24446
24447                 elsif (
24448                        $tok eq 'AutoLoader'
24449                     && $tokenizer_self->{_look_for_autoloader}
24450                     && (
24451                         $last_nonblank_token eq 'use'
24452
24453                         # these regexes are from AutoSplit.pm, which we want
24454                         # to mimic
24455                         || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
24456                         || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
24457                     )
24458                   )
24459                 {
24460                     write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
24461                     $tokenizer_self->{_saw_autoloader}      = 1;
24462                     $tokenizer_self->{_look_for_autoloader} = 0;
24463                     scan_bare_identifier();
24464                 }
24465
24466                 elsif (
24467                        $tok eq 'SelfLoader'
24468                     && $tokenizer_self->{_look_for_selfloader}
24469                     && (   $last_nonblank_token eq 'use'
24470                         || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
24471                         || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
24472                   )
24473                 {
24474                     write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
24475                     $tokenizer_self->{_saw_selfloader}      = 1;
24476                     $tokenizer_self->{_look_for_selfloader} = 0;
24477                     scan_bare_identifier();
24478                 }
24479
24480                 elsif ( ( $tok eq 'constant' )
24481                     and ( $last_nonblank_token eq 'use' ) )
24482                 {
24483                     scan_bare_identifier();
24484                     my ( $next_nonblank_token, $i_next ) =
24485                       find_next_nonblank_token( $i, $rtokens,
24486                         $max_token_index );
24487
24488                     if ($next_nonblank_token) {
24489
24490                         if ( $is_keyword{$next_nonblank_token} ) {
24491
24492                             # Assume qw is used as a quote and okay, as in:
24493                             #  use constant qw{ DEBUG 0 };
24494                             # Not worth trying to parse for just a warning
24495                             if ( $next_nonblank_token ne 'qw' ) {
24496                                 warning(
24497 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
24498                                 );
24499                             }
24500                         }
24501
24502                         # FIXME: could check for error in which next token is
24503                         # not a word (number, punctuation, ..)
24504                         else {
24505                             $is_constant{$current_package}
24506                               {$next_nonblank_token} = 1;
24507                         }
24508                     }
24509                 }
24510
24511                 # various quote operators
24512                 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
24513                     if ( $expecting == OPERATOR ) {
24514
24515                         # patch for paren-less for/foreach glitch, part 1
24516                         # perl will accept this construct as valid:
24517                         #
24518                         #    foreach my $key qw\Uno Due Tres Quadro\ {
24519                         #        print "Set $key\n";
24520                         #    }
24521                         unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
24522                         {
24523                             error_if_expecting_OPERATOR();
24524                         }
24525                     }
24526                     $in_quote                = $quote_items{$tok};
24527                     $allowed_quote_modifiers = $quote_modifiers{$tok};
24528
24529                    # All quote types are 'Q' except possibly qw quotes.
24530                    # qw quotes are special in that they may generally be trimmed
24531                    # of leading and trailing whitespace.  So they are given a
24532                    # separate type, 'q', unless requested otherwise.
24533                     $type =
24534                       ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
24535                       ? 'q'
24536                       : 'Q';
24537                     $quote_type = $type;
24538                 }
24539
24540                 # check for a statement label
24541                 elsif (
24542                        ( $next_nonblank_token eq ':' )
24543                     && ( $$rtokens[ $i_next + 1 ] ne ':' )
24544                     && ( $i_next <= $max_token_index )    # colon on same line
24545                     && label_ok()
24546                   )
24547                 {
24548                     if ( $tok !~ /[A-Z]/ ) {
24549                         push @{ $tokenizer_self->{_rlower_case_labels_at} },
24550                           $input_line_number;
24551                     }
24552                     $type = 'J';
24553                     $tok .= ':';
24554                     $i = $i_next;
24555                     next;
24556                 }
24557
24558                 #      'sub' || 'package'
24559                 elsif ( $is_sub_package{$tok_kw} ) {
24560                     error_if_expecting_OPERATOR()
24561                       if ( $expecting == OPERATOR );
24562                     scan_id();
24563                 }
24564
24565                 # Note on token types for format, __DATA__, __END__:
24566                 # It simplifies things to give these type ';', so that when we
24567                 # start rescanning we will be expecting a token of type TERM.
24568                 # We will switch to type 'k' before outputting the tokens.
24569                 elsif ( $is_format_END_DATA{$tok_kw} ) {
24570                     $type = ';';    # make tokenizer look for TERM next
24571                     $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
24572                     last;
24573                 }
24574
24575                 elsif ( $is_keyword{$tok_kw} ) {
24576                     $type = 'k';
24577
24578                     # Since for and foreach may not be followed immediately
24579                     # by an opening paren, we have to remember which keyword
24580                     # is associated with the next '('
24581                     if ( $is_for_foreach{$tok} ) {
24582                         if ( new_statement_ok() ) {
24583                             $want_paren = $tok;
24584                         }
24585                     }
24586
24587                     # recognize 'use' statements, which are special
24588                     elsif ( $is_use_require{$tok} ) {
24589                         $statement_type = $tok;
24590                         error_if_expecting_OPERATOR()
24591                           if ( $expecting == OPERATOR );
24592                     }
24593
24594                     # remember my and our to check for trailing ": shared"
24595                     elsif ( $is_my_our{$tok} ) {
24596                         $statement_type = $tok;
24597                     }
24598
24599                     # Check for misplaced 'elsif' and 'else', but allow isolated
24600                     # else or elsif blocks to be formatted.  This is indicated
24601                     # by a last noblank token of ';'
24602                     elsif ( $tok eq 'elsif' ) {
24603                         if (   $last_nonblank_token ne ';'
24604                             && $last_nonblank_block_type !~
24605                             /^(if|elsif|unless)$/ )
24606                         {
24607                             warning(
24608 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
24609                             );
24610                         }
24611                     }
24612                     elsif ( $tok eq 'else' ) {
24613
24614                         # patched for SWITCH/CASE
24615                         if (   $last_nonblank_token ne ';'
24616                             && $last_nonblank_block_type !~
24617                             /^(if|elsif|unless|case|when)$/ )
24618                         {
24619                             warning(
24620 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
24621                             );
24622                         }
24623                     }
24624                     elsif ( $tok eq 'continue' ) {
24625                         if (   $last_nonblank_token ne ';'
24626                             && $last_nonblank_block_type !~
24627                             /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
24628                         {
24629
24630                             # note: ';' '{' and '}' in list above
24631                             # because continues can follow bare blocks;
24632                             # ':' is labeled block
24633                             #
24634                             ############################################
24635                             # NOTE: This check has been deactivated because
24636                             # continue has an alternative usage for given/when
24637                             # blocks in perl 5.10
24638                             ## warning("'$tok' should follow a block\n");
24639                             ############################################
24640                         }
24641                     }
24642
24643                     # patch for SWITCH/CASE if 'case' and 'when are
24644                     # treated as keywords.
24645                     elsif ( $tok eq 'when' || $tok eq 'case' ) {
24646                         $statement_type = $tok;    # next '{' is block
24647                     }
24648
24649                     #
24650                     # indent trailing if/unless/while/until
24651                     # outdenting will be handled by later indentation loop
24652 ## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
24653 ##$opt_o = 1
24654 ##  if !(
24655 ##             $opt_b
24656 ##          || $opt_c
24657 ##          || $opt_d
24658 ##          || $opt_f
24659 ##          || $opt_i
24660 ##          || $opt_l
24661 ##          || $opt_o
24662 ##          || $opt_x
24663 ##  );
24664 ##                    if (   $tok =~ /^(if|unless|while|until)$/
24665 ##                        && $next_nonblank_token ne '(' )
24666 ##                    {
24667 ##                        $indent_flag = 1;
24668 ##                    }
24669                 }
24670
24671                 # check for inline label following
24672                 #         /^(redo|last|next|goto)$/
24673                 elsif (( $last_nonblank_type eq 'k' )
24674                     && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
24675                 {
24676                     $type = 'j';
24677                     next;
24678                 }
24679
24680                 # something else --
24681                 else {
24682
24683                     scan_bare_identifier();
24684                     if ( $type eq 'w' ) {
24685
24686                         if ( $expecting == OPERATOR ) {
24687
24688                             # don't complain about possible indirect object
24689                             # notation.
24690                             # For example:
24691                             #   package main;
24692                             #   sub new($) { ... }
24693                             #   $b = new A::;  # calls A::new
24694                             #   $c = new A;    # same thing but suspicious
24695                             # This will call A::new but we have a 'new' in
24696                             # main:: which looks like a constant.
24697                             #
24698                             if ( $last_nonblank_type eq 'C' ) {
24699                                 if ( $tok !~ /::$/ ) {
24700                                     complain(<<EOM);
24701 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
24702        Maybe indirectet object notation?
24703 EOM
24704                                 }
24705                             }
24706                             else {
24707                                 error_if_expecting_OPERATOR("bareword");
24708                             }
24709                         }
24710
24711                         # mark bare words immediately followed by a paren as
24712                         # functions
24713                         $next_tok = $$rtokens[ $i + 1 ];
24714                         if ( $next_tok eq '(' ) {
24715                             $type = 'U';
24716                         }
24717
24718                         # underscore after file test operator is file handle
24719                         if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
24720                             $type = 'Z';
24721                         }
24722
24723                         # patch for SWITCH/CASE if 'case' and 'when are
24724                         # not treated as keywords:
24725                         if (
24726                             (
24727                                    $tok eq 'case'
24728                                 && $brace_type[$brace_depth] eq 'switch'
24729                             )
24730                             || (   $tok eq 'when'
24731                                 && $brace_type[$brace_depth] eq 'given' )
24732                           )
24733                         {
24734                             $statement_type = $tok;    # next '{' is block
24735                             $type = 'k';    # for keyword syntax coloring
24736                         }
24737
24738                         # patch for SWITCH/CASE if switch and given not keywords
24739                         # Switch is not a perl 5 keyword, but we will gamble
24740                         # and mark switch followed by paren as a keyword.  This
24741                         # is only necessary to get html syntax coloring nice,
24742                         # and does not commit this as being a switch/case.
24743                         if ( $next_nonblank_token eq '('
24744                             && ( $tok eq 'switch' || $tok eq 'given' ) )
24745                         {
24746                             $type = 'k';    # for keyword syntax coloring
24747                         }
24748                     }
24749                 }
24750             }
24751
24752             ###############################################################
24753             # section 2: strings of digits
24754             ###############################################################
24755             elsif ( $pre_type eq 'd' ) {
24756                 $expecting = operator_expected( $prev_type, $tok, $next_type );
24757                 error_if_expecting_OPERATOR("Number")
24758                   if ( $expecting == OPERATOR );
24759                 my $number = scan_number();
24760                 if ( !defined($number) ) {
24761
24762                     # shouldn't happen - we should always get a number
24763                     warning("non-number beginning with digit--program bug\n");
24764                     report_definite_bug();
24765                 }
24766             }
24767
24768             ###############################################################
24769             # section 3: all other tokens
24770             ###############################################################
24771
24772             else {
24773                 last if ( $tok eq '#' );
24774                 my $code = $tokenization_code->{$tok};
24775                 if ($code) {
24776                     $expecting =
24777                       operator_expected( $prev_type, $tok, $next_type );
24778                     $code->();
24779                     redo if $in_quote;
24780                 }
24781             }
24782         }
24783
24784         # -----------------------------
24785         # end of main tokenization loop
24786         # -----------------------------
24787
24788         if ( $i_tok >= 0 ) {
24789             $routput_token_type->[$i_tok]     = $type;
24790             $routput_block_type->[$i_tok]     = $block_type;
24791             $routput_container_type->[$i_tok] = $container_type;
24792             $routput_type_sequence->[$i_tok]  = $type_sequence;
24793             $routput_indent_flag->[$i_tok]    = $indent_flag;
24794         }
24795
24796         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
24797             $last_last_nonblank_token          = $last_nonblank_token;
24798             $last_last_nonblank_type           = $last_nonblank_type;
24799             $last_last_nonblank_block_type     = $last_nonblank_block_type;
24800             $last_last_nonblank_container_type = $last_nonblank_container_type;
24801             $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
24802             $last_nonblank_token               = $tok;
24803             $last_nonblank_type                = $type;
24804             $last_nonblank_block_type          = $block_type;
24805             $last_nonblank_container_type      = $container_type;
24806             $last_nonblank_type_sequence       = $type_sequence;
24807             $last_nonblank_prototype           = $prototype;
24808         }
24809
24810         # reset indentation level if necessary at a sub or package
24811         # in an attempt to recover from a nesting error
24812         if ( $level_in_tokenizer < 0 ) {
24813             if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
24814                 reset_indentation_level(0);
24815                 brace_warning("resetting level to 0 at $1 $2\n");
24816             }
24817         }
24818
24819         # all done tokenizing this line ...
24820         # now prepare the final list of tokens and types
24821
24822         my @token_type     = ();   # stack of output token types
24823         my @block_type     = ();   # stack of output code block types
24824         my @container_type = ();   # stack of output code container types
24825         my @type_sequence  = ();   # stack of output type sequence numbers
24826         my @tokens         = ();   # output tokens
24827         my @levels         = ();   # structural brace levels of output tokens
24828         my @slevels        = ();   # secondary nesting levels of output tokens
24829         my @nesting_tokens = ();   # string of tokens leading to this depth
24830         my @nesting_types  = ();   # string of token types leading to this depth
24831         my @nesting_blocks = ();   # string of block types leading to this depth
24832         my @nesting_lists  = ();   # string of list types leading to this depth
24833         my @ci_string = ();  # string needed to compute continuation indentation
24834         my @container_environment = ();    # BLOCK or LIST
24835         my $container_environment = '';
24836         my $im                    = -1;    # previous $i value
24837         my $num;
24838         my $ci_string_sum = ones_count($ci_string_in_tokenizer);
24839
24840 # Computing Token Indentation
24841 #
24842 #     The final section of the tokenizer forms tokens and also computes
24843 #     parameters needed to find indentation.  It is much easier to do it
24844 #     in the tokenizer than elsewhere.  Here is a brief description of how
24845 #     indentation is computed.  Perl::Tidy computes indentation as the sum
24846 #     of 2 terms:
24847 #
24848 #     (1) structural indentation, such as if/else/elsif blocks
24849 #     (2) continuation indentation, such as long parameter call lists.
24850 #
24851 #     These are occasionally called primary and secondary indentation.
24852 #
24853 #     Structural indentation is introduced by tokens of type '{', although
24854 #     the actual tokens might be '{', '(', or '['.  Structural indentation
24855 #     is of two types: BLOCK and non-BLOCK.  Default structural indentation
24856 #     is 4 characters if the standard indentation scheme is used.
24857 #
24858 #     Continuation indentation is introduced whenever a line at BLOCK level
24859 #     is broken before its termination.  Default continuation indentation
24860 #     is 2 characters in the standard indentation scheme.
24861 #
24862 #     Both types of indentation may be nested arbitrarily deep and
24863 #     interlaced.  The distinction between the two is somewhat arbitrary.
24864 #
24865 #     For each token, we will define two variables which would apply if
24866 #     the current statement were broken just before that token, so that
24867 #     that token started a new line:
24868 #
24869 #     $level = the structural indentation level,
24870 #     $ci_level = the continuation indentation level
24871 #
24872 #     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
24873 #     assuming defaults.  However, in some special cases it is customary
24874 #     to modify $ci_level from this strict value.
24875 #
24876 #     The total structural indentation is easy to compute by adding and
24877 #     subtracting 1 from a saved value as types '{' and '}' are seen.  The
24878 #     running value of this variable is $level_in_tokenizer.
24879 #
24880 #     The total continuation is much more difficult to compute, and requires
24881 #     several variables.  These veriables are:
24882 #
24883 #     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
24884 #       each indentation level, if there are intervening open secondary
24885 #       structures just prior to that level.
24886 #     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
24887 #       if the last token at that level is "continued", meaning that it
24888 #       is not the first token of an expression.
24889 #     $nesting_block_string = a string of 1's and 0's indicating, for each
24890 #       indentation level, if the level is of type BLOCK or not.
24891 #     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
24892 #     $nesting_list_string = a string of 1's and 0's indicating, for each
24893 #       indentation level, if it is is appropriate for list formatting.
24894 #       If so, continuation indentation is used to indent long list items.
24895 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
24896 #     @{$rslevel_stack} = a stack of total nesting depths at each
24897 #       structural indentation level, where "total nesting depth" means
24898 #       the nesting depth that would occur if every nesting token -- '{', '[',
24899 #       and '(' -- , regardless of context, is used to compute a nesting
24900 #       depth.
24901
24902         #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
24903         #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
24904
24905         my ( $ci_string_i, $level_i, $nesting_block_string_i,
24906             $nesting_list_string_i, $nesting_token_string_i,
24907             $nesting_type_string_i, );
24908
24909         foreach $i ( @{$routput_token_list} )
24910         {    # scan the list of pre-tokens indexes
24911
24912             # self-checking for valid token types
24913             my $type                    = $routput_token_type->[$i];
24914             my $forced_indentation_flag = $routput_indent_flag->[$i];
24915
24916             # See if we should undo the $forced_indentation_flag.
24917             # Forced indentation after 'if', 'unless', 'while' and 'until'
24918             # expressions without trailing parens is optional and doesn't
24919             # always look good.  It is usually okay for a trailing logical
24920             # expression, but if the expression is a function call, code block,
24921             # or some kind of list it puts in an unwanted extra indentation
24922             # level which is hard to remove.
24923             #
24924             # Example where extra indentation looks ok:
24925             # return 1
24926             #   if $det_a < 0 and $det_b > 0
24927             #       or $det_a > 0 and $det_b < 0;
24928             #
24929             # Example where extra indentation is not needed because
24930             # the eval brace also provides indentation:
24931             # print "not " if defined eval {
24932             #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
24933             # };
24934             #
24935             # The following rule works fairly well:
24936             #   Undo the flag if the end of this line, or start of the next
24937             #   line, is an opening container token or a comma.
24938             # This almost always works, but if not after another pass it will
24939             # be stable.
24940             if ( $forced_indentation_flag && $type eq 'k' ) {
24941                 my $ixlast  = -1;
24942                 my $ilast   = $routput_token_list->[$ixlast];
24943                 my $toklast = $routput_token_type->[$ilast];
24944                 if ( $toklast eq '#' ) {
24945                     $ixlast--;
24946                     $ilast   = $routput_token_list->[$ixlast];
24947                     $toklast = $routput_token_type->[$ilast];
24948                 }
24949                 if ( $toklast eq 'b' ) {
24950                     $ixlast--;
24951                     $ilast   = $routput_token_list->[$ixlast];
24952                     $toklast = $routput_token_type->[$ilast];
24953                 }
24954                 if ( $toklast =~ /^[\{,]$/ ) {
24955                     $forced_indentation_flag = 0;
24956                 }
24957                 else {
24958                     ( $toklast, my $i_next ) =
24959                       find_next_nonblank_token( $max_token_index, $rtokens,
24960                         $max_token_index );
24961                     if ( $toklast =~ /^[\{,]$/ ) {
24962                         $forced_indentation_flag = 0;
24963                     }
24964                 }
24965             }
24966
24967             # if we are already in an indented if, see if we should outdent
24968             if ($indented_if_level) {
24969
24970                 # don't try to nest trailing if's - shouldn't happen
24971                 if ( $type eq 'k' ) {
24972                     $forced_indentation_flag = 0;
24973                 }
24974
24975                 # check for the normal case - outdenting at next ';'
24976                 elsif ( $type eq ';' ) {
24977                     if ( $level_in_tokenizer == $indented_if_level ) {
24978                         $forced_indentation_flag = -1;
24979                         $indented_if_level       = 0;
24980                     }
24981                 }
24982
24983                 # handle case of missing semicolon
24984                 elsif ( $type eq '}' ) {
24985                     if ( $level_in_tokenizer == $indented_if_level ) {
24986                         $indented_if_level = 0;
24987
24988                         # TBD: This could be a subroutine call
24989                         $level_in_tokenizer--;
24990                         if ( @{$rslevel_stack} > 1 ) {
24991                             pop( @{$rslevel_stack} );
24992                         }
24993                         if ( length($nesting_block_string) > 1 )
24994                         {    # true for valid script
24995                             chop $nesting_block_string;
24996                             chop $nesting_list_string;
24997                         }
24998
24999                     }
25000                 }
25001             }
25002
25003             my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
25004             $level_i = $level_in_tokenizer;
25005
25006             # This can happen by running perltidy on non-scripts
25007             # although it could also be bug introduced by programming change.
25008             # Perl silently accepts a 032 (^Z) and takes it as the end
25009             if ( !$is_valid_token_type{$type} ) {
25010                 my $val = ord($type);
25011                 warning(
25012                     "unexpected character decimal $val ($type) in script\n");
25013                 $tokenizer_self->{_in_error} = 1;
25014             }
25015
25016             # ----------------------------------------------------------------
25017             # TOKEN TYPE PATCHES
25018             #  output __END__, __DATA__, and format as type 'k' instead of ';'
25019             # to make html colors correct, etc.
25020             my $fix_type = $type;
25021             if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
25022
25023             # output anonymous 'sub' as keyword
25024             if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
25025
25026             # -----------------------------------------------------------------
25027
25028             $nesting_token_string_i = $nesting_token_string;
25029             $nesting_type_string_i  = $nesting_type_string;
25030             $nesting_block_string_i = $nesting_block_string;
25031             $nesting_list_string_i  = $nesting_list_string;
25032
25033             # set primary indentation levels based on structural braces
25034             # Note: these are set so that the leading braces have a HIGHER
25035             # level than their CONTENTS, which is convenient for indentation
25036             # Also, define continuation indentation for each token.
25037             if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
25038             {
25039
25040                 # use environment before updating
25041                 $container_environment =
25042                     $nesting_block_flag ? 'BLOCK'
25043                   : $nesting_list_flag  ? 'LIST'
25044                   :                       "";
25045
25046                 # if the difference between total nesting levels is not 1,
25047                 # there are intervening non-structural nesting types between
25048                 # this '{' and the previous unclosed '{'
25049                 my $intervening_secondary_structure = 0;
25050                 if ( @{$rslevel_stack} ) {
25051                     $intervening_secondary_structure =
25052                       $slevel_in_tokenizer - $rslevel_stack->[-1];
25053                 }
25054
25055      # Continuation Indentation
25056      #
25057      # Having tried setting continuation indentation both in the formatter and
25058      # in the tokenizer, I can say that setting it in the tokenizer is much,
25059      # much easier.  The formatter already has too much to do, and can't
25060      # make decisions on line breaks without knowing what 'ci' will be at
25061      # arbitrary locations.
25062      #
25063      # But a problem with setting the continuation indentation (ci) here
25064      # in the tokenizer is that we do not know where line breaks will actually
25065      # be.  As a result, we don't know if we should propagate continuation
25066      # indentation to higher levels of structure.
25067      #
25068      # For nesting of only structural indentation, we never need to do this.
25069      # For example, in a long if statement, like this
25070      #
25071      #   if ( !$output_block_type[$i]
25072      #     && ($in_statement_continuation) )
25073      #   {           <--outdented
25074      #       do_something();
25075      #   }
25076      #
25077      # the second line has ci but we do normally give the lines within the BLOCK
25078      # any ci.  This would be true if we had blocks nested arbitrarily deeply.
25079      #
25080      # But consider something like this, where we have created a break after
25081      # an opening paren on line 1, and the paren is not (currently) a
25082      # structural indentation token:
25083      #
25084      # my $file = $menubar->Menubutton(
25085      #   qw/-text File -underline 0 -menuitems/ => [
25086      #       [
25087      #           Cascade    => '~View',
25088      #           -menuitems => [
25089      #           ...
25090      #
25091      # The second line has ci, so it would seem reasonable to propagate it
25092      # down, giving the third line 1 ci + 1 indentation.  This suggests the
25093      # following rule, which is currently used to propagating ci down: if there
25094      # are any non-structural opening parens (or brackets, or braces), before
25095      # an opening structural brace, then ci is propagated down, and otherwise
25096      # not.  The variable $intervening_secondary_structure contains this
25097      # information for the current token, and the string
25098      # "$ci_string_in_tokenizer" is a stack of previous values of this
25099      # variable.
25100
25101                 # save the current states
25102                 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
25103                 $level_in_tokenizer++;
25104
25105                 if ($forced_indentation_flag) {
25106
25107                     # break BEFORE '?' when there is forced indentation
25108                     if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
25109                     if ( $type eq 'k' ) {
25110                         $indented_if_level = $level_in_tokenizer;
25111                     }
25112
25113                     # do not change container environement here if we are not
25114                     # at a real list. Adding this check prevents "blinkers"
25115                     # often near 'unless" clauses, such as in the following
25116                     # code:
25117 ##          next
25118 ##            unless -e (
25119 ##                    $archive =
25120 ##                      File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
25121 ##            );
25122
25123                     $nesting_block_string .= "$nesting_block_flag";
25124                 }
25125                 else {
25126
25127                     if ( $routput_block_type->[$i] ) {
25128                         $nesting_block_flag = 1;
25129                         $nesting_block_string .= '1';
25130                     }
25131                     else {
25132                         $nesting_block_flag = 0;
25133                         $nesting_block_string .= '0';
25134                     }
25135                 }
25136
25137                 # we will use continuation indentation within containers
25138                 # which are not blocks and not logical expressions
25139                 my $bit = 0;
25140                 if ( !$routput_block_type->[$i] ) {
25141
25142                     # propagate flag down at nested open parens
25143                     if ( $routput_container_type->[$i] eq '(' ) {
25144                         $bit = 1 if $nesting_list_flag;
25145                     }
25146
25147                   # use list continuation if not a logical grouping
25148                   # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
25149                     else {
25150                         $bit = 1
25151                           unless
25152                           $is_logical_container{ $routput_container_type->[$i]
25153                           };
25154                     }
25155                 }
25156                 $nesting_list_string .= $bit;
25157                 $nesting_list_flag = $bit;
25158
25159                 $ci_string_in_tokenizer .=
25160                   ( $intervening_secondary_structure != 0 ) ? '1' : '0';
25161                 $ci_string_sum = ones_count($ci_string_in_tokenizer);
25162                 $continuation_string_in_tokenizer .=
25163                   ( $in_statement_continuation > 0 ) ? '1' : '0';
25164
25165    #  Sometimes we want to give an opening brace continuation indentation,
25166    #  and sometimes not.  For code blocks, we don't do it, so that the leading
25167    #  '{' gets outdented, like this:
25168    #
25169    #   if ( !$output_block_type[$i]
25170    #     && ($in_statement_continuation) )
25171    #   {           <--outdented
25172    #
25173    #  For other types, we will give them continuation indentation.  For example,
25174    #  here is how a list looks with the opening paren indented:
25175    #
25176    #     @LoL =
25177    #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
25178    #         [ "homer", "marge", "bart" ], );
25179    #
25180    #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
25181
25182                 my $total_ci = $ci_string_sum;
25183                 if (
25184                     !$routput_block_type->[$i]    # patch: skip for BLOCK
25185                     && ($in_statement_continuation)
25186                     && !( $forced_indentation_flag && $type eq ':' )
25187                   )
25188                 {
25189                     $total_ci += $in_statement_continuation
25190                       unless ( $ci_string_in_tokenizer =~ /1$/ );
25191                 }
25192
25193                 $ci_string_i               = $total_ci;
25194                 $in_statement_continuation = 0;
25195             }
25196
25197             elsif ($type eq '}'
25198                 || $type eq 'R'
25199                 || $forced_indentation_flag < 0 )
25200             {
25201
25202                 # only a nesting error in the script would prevent popping here
25203                 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
25204
25205                 $level_i = --$level_in_tokenizer;
25206
25207                 # restore previous level values
25208                 if ( length($nesting_block_string) > 1 )
25209                 {    # true for valid script
25210                     chop $nesting_block_string;
25211                     $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
25212                     chop $nesting_list_string;
25213                     $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
25214
25215                     chop $ci_string_in_tokenizer;
25216                     $ci_string_sum = ones_count($ci_string_in_tokenizer);
25217
25218                     $in_statement_continuation =
25219                       chop $continuation_string_in_tokenizer;
25220
25221                     # zero continuation flag at terminal BLOCK '}' which
25222                     # ends a statement.
25223                     if ( $routput_block_type->[$i] ) {
25224
25225                         # ...These include non-anonymous subs
25226                         # note: could be sub ::abc { or sub 'abc
25227                         if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
25228
25229                          # note: older versions of perl require the /gc modifier
25230                          # here or else the \G does not work.
25231                             if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
25232                             {
25233                                 $in_statement_continuation = 0;
25234                             }
25235                         }
25236
25237 # ...and include all block types except user subs with
25238 # block prototypes and these: (sort|grep|map|do|eval)
25239 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
25240                         elsif (
25241                             $is_zero_continuation_block_type{
25242                                 $routput_block_type->[$i]
25243                             } )
25244                         {
25245                             $in_statement_continuation = 0;
25246                         }
25247
25248                         # ..but these are not terminal types:
25249                         #     /^(sort|grep|map|do|eval)$/ )
25250                         elsif (
25251                             $is_not_zero_continuation_block_type{
25252                                 $routput_block_type->[$i]
25253                             } )
25254                         {
25255                         }
25256
25257                         # ..and a block introduced by a label
25258                         # /^\w+\s*:$/gc ) {
25259                         elsif ( $routput_block_type->[$i] =~ /:$/ ) {
25260                             $in_statement_continuation = 0;
25261                         }
25262
25263                         # user function with block prototype
25264                         else {
25265                             $in_statement_continuation = 0;
25266                         }
25267                     }
25268
25269                     # If we are in a list, then
25270                     # we must set continuatoin indentation at the closing
25271                     # paren of something like this (paren after $check):
25272                     #     assert(
25273                     #         __LINE__,
25274                     #         ( not defined $check )
25275                     #           or ref $check
25276                     #           or $check eq "new"
25277                     #           or $check eq "old",
25278                     #     );
25279                     elsif ( $tok eq ')' ) {
25280                         $in_statement_continuation = 1
25281                           if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
25282                     }
25283
25284                     elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
25285                 }
25286
25287                 # use environment after updating
25288                 $container_environment =
25289                     $nesting_block_flag ? 'BLOCK'
25290                   : $nesting_list_flag  ? 'LIST'
25291                   :                       "";
25292                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
25293                 $nesting_block_string_i = $nesting_block_string;
25294                 $nesting_list_string_i  = $nesting_list_string;
25295             }
25296
25297             # not a structural indentation type..
25298             else {
25299
25300                 $container_environment =
25301                     $nesting_block_flag ? 'BLOCK'
25302                   : $nesting_list_flag  ? 'LIST'
25303                   :                       "";
25304
25305                 # zero the continuation indentation at certain tokens so
25306                 # that they will be at the same level as its container.  For
25307                 # commas, this simplifies the -lp indentation logic, which
25308                 # counts commas.  For ?: it makes them stand out.
25309                 if ($nesting_list_flag) {
25310                     if ( $type =~ /^[,\?\:]$/ ) {
25311                         $in_statement_continuation = 0;
25312                     }
25313                 }
25314
25315                 # be sure binary operators get continuation indentation
25316                 if (
25317                     $container_environment
25318                     && (   $type eq 'k' && $is_binary_keyword{$tok}
25319                         || $is_binary_type{$type} )
25320                   )
25321                 {
25322                     $in_statement_continuation = 1;
25323                 }
25324
25325                 # continuation indentation is sum of any open ci from previous
25326                 # levels plus the current level
25327                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
25328
25329                 # update continuation flag ...
25330                 # if this isn't a blank or comment..
25331                 if ( $type ne 'b' && $type ne '#' ) {
25332
25333                     # and we are in a BLOCK
25334                     if ($nesting_block_flag) {
25335
25336                         # the next token after a ';' and label starts a new stmt
25337                         if ( $type eq ';' || $type eq 'J' ) {
25338                             $in_statement_continuation = 0;
25339                         }
25340
25341                         # otherwise, we are continuing the current statement
25342                         else {
25343                             $in_statement_continuation = 1;
25344                         }
25345                     }
25346
25347                     # if we are not in a BLOCK..
25348                     else {
25349
25350                         # do not use continuation indentation if not list
25351                         # environment (could be within if/elsif clause)
25352                         if ( !$nesting_list_flag ) {
25353                             $in_statement_continuation = 0;
25354                         }
25355
25356                        # otherwise, the next token after a ',' starts a new term
25357                         elsif ( $type eq ',' ) {
25358                             $in_statement_continuation = 0;
25359                         }
25360
25361                         # otherwise, we are continuing the current term
25362                         else {
25363                             $in_statement_continuation = 1;
25364                         }
25365                     }
25366                 }
25367             }
25368
25369             if ( $level_in_tokenizer < 0 ) {
25370                 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
25371                     $tokenizer_self->{_saw_negative_indentation} = 1;
25372                     warning("Starting negative indentation\n");
25373                 }
25374             }
25375
25376             # set secondary nesting levels based on all continment token types
25377             # Note: these are set so that the nesting depth is the depth
25378             # of the PREVIOUS TOKEN, which is convenient for setting
25379             # the stength of token bonds
25380             my $slevel_i = $slevel_in_tokenizer;
25381
25382             #    /^[L\{\(\[]$/
25383             if ( $is_opening_type{$type} ) {
25384                 $slevel_in_tokenizer++;
25385                 $nesting_token_string .= $tok;
25386                 $nesting_type_string  .= $type;
25387             }
25388
25389             #       /^[R\}\)\]]$/
25390             elsif ( $is_closing_type{$type} ) {
25391                 $slevel_in_tokenizer--;
25392                 my $char = chop $nesting_token_string;
25393
25394                 if ( $char ne $matching_start_token{$tok} ) {
25395                     $nesting_token_string .= $char . $tok;
25396                     $nesting_type_string  .= $type;
25397                 }
25398                 else {
25399                     chop $nesting_type_string;
25400                 }
25401             }
25402
25403             push( @block_type,            $routput_block_type->[$i] );
25404             push( @ci_string,             $ci_string_i );
25405             push( @container_environment, $container_environment );
25406             push( @container_type,        $routput_container_type->[$i] );
25407             push( @levels,                $level_i );
25408             push( @nesting_tokens,        $nesting_token_string_i );
25409             push( @nesting_types,         $nesting_type_string_i );
25410             push( @slevels,               $slevel_i );
25411             push( @token_type,            $fix_type );
25412             push( @type_sequence,         $routput_type_sequence->[$i] );
25413             push( @nesting_blocks,        $nesting_block_string );
25414             push( @nesting_lists,         $nesting_list_string );
25415
25416             # now form the previous token
25417             if ( $im >= 0 ) {
25418                 $num =
25419                   $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
25420
25421                 if ( $num > 0 ) {
25422                     push( @tokens,
25423                         substr( $input_line, $$rtoken_map[$im], $num ) );
25424                 }
25425             }
25426             $im = $i;
25427         }
25428
25429         $num = length($input_line) - $$rtoken_map[$im];    # make the last token
25430         if ( $num > 0 ) {
25431             push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
25432         }
25433
25434         $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
25435         $tokenizer_self->{_in_quote}          = $in_quote;
25436         $tokenizer_self->{_quote_target} =
25437           $in_quote ? matching_end_token($quote_character) : "";
25438         $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
25439
25440         $line_of_tokens->{_rtoken_type}            = \@token_type;
25441         $line_of_tokens->{_rtokens}                = \@tokens;
25442         $line_of_tokens->{_rblock_type}            = \@block_type;
25443         $line_of_tokens->{_rcontainer_type}        = \@container_type;
25444         $line_of_tokens->{_rcontainer_environment} = \@container_environment;
25445         $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
25446         $line_of_tokens->{_rlevels}                = \@levels;
25447         $line_of_tokens->{_rslevels}               = \@slevels;
25448         $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
25449         $line_of_tokens->{_rci_levels}             = \@ci_string;
25450         $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
25451
25452         return;
25453     }
25454 }    # end tokenize_this_line
25455
25456 #########i#############################################################
25457 # Tokenizer routines which assist in identifying token types
25458 #######################################################################
25459
25460 sub operator_expected {
25461
25462     # Many perl symbols have two or more meanings.  For example, '<<'
25463     # can be a shift operator or a here-doc operator.  The
25464     # interpretation of these symbols depends on the current state of
25465     # the tokenizer, which may either be expecting a term or an
25466     # operator.  For this example, a << would be a shift if an operator
25467     # is expected, and a here-doc if a term is expected.  This routine
25468     # is called to make this decision for any current token.  It returns
25469     # one of three possible values:
25470     #
25471     #     OPERATOR - operator expected (or at least, not a term)
25472     #     UNKNOWN  - can't tell
25473     #     TERM     - a term is expected (or at least, not an operator)
25474     #
25475     # The decision is based on what has been seen so far.  This
25476     # information is stored in the "$last_nonblank_type" and
25477     # "$last_nonblank_token" variables.  For example, if the
25478     # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
25479     # if $last_nonblank_type is 'n' (numeric), we are expecting an
25480     # OPERATOR.
25481     #
25482     # If a UNKNOWN is returned, the calling routine must guess. A major
25483     # goal of this tokenizer is to minimize the possiblity of returning
25484     # UNKNOWN, because a wrong guess can spoil the formatting of a
25485     # script.
25486     #
25487     # adding NEW_TOKENS: it is critically important that this routine be
25488     # updated to allow it to determine if an operator or term is to be
25489     # expected after the new token.  Doing this simply involves adding
25490     # the new token character to one of the regexes in this routine or
25491     # to one of the hash lists
25492     # that it uses, which are initialized in the BEGIN section.
25493     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
25494     # $statement_type
25495
25496     my ( $prev_type, $tok, $next_type ) = @_;
25497
25498     my $op_expected = UNKNOWN;
25499
25500 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
25501
25502 # Note: function prototype is available for token type 'U' for future
25503 # program development.  It contains the leading and trailing parens,
25504 # and no blanks.  It might be used to eliminate token type 'C', for
25505 # example (prototype = '()'). Thus:
25506 # if ($last_nonblank_type eq 'U') {
25507 #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
25508 # }
25509
25510     # A possible filehandle (or object) requires some care...
25511     if ( $last_nonblank_type eq 'Z' ) {
25512
25513         # angle.t
25514         if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
25515             $op_expected = UNKNOWN;
25516         }
25517
25518         # For possible file handle like "$a", Perl uses weird parsing rules.
25519         # For example:
25520         # print $a/2,"/hi";   - division
25521         # print $a / 2,"/hi"; - division
25522         # print $a/ 2,"/hi";  - division
25523         # print $a /2,"/hi";  - pattern (and error)!
25524         elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
25525             $op_expected = TERM;
25526         }
25527
25528         # Note when an operation is being done where a
25529         # filehandle might be expected, since a change in whitespace
25530         # could change the interpretation of the statement.
25531         else {
25532             if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
25533                 complain("operator in print statement not recommended\n");
25534                 $op_expected = OPERATOR;
25535             }
25536         }
25537     }
25538
25539     # handle something after 'do' and 'eval'
25540     elsif ( $is_block_operator{$last_nonblank_token} ) {
25541
25542         # something like $a = eval "expression";
25543         #                          ^
25544         if ( $last_nonblank_type eq 'k' ) {
25545             $op_expected = TERM;    # expression or list mode following keyword
25546         }
25547
25548         # something like $a = do { BLOCK } / 2;
25549         #                                  ^
25550         else {
25551             $op_expected = OPERATOR;    # block mode following }
25552         }
25553     }
25554
25555     # handle bare word..
25556     elsif ( $last_nonblank_type eq 'w' ) {
25557
25558         # unfortunately, we can't tell what type of token to expect next
25559         # after most bare words
25560         $op_expected = UNKNOWN;
25561     }
25562
25563     # operator, but not term possible after these types
25564     # Note: moved ')' from type to token because parens in list context
25565     # get marked as '{' '}' now.  This is a minor glitch in the following:
25566     #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
25567     #
25568     elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
25569         || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
25570     {
25571         $op_expected = OPERATOR;
25572
25573         # in a 'use' statement, numbers and v-strings are not true
25574         # numbers, so to avoid incorrect error messages, we will
25575         # mark them as unknown for now (use.t)
25576         # TODO: it would be much nicer to create a new token V for VERSION
25577         # number in a use statement.  Then this could be a check on type V
25578         # and related patches which change $statement_type for '=>'
25579         # and ',' could be removed.  Further, it would clean things up to
25580         # scan the 'use' statement with a separate subroutine.
25581         if (   ( $statement_type eq 'use' )
25582             && ( $last_nonblank_type =~ /^[nv]$/ ) )
25583         {
25584             $op_expected = UNKNOWN;
25585         }
25586     }
25587
25588     # no operator after many keywords, such as "die", "warn", etc
25589     elsif ( $expecting_term_token{$last_nonblank_token} ) {
25590
25591         # patch for dor.t (defined or).
25592         # perl functions which may be unary operators
25593         # TODO: This list is incomplete, and these should be put
25594         # into a hash.
25595         if (   $tok eq '/'
25596             && $next_type          eq '/'
25597             && $last_nonblank_type eq 'k'
25598             && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
25599         {
25600             $op_expected = OPERATOR;
25601         }
25602         else {
25603             $op_expected = TERM;
25604         }
25605     }
25606
25607     # no operator after things like + - **  (i.e., other operators)
25608     elsif ( $expecting_term_types{$last_nonblank_type} ) {
25609         $op_expected = TERM;
25610     }
25611
25612     # a few operators, like "time", have an empty prototype () and so
25613     # take no parameters but produce a value to operate on
25614     elsif ( $expecting_operator_token{$last_nonblank_token} ) {
25615         $op_expected = OPERATOR;
25616     }
25617
25618     # post-increment and decrement produce values to be operated on
25619     elsif ( $expecting_operator_types{$last_nonblank_type} ) {
25620         $op_expected = OPERATOR;
25621     }
25622
25623     # no value to operate on after sub block
25624     elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
25625
25626     # a right brace here indicates the end of a simple block.
25627     # all non-structural right braces have type 'R'
25628     # all braces associated with block operator keywords have been given those
25629     # keywords as "last_nonblank_token" and caught above.
25630     # (This statement is order dependent, and must come after checking
25631     # $last_nonblank_token).
25632     elsif ( $last_nonblank_type eq '}' ) {
25633
25634         # patch for dor.t (defined or).
25635         if (   $tok eq '/'
25636             && $next_type eq '/'
25637             && $last_nonblank_token eq ']' )
25638         {
25639             $op_expected = OPERATOR;
25640         }
25641         else {
25642             $op_expected = TERM;
25643         }
25644     }
25645
25646     # something else..what did I forget?
25647     else {
25648
25649         # collecting diagnostics on unknown operator types..see what was missed
25650         $op_expected = UNKNOWN;
25651         write_diagnostics(
25652 "OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
25653         );
25654     }
25655
25656     TOKENIZER_DEBUG_FLAG_EXPECT && do {
25657         print
25658 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
25659     };
25660     return $op_expected;
25661 }
25662
25663 sub new_statement_ok {
25664
25665     # return true if the current token can start a new statement
25666     # USES GLOBAL VARIABLES: $last_nonblank_type
25667
25668     return label_ok()    # a label would be ok here
25669
25670       || $last_nonblank_type eq 'J';    # or we follow a label
25671
25672 }
25673
25674 sub label_ok {
25675
25676     # Decide if a bare word followed by a colon here is a label
25677     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25678     # $brace_depth, @brace_type
25679
25680     # if it follows an opening or closing code block curly brace..
25681     if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
25682         && $last_nonblank_type eq $last_nonblank_token )
25683     {
25684
25685         # it is a label if and only if the curly encloses a code block
25686         return $brace_type[$brace_depth];
25687     }
25688
25689     # otherwise, it is a label if and only if it follows a ';'
25690     # (real or fake)
25691     else {
25692         return ( $last_nonblank_type eq ';' );
25693     }
25694 }
25695
25696 sub code_block_type {
25697
25698     # Decide if this is a block of code, and its type.
25699     # Must be called only when $type = $token = '{'
25700     # The problem is to distinguish between the start of a block of code
25701     # and the start of an anonymous hash reference
25702     # Returns "" if not code block, otherwise returns 'last_nonblank_token'
25703     # to indicate the type of code block.  (For example, 'last_nonblank_token'
25704     # might be 'if' for an if block, 'else' for an else block, etc).
25705     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25706     # $last_nonblank_block_type, $brace_depth, @brace_type
25707
25708     # handle case of multiple '{'s
25709
25710 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
25711
25712     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25713     if (   $last_nonblank_token eq '{'
25714         && $last_nonblank_type eq $last_nonblank_token )
25715     {
25716
25717         # opening brace where a statement may appear is probably
25718         # a code block but might be and anonymous hash reference
25719         if ( $brace_type[$brace_depth] ) {
25720             return decide_if_code_block( $i, $rtokens, $rtoken_type,
25721                 $max_token_index );
25722         }
25723
25724         # cannot start a code block within an anonymous hash
25725         else {
25726             return "";
25727         }
25728     }
25729
25730     elsif ( $last_nonblank_token eq ';' ) {
25731
25732         # an opening brace where a statement may appear is probably
25733         # a code block but might be and anonymous hash reference
25734         return decide_if_code_block( $i, $rtokens, $rtoken_type,
25735             $max_token_index );
25736     }
25737
25738     # handle case of '}{'
25739     elsif ($last_nonblank_token eq '}'
25740         && $last_nonblank_type eq $last_nonblank_token )
25741     {
25742
25743         # a } { situation ...
25744         # could be hash reference after code block..(blktype1.t)
25745         if ($last_nonblank_block_type) {
25746             return decide_if_code_block( $i, $rtokens, $rtoken_type,
25747                 $max_token_index );
25748         }
25749
25750         # must be a block if it follows a closing hash reference
25751         else {
25752             return $last_nonblank_token;
25753         }
25754     }
25755
25756     # NOTE: braces after type characters start code blocks, but for
25757     # simplicity these are not identified as such.  See also
25758     # sub is_non_structural_brace.
25759     # elsif ( $last_nonblank_type eq 't' ) {
25760     #    return $last_nonblank_token;
25761     # }
25762
25763     # brace after label:
25764     elsif ( $last_nonblank_type eq 'J' ) {
25765         return $last_nonblank_token;
25766     }
25767
25768 # otherwise, look at previous token.  This must be a code block if
25769 # it follows any of these:
25770 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
25771     elsif ( $is_code_block_token{$last_nonblank_token} ) {
25772
25773         # Bug Patch: Note that the opening brace after the 'if' in the following
25774         # snippet is an anonymous hash ref and not a code block!
25775         #   print 'hi' if { x => 1, }->{x};
25776         # We can identify this situation because the last nonblank type
25777         # will be a keyword (instead of a closing peren)
25778         if (   $last_nonblank_token =~ /^(if|unless)$/
25779             && $last_nonblank_type eq 'k' )
25780         {
25781             return "";
25782         }
25783         else {
25784             return $last_nonblank_token;
25785         }
25786     }
25787
25788     # or a sub definition
25789     elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
25790         && $last_nonblank_token =~ /^(sub|package)\b/ )
25791     {
25792         return $last_nonblank_token;
25793     }
25794
25795     # user-defined subs with block parameters (like grep/map/eval)
25796     elsif ( $last_nonblank_type eq 'G' ) {
25797         return $last_nonblank_token;
25798     }
25799
25800     # check bareword
25801     elsif ( $last_nonblank_type eq 'w' ) {
25802         return decide_if_code_block( $i, $rtokens, $rtoken_type,
25803             $max_token_index );
25804     }
25805
25806     # anything else must be anonymous hash reference
25807     else {
25808         return "";
25809     }
25810 }
25811
25812 sub decide_if_code_block {
25813
25814     # USES GLOBAL VARIABLES: $last_nonblank_token
25815     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25816     my ( $next_nonblank_token, $i_next ) =
25817       find_next_nonblank_token( $i, $rtokens, $max_token_index );
25818
25819     # we are at a '{' where a statement may appear.
25820     # We must decide if this brace starts an anonymous hash or a code
25821     # block.
25822     # return "" if anonymous hash, and $last_nonblank_token otherwise
25823
25824     # initialize to be code BLOCK
25825     my $code_block_type = $last_nonblank_token;
25826
25827     # Check for the common case of an empty anonymous hash reference:
25828     # Maybe something like sub { { } }
25829     if ( $next_nonblank_token eq '}' ) {
25830         $code_block_type = "";
25831     }
25832
25833     else {
25834
25835         # To guess if this '{' is an anonymous hash reference, look ahead
25836         # and test as follows:
25837         #
25838         # it is a hash reference if next come:
25839         #   - a string or digit followed by a comma or =>
25840         #   - bareword followed by =>
25841         # otherwise it is a code block
25842         #
25843         # Examples of anonymous hash ref:
25844         # {'aa',};
25845         # {1,2}
25846         #
25847         # Examples of code blocks:
25848         # {1; print "hello\n", 1;}
25849         # {$a,1};
25850
25851         # We are only going to look ahead one more (nonblank/comment) line.
25852         # Strange formatting could cause a bad guess, but that's unlikely.
25853         my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
25854         my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
25855         my ( $rpre_tokens, $rpre_types ) =
25856           peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
25857                                                        # generous, and prevents
25858                                                        # wasting lots of
25859                                                        # time in mangled files
25860         if ( defined($rpre_types) && @$rpre_types ) {
25861             push @pre_types,  @$rpre_types;
25862             push @pre_tokens, @$rpre_tokens;
25863         }
25864
25865         # put a sentinal token to simplify stopping the search
25866         push @pre_types, '}';
25867
25868         my $jbeg = 0;
25869         $jbeg = 1 if $pre_types[0] eq 'b';
25870
25871         # first look for one of these
25872         #  - bareword
25873         #  - bareword with leading -
25874         #  - digit
25875         #  - quoted string
25876         my $j = $jbeg;
25877         if ( $pre_types[$j] =~ /^[\'\"]/ ) {
25878
25879             # find the closing quote; don't worry about escapes
25880             my $quote_mark = $pre_types[$j];
25881             for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
25882                 if ( $pre_types[$k] eq $quote_mark ) {
25883                     $j = $k + 1;
25884                     my $next = $pre_types[$j];
25885                     last;
25886                 }
25887             }
25888         }
25889         elsif ( $pre_types[$j] eq 'd' ) {
25890             $j++;
25891         }
25892         elsif ( $pre_types[$j] eq 'w' ) {
25893             unless ( $is_keyword{ $pre_tokens[$j] } ) {
25894                 $j++;
25895             }
25896         }
25897         elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
25898             $j++;
25899         }
25900         if ( $j > $jbeg ) {
25901
25902             $j++ if $pre_types[$j] eq 'b';
25903
25904             # it's a hash ref if a comma or => follow next
25905             if ( $pre_types[$j] eq ','
25906                 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
25907             {
25908                 $code_block_type = "";
25909             }
25910         }
25911     }
25912
25913     return $code_block_type;
25914 }
25915
25916 sub unexpected {
25917
25918     # report unexpected token type and show where it is
25919     # USES GLOBAL VARIABLES: $tokenizer_self
25920     my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
25921         $rpretoken_type, $input_line )
25922       = @_;
25923
25924     if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
25925         my $msg = "found $found where $expecting expected";
25926         my $pos = $$rpretoken_map[$i_tok];
25927         interrupt_logfile();
25928         my $input_line_number = $tokenizer_self->{_last_line_number};
25929         my ( $offset, $numbered_line, $underline ) =
25930           make_numbered_line( $input_line_number, $input_line, $pos );
25931         $underline = write_on_underline( $underline, $pos - $offset, '^' );
25932
25933         my $trailer = "";
25934         if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
25935             my $pos_prev = $$rpretoken_map[$last_nonblank_i];
25936             my $num;
25937             if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
25938                 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
25939             }
25940             else {
25941                 $num = $pos - $pos_prev;
25942             }
25943             if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
25944
25945             $underline =
25946               write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
25947             $trailer = " (previous token underlined)";
25948         }
25949         warning( $numbered_line . "\n" );
25950         warning( $underline . "\n" );
25951         warning( $msg . $trailer . "\n" );
25952         resume_logfile();
25953     }
25954 }
25955
25956 sub is_non_structural_brace {
25957
25958     # Decide if a brace or bracket is structural or non-structural
25959     # by looking at the previous token and type
25960     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
25961
25962     # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
25963     # Tentatively deactivated because it caused the wrong operator expectation
25964     # for this code:
25965     #      $user = @vars[1] / 100;
25966     # Must update sub operator_expected before re-implementing.
25967     # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
25968     #    return 0;
25969     # }
25970
25971     # NOTE: braces after type characters start code blocks, but for
25972     # simplicity these are not identified as such.  See also
25973     # sub code_block_type
25974     # if ($last_nonblank_type eq 't') {return 0}
25975
25976     # otherwise, it is non-structural if it is decorated
25977     # by type information.
25978     # For example, the '{' here is non-structural:   ${xxx}
25979     (
25980         $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
25981
25982           # or if we follow a hash or array closing curly brace or bracket
25983           # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
25984           # because the first '}' would have been given type 'R'
25985           || $last_nonblank_type =~ /^([R\]])$/
25986     );
25987 }
25988
25989 #########i#############################################################
25990 # Tokenizer routines for tracking container nesting depths
25991 #######################################################################
25992
25993 # The following routines keep track of nesting depths of the nesting
25994 # types, ( [ { and ?.  This is necessary for determining the indentation
25995 # level, and also for debugging programs.  Not only do they keep track of
25996 # nesting depths of the individual brace types, but they check that each
25997 # of the other brace types is balanced within matching pairs.  For
25998 # example, if the program sees this sequence:
25999 #
26000 #         {  ( ( ) }
26001 #
26002 # then it can determine that there is an extra left paren somewhere
26003 # between the { and the }.  And so on with every other possible
26004 # combination of outer and inner brace types.  For another
26005 # example:
26006 #
26007 #         ( [ ..... ]  ] )
26008 #
26009 # which has an extra ] within the parens.
26010 #
26011 # The brace types have indexes 0 .. 3 which are indexes into
26012 # the matrices.
26013 #
26014 # The pair ? : are treated as just another nesting type, with ? acting
26015 # as the opening brace and : acting as the closing brace.
26016 #
26017 # The matrix
26018 #
26019 #         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
26020 #
26021 # saves the nesting depth of brace type $b (where $b is either of the other
26022 # nesting types) when brace type $a enters a new depth.  When this depth
26023 # decreases, a check is made that the current depth of brace types $b is
26024 # unchanged, or otherwise there must have been an error.  This can
26025 # be very useful for localizing errors, particularly when perl runs to
26026 # the end of a large file (such as this one) and announces that there
26027 # is a problem somewhere.
26028 #
26029 # A numerical sequence number is maintained for every nesting type,
26030 # so that each matching pair can be uniquely identified in a simple
26031 # way.
26032
26033 sub increase_nesting_depth {
26034     my ( $aa, $pos ) = @_;
26035
26036     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
26037     # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
26038     # $statement_type
26039     my $bb;
26040     $current_depth[$aa]++;
26041     $total_depth++;
26042     $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
26043     my $input_line_number = $tokenizer_self->{_last_line_number};
26044     my $input_line        = $tokenizer_self->{_line_text};
26045
26046     # Sequence numbers increment by number of items.  This keeps
26047     # a unique set of numbers but still allows the relative location
26048     # of any type to be determined.
26049     $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
26050     my $seqno = $nesting_sequence_number[$aa];
26051     $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
26052
26053     $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
26054       [ $input_line_number, $input_line, $pos ];
26055
26056     for $bb ( 0 .. $#closing_brace_names ) {
26057         next if ( $bb == $aa );
26058         $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
26059     }
26060
26061     # set a flag for indenting a nested ternary statement
26062     my $indent = 0;
26063     if ( $aa == QUESTION_COLON ) {
26064         $nested_ternary_flag[ $current_depth[$aa] ] = 0;
26065         if ( $current_depth[$aa] > 1 ) {
26066             if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
26067                 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
26068                 if ( $pdepth == $total_depth - 1 ) {
26069                     $indent = 1;
26070                     $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
26071                 }
26072             }
26073         }
26074     }
26075     $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
26076     $statement_type = "";
26077     return ( $seqno, $indent );
26078 }
26079
26080 sub decrease_nesting_depth {
26081
26082     my ( $aa, $pos ) = @_;
26083
26084     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
26085     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
26086     # $statement_type
26087     my $bb;
26088     my $seqno             = 0;
26089     my $input_line_number = $tokenizer_self->{_last_line_number};
26090     my $input_line        = $tokenizer_self->{_line_text};
26091
26092     my $outdent = 0;
26093     $total_depth--;
26094     if ( $current_depth[$aa] > 0 ) {
26095
26096         # set a flag for un-indenting after seeing a nested ternary statement
26097         $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
26098         if ( $aa == QUESTION_COLON ) {
26099             $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
26100         }
26101         $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
26102
26103         # check that any brace types $bb contained within are balanced
26104         for $bb ( 0 .. $#closing_brace_names ) {
26105             next if ( $bb == $aa );
26106
26107             unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
26108                 $current_depth[$bb] )
26109             {
26110                 my $diff =
26111                   $current_depth[$bb] -
26112                   $depth_array[$aa][$bb][ $current_depth[$aa] ];
26113
26114                 # don't whine too many times
26115                 my $saw_brace_error = get_saw_brace_error();
26116                 if (
26117                     $saw_brace_error <= MAX_NAG_MESSAGES
26118
26119                     # if too many closing types have occured, we probably
26120                     # already caught this error
26121                     && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
26122                   )
26123                 {
26124                     interrupt_logfile();
26125                     my $rsl =
26126                       $starting_line_of_current_depth[$aa]
26127                       [ $current_depth[$aa] ];
26128                     my $sl  = $$rsl[0];
26129                     my $rel = [ $input_line_number, $input_line, $pos ];
26130                     my $el  = $$rel[0];
26131                     my ($ess);
26132
26133                     if ( $diff == 1 || $diff == -1 ) {
26134                         $ess = '';
26135                     }
26136                     else {
26137                         $ess = 's';
26138                     }
26139                     my $bname =
26140                       ( $diff > 0 )
26141                       ? $opening_brace_names[$bb]
26142                       : $closing_brace_names[$bb];
26143                     write_error_indicator_pair( @$rsl, '^' );
26144                     my $msg = <<"EOM";
26145 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
26146 EOM
26147
26148                     if ( $diff > 0 ) {
26149                         my $rml =
26150                           $starting_line_of_current_depth[$bb]
26151                           [ $current_depth[$bb] ];
26152                         my $ml = $$rml[0];
26153                         $msg .=
26154 "    The most recent un-matched $bname is on line $ml\n";
26155                         write_error_indicator_pair( @$rml, '^' );
26156                     }
26157                     write_error_indicator_pair( @$rel, '^' );
26158                     warning($msg);
26159                     resume_logfile();
26160                 }
26161                 increment_brace_error();
26162             }
26163         }
26164         $current_depth[$aa]--;
26165     }
26166     else {
26167
26168         my $saw_brace_error = get_saw_brace_error();
26169         if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
26170             my $msg = <<"EOM";
26171 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
26172 EOM
26173             indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
26174         }
26175         increment_brace_error();
26176     }
26177     return ( $seqno, $outdent );
26178 }
26179
26180 sub check_final_nesting_depths {
26181     my ($aa);
26182
26183     # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
26184
26185     for $aa ( 0 .. $#closing_brace_names ) {
26186
26187         if ( $current_depth[$aa] ) {
26188             my $rsl =
26189               $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
26190             my $sl  = $$rsl[0];
26191             my $msg = <<"EOM";
26192 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
26193 The most recent un-matched $opening_brace_names[$aa] is on line $sl
26194 EOM
26195             indicate_error( $msg, @$rsl, '^' );
26196             increment_brace_error();
26197         }
26198     }
26199 }
26200
26201 #########i#############################################################
26202 # Tokenizer routines for looking ahead in input stream
26203 #######################################################################
26204
26205 sub peek_ahead_for_n_nonblank_pre_tokens {
26206
26207     # returns next n pretokens if they exist
26208     # returns undef's if hits eof without seeing any pretokens
26209     # USES GLOBAL VARIABLES: $tokenizer_self
26210     my $max_pretokens = shift;
26211     my $line;
26212     my $i = 0;
26213     my ( $rpre_tokens, $rmap, $rpre_types );
26214
26215     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
26216     {
26217         $line =~ s/^\s*//;    # trim leading blanks
26218         next if ( length($line) <= 0 );    # skip blank
26219         next if ( $line =~ /^#/ );         # skip comment
26220         ( $rpre_tokens, $rmap, $rpre_types ) =
26221           pre_tokenize( $line, $max_pretokens );
26222         last;
26223     }
26224     return ( $rpre_tokens, $rpre_types );
26225 }
26226
26227 # look ahead for next non-blank, non-comment line of code
26228 sub peek_ahead_for_nonblank_token {
26229
26230     # USES GLOBAL VARIABLES: $tokenizer_self
26231     my ( $rtokens, $max_token_index ) = @_;
26232     my $line;
26233     my $i = 0;
26234
26235     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
26236     {
26237         $line =~ s/^\s*//;    # trim leading blanks
26238         next if ( length($line) <= 0 );    # skip blank
26239         next if ( $line =~ /^#/ );         # skip comment
26240         my ( $rtok, $rmap, $rtype ) =
26241           pre_tokenize( $line, 2 );        # only need 2 pre-tokens
26242         my $j = $max_token_index + 1;
26243         my $tok;
26244
26245         foreach $tok (@$rtok) {
26246             last if ( $tok =~ "\n" );
26247             $$rtokens[ ++$j ] = $tok;
26248         }
26249         last;
26250     }
26251     return $rtokens;
26252 }
26253
26254 #########i#############################################################
26255 # Tokenizer guessing routines for ambiguous situations
26256 #######################################################################
26257
26258 sub guess_if_pattern_or_conditional {
26259
26260     # this routine is called when we have encountered a ? following an
26261     # unknown bareword, and we must decide if it starts a pattern or not
26262     # input parameters:
26263     #   $i - token index of the ? starting possible pattern
26264     # output parameters:
26265     #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
26266     #   msg = a warning or diagnostic message
26267     # USES GLOBAL VARIABLES: $last_nonblank_token
26268     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
26269     my $is_pattern = 0;
26270     my $msg        = "guessing that ? after $last_nonblank_token starts a ";
26271
26272     if ( $i >= $max_token_index ) {
26273         $msg .= "conditional (no end to pattern found on the line)\n";
26274     }
26275     else {
26276         my $ibeg = $i;
26277         $i = $ibeg + 1;
26278         my $next_token = $$rtokens[$i];    # first token after ?
26279
26280         # look for a possible ending ? on this line..
26281         my $in_quote        = 1;
26282         my $quote_depth     = 0;
26283         my $quote_character = '';
26284         my $quote_pos       = 0;
26285         my $quoted_string;
26286         (
26287             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
26288             $quoted_string
26289           )
26290           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
26291             $quote_pos, $quote_depth, $max_token_index );
26292
26293         if ($in_quote) {
26294
26295             # we didn't find an ending ? on this line,
26296             # so we bias towards conditional
26297             $is_pattern = 0;
26298             $msg .= "conditional (no ending ? on this line)\n";
26299
26300             # we found an ending ?, so we bias towards a pattern
26301         }
26302         else {
26303
26304             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
26305                 $is_pattern = 1;
26306                 $msg .= "pattern (found ending ? and pattern expected)\n";
26307             }
26308             else {
26309                 $msg .= "pattern (uncertain, but found ending ?)\n";
26310             }
26311         }
26312     }
26313     return ( $is_pattern, $msg );
26314 }
26315
26316 sub guess_if_pattern_or_division {
26317
26318     # this routine is called when we have encountered a / following an
26319     # unknown bareword, and we must decide if it starts a pattern or is a
26320     # division
26321     # input parameters:
26322     #   $i - token index of the / starting possible pattern
26323     # output parameters:
26324     #   $is_pattern = 0 if probably division,  =1 if probably a pattern
26325     #   msg = a warning or diagnostic message
26326     # USES GLOBAL VARIABLES: $last_nonblank_token
26327     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
26328     my $is_pattern = 0;
26329     my $msg        = "guessing that / after $last_nonblank_token starts a ";
26330
26331     if ( $i >= $max_token_index ) {
26332         "division (no end to pattern found on the line)\n";
26333     }
26334     else {
26335         my $ibeg = $i;
26336         my $divide_expected =
26337           numerator_expected( $i, $rtokens, $max_token_index );
26338         $i = $ibeg + 1;
26339         my $next_token = $$rtokens[$i];    # first token after slash
26340
26341         # look for a possible ending / on this line..
26342         my $in_quote        = 1;
26343         my $quote_depth     = 0;
26344         my $quote_character = '';
26345         my $quote_pos       = 0;
26346         my $quoted_string;
26347         (
26348             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
26349             $quoted_string
26350           )
26351           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
26352             $quote_pos, $quote_depth, $max_token_index );
26353
26354         if ($in_quote) {
26355
26356             # we didn't find an ending / on this line,
26357             # so we bias towards division
26358             if ( $divide_expected >= 0 ) {
26359                 $is_pattern = 0;
26360                 $msg .= "division (no ending / on this line)\n";
26361             }
26362             else {
26363                 $msg        = "multi-line pattern (division not possible)\n";
26364                 $is_pattern = 1;
26365             }
26366
26367         }
26368
26369         # we found an ending /, so we bias towards a pattern
26370         else {
26371
26372             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
26373
26374                 if ( $divide_expected >= 0 ) {
26375
26376                     if ( $i - $ibeg > 60 ) {
26377                         $msg .= "division (matching / too distant)\n";
26378                         $is_pattern = 0;
26379                     }
26380                     else {
26381                         $msg .= "pattern (but division possible too)\n";
26382                         $is_pattern = 1;
26383                     }
26384                 }
26385                 else {
26386                     $is_pattern = 1;
26387                     $msg .= "pattern (division not possible)\n";
26388                 }
26389             }
26390             else {
26391
26392                 if ( $divide_expected >= 0 ) {
26393                     $is_pattern = 0;
26394                     $msg .= "division (pattern not possible)\n";
26395                 }
26396                 else {
26397                     $is_pattern = 1;
26398                     $msg .=
26399                       "pattern (uncertain, but division would not work here)\n";
26400                 }
26401             }
26402         }
26403     }
26404     return ( $is_pattern, $msg );
26405 }
26406
26407 # try to resolve here-doc vs. shift by looking ahead for
26408 # non-code or the end token (currently only looks for end token)
26409 # returns 1 if it is probably a here doc, 0 if not
26410 sub guess_if_here_doc {
26411
26412     # This is how many lines we will search for a target as part of the
26413     # guessing strategy.  It is a constant because there is probably
26414     # little reason to change it.
26415     # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
26416     # %is_constant,
26417     use constant HERE_DOC_WINDOW => 40;
26418
26419     my $next_token        = shift;
26420     my $here_doc_expected = 0;
26421     my $line;
26422     my $k   = 0;
26423     my $msg = "checking <<";
26424
26425     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
26426     {
26427         chomp $line;
26428
26429         if ( $line =~ /^$next_token$/ ) {
26430             $msg .= " -- found target $next_token ahead $k lines\n";
26431             $here_doc_expected = 1;    # got it
26432             last;
26433         }
26434         last if ( $k >= HERE_DOC_WINDOW );
26435     }
26436
26437     unless ($here_doc_expected) {
26438
26439         if ( !defined($line) ) {
26440             $here_doc_expected = -1;    # hit eof without seeing target
26441             $msg .= " -- must be shift; target $next_token not in file\n";
26442
26443         }
26444         else {                          # still unsure..taking a wild guess
26445
26446             if ( !$is_constant{$current_package}{$next_token} ) {
26447                 $here_doc_expected = 1;
26448                 $msg .=
26449                   " -- guessing it's a here-doc ($next_token not a constant)\n";
26450             }
26451             else {
26452                 $msg .=
26453                   " -- guessing it's a shift ($next_token is a constant)\n";
26454             }
26455         }
26456     }
26457     write_logfile_entry($msg);
26458     return $here_doc_expected;
26459 }
26460
26461 #########i#############################################################
26462 # Tokenizer Routines for scanning identifiers and related items
26463 #######################################################################
26464
26465 sub scan_bare_identifier_do {
26466
26467     # this routine is called to scan a token starting with an alphanumeric
26468     # variable or package separator, :: or '.
26469     # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
26470     # $last_nonblank_type,@paren_type, $paren_depth
26471
26472     my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
26473         $max_token_index )
26474       = @_;
26475     my $i_begin = $i;
26476     my $package = undef;
26477
26478     my $i_beg = $i;
26479
26480     # we have to back up one pretoken at a :: since each : is one pretoken
26481     if ( $tok eq '::' ) { $i_beg-- }
26482     if ( $tok eq '->' ) { $i_beg-- }
26483     my $pos_beg = $$rtoken_map[$i_beg];
26484     pos($input_line) = $pos_beg;
26485
26486     #  Examples:
26487     #   A::B::C
26488     #   A::
26489     #   ::A
26490     #   A'B
26491     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
26492
26493         my $pos  = pos($input_line);
26494         my $numc = $pos - $pos_beg;
26495         $tok = substr( $input_line, $pos_beg, $numc );
26496
26497         # type 'w' includes anything without leading type info
26498         # ($,%,@,*) including something like abc::def::ghi
26499         $type = 'w';
26500
26501         my $sub_name = "";
26502         if ( defined($2) ) { $sub_name = $2; }
26503         if ( defined($1) ) {
26504             $package = $1;
26505
26506             # patch: don't allow isolated package name which just ends
26507             # in the old style package separator (single quote).  Example:
26508             #   use CGI':all';
26509             if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
26510                 $pos--;
26511             }
26512
26513             $package =~ s/\'/::/g;
26514             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26515             $package =~ s/::$//;
26516         }
26517         else {
26518             $package = $current_package;
26519
26520             if ( $is_keyword{$tok} ) {
26521                 $type = 'k';
26522             }
26523         }
26524
26525         # if it is a bareword..
26526         if ( $type eq 'w' ) {
26527
26528             # check for v-string with leading 'v' type character
26529             # (This seems to have presidence over filehandle, type 'Y')
26530             if ( $tok =~ /^v\d[_\d]*$/ ) {
26531
26532                 # we only have the first part - something like 'v101' -
26533                 # look for more
26534                 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
26535                     $pos  = pos($input_line);
26536                     $numc = $pos - $pos_beg;
26537                     $tok  = substr( $input_line, $pos_beg, $numc );
26538                 }
26539                 $type = 'v';
26540
26541                 # warn if this version can't handle v-strings
26542                 report_v_string($tok);
26543             }
26544
26545             elsif ( $is_constant{$package}{$sub_name} ) {
26546                 $type = 'C';
26547             }
26548
26549             # bareword after sort has implied empty prototype; for example:
26550             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
26551             # This has priority over whatever the user has specified.
26552             elsif ($last_nonblank_token eq 'sort'
26553                 && $last_nonblank_type eq 'k' )
26554             {
26555                 $type = 'Z';
26556             }
26557
26558             # Note: strangely, perl does not seem to really let you create
26559             # functions which act like eval and do, in the sense that eval
26560             # and do may have operators following the final }, but any operators
26561             # that you create with prototype (&) apparently do not allow
26562             # trailing operators, only terms.  This seems strange.
26563             # If this ever changes, here is the update
26564             # to make perltidy behave accordingly:
26565
26566             # elsif ( $is_block_function{$package}{$tok} ) {
26567             #    $tok='eval'; # patch to do braces like eval  - doesn't work
26568             #    $type = 'k';
26569             #}
26570             # FIXME: This could become a separate type to allow for different
26571             # future behavior:
26572             elsif ( $is_block_function{$package}{$sub_name} ) {
26573                 $type = 'G';
26574             }
26575
26576             elsif ( $is_block_list_function{$package}{$sub_name} ) {
26577                 $type = 'G';
26578             }
26579             elsif ( $is_user_function{$package}{$sub_name} ) {
26580                 $type      = 'U';
26581                 $prototype = $user_function_prototype{$package}{$sub_name};
26582             }
26583
26584             # check for indirect object
26585             elsif (
26586
26587                 # added 2001-03-27: must not be followed immediately by '('
26588                 # see fhandle.t
26589                 ( $input_line !~ m/\G\(/gc )
26590
26591                 # and
26592                 && (
26593
26594                     # preceded by keyword like 'print', 'printf' and friends
26595                     $is_indirect_object_taker{$last_nonblank_token}
26596
26597                     # or preceded by something like 'print(' or 'printf('
26598                     || (
26599                         ( $last_nonblank_token eq '(' )
26600                         && $is_indirect_object_taker{ $paren_type[$paren_depth]
26601                         }
26602
26603                     )
26604                 )
26605               )
26606             {
26607
26608                 # may not be indirect object unless followed by a space
26609                 if ( $input_line =~ m/\G\s+/gc ) {
26610                     $type = 'Y';
26611
26612                     # Abandon Hope ...
26613                     # Perl's indirect object notation is a very bad
26614                     # thing and can cause subtle bugs, especially for
26615                     # beginning programmers.  And I haven't even been
26616                     # able to figure out a sane warning scheme which
26617                     # doesn't get in the way of good scripts.
26618
26619                     # Complain if a filehandle has any lower case
26620                     # letters.  This is suggested good practice.
26621                     # Use 'sub_name' because something like
26622                     # main::MYHANDLE is ok for filehandle
26623                     if ( $sub_name =~ /[a-z]/ ) {
26624
26625                         # could be bug caused by older perltidy if
26626                         # followed by '('
26627                         if ( $input_line =~ m/\G\s*\(/gc ) {
26628                             complain(
26629 "Caution: unknown word '$tok' in indirect object slot\n"
26630                             );
26631                         }
26632                     }
26633                 }
26634
26635                 # bareword not followed by a space -- may not be filehandle
26636                 # (may be function call defined in a 'use' statement)
26637                 else {
26638                     $type = 'Z';
26639                 }
26640             }
26641         }
26642
26643         # Now we must convert back from character position
26644         # to pre_token index.
26645         # I don't think an error flag can occur here ..but who knows
26646         my $error;
26647         ( $i, $error ) =
26648           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26649         if ($error) {
26650             warning("scan_bare_identifier: Possibly invalid tokenization\n");
26651         }
26652     }
26653
26654     # no match but line not blank - could be syntax error
26655     # perl will take '::' alone without complaint
26656     else {
26657         $type = 'w';
26658
26659         # change this warning to log message if it becomes annoying
26660         warning("didn't find identifier after leading ::\n");
26661     }
26662     return ( $i, $tok, $type, $prototype );
26663 }
26664
26665 sub scan_id_do {
26666
26667 # This is the new scanner and will eventually replace scan_identifier.
26668 # Only type 'sub' and 'package' are implemented.
26669 # Token types $ * % @ & -> are not yet implemented.
26670 #
26671 # Scan identifier following a type token.
26672 # The type of call depends on $id_scan_state: $id_scan_state = ''
26673 # for starting call, in which case $tok must be the token defining
26674 # the type.
26675 #
26676 # If the type token is the last nonblank token on the line, a value
26677 # of $id_scan_state = $tok is returned, indicating that further
26678 # calls must be made to get the identifier.  If the type token is
26679 # not the last nonblank token on the line, the identifier is
26680 # scanned and handled and a value of '' is returned.
26681 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
26682 # $statement_type, $tokenizer_self
26683
26684     my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
26685         $max_token_index )
26686       = @_;
26687     my $type = '';
26688     my ( $i_beg, $pos_beg );
26689
26690     #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26691     #my ($a,$b,$c) = caller;
26692     #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
26693
26694     # on re-entry, start scanning at first token on the line
26695     if ($id_scan_state) {
26696         $i_beg = $i;
26697         $type  = '';
26698     }
26699
26700     # on initial entry, start scanning just after type token
26701     else {
26702         $i_beg         = $i + 1;
26703         $id_scan_state = $tok;
26704         $type          = 't';
26705     }
26706
26707     # find $i_beg = index of next nonblank token,
26708     # and handle empty lines
26709     my $blank_line          = 0;
26710     my $next_nonblank_token = $$rtokens[$i_beg];
26711     if ( $i_beg > $max_token_index ) {
26712         $blank_line = 1;
26713     }
26714     else {
26715
26716         # only a '#' immediately after a '$' is not a comment
26717         if ( $next_nonblank_token eq '#' ) {
26718             unless ( $tok eq '$' ) {
26719                 $blank_line = 1;
26720             }
26721         }
26722
26723         if ( $next_nonblank_token =~ /^\s/ ) {
26724             ( $next_nonblank_token, $i_beg ) =
26725               find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
26726                 $max_token_index );
26727             if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
26728                 $blank_line = 1;
26729             }
26730         }
26731     }
26732
26733     # handle non-blank line; identifier, if any, must follow
26734     unless ($blank_line) {
26735
26736         if ( $id_scan_state eq 'sub' ) {
26737             ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
26738                 $input_line, $i,             $i_beg,
26739                 $tok,        $type,          $rtokens,
26740                 $rtoken_map, $id_scan_state, $max_token_index
26741             );
26742         }
26743
26744         elsif ( $id_scan_state eq 'package' ) {
26745             ( $i, $tok, $type ) =
26746               do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
26747                 $rtoken_map, $max_token_index );
26748             $id_scan_state = '';
26749         }
26750
26751         else {
26752             warning("invalid token in scan_id: $tok\n");
26753             $id_scan_state = '';
26754         }
26755     }
26756
26757     if ( $id_scan_state && ( !defined($type) || !$type ) ) {
26758
26759         # shouldn't happen:
26760         warning(
26761 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
26762         );
26763         report_definite_bug();
26764     }
26765
26766     TOKENIZER_DEBUG_FLAG_NSCAN && do {
26767         print
26768           "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26769     };
26770     return ( $i, $tok, $type, $id_scan_state );
26771 }
26772
26773 sub check_prototype {
26774     my ( $proto, $package, $subname ) = @_;
26775     return unless ( defined($package) && defined($subname) );
26776     if ( defined($proto) ) {
26777         $proto =~ s/^\s*\(\s*//;
26778         $proto =~ s/\s*\)$//;
26779         if ($proto) {
26780             $is_user_function{$package}{$subname}        = 1;
26781             $user_function_prototype{$package}{$subname} = "($proto)";
26782
26783             # prototypes containing '&' must be treated specially..
26784             if ( $proto =~ /\&/ ) {
26785
26786                 # right curly braces of prototypes ending in
26787                 # '&' may be followed by an operator
26788                 if ( $proto =~ /\&$/ ) {
26789                     $is_block_function{$package}{$subname} = 1;
26790                 }
26791
26792                 # right curly braces of prototypes NOT ending in
26793                 # '&' may NOT be followed by an operator
26794                 elsif ( $proto !~ /\&$/ ) {
26795                     $is_block_list_function{$package}{$subname} = 1;
26796                 }
26797             }
26798         }
26799         else {
26800             $is_constant{$package}{$subname} = 1;
26801         }
26802     }
26803     else {
26804         $is_user_function{$package}{$subname} = 1;
26805     }
26806 }
26807
26808 sub do_scan_package {
26809
26810     # do_scan_package parses a package name
26811     # it is called with $i_beg equal to the index of the first nonblank
26812     # token following a 'package' token.
26813     # USES GLOBAL VARIABLES: $current_package,
26814
26815     my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
26816         $max_token_index )
26817       = @_;
26818     my $package = undef;
26819     my $pos_beg = $$rtoken_map[$i_beg];
26820     pos($input_line) = $pos_beg;
26821
26822     # handle non-blank line; package name, if any, must follow
26823     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
26824         $package = $1;
26825         $package = ( defined($1) && $1 ) ? $1 : 'main';
26826         $package =~ s/\'/::/g;
26827         if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26828         $package =~ s/::$//;
26829         my $pos  = pos($input_line);
26830         my $numc = $pos - $pos_beg;
26831         $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
26832         $type = 'i';
26833
26834         # Now we must convert back from character position
26835         # to pre_token index.
26836         # I don't think an error flag can occur here ..but ?
26837         my $error;
26838         ( $i, $error ) =
26839           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26840         if ($error) { warning("Possibly invalid package\n") }
26841         $current_package = $package;
26842
26843         # check for error
26844         my ( $next_nonblank_token, $i_next ) =
26845           find_next_nonblank_token( $i, $rtokens, $max_token_index );
26846         if ( $next_nonblank_token !~ /^[;\{\}]$/ ) {
26847             warning(
26848                 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
26849             );
26850         }
26851     }
26852
26853     # no match but line not blank --
26854     # could be a label with name package, like package:  , for example.
26855     else {
26856         $type = 'k';
26857     }
26858
26859     return ( $i, $tok, $type );
26860 }
26861
26862 sub scan_identifier_do {
26863
26864     # This routine assembles tokens into identifiers.  It maintains a
26865     # scan state, id_scan_state.  It updates id_scan_state based upon
26866     # current id_scan_state and token, and returns an updated
26867     # id_scan_state and the next index after the identifier.
26868     # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
26869     # $last_nonblank_type
26870
26871     my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
26872         $expecting )
26873       = @_;
26874     my $i_begin   = $i;
26875     my $type      = '';
26876     my $tok_begin = $$rtokens[$i_begin];
26877     if ( $tok_begin eq ':' ) { $tok_begin = '::' }
26878     my $id_scan_state_begin = $id_scan_state;
26879     my $identifier_begin    = $identifier;
26880     my $tok                 = $tok_begin;
26881     my $message             = "";
26882
26883     # these flags will be used to help figure out the type:
26884     my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
26885     my $saw_type;
26886
26887     # allow old package separator (') except in 'use' statement
26888     my $allow_tick = ( $last_nonblank_token ne 'use' );
26889
26890     # get started by defining a type and a state if necessary
26891     unless ($id_scan_state) {
26892         $context = UNKNOWN_CONTEXT;
26893
26894         # fixup for digraph
26895         if ( $tok eq '>' ) {
26896             $tok       = '->';
26897             $tok_begin = $tok;
26898         }
26899         $identifier = $tok;
26900
26901         if ( $tok eq '$' || $tok eq '*' ) {
26902             $id_scan_state = '$';
26903             $context       = SCALAR_CONTEXT;
26904         }
26905         elsif ( $tok eq '%' || $tok eq '@' ) {
26906             $id_scan_state = '$';
26907             $context       = LIST_CONTEXT;
26908         }
26909         elsif ( $tok eq '&' ) {
26910             $id_scan_state = '&';
26911         }
26912         elsif ( $tok eq 'sub' or $tok eq 'package' ) {
26913             $saw_alpha     = 0;     # 'sub' is considered type info here
26914             $id_scan_state = '$';
26915             $identifier .= ' ';     # need a space to separate sub from sub name
26916         }
26917         elsif ( $tok eq '::' ) {
26918             $id_scan_state = 'A';
26919         }
26920         elsif ( $tok =~ /^[A-Za-z_]/ ) {
26921             $id_scan_state = ':';
26922         }
26923         elsif ( $tok eq '->' ) {
26924             $id_scan_state = '$';
26925         }
26926         else {
26927
26928             # shouldn't happen
26929             my ( $a, $b, $c ) = caller;
26930             warning("Program Bug: scan_identifier given bad token = $tok \n");
26931             warning("   called from sub $a  line: $c\n");
26932             report_definite_bug();
26933         }
26934         $saw_type = !$saw_alpha;
26935     }
26936     else {
26937         $i--;
26938         $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
26939     }
26940
26941     # now loop to gather the identifier
26942     my $i_save = $i;
26943
26944     while ( $i < $max_token_index ) {
26945         $i_save = $i unless ( $tok =~ /^\s*$/ );
26946         $tok = $$rtokens[ ++$i ];
26947
26948         if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
26949             $tok = '::';
26950             $i++;
26951         }
26952
26953         if ( $id_scan_state eq '$' ) {    # starting variable name
26954
26955             if ( $tok eq '$' ) {
26956
26957                 $identifier .= $tok;
26958
26959                 # we've got a punctuation variable if end of line (punct.t)
26960                 if ( $i == $max_token_index ) {
26961                     $type          = 'i';
26962                     $id_scan_state = '';
26963                     last;
26964                 }
26965             }
26966             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
26967                 $saw_alpha     = 1;
26968                 $id_scan_state = ':';           # now need ::
26969                 $identifier .= $tok;
26970             }
26971             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26972                 $saw_alpha     = 1;
26973                 $id_scan_state = ':';                 # now need ::
26974                 $identifier .= $tok;
26975
26976                 # Perl will accept leading digits in identifiers,
26977                 # although they may not always produce useful results.
26978                 # Something like $main::0 is ok.  But this also works:
26979                 #
26980                 #  sub howdy::123::bubba{ print "bubba $54321!\n" }
26981                 #  howdy::123::bubba();
26982                 #
26983             }
26984             elsif ( $tok =~ /^[0-9]/ ) {              # numeric
26985                 $saw_alpha     = 1;
26986                 $id_scan_state = ':';                 # now need ::
26987                 $identifier .= $tok;
26988             }
26989             elsif ( $tok eq '::' ) {
26990                 $id_scan_state = 'A';
26991                 $identifier .= $tok;
26992             }
26993             elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
26994                 $identifier .= $tok;    # keep same state, a $ could follow
26995             }
26996             elsif ( $tok eq '{' ) {
26997
26998                 # check for something like ${#} or ${©}
26999                 if (   $identifier eq '$'
27000                     && $i + 2 <= $max_token_index
27001                     && $$rtokens[ $i + 2 ] eq '}'
27002                     && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
27003                 {
27004                     my $next2 = $$rtokens[ $i + 2 ];
27005                     my $next1 = $$rtokens[ $i + 1 ];
27006                     $identifier .= $tok . $next1 . $next2;
27007                     $i += 2;
27008                     $id_scan_state = '';
27009                     last;
27010                 }
27011
27012                 # skip something like ${xxx} or ->{
27013                 $id_scan_state = '';
27014
27015                 # if this is the first token of a line, any tokens for this
27016                 # identifier have already been accumulated
27017                 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
27018                 $i = $i_save;
27019                 last;
27020             }
27021
27022             # space ok after leading $ % * & @
27023             elsif ( $tok =~ /^\s*$/ ) {
27024
27025                 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
27026
27027                     if ( length($identifier) > 1 ) {
27028                         $id_scan_state = '';
27029                         $i             = $i_save;
27030                         $type          = 'i';    # probably punctuation variable
27031                         last;
27032                     }
27033                     else {
27034
27035                         # spaces after $'s are common, and space after @
27036                         # is harmless, so only complain about space
27037                         # after other type characters. Space after $ and
27038                         # @ will be removed in formatting.  Report space
27039                         # after % and * because they might indicate a
27040                         # parsing error.  In other words '% ' might be a
27041                         # modulo operator.  Delete this warning if it
27042                         # gets annoying.
27043                         if ( $identifier !~ /^[\@\$]$/ ) {
27044                             $message =
27045                               "Space in identifier, following $identifier\n";
27046                         }
27047                     }
27048                 }
27049
27050                 # else:
27051                 # space after '->' is ok
27052             }
27053             elsif ( $tok eq '^' ) {
27054
27055                 # check for some special variables like $^W
27056                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
27057                     $identifier .= $tok;
27058                     $id_scan_state = 'A';
27059
27060                     # Perl accepts '$^]' or '@^]', but
27061                     # there must not be a space before the ']'.
27062                     my $next1 = $$rtokens[ $i + 1 ];
27063                     if ( $next1 eq ']' ) {
27064                         $i++;
27065                         $identifier .= $next1;
27066                         $id_scan_state = "";
27067                         last;
27068                     }
27069                 }
27070                 else {
27071                     $id_scan_state = '';
27072                 }
27073             }
27074             else {    # something else
27075
27076                 # check for various punctuation variables
27077                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
27078                     $identifier .= $tok;
27079                 }
27080
27081                 elsif ( $identifier eq '$#' ) {
27082
27083                     if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
27084
27085                     # perl seems to allow just these: $#: $#- $#+
27086                     elsif ( $tok =~ /^[\:\-\+]$/ ) {
27087                         $type = 'i';
27088                         $identifier .= $tok;
27089                     }
27090                     else {
27091                         $i = $i_save;
27092                         write_logfile_entry( 'Use of $# is deprecated' . "\n" );
27093                     }
27094                 }
27095                 elsif ( $identifier eq '$$' ) {
27096
27097                     # perl does not allow references to punctuation
27098                     # variables without braces.  For example, this
27099                     # won't work:
27100                     #  $:=\4;
27101                     #  $a = $$:;
27102                     # You would have to use
27103                     #  $a = ${$:};
27104
27105                     $i = $i_save;
27106                     if   ( $tok eq '{' ) { $type = 't' }
27107                     else                 { $type = 'i' }
27108                 }
27109                 elsif ( $identifier eq '->' ) {
27110                     $i = $i_save;
27111                 }
27112                 else {
27113                     $i = $i_save;
27114                     if ( length($identifier) == 1 ) { $identifier = ''; }
27115                 }
27116                 $id_scan_state = '';
27117                 last;
27118             }
27119         }
27120         elsif ( $id_scan_state eq '&' ) {    # starting sub call?
27121
27122             if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
27123                 $id_scan_state = ':';          # now need ::
27124                 $saw_alpha     = 1;
27125                 $identifier .= $tok;
27126             }
27127             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
27128                 $id_scan_state = ':';                 # now need ::
27129                 $saw_alpha     = 1;
27130                 $identifier .= $tok;
27131             }
27132             elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
27133                 $id_scan_state = ':';       # now need ::
27134                 $saw_alpha     = 1;
27135                 $identifier .= $tok;
27136             }
27137             elsif ( $tok =~ /^\s*$/ ) {     # allow space
27138             }
27139             elsif ( $tok eq '::' ) {        # leading ::
27140                 $id_scan_state = 'A';       # accept alpha next
27141                 $identifier .= $tok;
27142             }
27143             elsif ( $tok eq '{' ) {
27144                 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
27145                 $i             = $i_save;
27146                 $id_scan_state = '';
27147                 last;
27148             }
27149             else {
27150
27151                 # punctuation variable?
27152                 # testfile: cunningham4.pl
27153                 #
27154                 # We have to be careful here.  If we are in an unknown state,
27155                 # we will reject the punctuation variable.  In the following
27156                 # example the '&' is a binary opeator but we are in an unknown
27157                 # state because there is no sigil on 'Prima', so we don't
27158                 # know what it is.  But it is a bad guess that
27159                 # '&~' is a punction variable.
27160                 # $self->{text}->{colorMap}->[
27161                 #   Prima::PodView::COLOR_CODE_FOREGROUND
27162                 #   & ~tb::COLOR_INDEX ] =
27163                 #   $sec->{ColorCode}
27164                 if ( $identifier eq '&' && $expecting ) {
27165                     $identifier .= $tok;
27166                 }
27167                 else {
27168                     $identifier = '';
27169                     $i          = $i_save;
27170                     $type       = '&';
27171                 }
27172                 $id_scan_state = '';
27173                 last;
27174             }
27175         }
27176         elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
27177
27178             if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
27179                 $identifier .= $tok;
27180                 $id_scan_state = ':';        # now need ::
27181                 $saw_alpha     = 1;
27182             }
27183             elsif ( $tok eq "'" && $allow_tick ) {
27184                 $identifier .= $tok;
27185                 $id_scan_state = ':';        # now need ::
27186                 $saw_alpha     = 1;
27187             }
27188             elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
27189                 $identifier .= $tok;
27190                 $id_scan_state = ':';        # now need ::
27191                 $saw_alpha     = 1;
27192             }
27193             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
27194                 $id_scan_state = '(';
27195                 $identifier .= $tok;
27196             }
27197             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
27198                 $id_scan_state = ')';
27199                 $identifier .= $tok;
27200             }
27201             else {
27202                 $id_scan_state = '';
27203                 $i             = $i_save;
27204                 last;
27205             }
27206         }
27207         elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
27208
27209             if ( $tok eq '::' ) {            # got it
27210                 $identifier .= $tok;
27211                 $id_scan_state = 'A';        # now require alpha
27212             }
27213             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
27214                 $identifier .= $tok;
27215                 $id_scan_state = ':';           # now need ::
27216                 $saw_alpha     = 1;
27217             }
27218             elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
27219                 $identifier .= $tok;
27220                 $id_scan_state = ':';           # now need ::
27221                 $saw_alpha     = 1;
27222             }
27223             elsif ( $tok eq "'" && $allow_tick ) {    # tick
27224
27225                 if ( $is_keyword{$identifier} ) {
27226                     $id_scan_state = '';              # that's all
27227                     $i             = $i_save;
27228                 }
27229                 else {
27230                     $identifier .= $tok;
27231                 }
27232             }
27233             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
27234                 $id_scan_state = '(';
27235                 $identifier .= $tok;
27236             }
27237             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
27238                 $id_scan_state = ')';
27239                 $identifier .= $tok;
27240             }
27241             else {
27242                 $id_scan_state = '';        # that's all
27243                 $i             = $i_save;
27244                 last;
27245             }
27246         }
27247         elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
27248
27249             if ( $tok eq '(' ) {             # got it
27250                 $identifier .= $tok;
27251                 $id_scan_state = ')';        # now find the end of it
27252             }
27253             elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
27254                 $identifier .= $tok;
27255             }
27256             else {
27257                 $id_scan_state = '';         # that's all - no prototype
27258                 $i             = $i_save;
27259                 last;
27260             }
27261         }
27262         elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
27263
27264             if ( $tok eq ')' ) {             # got it
27265                 $identifier .= $tok;
27266                 $id_scan_state = '';         # all done
27267                 last;
27268             }
27269             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
27270                 $identifier .= $tok;
27271             }
27272             else {    # probable error in script, but keep going
27273                 warning("Unexpected '$tok' while seeking end of prototype\n");
27274                 $identifier .= $tok;
27275             }
27276         }
27277         else {        # can get here due to error in initialization
27278             $id_scan_state = '';
27279             $i             = $i_save;
27280             last;
27281         }
27282     }
27283
27284     if ( $id_scan_state eq ')' ) {
27285         warning("Hit end of line while seeking ) to end prototype\n");
27286     }
27287
27288     # once we enter the actual identifier, it may not extend beyond
27289     # the end of the current line
27290     if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
27291         $id_scan_state = '';
27292     }
27293     if ( $i < 0 ) { $i = 0 }
27294
27295     unless ($type) {
27296
27297         if ($saw_type) {
27298
27299             if ($saw_alpha) {
27300                 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
27301                     $type = 'w';
27302                 }
27303                 else { $type = 'i' }
27304             }
27305             elsif ( $identifier eq '->' ) {
27306                 $type = '->';
27307             }
27308             elsif (
27309                 ( length($identifier) > 1 )
27310
27311                 # In something like '@$=' we have an identifier '@$'
27312                 # In something like '$${' we have type '$$' (and only
27313                 # part of an identifier)
27314                 && !( $identifier =~ /\$$/ && $tok eq '{' )
27315                 && ( $identifier !~ /^(sub |package )$/ )
27316               )
27317             {
27318                 $type = 'i';
27319             }
27320             else { $type = 't' }
27321         }
27322         elsif ($saw_alpha) {
27323
27324             # type 'w' includes anything without leading type info
27325             # ($,%,@,*) including something like abc::def::ghi
27326             $type = 'w';
27327         }
27328         else {
27329             $type = '';
27330         }    # this can happen on a restart
27331     }
27332
27333     if ($identifier) {
27334         $tok = $identifier;
27335         if ($message) { write_logfile_entry($message) }
27336     }
27337     else {
27338         $tok = $tok_begin;
27339         $i   = $i_begin;
27340     }
27341
27342     TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
27343         my ( $a, $b, $c ) = caller;
27344         print
27345 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
27346         print
27347 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
27348     };
27349     return ( $i, $tok, $type, $id_scan_state, $identifier );
27350 }
27351
27352 {
27353
27354     # saved package and subnames in case prototype is on separate line
27355     my ( $package_saved, $subname_saved );
27356
27357     sub do_scan_sub {
27358
27359         # do_scan_sub parses a sub name and prototype
27360         # it is called with $i_beg equal to the index of the first nonblank
27361         # token following a 'sub' token.
27362
27363         # TODO: add future error checks to be sure we have a valid
27364         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
27365         # a name is given if and only if a non-anonymous sub is
27366         # appropriate.
27367         # USES GLOBAL VARS: $current_package, $last_nonblank_token,
27368         # $in_attribute_list, %saw_function_definition,
27369         # $statement_type
27370
27371         my (
27372             $input_line, $i,             $i_beg,
27373             $tok,        $type,          $rtokens,
27374             $rtoken_map, $id_scan_state, $max_token_index
27375         ) = @_;
27376         $id_scan_state = "";    # normally we get everything in one call
27377         my $subname = undef;
27378         my $package = undef;
27379         my $proto   = undef;
27380         my $attrs   = undef;
27381         my $match;
27382
27383         my $pos_beg = $$rtoken_map[$i_beg];
27384         pos($input_line) = $pos_beg;
27385
27386         # sub NAME PROTO ATTRS
27387         if (
27388             $input_line =~ m/\G\s*
27389         ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
27390         (\w+)               # NAME    - required
27391         (\s*\([^){]*\))?    # PROTO   - something in parens
27392         (\s*:)?             # ATTRS   - leading : of attribute list
27393         /gcx
27394           )
27395         {
27396             $match   = 1;
27397             $subname = $2;
27398             $proto   = $3;
27399             $attrs   = $4;
27400
27401             $package = ( defined($1) && $1 ) ? $1 : $current_package;
27402             $package =~ s/\'/::/g;
27403             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27404             $package =~ s/::$//;
27405             my $pos  = pos($input_line);
27406             my $numc = $pos - $pos_beg;
27407             $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
27408             $type = 'i';
27409         }
27410
27411         # Look for prototype/attributes not preceded on this line by subname;
27412         # This might be an anonymous sub with attributes,
27413         # or a prototype on a separate line from its sub name
27414         elsif (
27415             $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
27416             (\s*:)?                              # ATTRS leading ':'
27417             /gcx
27418             && ( $1 || $2 )
27419           )
27420         {
27421             $match = 1;
27422             $proto = $1;
27423             $attrs = $2;
27424
27425             # Handle prototype on separate line from subname
27426             if ($subname_saved) {
27427                 $package = $package_saved;
27428                 $subname = $subname_saved;
27429                 $tok     = $last_nonblank_token;
27430             }
27431             $type = 'i';
27432         }
27433
27434         if ($match) {
27435
27436             # ATTRS: if there are attributes, back up and let the ':' be
27437             # found later by the scanner.
27438             my $pos = pos($input_line);
27439             if ($attrs) {
27440                 $pos -= length($attrs);
27441             }
27442
27443             my $next_nonblank_token = $tok;
27444
27445             # catch case of line with leading ATTR ':' after anonymous sub
27446             if ( $pos == $pos_beg && $tok eq ':' ) {
27447                 $type              = 'A';
27448                 $in_attribute_list = 1;
27449             }
27450
27451             # We must convert back from character position
27452             # to pre_token index.
27453             else {
27454
27455                 # I don't think an error flag can occur here ..but ?
27456                 my $error;
27457                 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
27458                     $max_token_index );
27459                 if ($error) { warning("Possibly invalid sub\n") }
27460
27461                 # check for multiple definitions of a sub
27462                 ( $next_nonblank_token, my $i_next ) =
27463                   find_next_nonblank_token_on_this_line( $i, $rtokens,
27464                     $max_token_index );
27465             }
27466
27467             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
27468             {    # skip blank or side comment
27469                 my ( $rpre_tokens, $rpre_types ) =
27470                   peek_ahead_for_n_nonblank_pre_tokens(1);
27471                 if ( defined($rpre_tokens) && @$rpre_tokens ) {
27472                     $next_nonblank_token = $rpre_tokens->[0];
27473                 }
27474                 else {
27475                     $next_nonblank_token = '}';
27476                 }
27477             }
27478             $package_saved = "";
27479             $subname_saved = "";
27480             if ( $next_nonblank_token eq '{' ) {
27481                 if ($subname) {
27482
27483                     # Check for multiple definitions of a sub, but
27484                     # it is ok to have multiple sub BEGIN, etc,
27485                     # so we do not complain if name is all caps
27486                     if (   $saw_function_definition{$package}{$subname}
27487                         && $subname !~ /^[A-Z]+$/ )
27488                     {
27489                         my $lno = $saw_function_definition{$package}{$subname};
27490                         warning(
27491 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
27492                         );
27493                     }
27494                     $saw_function_definition{$package}{$subname} =
27495                       $tokenizer_self->{_last_line_number};
27496                 }
27497             }
27498             elsif ( $next_nonblank_token eq ';' ) {
27499             }
27500             elsif ( $next_nonblank_token eq '}' ) {
27501             }
27502
27503             # ATTRS - if an attribute list follows, remember the name
27504             # of the sub so the next opening brace can be labeled.
27505             # Setting 'statement_type' causes any ':'s to introduce
27506             # attributes.
27507             elsif ( $next_nonblank_token eq ':' ) {
27508                 $statement_type = $tok;
27509             }
27510
27511             # see if PROTO follows on another line:
27512             elsif ( $next_nonblank_token eq '(' ) {
27513                 if ( $attrs || $proto ) {
27514                     warning(
27515 "unexpected '(' after definition or declaration of sub '$subname'\n"
27516                     );
27517                 }
27518                 else {
27519                     $id_scan_state  = 'sub';    # we must come back to get proto
27520                     $statement_type = $tok;
27521                     $package_saved  = $package;
27522                     $subname_saved  = $subname;
27523                 }
27524             }
27525             elsif ($next_nonblank_token) {      # EOF technically ok
27526                 warning(
27527 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
27528                 );
27529             }
27530             check_prototype( $proto, $package, $subname );
27531         }
27532
27533         # no match but line not blank
27534         else {
27535         }
27536         return ( $i, $tok, $type, $id_scan_state );
27537     }
27538 }
27539
27540 #########i###############################################################
27541 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
27542 #########################################################################
27543
27544 sub find_next_nonblank_token {
27545     my ( $i, $rtokens, $max_token_index ) = @_;
27546
27547     if ( $i >= $max_token_index ) {
27548         if ( !peeked_ahead() ) {
27549             peeked_ahead(1);
27550             $rtokens =
27551               peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
27552         }
27553     }
27554     my $next_nonblank_token = $$rtokens[ ++$i ];
27555
27556     if ( $next_nonblank_token =~ /^\s*$/ ) {
27557         $next_nonblank_token = $$rtokens[ ++$i ];
27558     }
27559     return ( $next_nonblank_token, $i );
27560 }
27561
27562 sub numerator_expected {
27563
27564     # this is a filter for a possible numerator, in support of guessing
27565     # for the / pattern delimiter token.
27566     # returns -
27567     #   1 - yes
27568     #   0 - can't tell
27569     #  -1 - no
27570     # Note: I am using the convention that variables ending in
27571     # _expected have these 3 possible values.
27572     my ( $i, $rtokens, $max_token_index ) = @_;
27573     my $next_token = $$rtokens[ $i + 1 ];
27574     if ( $next_token eq '=' ) { $i++; }    # handle /=
27575     my ( $next_nonblank_token, $i_next ) =
27576       find_next_nonblank_token( $i, $rtokens, $max_token_index );
27577
27578     if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
27579         1;
27580     }
27581     else {
27582
27583         if ( $next_nonblank_token =~ /^\s*$/ ) {
27584             0;
27585         }
27586         else {
27587             -1;
27588         }
27589     }
27590 }
27591
27592 sub pattern_expected {
27593
27594     # This is the start of a filter for a possible pattern.
27595     # It looks at the token after a possbible pattern and tries to
27596     # determine if that token could end a pattern.
27597     # returns -
27598     #   1 - yes
27599     #   0 - can't tell
27600     #  -1 - no
27601     my ( $i, $rtokens, $max_token_index ) = @_;
27602     my $next_token = $$rtokens[ $i + 1 ];
27603     if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; }   # skip possible modifier
27604     my ( $next_nonblank_token, $i_next ) =
27605       find_next_nonblank_token( $i, $rtokens, $max_token_index );
27606
27607     # list of tokens which may follow a pattern
27608     # (can probably be expanded)
27609     if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
27610     {
27611         1;
27612     }
27613     else {
27614
27615         if ( $next_nonblank_token =~ /^\s*$/ ) {
27616             0;
27617         }
27618         else {
27619             -1;
27620         }
27621     }
27622 }
27623
27624 sub find_next_nonblank_token_on_this_line {
27625     my ( $i, $rtokens, $max_token_index ) = @_;
27626     my $next_nonblank_token;
27627
27628     if ( $i < $max_token_index ) {
27629         $next_nonblank_token = $$rtokens[ ++$i ];
27630
27631         if ( $next_nonblank_token =~ /^\s*$/ ) {
27632
27633             if ( $i < $max_token_index ) {
27634                 $next_nonblank_token = $$rtokens[ ++$i ];
27635             }
27636         }
27637     }
27638     else {
27639         $next_nonblank_token = "";
27640     }
27641     return ( $next_nonblank_token, $i );
27642 }
27643
27644 sub find_angle_operator_termination {
27645
27646     # We are looking at a '<' and want to know if it is an angle operator.
27647     # We are to return:
27648     #   $i = pretoken index of ending '>' if found, current $i otherwise
27649     #   $type = 'Q' if found, '>' otherwise
27650     my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
27651     my $i    = $i_beg;
27652     my $type = '<';
27653     pos($input_line) = 1 + $$rtoken_map[$i];
27654
27655     my $filter;
27656
27657     # we just have to find the next '>' if a term is expected
27658     if ( $expecting == TERM ) { $filter = '[\>]' }
27659
27660     # we have to guess if we don't know what is expected
27661     elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
27662
27663     # shouldn't happen - we shouldn't be here if operator is expected
27664     else { warning("Program Bug in find_angle_operator_termination\n") }
27665
27666     # To illustrate what we might be looking at, in case we are
27667     # guessing, here are some examples of valid angle operators
27668     # (or file globs):
27669     #  <tmp_imp/*>
27670     #  <FH>
27671     #  <$fh>
27672     #  <*.c *.h>
27673     #  <_>
27674     #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
27675     #  <${PREFIX}*img*.$IMAGE_TYPE>
27676     #  <img*.$IMAGE_TYPE>
27677     #  <Timg*.$IMAGE_TYPE>
27678     #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
27679     #
27680     # Here are some examples of lines which do not have angle operators:
27681     #  return undef unless $self->[2]++ < $#{$self->[1]};
27682     #  < 2  || @$t >
27683     #
27684     # the following line from dlister.pl caused trouble:
27685     #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
27686     #
27687     # If the '<' starts an angle operator, it must end on this line and
27688     # it must not have certain characters like ';' and '=' in it.  I use
27689     # this to limit the testing.  This filter should be improved if
27690     # possible.
27691
27692     if ( $input_line =~ /($filter)/g ) {
27693
27694         if ( $1 eq '>' ) {
27695
27696             # We MAY have found an angle operator termination if we get
27697             # here, but we need to do more to be sure we haven't been
27698             # fooled.
27699             my $pos = pos($input_line);
27700
27701             my $pos_beg = $$rtoken_map[$i];
27702             my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
27703
27704             # Reject if the closing '>' follows a '-' as in:
27705             # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
27706             if ( $expecting eq UNKNOWN ) {
27707                 my $check = substr( $input_line, $pos - 2, 1 );
27708                 if ( $check eq '-' ) {
27709                     return ( $i, $type );
27710                 }
27711             }
27712
27713             ######################################debug#####
27714             #write_diagnostics( "ANGLE? :$str\n");
27715             #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
27716             ######################################debug#####
27717             $type = 'Q';
27718             my $error;
27719             ( $i, $error ) =
27720               inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27721
27722             # It may be possible that a quote ends midway in a pretoken.
27723             # If this happens, it may be necessary to split the pretoken.
27724             if ($error) {
27725                 warning(
27726                     "Possible tokinization error..please check this line\n");
27727                 report_possible_bug();
27728             }
27729
27730             # Now let's see where we stand....
27731             # OK if math op not possible
27732             if ( $expecting == TERM ) {
27733             }
27734
27735             # OK if there are no more than 2 pre-tokens inside
27736             # (not possible to write 2 token math between < and >)
27737             # This catches most common cases
27738             elsif ( $i <= $i_beg + 3 ) {
27739                 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
27740             }
27741
27742             # Not sure..
27743             else {
27744
27745                 # Let's try a Brace Test: any braces inside must balance
27746                 my $br = 0;
27747                 while ( $str =~ /\{/g ) { $br++ }
27748                 while ( $str =~ /\}/g ) { $br-- }
27749                 my $sb = 0;
27750                 while ( $str =~ /\[/g ) { $sb++ }
27751                 while ( $str =~ /\]/g ) { $sb-- }
27752                 my $pr = 0;
27753                 while ( $str =~ /\(/g ) { $pr++ }
27754                 while ( $str =~ /\)/g ) { $pr-- }
27755
27756                 # if braces do not balance - not angle operator
27757                 if ( $br || $sb || $pr ) {
27758                     $i    = $i_beg;
27759                     $type = '<';
27760                     write_diagnostics(
27761                         "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
27762                 }
27763
27764                 # we should keep doing more checks here...to be continued
27765                 # Tentatively accepting this as a valid angle operator.
27766                 # There are lots more things that can be checked.
27767                 else {
27768                     write_diagnostics(
27769                         "ANGLE-Guessing yes: $str expecting=$expecting\n");
27770                     write_logfile_entry("Guessing angle operator here: $str\n");
27771                 }
27772             }
27773         }
27774
27775         # didn't find ending >
27776         else {
27777             if ( $expecting == TERM ) {
27778                 warning("No ending > for angle operator\n");
27779             }
27780         }
27781     }
27782     return ( $i, $type );
27783 }
27784
27785 sub scan_number_do {
27786
27787     #  scan a number in any of the formats that Perl accepts
27788     #  Underbars (_) are allowed in decimal numbers.
27789     #  input parameters -
27790     #      $input_line  - the string to scan
27791     #      $i           - pre_token index to start scanning
27792     #    $rtoken_map    - reference to the pre_token map giving starting
27793     #                    character position in $input_line of token $i
27794     #  output parameters -
27795     #    $i            - last pre_token index of the number just scanned
27796     #    number        - the number (characters); or undef if not a number
27797
27798     my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
27799     my $pos_beg = $$rtoken_map[$i];
27800     my $pos;
27801     my $i_begin = $i;
27802     my $number  = undef;
27803     my $type    = $input_type;
27804
27805     my $first_char = substr( $input_line, $pos_beg, 1 );
27806
27807     # Look for bad starting characters; Shouldn't happen..
27808     if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
27809         warning("Program bug - scan_number given character $first_char\n");
27810         report_definite_bug();
27811         return ( $i, $type, $number );
27812     }
27813
27814     # handle v-string without leading 'v' character ('Two Dot' rule)
27815     # (vstring.t)
27816     # TODO: v-strings may contain underscores
27817     pos($input_line) = $pos_beg;
27818     if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
27819         $pos = pos($input_line);
27820         my $numc = $pos - $pos_beg;
27821         $number = substr( $input_line, $pos_beg, $numc );
27822         $type = 'v';
27823         report_v_string($number);
27824     }
27825
27826     # handle octal, hex, binary
27827     if ( !defined($number) ) {
27828         pos($input_line) = $pos_beg;
27829         if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
27830         {
27831             $pos = pos($input_line);
27832             my $numc = $pos - $pos_beg;
27833             $number = substr( $input_line, $pos_beg, $numc );
27834             $type = 'n';
27835         }
27836     }
27837
27838     # handle decimal
27839     if ( !defined($number) ) {
27840         pos($input_line) = $pos_beg;
27841
27842         if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
27843             $pos = pos($input_line);
27844
27845             # watch out for things like 0..40 which would give 0. by this;
27846             if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
27847                 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
27848             {
27849                 $pos--;
27850             }
27851             my $numc = $pos - $pos_beg;
27852             $number = substr( $input_line, $pos_beg, $numc );
27853             $type = 'n';
27854         }
27855     }
27856
27857     # filter out non-numbers like e + - . e2  .e3 +e6
27858     # the rule: at least one digit, and any 'e' must be preceded by a digit
27859     if (
27860         $number !~ /\d/    # no digits
27861         || (   $number =~ /^(.*)[eE]/
27862             && $1 !~ /\d/ )    # or no digits before the 'e'
27863       )
27864     {
27865         $number = undef;
27866         $type   = $input_type;
27867         return ( $i, $type, $number );
27868     }
27869
27870     # Found a number; now we must convert back from character position
27871     # to pre_token index. An error here implies user syntax error.
27872     # An example would be an invalid octal number like '009'.
27873     my $error;
27874     ( $i, $error ) =
27875       inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27876     if ($error) { warning("Possibly invalid number\n") }
27877
27878     return ( $i, $type, $number );
27879 }
27880
27881 sub inverse_pretoken_map {
27882
27883     # Starting with the current pre_token index $i, scan forward until
27884     # finding the index of the next pre_token whose position is $pos.
27885     my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
27886     my $error = 0;
27887
27888     while ( ++$i <= $max_token_index ) {
27889
27890         if ( $pos <= $$rtoken_map[$i] ) {
27891
27892             # Let the calling routine handle errors in which we do not
27893             # land on a pre-token boundary.  It can happen by running
27894             # perltidy on some non-perl scripts, for example.
27895             if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
27896             $i--;
27897             last;
27898         }
27899     }
27900     return ( $i, $error );
27901 }
27902
27903 sub find_here_doc {
27904
27905     # find the target of a here document, if any
27906     # input parameters:
27907     #   $i - token index of the second < of <<
27908     #   ($i must be less than the last token index if this is called)
27909     # output parameters:
27910     #   $found_target = 0 didn't find target; =1 found target
27911     #   HERE_TARGET - the target string (may be empty string)
27912     #   $i - unchanged if not here doc,
27913     #    or index of the last token of the here target
27914     #   $saw_error - flag noting unbalanced quote on here target
27915     my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27916     my $ibeg                 = $i;
27917     my $found_target         = 0;
27918     my $here_doc_target      = '';
27919     my $here_quote_character = '';
27920     my $saw_error            = 0;
27921     my ( $next_nonblank_token, $i_next_nonblank, $next_token );
27922     $next_token = $$rtokens[ $i + 1 ];
27923
27924     # perl allows a backslash before the target string (heredoc.t)
27925     my $backslash = 0;
27926     if ( $next_token eq '\\' ) {
27927         $backslash  = 1;
27928         $next_token = $$rtokens[ $i + 2 ];
27929     }
27930
27931     ( $next_nonblank_token, $i_next_nonblank ) =
27932       find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
27933
27934     if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
27935
27936         my $in_quote    = 1;
27937         my $quote_depth = 0;
27938         my $quote_pos   = 0;
27939         my $quoted_string;
27940
27941         (
27942             $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
27943             $quoted_string
27944           )
27945           = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
27946             $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
27947
27948         if ($in_quote) {    # didn't find end of quote, so no target found
27949             $i = $ibeg;
27950             if ( $expecting == TERM ) {
27951                 warning(
27952 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
27953                 );
27954                 $saw_error = 1;
27955             }
27956         }
27957         else {              # found ending quote
27958             my $j;
27959             $found_target = 1;
27960
27961             my $tokj;
27962             for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
27963                 $tokj = $$rtokens[$j];
27964
27965                 # we have to remove any backslash before the quote character
27966                 # so that the here-doc-target exactly matches this string
27967                 next
27968                   if ( $tokj eq "\\"
27969                     && $j < $i - 1
27970                     && $$rtokens[ $j + 1 ] eq $here_quote_character );
27971                 $here_doc_target .= $tokj;
27972             }
27973         }
27974     }
27975
27976     elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
27977         $found_target = 1;
27978         write_logfile_entry(
27979             "found blank here-target after <<; suggest using \"\"\n");
27980         $i = $ibeg;
27981     }
27982     elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
27983
27984         my $here_doc_expected;
27985         if ( $expecting == UNKNOWN ) {
27986             $here_doc_expected = guess_if_here_doc($next_token);
27987         }
27988         else {
27989             $here_doc_expected = 1;
27990         }
27991
27992         if ($here_doc_expected) {
27993             $found_target    = 1;
27994             $here_doc_target = $next_token;
27995             $i               = $ibeg + 1;
27996         }
27997
27998     }
27999     else {
28000
28001         if ( $expecting == TERM ) {
28002             $found_target = 1;
28003             write_logfile_entry("Note: bare here-doc operator <<\n");
28004         }
28005         else {
28006             $i = $ibeg;
28007         }
28008     }
28009
28010     # patch to neglect any prepended backslash
28011     if ( $found_target && $backslash ) { $i++ }
28012
28013     return ( $found_target, $here_doc_target, $here_quote_character, $i,
28014         $saw_error );
28015 }
28016
28017 sub do_quote {
28018
28019     # follow (or continue following) quoted string(s)
28020     # $in_quote return code:
28021     #   0 - ok, found end
28022     #   1 - still must find end of quote whose target is $quote_character
28023     #   2 - still looking for end of first of two quotes
28024     #
28025     # Returns updated strings:
28026     #  $quoted_string_1 = quoted string seen while in_quote=1
28027     #  $quoted_string_2 = quoted string seen while in_quote=2
28028     my (
28029         $i,               $in_quote,    $quote_character,
28030         $quote_pos,       $quote_depth, $quoted_string_1,
28031         $quoted_string_2, $rtokens,     $rtoken_map,
28032         $max_token_index
28033     ) = @_;
28034
28035     my $in_quote_starting = $in_quote;
28036
28037     my $quoted_string;
28038     if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
28039         my $ibeg = $i;
28040         (
28041             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
28042             $quoted_string
28043           )
28044           = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
28045             $quote_pos, $quote_depth, $max_token_index );
28046         $quoted_string_2 .= $quoted_string;
28047         if ( $in_quote == 1 ) {
28048             if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
28049             $quote_character = '';
28050         }
28051         else {
28052             $quoted_string_2 .= "\n";
28053         }
28054     }
28055
28056     if ( $in_quote == 1 ) {    # one (more) quote to follow
28057         my $ibeg = $i;
28058         (
28059             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
28060             $quoted_string
28061           )
28062           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
28063             $quote_pos, $quote_depth, $max_token_index );
28064         $quoted_string_1 .= $quoted_string;
28065         if ( $in_quote == 1 ) {
28066             $quoted_string_1 .= "\n";
28067         }
28068     }
28069     return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
28070         $quoted_string_1, $quoted_string_2 );
28071 }
28072
28073 sub follow_quoted_string {
28074
28075     # scan for a specific token, skipping escaped characters
28076     # if the quote character is blank, use the first non-blank character
28077     # input parameters:
28078     #   $rtokens = reference to the array of tokens
28079     #   $i = the token index of the first character to search
28080     #   $in_quote = number of quoted strings being followed
28081     #   $beginning_tok = the starting quote character
28082     #   $quote_pos = index to check next for alphanumeric delimiter
28083     # output parameters:
28084     #   $i = the token index of the ending quote character
28085     #   $in_quote = decremented if found end, unchanged if not
28086     #   $beginning_tok = the starting quote character
28087     #   $quote_pos = index to check next for alphanumeric delimiter
28088     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
28089     #   $quoted_string = the text of the quote (without quotation tokens)
28090     my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
28091         $max_token_index )
28092       = @_;
28093     my ( $tok, $end_tok );
28094     my $i             = $i_beg - 1;
28095     my $quoted_string = "";
28096
28097     TOKENIZER_DEBUG_FLAG_QUOTE && do {
28098         print
28099 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
28100     };
28101
28102     # get the corresponding end token
28103     if ( $beginning_tok !~ /^\s*$/ ) {
28104         $end_tok = matching_end_token($beginning_tok);
28105     }
28106
28107     # a blank token means we must find and use the first non-blank one
28108     else {
28109         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
28110
28111         while ( $i < $max_token_index ) {
28112             $tok = $$rtokens[ ++$i ];
28113
28114             if ( $tok !~ /^\s*$/ ) {
28115
28116                 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
28117                     $i = $max_token_index;
28118                 }
28119                 else {
28120
28121                     if ( length($tok) > 1 ) {
28122                         if ( $quote_pos <= 0 ) { $quote_pos = 1 }
28123                         $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
28124                     }
28125                     else {
28126                         $beginning_tok = $tok;
28127                         $quote_pos     = 0;
28128                     }
28129                     $end_tok     = matching_end_token($beginning_tok);
28130                     $quote_depth = 1;
28131                     last;
28132                 }
28133             }
28134             else {
28135                 $allow_quote_comments = 1;
28136             }
28137         }
28138     }
28139
28140     # There are two different loops which search for the ending quote
28141     # character.  In the rare case of an alphanumeric quote delimiter, we
28142     # have to look through alphanumeric tokens character-by-character, since
28143     # the pre-tokenization process combines multiple alphanumeric
28144     # characters, whereas for a non-alphanumeric delimiter, only tokens of
28145     # length 1 can match.
28146
28147     ###################################################################
28148     # Case 1 (rare): loop for case of alphanumeric quote delimiter..
28149     # "quote_pos" is the position the current word to begin searching
28150     ###################################################################
28151     if ( $beginning_tok =~ /\w/ ) {
28152
28153         # Note this because it is not recommended practice except
28154         # for obfuscated perl contests
28155         if ( $in_quote == 1 ) {
28156             write_logfile_entry(
28157                 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
28158         }
28159
28160         while ( $i < $max_token_index ) {
28161
28162             if ( $quote_pos == 0 || ( $i < 0 ) ) {
28163                 $tok = $$rtokens[ ++$i ];
28164
28165                 if ( $tok eq '\\' ) {
28166
28167                     # retain backslash unless it hides the end token
28168                     $quoted_string .= $tok
28169                       unless $$rtokens[ $i + 1 ] eq $end_tok;
28170                     $quote_pos++;
28171                     last if ( $i >= $max_token_index );
28172                     $tok = $$rtokens[ ++$i ];
28173                 }
28174             }
28175             my $old_pos = $quote_pos;
28176
28177             unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
28178             {
28179
28180             }
28181             $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
28182
28183             if ( $quote_pos > 0 ) {
28184
28185                 $quoted_string .=
28186                   substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
28187
28188                 $quote_depth--;
28189
28190                 if ( $quote_depth == 0 ) {
28191                     $in_quote--;
28192                     last;
28193                 }
28194             }
28195             else {
28196                 $quoted_string .= substr( $tok, $old_pos );
28197             }
28198         }
28199     }
28200
28201     ########################################################################
28202     # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
28203     ########################################################################
28204     else {
28205
28206         while ( $i < $max_token_index ) {
28207             $tok = $$rtokens[ ++$i ];
28208
28209             if ( $tok eq $end_tok ) {
28210                 $quote_depth--;
28211
28212                 if ( $quote_depth == 0 ) {
28213                     $in_quote--;
28214                     last;
28215                 }
28216             }
28217             elsif ( $tok eq $beginning_tok ) {
28218                 $quote_depth++;
28219             }
28220             elsif ( $tok eq '\\' ) {
28221
28222                 # retain backslash unless it hides the beginning or end token
28223                 $tok = $$rtokens[ ++$i ];
28224                 $quoted_string .= '\\'
28225                   unless ( $tok eq $end_tok || $tok eq $beginning_tok );
28226             }
28227             $quoted_string .= $tok;
28228         }
28229     }
28230     if ( $i > $max_token_index ) { $i = $max_token_index }
28231     return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
28232         $quoted_string );
28233 }
28234
28235 sub indicate_error {
28236     my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
28237     interrupt_logfile();
28238     warning($msg);
28239     write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
28240     resume_logfile();
28241 }
28242
28243 sub write_error_indicator_pair {
28244     my ( $line_number, $input_line, $pos, $carrat ) = @_;
28245     my ( $offset, $numbered_line, $underline ) =
28246       make_numbered_line( $line_number, $input_line, $pos );
28247     $underline = write_on_underline( $underline, $pos - $offset, $carrat );
28248     warning( $numbered_line . "\n" );
28249     $underline =~ s/\s*$//;
28250     warning( $underline . "\n" );
28251 }
28252
28253 sub make_numbered_line {
28254
28255     #  Given an input line, its line number, and a character position of
28256     #  interest, create a string not longer than 80 characters of the form
28257     #     $lineno: sub_string
28258     #  such that the sub_string of $str contains the position of interest
28259     #
28260     #  Here is an example of what we want, in this case we add trailing
28261     #  '...' because the line is long.
28262     #
28263     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
28264     #
28265     #  Here is another example, this time in which we used leading '...'
28266     #  because of excessive length:
28267     #
28268     # 2: ... er of the World Wide Web Consortium's
28269     #
28270     #  input parameters are:
28271     #   $lineno = line number
28272     #   $str = the text of the line
28273     #   $pos = position of interest (the error) : 0 = first character
28274     #
28275     #   We return :
28276     #     - $offset = an offset which corrects the position in case we only
28277     #       display part of a line, such that $pos-$offset is the effective
28278     #       position from the start of the displayed line.
28279     #     - $numbered_line = the numbered line as above,
28280     #     - $underline = a blank 'underline' which is all spaces with the same
28281     #       number of characters as the numbered line.
28282
28283     my ( $lineno, $str, $pos ) = @_;
28284     my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
28285     my $excess = length($str) - $offset - 68;
28286     my $numc   = ( $excess > 0 ) ? 68 : undef;
28287
28288     if ( defined($numc) ) {
28289         if ( $offset == 0 ) {
28290             $str = substr( $str, $offset, $numc - 4 ) . " ...";
28291         }
28292         else {
28293             $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
28294         }
28295     }
28296     else {
28297
28298         if ( $offset == 0 ) {
28299         }
28300         else {
28301             $str = "... " . substr( $str, $offset + 4 );
28302         }
28303     }
28304
28305     my $numbered_line = sprintf( "%d: ", $lineno );
28306     $offset -= length($numbered_line);
28307     $numbered_line .= $str;
28308     my $underline = " " x length($numbered_line);
28309     return ( $offset, $numbered_line, $underline );
28310 }
28311
28312 sub write_on_underline {
28313
28314     # The "underline" is a string that shows where an error is; it starts
28315     # out as a string of blanks with the same length as the numbered line of
28316     # code above it, and we have to add marking to show where an error is.
28317     # In the example below, we want to write the string '--^' just below
28318     # the line of bad code:
28319     #
28320     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
28321     #                 ---^
28322     # We are given the current underline string, plus a position and a
28323     # string to write on it.
28324     #
28325     # In the above example, there will be 2 calls to do this:
28326     # First call:  $pos=19, pos_chr=^
28327     # Second call: $pos=16, pos_chr=---
28328     #
28329     # This is a trivial thing to do with substr, but there is some
28330     # checking to do.
28331
28332     my ( $underline, $pos, $pos_chr ) = @_;
28333
28334     # check for error..shouldn't happen
28335     unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
28336         return $underline;
28337     }
28338     my $excess = length($pos_chr) + $pos - length($underline);
28339     if ( $excess > 0 ) {
28340         $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
28341     }
28342     substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
28343     return ($underline);
28344 }
28345
28346 sub pre_tokenize {
28347
28348     # Break a string, $str, into a sequence of preliminary tokens.  We
28349     # are interested in these types of tokens:
28350     #   words       (type='w'),            example: 'max_tokens_wanted'
28351     #   digits      (type = 'd'),          example: '0755'
28352     #   whitespace  (type = 'b'),          example: '   '
28353     #   any other single character (i.e. punct; type = the character itself).
28354     # We cannot do better than this yet because we might be in a quoted
28355     # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
28356     # tokens.
28357     my ( $str, $max_tokens_wanted ) = @_;
28358
28359     # we return references to these 3 arrays:
28360     my @tokens    = ();     # array of the tokens themselves
28361     my @token_map = (0);    # string position of start of each token
28362     my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
28363
28364     do {
28365
28366         # whitespace
28367         if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
28368
28369         # numbers
28370         # note that this must come before words!
28371         elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
28372
28373         # words
28374         elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
28375
28376         # single-character punctuation
28377         elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
28378
28379         # that's all..
28380         else {
28381             return ( \@tokens, \@token_map, \@type );
28382         }
28383
28384         push @tokens,    $1;
28385         push @token_map, pos($str);
28386
28387     } while ( --$max_tokens_wanted != 0 );
28388
28389     return ( \@tokens, \@token_map, \@type );
28390 }
28391
28392 sub show_tokens {
28393
28394     # this is an old debug routine
28395     my ( $rtokens, $rtoken_map ) = @_;
28396     my $num = scalar(@$rtokens);
28397     my $i;
28398
28399     for ( $i = 0 ; $i < $num ; $i++ ) {
28400         my $len = length( $$rtokens[$i] );
28401         print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
28402     }
28403 }
28404
28405 sub matching_end_token {
28406
28407     # find closing character for a pattern
28408     my $beginning_token = shift;
28409
28410     if ( $beginning_token eq '{' ) {
28411         '}';
28412     }
28413     elsif ( $beginning_token eq '[' ) {
28414         ']';
28415     }
28416     elsif ( $beginning_token eq '<' ) {
28417         '>';
28418     }
28419     elsif ( $beginning_token eq '(' ) {
28420         ')';
28421     }
28422     else {
28423         $beginning_token;
28424     }
28425 }
28426
28427 sub dump_token_types {
28428     my $class = shift;
28429     my $fh    = shift;
28430
28431     # This should be the latest list of token types in use
28432     # adding NEW_TOKENS: add a comment here
28433     print $fh <<'END_OF_LIST';
28434
28435 Here is a list of the token types currently used for lines of type 'CODE'.
28436 For the following tokens, the "type" of a token is just the token itself.
28437
28438 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
28439 ( ) <= >= == =~ !~ != ++ -- /= x=
28440 ... **= <<= >>= &&= ||= //= <=>
28441 , + - / * | % ! x ~ = \ ? : . < > ^ &
28442
28443 The following additional token types are defined:
28444
28445  type    meaning
28446     b    blank (white space)
28447     {    indent: opening structural curly brace or square bracket or paren
28448          (code block, anonymous hash reference, or anonymous array reference)
28449     }    outdent: right structural curly brace or square bracket or paren
28450     [    left non-structural square bracket (enclosing an array index)
28451     ]    right non-structural square bracket
28452     (    left non-structural paren (all but a list right of an =)
28453     )    right non-structural parena
28454     L    left non-structural curly brace (enclosing a key)
28455     R    right non-structural curly brace
28456     ;    terminal semicolon
28457     f    indicates a semicolon in a "for" statement
28458     h    here_doc operator <<
28459     #    a comment
28460     Q    indicates a quote or pattern
28461     q    indicates a qw quote block
28462     k    a perl keyword
28463     C    user-defined constant or constant function (with void prototype = ())
28464     U    user-defined function taking parameters
28465     G    user-defined function taking block parameter (like grep/map/eval)
28466     M    (unused, but reserved for subroutine definition name)
28467     P    (unused, but -html uses it to label pod text)
28468     t    type indicater such as %,$,@,*,&,sub
28469     w    bare word (perhaps a subroutine call)
28470     i    identifier of some type (with leading %, $, @, *, &, sub, -> )
28471     n    a number
28472     v    a v-string
28473     F    a file test operator (like -e)
28474     Y    File handle
28475     Z    identifier in indirect object slot: may be file handle, object
28476     J    LABEL:  code block label
28477     j    LABEL after next, last, redo, goto
28478     p    unary +
28479     m    unary -
28480     pp   pre-increment operator ++
28481     mm   pre-decrement operator --
28482     A    : used as attribute separator
28483
28484     Here are the '_line_type' codes used internally:
28485     SYSTEM         - system-specific code before hash-bang line
28486     CODE           - line of perl code (including comments)
28487     POD_START      - line starting pod, such as '=head'
28488     POD            - pod documentation text
28489     POD_END        - last line of pod section, '=cut'
28490     HERE           - text of here-document
28491     HERE_END       - last line of here-doc (target word)
28492     FORMAT         - format section
28493     FORMAT_END     - last line of format section, '.'
28494     DATA_START     - __DATA__ line
28495     DATA           - unidentified text following __DATA__
28496     END_START      - __END__ line
28497     END            - unidentified text following __END__
28498     ERROR          - we are in big trouble, probably not a perl script
28499 END_OF_LIST
28500 }
28501
28502 BEGIN {
28503
28504     # These names are used in error messages
28505     @opening_brace_names = qw# '{' '[' '(' '?' #;
28506     @closing_brace_names = qw# '}' ']' ')' ':' #;
28507
28508     my @digraphs = qw(
28509       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
28510       <= >= == =~ !~ != ++ -- /= x= ~~
28511     );
28512     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
28513
28514     my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
28515     @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
28516
28517     # make a hash of all valid token types for self-checking the tokenizer
28518     # (adding NEW_TOKENS : select a new character and add to this list)
28519     my @valid_token_types = qw#
28520       A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
28521       { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
28522       #;
28523     push( @valid_token_types, @digraphs );
28524     push( @valid_token_types, @trigraphs );
28525     push( @valid_token_types, '#' );
28526     push( @valid_token_types, ',' );
28527     @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
28528
28529     # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
28530     my @file_test_operators =
28531       qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
28532     @is_file_test_operator{@file_test_operators} =
28533       (1) x scalar(@file_test_operators);
28534
28535     # these functions have prototypes of the form (&), so when they are
28536     # followed by a block, that block MAY BE followed by an operator.
28537     @_ = qw( do eval );
28538     @is_block_operator{@_} = (1) x scalar(@_);
28539
28540     # these functions allow an identifier in the indirect object slot
28541     @_ = qw( print printf sort exec system say);
28542     @is_indirect_object_taker{@_} = (1) x scalar(@_);
28543
28544     # These tokens may precede a code block
28545     # patched for SWITCH/CASE
28546     @_ =
28547       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
28548       unless do while until eval for foreach map grep sort
28549       switch case given when);
28550     @is_code_block_token{@_} = (1) x scalar(@_);
28551
28552     # I'll build the list of keywords incrementally
28553     my @Keywords = ();
28554
28555     # keywords and tokens after which a value or pattern is expected,
28556     # but not an operator.  In other words, these should consume terms
28557     # to their right, or at least they are not expected to be followed
28558     # immediately by operators.
28559     my @value_requestor = qw(
28560       AUTOLOAD
28561       BEGIN
28562       CHECK
28563       DESTROY
28564       END
28565       EQ
28566       GE
28567       GT
28568       INIT
28569       LE
28570       LT
28571       NE
28572       UNITCHECK
28573       abs
28574       accept
28575       alarm
28576       and
28577       atan2
28578       bind
28579       binmode
28580       bless
28581       break
28582       caller
28583       chdir
28584       chmod
28585       chomp
28586       chop
28587       chown
28588       chr
28589       chroot
28590       close
28591       closedir
28592       cmp
28593       connect
28594       continue
28595       cos
28596       crypt
28597       dbmclose
28598       dbmopen
28599       defined
28600       delete
28601       die
28602       dump
28603       each
28604       else
28605       elsif
28606       eof
28607       eq
28608       exec
28609       exists
28610       exit
28611       exp
28612       fcntl
28613       fileno
28614       flock
28615       for
28616       foreach
28617       formline
28618       ge
28619       getc
28620       getgrgid
28621       getgrnam
28622       gethostbyaddr
28623       gethostbyname
28624       getnetbyaddr
28625       getnetbyname
28626       getpeername
28627       getpgrp
28628       getpriority
28629       getprotobyname
28630       getprotobynumber
28631       getpwnam
28632       getpwuid
28633       getservbyname
28634       getservbyport
28635       getsockname
28636       getsockopt
28637       glob
28638       gmtime
28639       goto
28640       grep
28641       gt
28642       hex
28643       if
28644       index
28645       int
28646       ioctl
28647       join
28648       keys
28649       kill
28650       last
28651       lc
28652       lcfirst
28653       le
28654       length
28655       link
28656       listen
28657       local
28658       localtime
28659       lock
28660       log
28661       lstat
28662       lt
28663       map
28664       mkdir
28665       msgctl
28666       msgget
28667       msgrcv
28668       msgsnd
28669       my
28670       ne
28671       next
28672       no
28673       not
28674       oct
28675       open
28676       opendir
28677       or
28678       ord
28679       our
28680       pack
28681       pipe
28682       pop
28683       pos
28684       print
28685       printf
28686       prototype
28687       push
28688       quotemeta
28689       rand
28690       read
28691       readdir
28692       readlink
28693       readline
28694       readpipe
28695       recv
28696       redo
28697       ref
28698       rename
28699       require
28700       reset
28701       return
28702       reverse
28703       rewinddir
28704       rindex
28705       rmdir
28706       scalar
28707       seek
28708       seekdir
28709       select
28710       semctl
28711       semget
28712       semop
28713       send
28714       sethostent
28715       setnetent
28716       setpgrp
28717       setpriority
28718       setprotoent
28719       setservent
28720       setsockopt
28721       shift
28722       shmctl
28723       shmget
28724       shmread
28725       shmwrite
28726       shutdown
28727       sin
28728       sleep
28729       socket
28730       socketpair
28731       sort
28732       splice
28733       split
28734       sprintf
28735       sqrt
28736       srand
28737       stat
28738       study
28739       substr
28740       symlink
28741       syscall
28742       sysopen
28743       sysread
28744       sysseek
28745       system
28746       syswrite
28747       tell
28748       telldir
28749       tie
28750       tied
28751       truncate
28752       uc
28753       ucfirst
28754       umask
28755       undef
28756       unless
28757       unlink
28758       unpack
28759       unshift
28760       untie
28761       until
28762       use
28763       utime
28764       values
28765       vec
28766       waitpid
28767       warn
28768       while
28769       write
28770       xor
28771
28772       switch
28773       case
28774       given
28775       when
28776       err
28777       say
28778     );
28779
28780     # patched above for SWITCH/CASE given/when err say
28781     # 'err' is a fairly safe addition.
28782     # TODO: 'default' still needed if appropriate
28783     # 'use feature' seen, but perltidy works ok without it.
28784     # Concerned that 'default' could break code.
28785     push( @Keywords, @value_requestor );
28786
28787     # These are treated the same but are not keywords:
28788     my @extra_vr = qw(
28789       constant
28790       vars
28791     );
28792     push( @value_requestor, @extra_vr );
28793
28794     @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
28795
28796     # this list contains keywords which do not look for arguments,
28797     # so that they might be followed by an operator, or at least
28798     # not a term.
28799     my @operator_requestor = qw(
28800       endgrent
28801       endhostent
28802       endnetent
28803       endprotoent
28804       endpwent
28805       endservent
28806       fork
28807       getgrent
28808       gethostent
28809       getlogin
28810       getnetent
28811       getppid
28812       getprotoent
28813       getpwent
28814       getservent
28815       setgrent
28816       setpwent
28817       time
28818       times
28819       wait
28820       wantarray
28821     );
28822
28823     push( @Keywords, @operator_requestor );
28824
28825     # These are treated the same but are not considered keywords:
28826     my @extra_or = qw(
28827       STDERR
28828       STDIN
28829       STDOUT
28830     );
28831
28832     push( @operator_requestor, @extra_or );
28833
28834     @expecting_operator_token{@operator_requestor} =
28835       (1) x scalar(@operator_requestor);
28836
28837     # these token TYPES expect trailing operator but not a term
28838     # note: ++ and -- are post-increment and decrement, 'C' = constant
28839     my @operator_requestor_types = qw( ++ -- C <> q );
28840     @expecting_operator_types{@operator_requestor_types} =
28841       (1) x scalar(@operator_requestor_types);
28842
28843     # these token TYPES consume values (terms)
28844     # note: pp and mm are pre-increment and decrement
28845     # f=semicolon in for,  F=file test operator
28846     my @value_requestor_type = qw#
28847       L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
28848       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
28849       <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
28850       f F pp mm Y p m U J G j >> << ^ t
28851       #;
28852     push( @value_requestor_type, ',' )
28853       ;    # (perl doesn't like a ',' in a qw block)
28854     @expecting_term_types{@value_requestor_type} =
28855       (1) x scalar(@value_requestor_type);
28856
28857     # Note: the following valid token types are not assigned here to
28858     # hashes requesting to be followed by values or terms, but are
28859     # instead currently hard-coded into sub operator_expected:
28860     # ) -> :: Q R Z ] b h i k n v w } #
28861
28862     # For simple syntax checking, it is nice to have a list of operators which
28863     # will really be unhappy if not followed by a term.  This includes most
28864     # of the above...
28865     %really_want_term = %expecting_term_types;
28866
28867     # with these exceptions...
28868     delete $really_want_term{'U'}; # user sub, depends on prototype
28869     delete $really_want_term{'F'}; # file test works on $_ if no following term
28870     delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
28871                                    # let perl do it
28872
28873     @_ = qw(q qq qw qx qr s y tr m);
28874     @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
28875
28876     # These keywords are handled specially in the tokenizer code:
28877     my @special_keywords = qw(
28878       do
28879       eval
28880       format
28881       m
28882       package
28883       q
28884       qq
28885       qr
28886       qw
28887       qx
28888       s
28889       sub
28890       tr
28891       y
28892     );
28893     push( @Keywords, @special_keywords );
28894
28895     # Keywords after which list formatting may be used
28896     # WARNING: do not include |map|grep|eval or perl may die on
28897     # syntax errors (map1.t).
28898     my @keyword_taking_list = qw(
28899       and
28900       chmod
28901       chomp
28902       chop
28903       chown
28904       dbmopen
28905       die
28906       elsif
28907       exec
28908       fcntl
28909       for
28910       foreach
28911       formline
28912       getsockopt
28913       if
28914       index
28915       ioctl
28916       join
28917       kill
28918       local
28919       msgctl
28920       msgrcv
28921       msgsnd
28922       my
28923       open
28924       or
28925       our
28926       pack
28927       print
28928       printf
28929       push
28930       read
28931       readpipe
28932       recv
28933       return
28934       reverse
28935       rindex
28936       seek
28937       select
28938       semctl
28939       semget
28940       send
28941       setpriority
28942       setsockopt
28943       shmctl
28944       shmget
28945       shmread
28946       shmwrite
28947       socket
28948       socketpair
28949       sort
28950       splice
28951       split
28952       sprintf
28953       substr
28954       syscall
28955       sysopen
28956       sysread
28957       sysseek
28958       system
28959       syswrite
28960       tie
28961       unless
28962       unlink
28963       unpack
28964       unshift
28965       until
28966       vec
28967       warn
28968       while
28969       given
28970       when
28971     );
28972     @is_keyword_taking_list{@keyword_taking_list} =
28973       (1) x scalar(@keyword_taking_list);
28974
28975     # These are not used in any way yet
28976     #    my @unused_keywords = qw(
28977     #      CORE
28978     #     __FILE__
28979     #     __LINE__
28980     #     __PACKAGE__
28981     #     );
28982
28983     #  The list of keywords was originally extracted from function 'keyword' in
28984     #  perl file toke.c version 5.005.03, using this utility, plus a
28985     #  little editing: (file getkwd.pl):
28986     #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
28987     #  Add 'get' prefix where necessary, then split into the above lists.
28988     #  This list should be updated as necessary.
28989     #  The list should not contain these special variables:
28990     #  ARGV DATA ENV SIG STDERR STDIN STDOUT
28991     #  __DATA__ __END__
28992
28993     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
28994 }
28995 1;
28996 __END__
28997
28998 =head1 NAME
28999
29000 Perl::Tidy - Parses and beautifies perl source
29001
29002 =head1 SYNOPSIS
29003
29004     use Perl::Tidy;
29005
29006     Perl::Tidy::perltidy(
29007         source            => $source,
29008         destination       => $destination,
29009         stderr            => $stderr,
29010         argv              => $argv,
29011         perltidyrc        => $perltidyrc,
29012         logfile           => $logfile,
29013         errorfile         => $errorfile,
29014         formatter         => $formatter,           # callback object (see below)
29015         dump_options      => $dump_options,
29016         dump_options_type => $dump_options_type,
29017         prefilter         => $prefilter_coderef,
29018         postfilter        => $postfilter_coderef,
29019     );
29020
29021 =head1 DESCRIPTION
29022
29023 This module makes the functionality of the perltidy utility available to perl
29024 scripts.  Any or all of the input parameters may be omitted, in which case the
29025 @ARGV array will be used to provide input parameters as described
29026 in the perltidy(1) man page.
29027
29028 For example, the perltidy script is basically just this:
29029
29030     use Perl::Tidy;
29031     Perl::Tidy::perltidy();
29032
29033 The module accepts input and output streams by a variety of methods.
29034 The following list of parameters may be any of the following: a
29035 filename, an ARRAY reference, a SCALAR reference, or an object with
29036 either a B<getline> or B<print> method, as appropriate.
29037
29038         source            - the source of the script to be formatted
29039         destination       - the destination of the formatted output
29040         stderr            - standard error output
29041         perltidyrc        - the .perltidyrc file
29042         logfile           - the .LOG file stream, if any
29043         errorfile         - the .ERR file stream, if any
29044         dump_options      - ref to a hash to receive parameters (see below),
29045         dump_options_type - controls contents of dump_options
29046         dump_getopt_flags - ref to a hash to receive Getopt flags
29047         dump_options_category - ref to a hash giving category of options
29048         dump_abbreviations    - ref to a hash giving all abbreviations
29049
29050 The following chart illustrates the logic used to decide how to
29051 treat a parameter.
29052
29053    ref($param)  $param is assumed to be:
29054    -----------  ---------------------
29055    undef        a filename
29056    SCALAR       ref to string
29057    ARRAY        ref to array
29058    (other)      object with getline (if source) or print method
29059
29060 If the parameter is an object, and the object has a B<close> method, that
29061 close method will be called at the end of the stream.
29062
29063 =over 4
29064
29065 =item source
29066
29067 If the B<source> parameter is given, it defines the source of the input stream.
29068 If an input stream is defined with the B<source> parameter then no other source
29069 filenames may be specified in the @ARGV array or B<argv> parameter.
29070
29071 =item destination
29072
29073 If the B<destination> parameter is given, it will be used to define the
29074 file or memory location to receive output of perltidy.
29075
29076 =item stderr
29077
29078 The B<stderr> parameter allows the calling program to redirect to a file the
29079 output of what would otherwise go to the standard error output device.  Unlike
29080 many other parameters, $stderr must be a file or file handle; it may not be a
29081 reference to a SCALAR or ARRAY.
29082
29083 =item perltidyrc
29084
29085 If the B<perltidyrc> file is given, it will be used instead of any
29086 F<.perltidyrc> configuration file that would otherwise be used.
29087
29088 =item argv
29089
29090 If the B<argv> parameter is given, it will be used instead of the
29091 B<@ARGV> array.  The B<argv> parameter may be a string, a reference to a
29092 string, or a reference to an array.  If it is a string or reference to a
29093 string, it will be parsed into an array of items just as if it were a
29094 command line string.
29095
29096 =item dump_options
29097
29098 If the B<dump_options> parameter is given, it must be the reference to a hash.
29099 In this case, the parameters contained in any perltidyrc configuration file
29100 will be placed in this hash and perltidy will return immediately.  This is
29101 equivalent to running perltidy with --dump-options, except that the perameters
29102 are returned in a hash rather than dumped to standard output.  Also, by default
29103 only the parameters in the perltidyrc file are returned, but this can be
29104 changed (see the next parameter).  This parameter provides a convenient method
29105 for external programs to read a perltidyrc file.  An example program using
29106 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
29107
29108 Any combination of the B<dump_> parameters may be used together.
29109
29110 =item dump_options_type
29111
29112 This parameter is a string which can be used to control the parameters placed
29113 in the hash reference supplied by B<dump_options>.  The possible values are
29114 'perltidyrc' (default) and 'full'.  The 'full' parameter causes both the
29115 default options plus any options found in a perltidyrc file to be returned.
29116
29117 =item dump_getopt_flags
29118
29119 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
29120 hash.  This hash will receive all of the parameters that perltidy understands
29121 and flags that are passed to Getopt::Long.  This parameter may be
29122 used alone or with the B<dump_options> flag.  Perltidy will
29123 exit immediately after filling this hash.  See the demo program
29124 F<perltidyrc_dump.pl> for example usage.
29125
29126 =item dump_options_category
29127
29128 If the B<dump_options_category> parameter is given, it must be the reference to a
29129 hash.  This hash will receive a hash with keys equal to all long parameter names
29130 and values equal to the title of the corresponding section of the perltidy manual.
29131 See the demo program F<perltidyrc_dump.pl> for example usage.
29132
29133 =item dump_abbreviations
29134
29135 If the B<dump_abbreviations> parameter is given, it must be the reference to a
29136 hash.  This hash will receive all abbreviations used by Perl::Tidy.  See the
29137 demo program F<perltidyrc_dump.pl> for example usage.
29138
29139 =item prefilter
29140
29141 A code reference that will be applied to the source before tidying. It is
29142 expected to take the full content as a string in its input, and output the
29143 transformed content.
29144
29145 =item postfilter
29146
29147 A code reference that will be applied to the tidied result before outputting.
29148 It is expected to take the full content as a string in its input, and output
29149 the transformed content.
29150
29151 Note: A convenient way to check the function of your custom prefilter and
29152 postfilter code is to use the --notidy option, first with just the prefilter
29153 and then with both the prefilter and postfilter.  See also the file
29154 B<filter_example.pl> in the perltidy distribution.
29155
29156 =back
29157
29158 =head1 NOTES ON FORMATTING PARAMETERS
29159
29160 Parameters which control formatting may be passed in several ways: in a
29161 F<.perltidyrc> configuration file, in the B<perltidyrc> parameter, and in the
29162 B<argv> parameter.
29163
29164 The B<-syn> (B<--check-syntax>) flag may be used with all source and
29165 destination streams except for standard input and output.  However
29166 data streams which are not associated with a filename will
29167 be copied to a temporary file before being be passed to Perl.  This
29168 use of temporary files can cause somewhat confusing output from Perl.
29169
29170 =head1 EXAMPLES
29171
29172 The perltidy script itself is a simple example, and several
29173 examples are given in the perltidy distribution.
29174
29175 The following example passes perltidy a snippet as a reference
29176 to a string and receives the result back in a reference to
29177 an array.
29178
29179  use Perl::Tidy;
29180
29181  # some messy source code to format
29182  my $source = <<'EOM';
29183  use strict;
29184  my @editors=('Emacs', 'Vi   '); my $rand = rand();
29185  print "A poll of 10 random programmers gave these results:\n";
29186  foreach(0..10) {
29187  my $i=int ($rand+rand());
29188  print " $editors[$i] users are from Venus" . ", " .
29189  "$editors[1-$i] users are from Mars" .
29190  "\n";
29191  }
29192  EOM
29193
29194  # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
29195  my @dest;
29196  perltidy( source => \$source, destination => \@dest );
29197  foreach (@dest) {print}
29198
29199 =head1 Using the B<formatter> Callback Object
29200
29201 The B<formatter> parameter is an optional callback object which allows
29202 the calling program to receive tokenized lines directly from perltidy for
29203 further specialized processing.  When this parameter is used, the two
29204 formatting options which are built into perltidy (beautification or
29205 html) are ignored.  The following diagram illustrates the logical flow:
29206
29207                     |-- (normal route)   -> code beautification
29208   caller->perltidy->|-- (-html flag )    -> create html
29209                     |-- (formatter given)-> callback to write_line
29210
29211 This can be useful for processing perl scripts in some way.  The
29212 parameter C<$formatter> in the perltidy call,
29213
29214         formatter   => $formatter,
29215
29216 is an object created by the caller with a C<write_line> method which
29217 will accept and process tokenized lines, one line per call.  Here is
29218 a simple example of a C<write_line> which merely prints the line number,
29219 the line type (as determined by perltidy), and the text of the line:
29220
29221  sub write_line {
29222
29223      # This is called from perltidy line-by-line
29224      my $self              = shift;
29225      my $line_of_tokens    = shift;
29226      my $line_type         = $line_of_tokens->{_line_type};
29227      my $input_line_number = $line_of_tokens->{_line_number};
29228      my $input_line        = $line_of_tokens->{_line_text};
29229      print "$input_line_number:$line_type:$input_line";
29230  }
29231
29232 The complete program, B<perllinetype>, is contained in the examples section of
29233 the source distribution.  As this example shows, the callback method
29234 receives a parameter B<$line_of_tokens>, which is a reference to a hash
29235 of other useful information.  This example uses these hash entries:
29236
29237  $line_of_tokens->{_line_number} - the line number (1,2,...)
29238  $line_of_tokens->{_line_text}   - the text of the line
29239  $line_of_tokens->{_line_type}   - the type of the line, one of:
29240
29241     SYSTEM         - system-specific code before hash-bang line
29242     CODE           - line of perl code (including comments)
29243     POD_START      - line starting pod, such as '=head'
29244     POD            - pod documentation text
29245     POD_END        - last line of pod section, '=cut'
29246     HERE           - text of here-document
29247     HERE_END       - last line of here-doc (target word)
29248     FORMAT         - format section
29249     FORMAT_END     - last line of format section, '.'
29250     DATA_START     - __DATA__ line
29251     DATA           - unidentified text following __DATA__
29252     END_START      - __END__ line
29253     END            - unidentified text following __END__
29254     ERROR          - we are in big trouble, probably not a perl script
29255
29256 Most applications will be only interested in lines of type B<CODE>.  For
29257 another example, let's write a program which checks for one of the
29258 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
29259 can slow down processing.  Here is a B<write_line>, from the example
29260 program B<find_naughty.pl>, which does that:
29261
29262  sub write_line {
29263
29264      # This is called back from perltidy line-by-line
29265      # We're looking for $`, $&, and $'
29266      my ( $self, $line_of_tokens ) = @_;
29267
29268      # pull out some stuff we might need
29269      my $line_type         = $line_of_tokens->{_line_type};
29270      my $input_line_number = $line_of_tokens->{_line_number};
29271      my $input_line        = $line_of_tokens->{_line_text};
29272      my $rtoken_type       = $line_of_tokens->{_rtoken_type};
29273      my $rtokens           = $line_of_tokens->{_rtokens};
29274      chomp $input_line;
29275
29276      # skip comments, pod, etc
29277      return if ( $line_type ne 'CODE' );
29278
29279      # loop over tokens looking for $`, $&, and $'
29280      for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
29281
29282          # we only want to examine token types 'i' (identifier)
29283          next unless $$rtoken_type[$j] eq 'i';
29284
29285          # pull out the actual token text
29286          my $token = $$rtokens[$j];
29287
29288          # and check it
29289          if ( $token =~ /^\$[\`\&\']$/ ) {
29290              print STDERR
29291                "$input_line_number: $token\n";
29292          }
29293      }
29294  }
29295
29296 This example pulls out these tokenization variables from the $line_of_tokens
29297 hash reference:
29298
29299      $rtoken_type = $line_of_tokens->{_rtoken_type};
29300      $rtokens     = $line_of_tokens->{_rtokens};
29301
29302 The variable C<$rtoken_type> is a reference to an array of token type codes,
29303 and C<$rtokens> is a reference to a corresponding array of token text.
29304 These are obviously only defined for lines of type B<CODE>.
29305 Perltidy classifies tokens into types, and has a brief code for each type.
29306 You can get a complete list at any time by running perltidy from the
29307 command line with
29308
29309      perltidy --dump-token-types
29310
29311 In the present example, we are only looking for tokens of type B<i>
29312 (identifiers), so the for loop skips past all other types.  When an
29313 identifier is found, its actual text is checked to see if it is one
29314 being sought.  If so, the above write_line prints the token and its
29315 line number.
29316
29317 The B<formatter> feature is relatively new in perltidy, and further
29318 documentation needs to be written to complete its description.  However,
29319 several example programs have been written and can be found in the
29320 B<examples> section of the source distribution.  Probably the best way
29321 to get started is to find one of the examples which most closely matches
29322 your application and start modifying it.
29323
29324 For help with perltidy's pecular way of breaking lines into tokens, you
29325 might run, from the command line,
29326
29327  perltidy -D filename
29328
29329 where F<filename> is a short script of interest.  This will produce
29330 F<filename.DEBUG> with interleaved lines of text and their token types.
29331 The B<-D> flag has been in perltidy from the beginning for this purpose.
29332 If you want to see the code which creates this file, it is
29333 C<write_debug_entry> in Tidy.pm.
29334
29335 =head1 EXPORT
29336
29337   &perltidy
29338
29339 =head1 CREDITS
29340
29341 Thanks to Hugh Myers who developed the initial modular interface
29342 to perltidy.
29343
29344 =head1 VERSION
29345
29346 This man page documents Perl::Tidy version 20120701.
29347
29348 =head1 LICENSE
29349
29350 This package is free software; you can redistribute it and/or modify it
29351 under the terms of the "GNU General Public License".
29352
29353 Please refer to the file "COPYING" for details.
29354
29355 =head1 AUTHOR
29356
29357  Steve Hancock
29358  perltidy at users.sourceforge.net
29359
29360 =head1 SEE ALSO
29361
29362 The perltidy(1) man page describes all of the features of perltidy.  It
29363 can be found at http://perltidy.sourceforge.net.
29364
29365 =cut