lib/Perl/Tidy.pm

   1 #
   2 ############################################################
   3 #
   4 #    perltidy - a perl script indenter and formatter
   5 #
   6 #    Copyright (c) 2000-2009 by Steve Hancock
   7 #    Distributed under the GPL license agreement; see file COPYING
   8 #
   9 #    This program is free software; you can redistribute it and/or modify
  10 #    it under the terms of the GNU General Public License as published by
  11 #    the Free Software Foundation; either version 2 of the License, or
  12 #    (at your option) any later version.
  13 #
  14 #    This program is distributed in the hope that it will be useful,
  15 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 #    GNU General Public License for more details.
  18 #
  19 #    You should have received a copy of the GNU General Public License
  20 #    along with this program; if not, write to the Free Software
  21 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22 #
  23 #    For brief instructions instructions, try 'perltidy -h'.
  24 #    For more complete documentation, try 'man perltidy'
  25 #    or visit http://perltidy.sourceforge.net
  26 #
  27 #    This script is an example of the default style.  It was formatted with:
  28 #
  29 #      perltidy Tidy.pm
  30 #
  31 #    Code Contributions: See ChangeLog.html for a complete history.
  32 #      Michael Cartmell supplied code for adaptation to VMS and helped with
  33 #        v-strings.
  34 #      Hugh S. Myers supplied sub streamhandle and the supporting code to
  35 #        create a Perl::Tidy module which can operate on strings, arrays, etc.
  36 #      Yves Orton supplied coding to help detect Windows versions.
  37 #      Axel Rose supplied a patch for MacPerl.
  38 #      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
  39 #      Dan Tyrell contributed a patch for binary I/O.
  40 #      Ueli Hugenschmidt contributed a patch for -fpsc
  41 #      Sam Kington supplied a patch to identify the initial indentation of
  42 #      entabbed code.
  43 #      jonathan swartz supplied patches for:
  44 #      * .../ pattern, which looks upwards from directory
  45 #      * --notidy, to be used in directories where we want to avoid
  46 #        accidentally tidying
  47 #      * prefilter and postfilter
  48 #      * iterations option
  49 #
  50 #      Many others have supplied key ideas, suggestions, and bug reports;
  51 #        see the CHANGES file.
  52 #
  53 ############################################################
  54
  55 package Perl::Tidy;
  56 use 5.004;    # need IO::File from 5.004 or later
  57 BEGIN { $^W = 1; }    # turn on warnings
  58
  59 use strict;
  60 use Exporter;
  61 use Carp;
  62 $|++;
  63
  64 use vars qw{
  65   $VERSION
  66   @ISA
  67   @EXPORT
  68   $missing_file_spec
  69 };
  70
  71 @ISA    = qw( Exporter );
  72 @EXPORT = qw( &perltidy );
  73
  74 use Cwd;
  75 use IO::File;
  76 use File::Basename;
  77
  78 BEGIN {
  79     ( $VERSION = q($Id: Tidy.pm,v 1.74 2010/12/17 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  80 }
  81
  82 sub streamhandle {
  83
  84     # given filename and mode (r or w), create an object which:
  85     #   has a 'getline' method if mode='r', and
  86     #   has a 'print' method if mode='w'.
  87     # The objects also need a 'close' method.
  88     #
  89     # How the object is made:
  90     #
  91     # if $filename is:     Make object using:
  92     # ----------------     -----------------
  93     # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
  94     # string               IO::File
  95     # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
  96     # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
  97     # object               object
  98     #                      (check for 'print' method for 'w' mode)
  99     #                      (check for 'getline' method for 'r' mode)
 100     my $ref = ref( my $filename = shift );
 101     my $mode = shift;
 102     my $New;
 103     my $fh;
 104
 105     # handle a reference
 106     if ($ref) {
 107         if ( $ref eq 'ARRAY' ) {
 108             $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
 109         }
 110         elsif ( $ref eq 'SCALAR' ) {
 111             $New = sub { Perl::Tidy::IOScalar->new(@_) };
 112         }
 113         else {
 114
 115             # Accept an object with a getline method for reading. Note:
 116             # IO::File is built-in and does not respond to the defined
 117             # operator.  If this causes trouble, the check can be
 118             # skipped and we can just let it crash if there is no
 119             # getline.
 120             if ( $mode =~ /[rR]/ ) {
 121                 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
 122                     $New = sub { $filename };
 123                 }
 124                 else {
 125                     $New = sub { undef };
 126                     confess <<EOM;
 127 ------------------------------------------------------------------------
 128 No 'getline' method is defined for object of class $ref
 129 Please check your call to Perl::Tidy::perltidy.  Trace follows.
 130 ------------------------------------------------------------------------
 131 EOM
 132                 }
 133             }
 134
 135             # Accept an object with a print method for writing.
 136             # See note above about IO::File
 137             if ( $mode =~ /[wW]/ ) {
 138                 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
 139                     $New = sub { $filename };
 140                 }
 141                 else {
 142                     $New = sub { undef };
 143                     confess <<EOM;
 144 ------------------------------------------------------------------------
 145 No 'print' method is defined for object of class $ref
 146 Please check your call to Perl::Tidy::perltidy. Trace follows.
 147 ------------------------------------------------------------------------
 148 EOM
 149                 }
 150             }
 151         }
 152     }
 153
 154     # handle a string
 155     else {
 156         if ( $filename eq '-' ) {
 157             $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
 158         }
 159         else {
 160             $New = sub { IO::File->new(@_) };
 161         }
 162     }
 163     $fh = $New->( $filename, $mode )
 164       or warn "Couldn't open file:$filename in mode:$mode : $!\n";
 165     return $fh, ( $ref or $filename );
 166 }
 167
 168 sub find_input_line_ending {
 169
 170     # Peek at a file and return first line ending character.
 171     # Quietly return undef in case of any trouble.
 172     my ($input_file) = @_;
 173     my $ending;
 174
 175     # silently ignore input from object or stdin
 176     if ( ref($input_file) || $input_file eq '-' ) {
 177         return $ending;
 178     }
 179     open( INFILE, $input_file ) || return $ending;
 180
 181     binmode INFILE;
 182     my $buf;
 183     read( INFILE, $buf, 1024 );
 184     close INFILE;
 185     if ( $buf && $buf =~ /([\012\015]+)/ ) {
 186         my $test = $1;
 187
 188         # dos
 189         if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 190
 191         # mac
 192         elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 193
 194         # unix
 195         elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 196
 197         # unknown
 198         else { }
 199     }
 200
 201     # no ending seen
 202     else { }
 203
 204     return $ending;
 205 }
 206
 207 sub catfile {
 208
 209     # concatenate a path and file basename
 210     # returns undef in case of error
 211
 212     BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
 213
 214     # use File::Spec if we can
 215     unless ($missing_file_spec) {
 216         return File::Spec->catfile(@_);
 217     }
 218
 219     # Perl 5.004 systems may not have File::Spec so we'll make
 220     # a simple try.  We assume File::Basename is available.
 221     # return undef if not successful.
 222     my $name      = pop @_;
 223     my $path      = join '/', @_;
 224     my $test_file = $path . $name;
 225     my ( $test_name, $test_path ) = fileparse($test_file);
 226     return $test_file if ( $test_name eq $name );
 227     return undef if ( $^O eq 'VMS' );
 228
 229     # this should work at least for Windows and Unix:
 230     $test_file = $path . '/' . $name;
 231     ( $test_name, $test_path ) = fileparse($test_file);
 232     return $test_file if ( $test_name eq $name );
 233     return undef;
 234 }
 235
 236 sub make_temporary_filename {
 237
 238     # Make a temporary filename.
 239     #
 240     # The POSIX tmpnam() function tends to be unreliable for non-unix
 241     # systems (at least for the win32 systems that I've tested), so use
 242     # a pre-defined name.  A slight disadvantage of this is that two
 243     # perltidy runs in the same working directory may conflict.
 244     # However, the chance of that is small and managable by the user.
 245     # An alternative would be to check for the file's existance and use,
 246     # say .TMP0, .TMP1, etc, but that scheme has its own problems.  So,
 247     # keep it simple.
 248     my $name = "perltidy.TMP";
 249     if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
 250         return $name;
 251     }
 252     eval "use POSIX qw(tmpnam)";
 253     if ($@) { return $name }
 254     use IO::File;
 255
 256     # just make a couple of tries before giving up and using the default
 257     for ( 0 .. 1 ) {
 258         my $tmpname = tmpnam();
 259         my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
 260         if ($fh) {
 261             $fh->close();
 262             return ($tmpname);
 263             last;
 264         }
 265     }
 266     return ($name);
 267 }
 268
 269 # Here is a map of the flow of data from the input source to the output
 270 # line sink:
 271 #
 272 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
 273 #       input                         groups                 output
 274 #       lines   tokens      lines       of          lines    lines
 275 #                                      lines
 276 #
 277 # The names correspond to the package names responsible for the unit processes.
 278 #
 279 # The overall process is controlled by the "main" package.
 280 #
 281 # LineSource is the stream of input lines
 282 #
 283 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
 284 # if necessary.  A token is any section of the input line which should be
 285 # manipulated as a single entity during formatting.  For example, a single
 286 # ',' character is a token, and so is an entire side comment.  It handles
 287 # the complexities of Perl syntax, such as distinguishing between '<<' as
 288 # a shift operator and as a here-document, or distinguishing between '/'
 289 # as a divide symbol and as a pattern delimiter.
 290 #
 291 # Formatter inserts and deletes whitespace between tokens, and breaks
 292 # sequences of tokens at appropriate points as output lines.  It bases its
 293 # decisions on the default rules as modified by any command-line options.
 294 #
 295 # VerticalAligner collects groups of lines together and tries to line up
 296 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
 297 #
 298 # FileWriter simply writes lines to the output stream.
 299 #
 300 # The Logger package, not shown, records significant events and warning
 301 # messages.  It writes a .LOG file, which may be saved with a
 302 # '-log' or a '-g' flag.
 303
 304 {
 305
 306     # variables needed by interrupt handler:
 307     my $tokenizer;
 308     my $input_file;
 309
 310     # this routine may be called to give a status report if interrupted.  If a
 311     # parameter is given, it will call exit with that parameter.  This is no
 312     # longer used because it works under Unix but not under Windows.
 313     sub interrupt_handler {
 314
 315         my $exit_flag = shift;
 316         print STDERR "perltidy interrupted";
 317         if ($tokenizer) {
 318             my $input_line_number =
 319               Perl::Tidy::Tokenizer::get_input_line_number();
 320             print STDERR " at line $input_line_number";
 321         }
 322         if ($input_file) {
 323
 324             if   ( ref $input_file ) { print STDERR " of reference to:" }
 325             else                     { print STDERR " of file:" }
 326             print STDERR " $input_file";
 327         }
 328         print STDERR "\n";
 329         exit $exit_flag if defined($exit_flag);
 330     }
 331
 332     sub perltidy {
 333
 334         my %defaults = (
 335             argv                  => undef,
 336             destination           => undef,
 337             formatter             => undef,
 338             logfile               => undef,
 339             errorfile             => undef,
 340             perltidyrc            => undef,
 341             source                => undef,
 342             stderr                => undef,
 343             dump_options          => undef,
 344             dump_options_type     => undef,
 345             dump_getopt_flags     => undef,
 346             dump_options_category => undef,
 347             dump_options_range    => undef,
 348             dump_abbreviations    => undef,
 349             prefilter             => undef,
 350             postfilter            => undef,
 351         );
 352
 353         # don't overwrite callers ARGV
 354         local @ARGV = @ARGV;
 355
 356         my %input_hash = @_;
 357
 358         if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
 359             local $" = ')(';
 360             my @good_keys = sort keys %defaults;
 361             @bad_keys = sort @bad_keys;
 362             confess <<EOM;
 363 ------------------------------------------------------------------------
 364 Unknown perltidy parameter : (@bad_keys)
 365 perltidy only understands : (@good_keys)
 366 ------------------------------------------------------------------------
 367
 368 EOM
 369         }
 370
 371         my $get_hash_ref = sub {
 372             my ($key) = @_;
 373             my $hash_ref = $input_hash{$key};
 374             if ( defined($hash_ref) ) {
 375                 unless ( ref($hash_ref) eq 'HASH' ) {
 376                     my $what = ref($hash_ref);
 377                     my $but_is =
 378                       $what ? "but is ref to $what" : "but is not a reference";
 379                     croak <<EOM;
 380 ------------------------------------------------------------------------
 381 error in call to perltidy:
 382 -$key must be reference to HASH $but_is
 383 ------------------------------------------------------------------------
 384 EOM
 385                 }
 386             }
 387             return $hash_ref;
 388         };
 389
 390         %input_hash = ( %defaults, %input_hash );
 391         my $argv               = $input_hash{'argv'};
 392         my $destination_stream = $input_hash{'destination'};
 393         my $errorfile_stream   = $input_hash{'errorfile'};
 394         my $logfile_stream     = $input_hash{'logfile'};
 395         my $perltidyrc_stream  = $input_hash{'perltidyrc'};
 396         my $source_stream      = $input_hash{'source'};
 397         my $stderr_stream      = $input_hash{'stderr'};
 398         my $user_formatter     = $input_hash{'formatter'};
 399         my $prefilter          = $input_hash{'prefilter'};
 400         my $postfilter         = $input_hash{'postfilter'};
 401
 402         # various dump parameters
 403         my $dump_options_type     = $input_hash{'dump_options_type'};
 404         my $dump_options          = $get_hash_ref->('dump_options');
 405         my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
 406         my $dump_options_category = $get_hash_ref->('dump_options_category');
 407         my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
 408         my $dump_options_range    = $get_hash_ref->('dump_options_range');
 409
 410         # validate dump_options_type
 411         if ( defined($dump_options) ) {
 412             unless ( defined($dump_options_type) ) {
 413                 $dump_options_type = 'perltidyrc';
 414             }
 415             unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
 416                 croak <<EOM;
 417 ------------------------------------------------------------------------
 418 Please check value of -dump_options_type in call to perltidy;
 419 saw: '$dump_options_type'
 420 expecting: 'perltidyrc' or 'full'
 421 ------------------------------------------------------------------------
 422 EOM
 423
 424             }
 425         }
 426         else {
 427             $dump_options_type = "";
 428         }
 429
 430         if ($user_formatter) {
 431
 432             # if the user defines a formatter, there is no output stream,
 433             # but we need a null stream to keep coding simple
 434             $destination_stream = Perl::Tidy::DevNull->new();
 435         }
 436
 437         # see if ARGV is overridden
 438         if ( defined($argv) ) {
 439
 440             my $rargv = ref $argv;
 441             if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
 442
 443             # ref to ARRAY
 444             if ($rargv) {
 445                 if ( $rargv eq 'ARRAY' ) {
 446                     @ARGV = @$argv;
 447                 }
 448                 else {
 449                     croak <<EOM;
 450 ------------------------------------------------------------------------
 451 Please check value of -argv in call to perltidy;
 452 it must be a string or ref to ARRAY but is: $rargv
 453 ------------------------------------------------------------------------
 454 EOM
 455                 }
 456             }
 457
 458             # string
 459             else {
 460                 my ( $rargv, $msg ) = parse_args($argv);
 461                 if ($msg) {
 462                     die <<EOM;
 463 Error parsing this string passed to to perltidy with 'argv':
 464 $msg
 465 EOM
 466                 }
 467                 @ARGV = @{$rargv};
 468             }
 469         }
 470
 471         # redirect STDERR if requested
 472         if ($stderr_stream) {
 473             my ( $fh_stderr, $stderr_file ) =
 474               Perl::Tidy::streamhandle( $stderr_stream, 'w' );
 475             if ($fh_stderr) { *STDERR = $fh_stderr }
 476             else {
 477                 croak <<EOM;
 478 ------------------------------------------------------------------------
 479 Unable to redirect STDERR to $stderr_stream
 480 Please check value of -stderr in call to perltidy
 481 ------------------------------------------------------------------------
 482 EOM
 483             }
 484         }
 485
 486         my $rpending_complaint;
 487         $$rpending_complaint = "";
 488         my $rpending_logfile_message;
 489         $$rpending_logfile_message = "";
 490
 491         my ( $is_Windows, $Windows_type ) =
 492           look_for_Windows($rpending_complaint);
 493
 494         # VMS file names are restricted to a 40.40 format, so we append _tdy
 495         # instead of .tdy, etc. (but see also sub check_vms_filename)
 496         my $dot;
 497         my $dot_pattern;
 498         if ( $^O eq 'VMS' ) {
 499             $dot         = '_';
 500             $dot_pattern = '_';
 501         }
 502         else {
 503             $dot         = '.';
 504             $dot_pattern = '\.';    # must escape for use in regex
 505         }
 506
 507         # handle command line options
 508         my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
 509             $rexpansion, $roption_category, $roption_range )
 510           = process_command_line(
 511             $perltidyrc_stream,  $is_Windows, $Windows_type,
 512             $rpending_complaint, $dump_options_type,
 513           );
 514
 515         # return or exit immediately after all dumps
 516         my $quit_now = 0;
 517
 518         # Getopt parameters and their flags
 519         if ( defined($dump_getopt_flags) ) {
 520             $quit_now = 1;
 521             foreach my $op ( @{$roption_string} ) {
 522                 my $opt  = $op;
 523                 my $flag = "";
 524
 525                 # Examples:
 526                 #  some-option=s
 527                 #  some-option=i
 528                 #  some-option:i
 529                 #  some-option!
 530                 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
 531                     $opt  = $1;
 532                     $flag = $2;
 533                 }
 534                 $dump_getopt_flags->{$opt} = $flag;
 535             }
 536         }
 537
 538         if ( defined($dump_options_category) ) {
 539             $quit_now = 1;
 540             %{$dump_options_category} = %{$roption_category};
 541         }
 542
 543         if ( defined($dump_options_range) ) {
 544             $quit_now = 1;
 545             %{$dump_options_range} = %{$roption_range};
 546         }
 547
 548         if ( defined($dump_abbreviations) ) {
 549             $quit_now = 1;
 550             %{$dump_abbreviations} = %{$rexpansion};
 551         }
 552
 553         if ( defined($dump_options) ) {
 554             $quit_now = 1;
 555             %{$dump_options} = %{$rOpts};
 556         }
 557
 558         return if ($quit_now);
 559
 560         # make printable string of options for this run as possible diagnostic
 561         my $readable_options = readable_options( $rOpts, $roption_string );
 562
 563         # dump from command line
 564         if ( $rOpts->{'dump-options'} ) {
 565             print STDOUT $readable_options;
 566             exit 1;
 567         }
 568
 569         check_options( $rOpts, $is_Windows, $Windows_type,
 570             $rpending_complaint );
 571
 572         if ($user_formatter) {
 573             $rOpts->{'format'} = 'user';
 574         }
 575
 576         # there must be one entry here for every possible format
 577         my %default_file_extension = (
 578             tidy => 'tdy',
 579             html => 'html',
 580             user => '',
 581         );
 582
 583         # be sure we have a valid output format
 584         unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
 585             my $formats = join ' ',
 586               sort map { "'" . $_ . "'" } keys %default_file_extension;
 587             my $fmt = $rOpts->{'format'};
 588             die "-format='$fmt' but must be one of: $formats\n";
 589         }
 590
 591         my $output_extension =
 592           make_extension( $rOpts->{'output-file-extension'},
 593             $default_file_extension{ $rOpts->{'format'} }, $dot );
 594
 595         my $backup_extension =
 596           make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
 597
 598         my $html_toc_extension =
 599           make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
 600
 601         my $html_src_extension =
 602           make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
 603
 604         # check for -b option;
 605         my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
 606           && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
 607           && @ARGV > 0;    # silently ignore if standard input;
 608                            # this allows -b to be in a .perltidyrc file
 609                            # without error messages when running from an editor
 610
 611         # turn off -b with warnings in case of conflicts with other options
 612         if ($in_place_modify) {
 613             if ( $rOpts->{'standard-output'} ) {
 614                 warn "Ignoring -b; you may not use -b and -st together\n";
 615                 $in_place_modify = 0;
 616             }
 617             if ($destination_stream) {
 618                 warn
 619 "Ignoring -b; you may not specify a destination array and -b together\n";
 620                 $in_place_modify = 0;
 621             }
 622             if ($source_stream) {
 623                 warn
 624 "Ignoring -b; you may not specify a source array and -b together\n";
 625                 $in_place_modify = 0;
 626             }
 627             if ( $rOpts->{'outfile'} ) {
 628                 warn "Ignoring -b; you may not use -b and -o together\n";
 629                 $in_place_modify = 0;
 630             }
 631             if ( defined( $rOpts->{'output-path'} ) ) {
 632                 warn "Ignoring -b; you may not use -b and -opath together\n";
 633                 $in_place_modify = 0;
 634             }
 635         }
 636
 637         Perl::Tidy::Formatter::check_options($rOpts);
 638         if ( $rOpts->{'format'} eq 'html' ) {
 639             Perl::Tidy::HtmlWriter->check_options($rOpts);
 640         }
 641
 642         # make the pattern of file extensions that we shouldn't touch
 643         my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
 644         if ($output_extension) {
 645             my $ext = quotemeta($output_extension);
 646             $forbidden_file_extensions .= "|$ext";
 647         }
 648         if ( $in_place_modify && $backup_extension ) {
 649             my $ext = quotemeta($backup_extension);
 650             $forbidden_file_extensions .= "|$ext";
 651         }
 652         $forbidden_file_extensions .= ')$';
 653
 654         # Create a diagnostics object if requested;
 655         # This is only useful for code development
 656         my $diagnostics_object = undef;
 657         if ( $rOpts->{'DIAGNOSTICS'} ) {
 658             $diagnostics_object = Perl::Tidy::Diagnostics->new();
 659         }
 660
 661         # no filenames should be given if input is from an array
 662         if ($source_stream) {
 663             if ( @ARGV > 0 ) {
 664                 die
 665 "You may not specify any filenames when a source array is given\n";
 666             }
 667
 668             # we'll stuff the source array into ARGV
 669             unshift( @ARGV, $source_stream );
 670
 671             # No special treatment for source stream which is a filename.
 672             # This will enable checks for binary files and other bad stuff.
 673             $source_stream = undef unless ref($source_stream);
 674         }
 675
 676         # use stdin by default if no source array and no args
 677         else {
 678             unshift( @ARGV, '-' ) unless @ARGV;
 679         }
 680
 681         # loop to process all files in argument list
 682         my $number_of_files = @ARGV;
 683         my $formatter       = undef;
 684         $tokenizer = undef;
 685         while ( $input_file = shift @ARGV ) {
 686             my $fileroot;
 687             my $input_file_permissions;
 688
 689             #---------------------------------------------------------------
 690             # determine the input file name
 691             #---------------------------------------------------------------
 692             if ($source_stream) {
 693                 $fileroot = "perltidy";
 694             }
 695             elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
 696                 $fileroot = "perltidy";   # root name to use for .ERR, .LOG, etc
 697                 $in_place_modify = 0;
 698             }
 699             else {
 700                 $fileroot = $input_file;
 701                 unless ( -e $input_file ) {
 702
 703                     # file doesn't exist - check for a file glob
 704                     if ( $input_file =~ /([\?\*\[\{])/ ) {
 705
 706                         # Windows shell may not remove quotes, so do it
 707                         my $input_file = $input_file;
 708                         if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
 709                         if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
 710                         my $pattern = fileglob_to_re($input_file);
 711                         ##eval "/$pattern/";
 712                         if ( !$@ && opendir( DIR, './' ) ) {
 713                             my @files =
 714                               grep { /$pattern/ && !-d $_ } readdir(DIR);
 715                             closedir(DIR);
 716                             if (@files) {
 717                                 unshift @ARGV, @files;
 718                                 next;
 719                             }
 720                         }
 721                     }
 722                     print "skipping file: '$input_file': no matches found\n";
 723                     next;
 724                 }
 725
 726                 unless ( -f $input_file ) {
 727                     print "skipping file: $input_file: not a regular file\n";
 728                     next;
 729                 }
 730
 731                 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
 732                     print
 733 "skipping file: $input_file: Non-text (override with -f)\n";
 734                     next;
 735                 }
 736
 737                 # we should have a valid filename now
 738                 $fileroot               = $input_file;
 739                 $input_file_permissions = ( stat $input_file )[2] & 07777;
 740
 741                 if ( $^O eq 'VMS' ) {
 742                     ( $fileroot, $dot ) = check_vms_filename($fileroot);
 743                 }
 744
 745                 # add option to change path here
 746                 if ( defined( $rOpts->{'output-path'} ) ) {
 747
 748                     my ( $base, $old_path ) = fileparse($fileroot);
 749                     my $new_path = $rOpts->{'output-path'};
 750                     unless ( -d $new_path ) {
 751                         unless ( mkdir $new_path, 0777 ) {
 752                             die "unable to create directory $new_path: $!\n";
 753                         }
 754                     }
 755                     my $path = $new_path;
 756                     $fileroot = catfile( $path, $base );
 757                     unless ($fileroot) {
 758                         die <<EOM;
 759 ------------------------------------------------------------------------
 760 Problem combining $new_path and $base to make a filename; check -opath
 761 ------------------------------------------------------------------------
 762 EOM
 763                     }
 764                 }
 765             }
 766
 767             # Skip files with same extension as the output files because
 768             # this can lead to a messy situation with files like
 769             # script.tdy.tdy.tdy ... or worse problems ...  when you
 770             # rerun perltidy over and over with wildcard input.
 771             if (
 772                 !$source_stream
 773                 && (   $input_file =~ /$forbidden_file_extensions/o
 774                     || $input_file eq 'DIAGNOSTICS' )
 775               )
 776             {
 777                 print "skipping file: $input_file: wrong extension\n";
 778                 next;
 779             }
 780
 781             # the 'source_object' supplies a method to read the input file
 782             my $source_object =
 783               Perl::Tidy::LineSource->new( $input_file, $rOpts,
 784                 $rpending_logfile_message );
 785             next unless ($source_object);
 786
 787             # Prefilters and postfilters: The prefilter is a code reference
 788             # that will be applied to the source before tidying, and the
 789             # postfilter is a code reference to the result before outputting.
 790             if ($prefilter) {
 791                 my $buf = '';
 792                 while ( my $line = $source_object->get_line() ) {
 793                     $buf .= $line;
 794                 }
 795                 $buf = $prefilter->($buf);
 796
 797                 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
 798                     $rpending_logfile_message );
 799             }
 800
 801             # register this file name with the Diagnostics package
 802             $diagnostics_object->set_input_file($input_file)
 803               if $diagnostics_object;
 804
 805             #---------------------------------------------------------------
 806             # determine the output file name
 807             #---------------------------------------------------------------
 808             my $output_file = undef;
 809             my $actual_output_extension;
 810
 811             if ( $rOpts->{'outfile'} ) {
 812
 813                 if ( $number_of_files <= 1 ) {
 814
 815                     if ( $rOpts->{'standard-output'} ) {
 816                         die "You may not use -o and -st together\n";
 817                     }
 818                     elsif ($destination_stream) {
 819                         die
 820 "You may not specify a destination array and -o together\n";
 821                     }
 822                     elsif ( defined( $rOpts->{'output-path'} ) ) {
 823                         die "You may not specify -o and -opath together\n";
 824                     }
 825                     elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
 826                         die "You may not specify -o and -oext together\n";
 827                     }
 828                     $output_file = $rOpts->{outfile};
 829
 830                     # make sure user gives a file name after -o
 831                     if ( $output_file =~ /^-/ ) {
 832                         die "You must specify a valid filename after -o\n";
 833                     }
 834
 835                     # do not overwrite input file with -o
 836                     if ( defined($input_file_permissions)
 837                         && ( $output_file eq $input_file ) )
 838                     {
 839                         die
 840                           "Use 'perltidy -b $input_file' to modify in-place\n";
 841                     }
 842                 }
 843                 else {
 844                     die "You may not use -o with more than one input file\n";
 845                 }
 846             }
 847             elsif ( $rOpts->{'standard-output'} ) {
 848                 if ($destination_stream) {
 849                     die
 850 "You may not specify a destination array and -st together\n";
 851                 }
 852                 $output_file = '-';
 853
 854                 if ( $number_of_files <= 1 ) {
 855                 }
 856                 else {
 857                     die "You may not use -st with more than one input file\n";
 858                 }
 859             }
 860             elsif ($destination_stream) {
 861                 $output_file = $destination_stream;
 862             }
 863             elsif ($source_stream) {  # source but no destination goes to stdout
 864                 $output_file = '-';
 865             }
 866             elsif ( $input_file eq '-' ) {
 867                 $output_file = '-';
 868             }
 869             else {
 870                 if ($in_place_modify) {
 871                     $output_file = IO::File->new_tmpfile()
 872                       or die "cannot open temp file for -b option: $!\n";
 873                 }
 874                 else {
 875                     $actual_output_extension = $output_extension;
 876                     $output_file             = $fileroot . $output_extension;
 877                 }
 878             }
 879
 880             # the 'sink_object' knows how to write the output file
 881             my $tee_file = $fileroot . $dot . "TEE";
 882
 883             my $line_separator = $rOpts->{'output-line-ending'};
 884             if ( $rOpts->{'preserve-line-endings'} ) {
 885                 $line_separator = find_input_line_ending($input_file);
 886             }
 887
 888             # Eventually all I/O may be done with binmode, but for now it is
 889             # only done when a user requests a particular line separator
 890             # through the -ple or -ole flags
 891             my $binmode = 0;
 892             if   ( defined($line_separator) ) { $binmode        = 1 }
 893             else                              { $line_separator = "\n" }
 894
 895             my ( $sink_object, $postfilter_buffer );
 896             if ($postfilter) {
 897                 $sink_object =
 898                   Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
 899                     $line_separator, $rOpts, $rpending_logfile_message,
 900                     $binmode );
 901             }
 902             else {
 903                 $sink_object =
 904                   Perl::Tidy::LineSink->new( $output_file, $tee_file,
 905                     $line_separator, $rOpts, $rpending_logfile_message,
 906                     $binmode );
 907             }
 908
 909             #---------------------------------------------------------------
 910             # initialize the error logger
 911             #---------------------------------------------------------------
 912             my $warning_file = $fileroot . $dot . "ERR";
 913             if ($errorfile_stream) { $warning_file = $errorfile_stream }
 914             my $log_file = $fileroot . $dot . "LOG";
 915             if ($logfile_stream) { $log_file = $logfile_stream }
 916
 917             my $logger_object =
 918               Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
 919                 $saw_extrude );
 920             write_logfile_header(
 921                 $rOpts,        $logger_object, $config_file,
 922                 $rraw_options, $Windows_type,  $readable_options,
 923             );
 924             if ($$rpending_logfile_message) {
 925                 $logger_object->write_logfile_entry($$rpending_logfile_message);
 926             }
 927             if ($$rpending_complaint) {
 928                 $logger_object->complain($$rpending_complaint);
 929             }
 930
 931             #---------------------------------------------------------------
 932             # initialize the debug object, if any
 933             #---------------------------------------------------------------
 934             my $debugger_object = undef;
 935             if ( $rOpts->{DEBUG} ) {
 936                 $debugger_object =
 937                   Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
 938             }
 939
 940             # loop over iterations
 941             my $max_iterations    = $rOpts->{'iterations'};
 942             my $sink_object_final = $sink_object;
 943             for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
 944                 my $temp_buffer;
 945
 946                 # local copies of some debugging objects which get deleted
 947                 # after first iteration, but will reappear after this loop
 948                 my $debugger_object    = $debugger_object;
 949                 my $logger_object      = $logger_object;
 950                 my $diagnostics_object = $diagnostics_object;
 951
 952                 # output to temp buffer until last iteration
 953                 if ( $iter < $max_iterations ) {
 954                     $sink_object =
 955                       Perl::Tidy::LineSink->new( \$temp_buffer, $tee_file,
 956                         $line_separator, $rOpts, $rpending_logfile_message,
 957                         $binmode );
 958                 }
 959                 else {
 960                     $sink_object = $sink_object_final;
 961
 962                     # terminate some debugging output after first pass
 963                     # to avoid needless output.
 964                     $debugger_object    = undef;
 965                     $logger_object      = undef;
 966                     $diagnostics_object = undef;
 967                 }
 968
 969               #---------------------------------------------------------------
 970               # create a formatter for this file : html writer or pretty printer
 971               #---------------------------------------------------------------
 972
 973                 # we have to delete any old formatter because, for safety,
 974                 # the formatter will check to see that there is only one.
 975                 $formatter = undef;
 976
 977                 if ($user_formatter) {
 978                     $formatter = $user_formatter;
 979                 }
 980                 elsif ( $rOpts->{'format'} eq 'html' ) {
 981                     $formatter =
 982                       Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
 983                         $actual_output_extension, $html_toc_extension,
 984                         $html_src_extension );
 985                 }
 986                 elsif ( $rOpts->{'format'} eq 'tidy' ) {
 987                     $formatter = Perl::Tidy::Formatter->new(
 988                         logger_object      => $logger_object,
 989                         diagnostics_object => $diagnostics_object,
 990                         sink_object        => $sink_object,
 991                     );
 992                 }
 993                 else {
 994                     die "I don't know how to do -format=$rOpts->{'format'}\n";
 995                 }
 996
 997                 unless ($formatter) {
 998                     die
 999                       "Unable to continue with $rOpts->{'format'} formatting\n";
1000                 }
1001
1002                 #---------------------------------------------------------------
1003                 # create the tokenizer for this file
1004                 #---------------------------------------------------------------
1005                 $tokenizer = undef;    # must destroy old tokenizer
1006                 $tokenizer = Perl::Tidy::Tokenizer->new(
1007                     source_object      => $source_object,
1008                     logger_object      => $logger_object,
1009                     debugger_object    => $debugger_object,
1010                     diagnostics_object => $diagnostics_object,
1011                     starting_level => $rOpts->{'starting-indentation-level'},
1012                     tabs           => $rOpts->{'tabs'},
1013                     entab_leading_space => $rOpts->{'entab-leading-whitespace'},
1014                     indent_columns      => $rOpts->{'indent-columns'},
1015                     look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
1016                     look_for_autoloader => $rOpts->{'look-for-autoloader'},
1017                     look_for_selfloader => $rOpts->{'look-for-selfloader'},
1018                     trim_qw             => $rOpts->{'trim-qw'},
1019                 );
1020
1021                 #---------------------------------------------------------------
1022                 # now we can do it
1023                 #---------------------------------------------------------------
1024                 process_this_file( $tokenizer, $formatter );
1025
1026                 #---------------------------------------------------------------
1027                 # close the input source and report errors
1028                 #---------------------------------------------------------------
1029                 $source_object->close_input_file();
1030
1031                 # line source for next iteration (if any) comes from the current
1032                 # temporary buffer
1033                 if ( $iter < $max_iterations ) {
1034                     $source_object =
1035                       Perl::Tidy::LineSource->new( \$temp_buffer, $rOpts,
1036                         $rpending_logfile_message );
1037                 }
1038
1039             }    # end loop over iterations
1040
1041             # get file names to use for syntax check
1042             my $ifname = $source_object->get_input_file_copy_name();
1043             my $ofname = $sink_object->get_output_file_copy();
1044
1045             #---------------------------------------------------------------
1046             # handle the -b option (backup and modify in-place)
1047             #---------------------------------------------------------------
1048             if ($in_place_modify) {
1049                 unless ( -f $input_file ) {
1050
1051                     # oh, oh, no real file to backup ..
1052                     # shouldn't happen because of numerous preliminary checks
1053                     die print
1054 "problem with -b backing up input file '$input_file': not a file\n";
1055                 }
1056                 my $backup_name = $input_file . $backup_extension;
1057                 if ( -f $backup_name ) {
1058                     unlink($backup_name)
1059                       or die
1060 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1061                 }
1062                 rename( $input_file, $backup_name )
1063                   or die
1064 "problem renaming $input_file to $backup_name for -b option: $!\n";
1065                 $ifname = $backup_name;
1066
1067                 seek( $output_file, 0, 0 )
1068                   or die "unable to rewind tmp file for -b option: $!\n";
1069
1070                 my $fout = IO::File->new("> $input_file")
1071                   or die
1072 "problem opening $input_file for write for -b option; check directory permissions: $!\n";
1073                 binmode $fout;
1074                 my $line;
1075                 while ( $line = $output_file->getline() ) {
1076                     $fout->print($line);
1077                 }
1078                 $fout->close();
1079                 $output_file = $input_file;
1080                 $ofname      = $input_file;
1081             }
1082
1083             #---------------------------------------------------------------
1084             # clean up and report errors
1085             #---------------------------------------------------------------
1086             $sink_object->close_output_file()    if $sink_object;
1087             $debugger_object->close_debug_file() if $debugger_object;
1088
1089             if ($postfilter) {
1090                 my $new_sink =
1091                   Perl::Tidy::LineSink->new( $output_file, $tee_file,
1092                     $line_separator, $rOpts, $rpending_logfile_message,
1093                     $binmode );
1094                 my $buf = $postfilter->($postfilter_buffer);
1095                 foreach my $line ( split( "\n", $buf ) ) {
1096                     $new_sink->write_line($line);
1097                 }
1098             }
1099
1100             my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
1101             if ($output_file) {
1102
1103                 if ($input_file_permissions) {
1104
1105                     # give output script same permissions as input script, but
1106                     # make it user-writable or else we can't run perltidy again.
1107                     # Thus we retain whatever executable flags were set.
1108                     if ( $rOpts->{'format'} eq 'tidy' ) {
1109                         chmod( $input_file_permissions | 0600, $output_file );
1110                     }
1111
1112                     # else use default permissions for html and any other format
1113
1114                 }
1115                 if ( $logger_object && $rOpts->{'check-syntax'} ) {
1116                     $infile_syntax_ok =
1117                       check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1118                 }
1119             }
1120
1121             $logger_object->finish( $infile_syntax_ok, $formatter )
1122               if $logger_object;
1123         }    # end of loop to process all files
1124     }    # end of main program
1125 }
1126
1127 sub fileglob_to_re {
1128
1129     # modified (corrected) from version in find2perl
1130     my $x = shift;
1131     $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1132     $x =~ s#\*#.*#g;               # '*' -> '.*'
1133     $x =~ s#\?#.#g;                # '?' -> '.'
1134     "^$x\\z";                      # match whole word
1135 }
1136
1137 sub make_extension {
1138
1139     # Make a file extension, including any leading '.' if necessary
1140     # The '.' may actually be an '_' under VMS
1141     my ( $extension, $default, $dot ) = @_;
1142
1143     # Use the default if none specified
1144     $extension = $default unless ($extension);
1145
1146     # Only extensions with these leading characters get a '.'
1147     # This rule gives the user some freedom
1148     if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1149         $extension = $dot . $extension;
1150     }
1151     return $extension;
1152 }
1153
1154 sub write_logfile_header {
1155     my (
1156         $rOpts,        $logger_object, $config_file,
1157         $rraw_options, $Windows_type,  $readable_options
1158     ) = @_;
1159     $logger_object->write_logfile_entry(
1160 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1161     );
1162     if ($Windows_type) {
1163         $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1164     }
1165     my $options_string = join( ' ', @$rraw_options );
1166
1167     if ($config_file) {
1168         $logger_object->write_logfile_entry(
1169             "Found Configuration File >>> $config_file \n");
1170     }
1171     $logger_object->write_logfile_entry(
1172         "Configuration and command line parameters for this run:\n");
1173     $logger_object->write_logfile_entry("$options_string\n");
1174
1175     if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1176         $rOpts->{'logfile'} = 1;    # force logfile to be saved
1177         $logger_object->write_logfile_entry(
1178             "Final parameter set for this run\n");
1179         $logger_object->write_logfile_entry(
1180             "------------------------------------\n");
1181
1182         $logger_object->write_logfile_entry($readable_options);
1183
1184         $logger_object->write_logfile_entry(
1185             "------------------------------------\n");
1186     }
1187     $logger_object->write_logfile_entry(
1188         "To find error messages search for 'WARNING' with your editor\n");
1189 }
1190
1191 sub generate_options {
1192
1193     ######################################################################
1194     # Generate and return references to:
1195     #  @option_string - the list of options to be passed to Getopt::Long
1196     #  @defaults - the list of default options
1197     #  %expansion - a hash showing how all abbreviations are expanded
1198     #  %category - a hash giving the general category of each option
1199     #  %option_range - a hash giving the valid ranges of certain options
1200
1201     # Note: a few options are not documented in the man page and usage
1202     # message. This is because these are experimental or debug options and
1203     # may or may not be retained in future versions.
1204     #
1205     # Here are the undocumented flags as far as I know.  Any of them
1206     # may disappear at any time.  They are mainly for fine-tuning
1207     # and debugging.
1208     #
1209     # fll --> fuzzy-line-length           # a trivial parameter which gets
1210     #                                       turned off for the extrude option
1211     #                                       which is mainly for debugging
1212     # chk --> check-multiline-quotes      # check for old bug; to be deleted
1213     # scl --> short-concatenation-item-length   # helps break at '.'
1214     # recombine                           # for debugging line breaks
1215     # valign                              # for debugging vertical alignment
1216     # I   --> DIAGNOSTICS                 # for debugging
1217     ######################################################################
1218
1219     # here is a summary of the Getopt codes:
1220     # <none> does not take an argument
1221     # =s takes a mandatory string
1222     # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1223     # =i takes a mandatory integer
1224     # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1225     # ! does not take an argument and may be negated
1226     #  i.e., -foo and -nofoo are allowed
1227     # a double dash signals the end of the options list
1228     #
1229     #---------------------------------------------------------------
1230     # Define the option string passed to GetOptions.
1231     #---------------------------------------------------------------
1232
1233     my @option_string   = ();
1234     my %expansion       = ();
1235     my %option_category = ();
1236     my %option_range    = ();
1237     my $rexpansion      = \%expansion;
1238
1239     # names of categories in manual
1240     # leading integers will allow sorting
1241     my @category_name = (
1242         '0. I/O control',
1243         '1. Basic formatting options',
1244         '2. Code indentation control',
1245         '3. Whitespace control',
1246         '4. Comment controls',
1247         '5. Linebreak controls',
1248         '6. Controlling list formatting',
1249         '7. Retaining or ignoring existing line breaks',
1250         '8. Blank line control',
1251         '9. Other controls',
1252         '10. HTML options',
1253         '11. pod2html options',
1254         '12. Controlling HTML properties',
1255         '13. Debugging',
1256     );
1257
1258     #  These options are parsed directly by perltidy:
1259     #    help h
1260     #    version v
1261     #  However, they are included in the option set so that they will
1262     #  be seen in the options dump.
1263
1264     # These long option names have no abbreviations or are treated specially
1265     @option_string = qw(
1266       html!
1267       noprofile
1268       no-profile
1269       npro
1270       recombine!
1271       valign!
1272       notidy
1273     );
1274
1275     my $category = 13;    # Debugging
1276     foreach (@option_string) {
1277         my $opt = $_;     # must avoid changing the actual flag
1278         $opt =~ s/!$//;
1279         $option_category{$opt} = $category_name[$category];
1280     }
1281
1282     $category = 11;                                       # HTML
1283     $option_category{html} = $category_name[$category];
1284
1285     # routine to install and check options
1286     my $add_option = sub {
1287         my ( $long_name, $short_name, $flag ) = @_;
1288         push @option_string, $long_name . $flag;
1289         $option_category{$long_name} = $category_name[$category];
1290         if ($short_name) {
1291             if ( $expansion{$short_name} ) {
1292                 my $existing_name = $expansion{$short_name}[0];
1293                 die
1294 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1295             }
1296             $expansion{$short_name} = [$long_name];
1297             if ( $flag eq '!' ) {
1298                 my $nshort_name = 'n' . $short_name;
1299                 my $nolong_name = 'no' . $long_name;
1300                 if ( $expansion{$nshort_name} ) {
1301                     my $existing_name = $expansion{$nshort_name}[0];
1302                     die
1303 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1304                 }
1305                 $expansion{$nshort_name} = [$nolong_name];
1306             }
1307         }
1308     };
1309
1310     # Install long option names which have a simple abbreviation.
1311     # Options with code '!' get standard negation ('no' for long names,
1312     # 'n' for abbreviations).  Categories follow the manual.
1313
1314     ###########################
1315     $category = 0;    # I/O_Control
1316     ###########################
1317     $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1318     $add_option->( 'backup-file-extension',      'bext',  '=s' );
1319     $add_option->( 'force-read-binary',          'f',     '!' );
1320     $add_option->( 'format',                     'fmt',   '=s' );
1321     $add_option->( 'iterations',                 'it',    '=i' );
1322     $add_option->( 'logfile',                    'log',   '!' );
1323     $add_option->( 'logfile-gap',                'g',     ':i' );
1324     $add_option->( 'outfile',                    'o',     '=s' );
1325     $add_option->( 'output-file-extension',      'oext',  '=s' );
1326     $add_option->( 'output-path',                'opath', '=s' );
1327     $add_option->( 'profile',                    'pro',   '=s' );
1328     $add_option->( 'quiet',                      'q',     '!' );
1329     $add_option->( 'standard-error-output',      'se',    '!' );
1330     $add_option->( 'standard-output',            'st',    '!' );
1331     $add_option->( 'warning-output',             'w',     '!' );
1332
1333     # options which are both toggle switches and values moved here
1334     # to hide from tidyview (which does not show category 0 flags):
1335     # -ole moved here from category 1
1336     # -sil moved here from category 2
1337     $add_option->( 'output-line-ending',         'ole', '=s' );
1338     $add_option->( 'starting-indentation-level', 'sil', '=i' );
1339
1340     ########################################
1341     $category = 1;    # Basic formatting options
1342     ########################################
1343     $add_option->( 'check-syntax',             'syn',  '!' );
1344     $add_option->( 'entab-leading-whitespace', 'et',   '=i' );
1345     $add_option->( 'indent-columns',           'i',    '=i' );
1346     $add_option->( 'maximum-line-length',      'l',    '=i' );
1347     $add_option->( 'perl-syntax-check-flags',  'pscf', '=s' );
1348     $add_option->( 'preserve-line-endings',    'ple',  '!' );
1349     $add_option->( 'tabs',                     't',    '!' );
1350
1351     ########################################
1352     $category = 2;    # Code indentation control
1353     ########################################
1354     $add_option->( 'continuation-indentation',           'ci',   '=i' );
1355     $add_option->( 'line-up-parentheses',                'lp',   '!' );
1356     $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1357     $add_option->( 'outdent-keywords',                   'okw',  '!' );
1358     $add_option->( 'outdent-labels',                     'ola',  '!' );
1359     $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1360     $add_option->( 'indent-closing-brace',               'icb',  '!' );
1361     $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1362     $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1363     $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1364     $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1365     $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1366     $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1367
1368     ########################################
1369     $category = 3;    # Whitespace control
1370     ########################################
1371     $add_option->( 'add-semicolons',                            'asc',   '!' );
1372     $add_option->( 'add-whitespace',                            'aws',   '!' );
1373     $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1374     $add_option->( 'brace-tightness',                           'bt',    '=i' );
1375     $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1376     $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1377     $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1378     $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1379     $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1380     $add_option->( 'paren-tightness',                           'pt',    '=i' );
1381     $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1382     $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1383     $add_option->( 'space-function-paren',                      'sfp',   '!' );
1384     $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1385     $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1386     $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1387     $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1388     $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1389     $add_option->( 'trim-qw',                                   'tqw',   '!' );
1390     $add_option->( 'want-left-space',                           'wls',   '=s' );
1391     $add_option->( 'want-right-space',                          'wrs',   '=s' );
1392
1393     ########################################
1394     $category = 4;    # Comment controls
1395     ########################################
1396     $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1397     $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1398     $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1399     $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1400     $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1401     $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1402     $add_option->( 'closing-side-comments',             'csc',  '!' );
1403     $add_option->( 'closing-side-comments-balanced',    'cscb', '!' );
1404     $add_option->( 'format-skipping',                   'fs',   '!' );
1405     $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1406     $add_option->( 'format-skipping-end',               'fse',  '=s' );
1407     $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1408     $add_option->( 'indent-block-comments',             'ibc',  '!' );
1409     $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1410     $add_option->( 'fixed-position-side-comment',       'fpsc', '=i' );
1411     $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1412     $add_option->( 'outdent-long-comments',             'olc',  '!' );
1413     $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1414     $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1415     $add_option->( 'static-block-comments',             'sbc',  '!' );
1416     $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1417     $add_option->( 'static-side-comments',              'ssc',  '!' );
1418
1419     ########################################
1420     $category = 5;    # Linebreak controls
1421     ########################################
1422     $add_option->( 'add-newlines',                            'anl',   '!' );
1423     $add_option->( 'block-brace-vertical-tightness',          'bbvt',  '=i' );
1424     $add_option->( 'block-brace-vertical-tightness-list',     'bbvtl', '=s' );
1425     $add_option->( 'brace-vertical-tightness',                'bvt',   '=i' );
1426     $add_option->( 'brace-vertical-tightness-closing',        'bvtc',  '=i' );
1427     $add_option->( 'cuddled-else',                            'ce',    '!' );
1428     $add_option->( 'delete-old-newlines',                     'dnl',   '!' );
1429     $add_option->( 'opening-brace-always-on-right',           'bar',   '!' );
1430     $add_option->( 'opening-brace-on-new-line',               'bl',    '!' );
1431     $add_option->( 'opening-hash-brace-right',                'ohbr',  '!' );
1432     $add_option->( 'opening-paren-right',                     'opr',   '!' );
1433     $add_option->( 'opening-square-bracket-right',            'osbr',  '!' );
1434     $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl',  '!' );
1435     $add_option->( 'opening-sub-brace-on-new-line',           'sbl',   '!' );
1436     $add_option->( 'paren-vertical-tightness',                'pvt',   '=i' );
1437     $add_option->( 'paren-vertical-tightness-closing',        'pvtc',  '=i' );
1438     $add_option->( 'stack-closing-hash-brace',                'schb',  '!' );
1439     $add_option->( 'stack-closing-paren',                     'scp',   '!' );
1440     $add_option->( 'stack-closing-square-bracket',            'scsb',  '!' );
1441     $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
1442     $add_option->( 'stack-opening-paren',                     'sop',   '!' );
1443     $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
1444     $add_option->( 'vertical-tightness',                      'vt',    '=i' );
1445     $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
1446     $add_option->( 'want-break-after',                        'wba',   '=s' );
1447     $add_option->( 'want-break-before',                       'wbb',   '=s' );
1448     $add_option->( 'break-after-all-operators',               'baao',  '!' );
1449     $add_option->( 'break-before-all-operators',              'bbao',  '!' );
1450     $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
1451
1452     ########################################
1453     $category = 6;    # Controlling list formatting
1454     ########################################
1455     $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1456     $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1457     $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1458
1459     ########################################
1460     $category = 7;    # Retaining or ignoring existing line breaks
1461     ########################################
1462     $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1463     $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1464     $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1465     $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
1466
1467     ########################################
1468     $category = 8;    # Blank line control
1469     ########################################
1470     $add_option->( 'blanks-before-blocks',            'bbb', '!' );
1471     $add_option->( 'blanks-before-comments',          'bbc', '!' );
1472     $add_option->( 'blanks-before-subs',              'bbs', '!' );
1473     $add_option->( 'long-block-line-count',           'lbl', '=i' );
1474     $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1475     $add_option->( 'keep-old-blank-lines',            'kbl', '=i' );
1476
1477     ########################################
1478     $category = 9;    # Other controls
1479     ########################################
1480     $add_option->( 'delete-block-comments',        'dbc',  '!' );
1481     $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1482     $add_option->( 'delete-pod',                   'dp',   '!' );
1483     $add_option->( 'delete-side-comments',         'dsc',  '!' );
1484     $add_option->( 'tee-block-comments',           'tbc',  '!' );
1485     $add_option->( 'tee-pod',                      'tp',   '!' );
1486     $add_option->( 'tee-side-comments',            'tsc',  '!' );
1487     $add_option->( 'look-for-autoloader',          'lal',  '!' );
1488     $add_option->( 'look-for-hash-bang',           'x',    '!' );
1489     $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1490     $add_option->( 'pass-version-line',            'pvl',  '!' );
1491
1492     ########################################
1493     $category = 13;    # Debugging
1494     ########################################
1495     $add_option->( 'DEBUG',                           'D',    '!' );
1496     $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1497     $add_option->( 'check-multiline-quotes',          'chk',  '!' );
1498     $add_option->( 'dump-defaults',                   'ddf',  '!' );
1499     $add_option->( 'dump-long-names',                 'dln',  '!' );
1500     $add_option->( 'dump-options',                    'dop',  '!' );
1501     $add_option->( 'dump-profile',                    'dpro', '!' );
1502     $add_option->( 'dump-short-names',                'dsn',  '!' );
1503     $add_option->( 'dump-token-types',                'dtt',  '!' );
1504     $add_option->( 'dump-want-left-space',            'dwls', '!' );
1505     $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1506     $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1507     $add_option->( 'help',                            'h',    '' );
1508     $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1509     $add_option->( 'show-options',                    'opt',  '!' );
1510     $add_option->( 'version',                         'v',    '' );
1511
1512     #---------------------------------------------------------------------
1513
1514     # The Perl::Tidy::HtmlWriter will add its own options to the string
1515     Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1516
1517     ########################################
1518     # Set categories 10, 11, 12
1519     ########################################
1520     # Based on their known order
1521     $category = 12;    # HTML properties
1522     foreach my $opt (@option_string) {
1523         my $long_name = $opt;
1524         $long_name =~ s/(!|=.*|:.*)$//;
1525         unless ( defined( $option_category{$long_name} ) ) {
1526             if ( $long_name =~ /^html-linked/ ) {
1527                 $category = 10;    # HTML options
1528             }
1529             elsif ( $long_name =~ /^pod2html/ ) {
1530                 $category = 11;    # Pod2html
1531             }
1532             $option_category{$long_name} = $category_name[$category];
1533         }
1534     }
1535
1536     #---------------------------------------------------------------
1537     # Assign valid ranges to certain options
1538     #---------------------------------------------------------------
1539     # In the future, these may be used to make preliminary checks
1540     # hash keys are long names
1541     # If key or value is undefined:
1542     #   strings may have any value
1543     #   integer ranges are >=0
1544     # If value is defined:
1545     #   value is [qw(any valid words)] for strings
1546     #   value is [min, max] for integers
1547     #   if min is undefined, there is no lower limit
1548     #   if max is undefined, there is no upper limit
1549     # Parameters not listed here have defaults
1550     %option_range = (
1551         'format'             => [ 'tidy', 'html', 'user' ],
1552         'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
1553
1554         'block-brace-tightness'    => [ 0, 2 ],
1555         'brace-tightness'          => [ 0, 2 ],
1556         'paren-tightness'          => [ 0, 2 ],
1557         'square-bracket-tightness' => [ 0, 2 ],
1558
1559         'block-brace-vertical-tightness'            => [ 0, 2 ],
1560         'brace-vertical-tightness'                  => [ 0, 2 ],
1561         'brace-vertical-tightness-closing'          => [ 0, 2 ],
1562         'paren-vertical-tightness'                  => [ 0, 2 ],
1563         'paren-vertical-tightness-closing'          => [ 0, 2 ],
1564         'square-bracket-vertical-tightness'         => [ 0, 2 ],
1565         'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1566         'vertical-tightness'                        => [ 0, 2 ],
1567         'vertical-tightness-closing'                => [ 0, 2 ],
1568
1569         'closing-brace-indentation'          => [ 0, 3 ],
1570         'closing-paren-indentation'          => [ 0, 3 ],
1571         'closing-square-bracket-indentation' => [ 0, 3 ],
1572         'closing-token-indentation'          => [ 0, 3 ],
1573
1574         'closing-side-comment-else-flag' => [ 0, 2 ],
1575         'comma-arrow-breakpoints'        => [ 0, 3 ],
1576     );
1577
1578     # Note: we could actually allow negative ci if someone really wants it:
1579     # $option_range{'continuation-indentation'} = [ undef, undef ];
1580
1581     #---------------------------------------------------------------
1582     # Assign default values to the above options here, except
1583     # for 'outfile' and 'help'.
1584     # These settings should approximate the perlstyle(1) suggestions.
1585     #---------------------------------------------------------------
1586     my @defaults = qw(
1587       add-newlines
1588       add-semicolons
1589       add-whitespace
1590       blanks-before-blocks
1591       blanks-before-comments
1592       blanks-before-subs
1593       block-brace-tightness=0
1594       block-brace-vertical-tightness=0
1595       brace-tightness=1
1596       brace-vertical-tightness-closing=0
1597       brace-vertical-tightness=0
1598       break-at-old-logical-breakpoints
1599       break-at-old-ternary-breakpoints
1600       break-at-old-keyword-breakpoints
1601       comma-arrow-breakpoints=1
1602       nocheck-syntax
1603       closing-side-comment-interval=6
1604       closing-side-comment-maximum-text=20
1605       closing-side-comment-else-flag=0
1606       closing-side-comments-balanced
1607       closing-paren-indentation=0
1608       closing-brace-indentation=0
1609       closing-square-bracket-indentation=0
1610       continuation-indentation=2
1611       delete-old-newlines
1612       delete-semicolons
1613       fuzzy-line-length
1614       hanging-side-comments
1615       indent-block-comments
1616       indent-columns=4
1617       iterations=1
1618       keep-old-blank-lines=1
1619       long-block-line-count=8
1620       look-for-autoloader
1621       look-for-selfloader
1622       maximum-consecutive-blank-lines=1
1623       maximum-fields-per-table=0
1624       maximum-line-length=80
1625       minimum-space-to-comment=4
1626       nobrace-left-and-indent
1627       nocuddled-else
1628       nodelete-old-whitespace
1629       nohtml
1630       nologfile
1631       noquiet
1632       noshow-options
1633       nostatic-side-comments
1634       notabs
1635       nowarning-output
1636       outdent-labels
1637       outdent-long-quotes
1638       outdent-long-comments
1639       paren-tightness=1
1640       paren-vertical-tightness-closing=0
1641       paren-vertical-tightness=0
1642       pass-version-line
1643       recombine
1644       valign
1645       short-concatenation-item-length=8
1646       space-for-semicolon
1647       square-bracket-tightness=1
1648       square-bracket-vertical-tightness-closing=0
1649       square-bracket-vertical-tightness=0
1650       static-block-comments
1651       trim-qw
1652       format=tidy
1653       backup-file-extension=bak
1654       format-skipping
1655
1656       pod2html
1657       html-table-of-contents
1658       html-entities
1659     );
1660
1661     push @defaults, "perl-syntax-check-flags=-c -T";
1662
1663     #---------------------------------------------------------------
1664     # Define abbreviations which will be expanded into the above primitives.
1665     # These may be defined recursively.
1666     #---------------------------------------------------------------
1667     %expansion = (
1668         %expansion,
1669         'freeze-newlines'   => [qw(noadd-newlines nodelete-old-newlines)],
1670         'fnl'               => [qw(freeze-newlines)],
1671         'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1672         'fws'               => [qw(freeze-whitespace)],
1673         'freeze-blank-lines' =>
1674           [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1675         'fbl'                => [qw(freeze-blank-lines)],
1676         'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1677         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1678         'nooutdent-long-lines' =>
1679           [qw(nooutdent-long-quotes nooutdent-long-comments)],
1680         'noll' => [qw(nooutdent-long-lines)],
1681         'io'   => [qw(indent-only)],
1682         'delete-all-comments' =>
1683           [qw(delete-block-comments delete-side-comments delete-pod)],
1684         'nodelete-all-comments' =>
1685           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1686         'dac'  => [qw(delete-all-comments)],
1687         'ndac' => [qw(nodelete-all-comments)],
1688         'gnu'  => [qw(gnu-style)],
1689         'pbp'  => [qw(perl-best-practices)],
1690         'tee-all-comments' =>
1691           [qw(tee-block-comments tee-side-comments tee-pod)],
1692         'notee-all-comments' =>
1693           [qw(notee-block-comments notee-side-comments notee-pod)],
1694         'tac'   => [qw(tee-all-comments)],
1695         'ntac'  => [qw(notee-all-comments)],
1696         'html'  => [qw(format=html)],
1697         'nhtml' => [qw(format=tidy)],
1698         'tidy'  => [qw(format=tidy)],
1699
1700         'swallow-optional-blank-lines'   => [qw(kbl=0)],
1701         'noswallow-optional-blank-lines' => [qw(kbl=1)],
1702         'sob'                            => [qw(kbl=0)],
1703         'nsob'                           => [qw(kbl=1)],
1704
1705         'break-after-comma-arrows'   => [qw(cab=0)],
1706         'nobreak-after-comma-arrows' => [qw(cab=1)],
1707         'baa'                        => [qw(cab=0)],
1708         'nbaa'                       => [qw(cab=1)],
1709
1710         'break-at-old-trinary-breakpoints' => [qw(bot)],
1711
1712         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1713         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1714         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1715         'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1716         'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1717
1718         'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1719         'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1720         'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1721         'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1722         'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1723
1724         'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1725         'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1726         'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1727
1728         'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1729         'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1730         'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1731
1732         'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1733         'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1734         'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1735
1736         'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1737         'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1738         'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1739
1740         'otr'                   => [qw(opr ohbr osbr)],
1741         'opening-token-right'   => [qw(opr ohbr osbr)],
1742         'notr'                  => [qw(nopr nohbr nosbr)],
1743         'noopening-token-right' => [qw(nopr nohbr nosbr)],
1744
1745         'sot'                    => [qw(sop sohb sosb)],
1746         'nsot'                   => [qw(nsop nsohb nsosb)],
1747         'stack-opening-tokens'   => [qw(sop sohb sosb)],
1748         'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1749
1750         'sct'                    => [qw(scp schb scsb)],
1751         'stack-closing-tokens'   => => [qw(scp schb scsb)],
1752         'nsct'                   => [qw(nscp nschb nscsb)],
1753         'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1754
1755         # 'mangle' originally deleted pod and comments, but to keep it
1756         # reversible, it no longer does.  But if you really want to
1757         # delete them, just use:
1758         #   -mangle -dac
1759
1760         # An interesting use for 'mangle' is to do this:
1761         #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1762         # which will form as many one-line blocks as possible
1763
1764         'mangle' => [
1765             qw(
1766               check-syntax
1767               keep-old-blank-lines=0
1768               delete-old-newlines
1769               delete-old-whitespace
1770               delete-semicolons
1771               indent-columns=0
1772               maximum-consecutive-blank-lines=0
1773               maximum-line-length=100000
1774               noadd-newlines
1775               noadd-semicolons
1776               noadd-whitespace
1777               noblanks-before-blocks
1778               noblanks-before-subs
1779               notabs
1780               )
1781         ],
1782
1783         # 'extrude' originally deleted pod and comments, but to keep it
1784         # reversible, it no longer does.  But if you really want to
1785         # delete them, just use
1786         #   extrude -dac
1787         #
1788         # An interesting use for 'extrude' is to do this:
1789         #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1790         # which will break up all one-line blocks.
1791
1792         'extrude' => [
1793             qw(
1794               check-syntax
1795               ci=0
1796               delete-old-newlines
1797               delete-old-whitespace
1798               delete-semicolons
1799               indent-columns=0
1800               maximum-consecutive-blank-lines=0
1801               maximum-line-length=1
1802               noadd-semicolons
1803               noadd-whitespace
1804               noblanks-before-blocks
1805               noblanks-before-subs
1806               nofuzzy-line-length
1807               notabs
1808               norecombine
1809               )
1810         ],
1811
1812         # this style tries to follow the GNU Coding Standards (which do
1813         # not really apply to perl but which are followed by some perl
1814         # programmers).
1815         'gnu-style' => [
1816             qw(
1817               lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1818               )
1819         ],
1820
1821         # Style suggested in Damian Conway's Perl Best Practices
1822         'perl-best-practices' => [
1823             qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
1824 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
1825         ],
1826
1827         # Additional styles can be added here
1828     );
1829
1830     Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1831
1832     # Uncomment next line to dump all expansions for debugging:
1833     # dump_short_names(\%expansion);
1834     return (
1835         \@option_string,   \@defaults, \%expansion,
1836         \%option_category, \%option_range
1837     );
1838
1839 }    # end of generate_options
1840
1841 sub process_command_line {
1842
1843     my (
1844         $perltidyrc_stream,  $is_Windows, $Windows_type,
1845         $rpending_complaint, $dump_options_type
1846     ) = @_;
1847
1848     use Getopt::Long;
1849
1850     my (
1851         $roption_string,   $rdefaults, $rexpansion,
1852         $roption_category, $roption_range
1853     ) = generate_options();
1854
1855     #---------------------------------------------------------------
1856     # set the defaults by passing the above list through GetOptions
1857     #---------------------------------------------------------------
1858     my %Opts = ();
1859     {
1860         local @ARGV;
1861         my $i;
1862
1863         # do not load the defaults if we are just dumping perltidyrc
1864         unless ( $dump_options_type eq 'perltidyrc' ) {
1865             for $i (@$rdefaults) { push @ARGV, "--" . $i }
1866         }
1867
1868         # Patch to save users Getopt::Long configuration
1869         # and set to Getopt::Long defaults.  Use eval to avoid
1870         # breaking old versions of Perl without these routines.
1871         my $glc;
1872         eval { $glc = Getopt::Long::Configure() };
1873         unless ($@) {
1874             eval { Getopt::Long::ConfigDefaults() };
1875         }
1876         else { $glc = undef }
1877
1878         if ( !GetOptions( \%Opts, @$roption_string ) ) {
1879             die "Programming Bug: error in setting default options";
1880         }
1881
1882         # Patch to put the previous Getopt::Long configuration back
1883         eval { Getopt::Long::Configure($glc) } if defined $glc;
1884     }
1885
1886     my $word;
1887     my @raw_options        = ();
1888     my $config_file        = "";
1889     my $saw_ignore_profile = 0;
1890     my $saw_extrude        = 0;
1891     my $saw_dump_profile   = 0;
1892     my $i;
1893
1894     #---------------------------------------------------------------
1895     # Take a first look at the command-line parameters.  Do as many
1896     # immediate dumps as possible, which can avoid confusion if the
1897     # perltidyrc file has an error.
1898     #---------------------------------------------------------------
1899     foreach $i (@ARGV) {
1900
1901         $i =~ s/^--/-/;
1902         if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1903             $saw_ignore_profile = 1;
1904         }
1905
1906         # note: this must come before -pro and -profile, below:
1907         elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1908             $saw_dump_profile = 1;
1909         }
1910         elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1911             if ($config_file) {
1912                 warn
1913 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1914             }
1915             $config_file = $2;
1916
1917             # resolve <dir>/.../<file>, meaning look upwards from directory
1918             if ( defined($config_file) ) {
1919                 if ( my ( $start_dir, $search_file ) =
1920                     ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
1921                 {
1922                     $start_dir = '.' if !$start_dir;
1923                     $start_dir = Cwd::realpath($start_dir);
1924                     if ( my $found_file =
1925                         find_file_upwards( $start_dir, $search_file ) )
1926                     {
1927                         $config_file = $found_file;
1928                     }
1929                 }
1930             }
1931             unless ( -e $config_file ) {
1932                 warn "cannot find file given with -pro=$config_file: $!\n";
1933                 $config_file = "";
1934             }
1935         }
1936         elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1937             die "usage: -pro=filename or --profile=filename, no spaces\n";
1938         }
1939         elsif ( $i =~ /^-extrude$/ ) {
1940             $saw_extrude = 1;
1941         }
1942         elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1943             usage();
1944             exit 1;
1945         }
1946         elsif ( $i =~ /^-(version|v)$/ ) {
1947             show_version();
1948             exit 1;
1949         }
1950         elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1951             dump_defaults(@$rdefaults);
1952             exit 1;
1953         }
1954         elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1955             dump_long_names(@$roption_string);
1956             exit 1;
1957         }
1958         elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1959             dump_short_names($rexpansion);
1960             exit 1;
1961         }
1962         elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1963             Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1964             exit 1;
1965         }
1966     }
1967
1968     if ( $saw_dump_profile && $saw_ignore_profile ) {
1969         warn "No profile to dump because of -npro\n";
1970         exit 1;
1971     }
1972
1973     #---------------------------------------------------------------
1974     # read any .perltidyrc configuration file
1975     #---------------------------------------------------------------
1976     unless ($saw_ignore_profile) {
1977
1978         # resolve possible conflict between $perltidyrc_stream passed
1979         # as call parameter to perltidy and -pro=filename on command
1980         # line.
1981         if ($perltidyrc_stream) {
1982             if ($config_file) {
1983                 warn <<EOM;
1984  Conflict: a perltidyrc configuration file was specified both as this
1985  perltidy call parameter: $perltidyrc_stream
1986  and with this -profile=$config_file.
1987  Using -profile=$config_file.
1988 EOM
1989             }
1990             else {
1991                 $config_file = $perltidyrc_stream;
1992             }
1993         }
1994
1995         # look for a config file if we don't have one yet
1996         my $rconfig_file_chatter;
1997         $$rconfig_file_chatter = "";
1998         $config_file =
1999           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2000             $rpending_complaint )
2001           unless $config_file;
2002
2003         # open any config file
2004         my $fh_config;
2005         if ($config_file) {
2006             ( $fh_config, $config_file ) =
2007               Perl::Tidy::streamhandle( $config_file, 'r' );
2008             unless ($fh_config) {
2009                 $$rconfig_file_chatter .=
2010                   "# $config_file exists but cannot be opened\n";
2011             }
2012         }
2013
2014         if ($saw_dump_profile) {
2015             if ($saw_dump_profile) {
2016                 dump_config_file( $fh_config, $config_file,
2017                     $rconfig_file_chatter );
2018                 exit 1;
2019             }
2020         }
2021
2022         if ($fh_config) {
2023
2024             my ( $rconfig_list, $death_message ) =
2025               read_config_file( $fh_config, $config_file, $rexpansion );
2026             die $death_message if ($death_message);
2027
2028             # process any .perltidyrc parameters right now so we can
2029             # localize errors
2030             if (@$rconfig_list) {
2031                 local @ARGV = @$rconfig_list;
2032
2033                 expand_command_abbreviations( $rexpansion, \@raw_options,
2034                     $config_file );
2035
2036                 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2037                     die
2038 "Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
2039                 }
2040
2041                 # Anything left in this local @ARGV is an error and must be
2042                 # invalid bare words from the configuration file.  We cannot
2043                 # check this earlier because bare words may have been valid
2044                 # values for parameters.  We had to wait for GetOptions to have
2045                 # a look at @ARGV.
2046                 if (@ARGV) {
2047                     my $count = @ARGV;
2048                     my $str   = "\'" . pop(@ARGV) . "\'";
2049                     while ( my $param = pop(@ARGV) ) {
2050                         if ( length($str) < 70 ) {
2051                             $str .= ", '$param'";
2052                         }
2053                         else {
2054                             $str .= ", ...";
2055                             last;
2056                         }
2057                     }
2058                     die <<EOM;
2059 There are $count unrecognized values in the configuration file '$config_file':
2060 $str
2061 Use leading dashes for parameters.  Use -npro to ignore this file.
2062 EOM
2063                 }
2064
2065                 # Undo any options which cause premature exit.  They are not
2066                 # appropriate for a config file, and it could be hard to
2067                 # diagnose the cause of the premature exit.
2068                 foreach (
2069                     qw{
2070                     dump-defaults
2071                     dump-long-names
2072                     dump-options
2073                     dump-profile
2074                     dump-short-names
2075                     dump-token-types
2076                     dump-want-left-space
2077                     dump-want-right-space
2078                     help
2079                     stylesheet
2080                     version
2081                     }
2082                   )
2083                 {
2084
2085                     if ( defined( $Opts{$_} ) ) {
2086                         delete $Opts{$_};
2087                         warn "ignoring --$_ in config file: $config_file\n";
2088                     }
2089                 }
2090             }
2091         }
2092     }
2093
2094     #---------------------------------------------------------------
2095     # now process the command line parameters
2096     #---------------------------------------------------------------
2097     expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2098
2099     if ( !GetOptions( \%Opts, @$roption_string ) ) {
2100         die "Error on command line; for help try 'perltidy -h'\n";
2101     }
2102
2103     return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
2104         $rexpansion, $roption_category, $roption_range );
2105 }    # end of process_command_line
2106
2107 sub check_options {
2108
2109     my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2110
2111     #---------------------------------------------------------------
2112     # check and handle any interactions among the basic options..
2113     #---------------------------------------------------------------
2114
2115     # Since -vt, -vtc, and -cti are abbreviations, but under
2116     # msdos, an unquoted input parameter like vtc=1 will be
2117     # seen as 2 parameters, vtc and 1, so the abbreviations
2118     # won't be seen.  Therefore, we will catch them here if
2119     # they get through.
2120
2121     if ( defined $rOpts->{'vertical-tightness'} ) {
2122         my $vt = $rOpts->{'vertical-tightness'};
2123         $rOpts->{'paren-vertical-tightness'}          = $vt;
2124         $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2125         $rOpts->{'brace-vertical-tightness'}          = $vt;
2126     }
2127
2128     if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2129         my $vtc = $rOpts->{'vertical-tightness-closing'};
2130         $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
2131         $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2132         $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
2133     }
2134
2135     if ( defined $rOpts->{'closing-token-indentation'} ) {
2136         my $cti = $rOpts->{'closing-token-indentation'};
2137         $rOpts->{'closing-square-bracket-indentation'} = $cti;
2138         $rOpts->{'closing-brace-indentation'}          = $cti;
2139         $rOpts->{'closing-paren-indentation'}          = $cti;
2140     }
2141
2142     # In quiet mode, there is no log file and hence no way to report
2143     # results of syntax check, so don't do it.
2144     if ( $rOpts->{'quiet'} ) {
2145         $rOpts->{'check-syntax'} = 0;
2146     }
2147
2148     # can't check syntax if no output
2149     if ( $rOpts->{'format'} ne 'tidy' ) {
2150         $rOpts->{'check-syntax'} = 0;
2151     }
2152
2153     # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2154     # wide variety of nasty problems on these systems, because they cannot
2155     # reliably run backticks.  Don't even think about changing this!
2156     if (   $rOpts->{'check-syntax'}
2157         && $is_Windows
2158         && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2159     {
2160         $rOpts->{'check-syntax'} = 0;
2161     }
2162
2163     # It's really a bad idea to check syntax as root unless you wrote
2164     # the script yourself.  FIXME: not sure if this works with VMS
2165     unless ($is_Windows) {
2166
2167         if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2168             $rOpts->{'check-syntax'} = 0;
2169             $$rpending_complaint .=
2170 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2171         }
2172     }
2173
2174     # check iteration count and quietly fix if necessary:
2175     # - iterations option only applies to code beautification mode
2176     # - it shouldn't be nessary to use more than about 2 iterations
2177     if ( $rOpts->{'format'} ne 'tidy' ) {
2178         $rOpts->{'iterations'} = 1;
2179     }
2180     elsif ( defined( $rOpts->{'iterations'} ) ) {
2181         if    ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2182         elsif ( $rOpts->{'iterations'} > 5 )  { $rOpts->{'iterations'} = 5 }
2183     }
2184     else {
2185         $rOpts->{'iterations'} = 1;
2186     }
2187
2188     # see if user set a non-negative logfile-gap
2189     if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2190
2191         # a zero gap will be taken as a 1
2192         if ( $rOpts->{'logfile-gap'} == 0 ) {
2193             $rOpts->{'logfile-gap'} = 1;
2194         }
2195
2196         # setting a non-negative logfile gap causes logfile to be saved
2197         $rOpts->{'logfile'} = 1;
2198     }
2199
2200     # not setting logfile gap, or setting it negative, causes default of 50
2201     else {
2202         $rOpts->{'logfile-gap'} = 50;
2203     }
2204
2205     # set short-cut flag when only indentation is to be done.
2206     # Note that the user may or may not have already set the
2207     # indent-only flag.
2208     if (   !$rOpts->{'add-whitespace'}
2209         && !$rOpts->{'delete-old-whitespace'}
2210         && !$rOpts->{'add-newlines'}
2211         && !$rOpts->{'delete-old-newlines'} )
2212     {
2213         $rOpts->{'indent-only'} = 1;
2214     }
2215
2216     # -isbc implies -ibc
2217     if ( $rOpts->{'indent-spaced-block-comments'} ) {
2218         $rOpts->{'indent-block-comments'} = 1;
2219     }
2220
2221     # -bli flag implies -bl
2222     if ( $rOpts->{'brace-left-and-indent'} ) {
2223         $rOpts->{'opening-brace-on-new-line'} = 1;
2224     }
2225
2226     if (   $rOpts->{'opening-brace-always-on-right'}
2227         && $rOpts->{'opening-brace-on-new-line'} )
2228     {
2229         warn <<EOM;
2230  Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2231   'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2232 EOM
2233         $rOpts->{'opening-brace-on-new-line'} = 0;
2234     }
2235
2236     # it simplifies things if -bl is 0 rather than undefined
2237     if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2238         $rOpts->{'opening-brace-on-new-line'} = 0;
2239     }
2240
2241     # -sbl defaults to -bl if not defined
2242     if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2243         $rOpts->{'opening-sub-brace-on-new-line'} =
2244           $rOpts->{'opening-brace-on-new-line'};
2245     }
2246
2247     if ( $rOpts->{'entab-leading-whitespace'} ) {
2248         if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2249             warn "-et=n must use a positive integer; ignoring -et\n";
2250             $rOpts->{'entab-leading-whitespace'} = undef;
2251         }
2252
2253         # entab leading whitespace has priority over the older 'tabs' option
2254         if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2255     }
2256 }
2257
2258 sub find_file_upwards {
2259     my ( $search_dir, $search_file ) = @_;
2260
2261     $search_dir  =~ s{/+$}{};
2262     $search_file =~ s{^/+}{};
2263
2264     while (1) {
2265         my $try_path = "$search_dir/$search_file";
2266         if ( -f $try_path ) {
2267             return $try_path;
2268         }
2269         elsif ( $search_dir eq '/' ) {
2270             return undef;
2271         }
2272         else {
2273             $search_dir = dirname($search_dir);
2274         }
2275     }
2276 }
2277
2278 sub expand_command_abbreviations {
2279
2280     # go through @ARGV and expand any abbreviations
2281
2282     my ( $rexpansion, $rraw_options, $config_file ) = @_;
2283     my ($word);
2284
2285     # set a pass limit to prevent an infinite loop;
2286     # 10 should be plenty, but it may be increased to allow deeply
2287     # nested expansions.
2288     my $max_passes = 10;
2289     my @new_argv   = ();
2290
2291     # keep looping until all expansions have been converted into actual
2292     # dash parameters..
2293     for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2294         my @new_argv     = ();
2295         my $abbrev_count = 0;
2296
2297         # loop over each item in @ARGV..
2298         foreach $word (@ARGV) {
2299
2300             # convert any leading 'no-' to just 'no'
2301             if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2302
2303             # if it is a dash flag (instead of a file name)..
2304             if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2305
2306                 my $abr   = $1;
2307                 my $flags = $2;
2308
2309                 # save the raw input for debug output in case of circular refs
2310                 if ( $pass_count == 0 ) {
2311                     push( @$rraw_options, $word );
2312                 }
2313
2314                 # recombine abbreviation and flag, if necessary,
2315                 # to allow abbreviations with arguments such as '-vt=1'
2316                 if ( $rexpansion->{ $abr . $flags } ) {
2317                     $abr   = $abr . $flags;
2318                     $flags = "";
2319                 }
2320
2321                 # if we see this dash item in the expansion hash..
2322                 if ( $rexpansion->{$abr} ) {
2323                     $abbrev_count++;
2324
2325                     # stuff all of the words that it expands to into the
2326                     # new arg list for the next pass
2327                     foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2328                         next unless $abbrev;    # for safety; shouldn't happen
2329                         push( @new_argv, '--' . $abbrev . $flags );
2330                     }
2331                 }
2332
2333                 # not in expansion hash, must be actual long name
2334                 else {
2335                     push( @new_argv, $word );
2336                 }
2337             }
2338
2339             # not a dash item, so just save it for the next pass
2340             else {
2341                 push( @new_argv, $word );
2342             }
2343         }    # end of this pass
2344
2345         # update parameter list @ARGV to the new one
2346         @ARGV = @new_argv;
2347         last unless ( $abbrev_count > 0 );
2348
2349         # make sure we are not in an infinite loop
2350         if ( $pass_count == $max_passes ) {
2351             print STDERR
2352 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2353             print STDERR "Here are the raw options\n";
2354             local $" = ')(';
2355             print STDERR "(@$rraw_options)\n";
2356             my $num = @new_argv;
2357
2358             if ( $num < 50 ) {
2359                 print STDERR "After $max_passes passes here is ARGV\n";
2360                 print STDERR "(@new_argv)\n";
2361             }
2362             else {
2363                 print STDERR "After $max_passes passes ARGV has $num entries\n";
2364             }
2365
2366             if ($config_file) {
2367                 die <<"DIE";
2368 Please check your configuration file $config_file for circular-references.
2369 To deactivate it, use -npro.
2370 DIE
2371             }
2372             else {
2373                 die <<'DIE';
2374 Program bug - circular-references in the %expansion hash, probably due to
2375 a recent program change.
2376 DIE
2377             }
2378         }    # end of check for circular references
2379     }    # end of loop over all passes
2380 }
2381
2382 # Debug routine -- this will dump the expansion hash
2383 sub dump_short_names {
2384     my $rexpansion = shift;
2385     print STDOUT <<EOM;
2386 List of short names.  This list shows how all abbreviations are
2387 translated into other abbreviations and, eventually, into long names.
2388 New abbreviations may be defined in a .perltidyrc file.
2389 For a list of all long names, use perltidy --dump-long-names (-dln).
2390 --------------------------------------------------------------------------
2391 EOM
2392     foreach my $abbrev ( sort keys %$rexpansion ) {
2393         my @list = @{ $$rexpansion{$abbrev} };
2394         print STDOUT "$abbrev --> @list\n";
2395     }
2396 }
2397
2398 sub check_vms_filename {
2399
2400     # given a valid filename (the perltidy input file)
2401     # create a modified filename and separator character
2402     # suitable for VMS.
2403     #
2404     # Contributed by Michael Cartmell
2405     #
2406     my ( $base, $path ) = fileparse( $_[0] );
2407
2408     # remove explicit ; version
2409     $base =~ s/;-?\d*$//
2410
2411       # remove explicit . version ie two dots in filename NB ^ escapes a dot
2412       or $base =~ s/(          # begin capture $1
2413                   (?:^|[^^])\. # match a dot not preceded by a caret
2414                   (?:          # followed by nothing
2415                     |          # or
2416                     .*[^^]     # anything ending in a non caret
2417                   )
2418                 )              # end capture $1
2419                 \.-?\d*$       # match . version number
2420               /$1/x;
2421
2422     # normalise filename, if there are no unescaped dots then append one
2423     $base .= '.' unless $base =~ /(?:^|[^^])\./;
2424
2425     # if we don't already have an extension then we just append the extention
2426     my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2427     return ( $path . $base, $separator );
2428 }
2429
2430 sub Win_OS_Type {
2431
2432     # TODO: are these more standard names?
2433     # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2434
2435     # Returns a string that determines what MS OS we are on.
2436     # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2437     # Returns blank string if not an MS system.
2438     # Original code contributed by: Yves Orton
2439     # We need to know this to decide where to look for config files
2440
2441     my $rpending_complaint = shift;
2442     my $os                 = "";
2443     return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2444
2445     # Systems built from Perl source may not have Win32.pm
2446     # But probably have Win32::GetOSVersion() anyway so the
2447     # following line is not 'required':
2448     # return $os unless eval('require Win32');
2449
2450     # Use the standard API call to determine the version
2451     my ( $undef, $major, $minor, $build, $id );
2452     eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2453
2454     #
2455     #    NAME                   ID   MAJOR  MINOR
2456     #    Windows NT 4           2      4       0
2457     #    Windows 2000           2      5       0
2458     #    Windows XP             2      5       1
2459     #    Windows Server 2003    2      5       2
2460
2461     return "win32s" unless $id;    # If id==0 then its a win32s box.
2462     $os = {                        # Magic numbers from MSDN
2463                                    # documentation of GetOSVersion
2464         1 => {
2465             0  => "95",
2466             10 => "98",
2467             90 => "Me"
2468         },
2469         2 => {
2470             0  => "2000",          # or NT 4, see below
2471             1  => "XP/.Net",
2472             2  => "Win2003",
2473             51 => "NT3.51"
2474         }
2475     }->{$id}->{$minor};
2476
2477     # If $os is undefined, the above code is out of date.  Suggested updates
2478     # are welcome.
2479     unless ( defined $os ) {
2480         $os = "";
2481         $$rpending_complaint .= <<EOS;
2482 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2483 We won't be able to look for a system-wide config file.
2484 EOS
2485     }
2486
2487     # Unfortunately the logic used for the various versions isnt so clever..
2488     # so we have to handle an outside case.
2489     return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2490 }
2491
2492 sub is_unix {
2493     return
2494          ( $^O !~ /win32|dos/i )
2495       && ( $^O ne 'VMS' )
2496       && ( $^O ne 'OS2' )
2497       && ( $^O ne 'MacOS' );
2498 }
2499
2500 sub look_for_Windows {
2501
2502     # determine Windows sub-type and location of
2503     # system-wide configuration files
2504     my $rpending_complaint = shift;
2505     my $is_Windows         = ( $^O =~ /win32|dos/i );
2506     my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
2507     return ( $is_Windows, $Windows_type );
2508 }
2509
2510 sub find_config_file {
2511
2512     # look for a .perltidyrc configuration file
2513     # For Windows also look for a file named perltidy.ini
2514     my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2515         $rpending_complaint ) = @_;
2516
2517     $$rconfig_file_chatter .= "# Config file search...system reported as:";
2518     if ($is_Windows) {
2519         $$rconfig_file_chatter .= "Windows $Windows_type\n";
2520     }
2521     else {
2522         $$rconfig_file_chatter .= " $^O\n";
2523     }
2524
2525     # sub to check file existance and record all tests
2526     my $exists_config_file = sub {
2527         my $config_file = shift;
2528         return 0 unless $config_file;
2529         $$rconfig_file_chatter .= "# Testing: $config_file\n";
2530         return -f $config_file;
2531     };
2532
2533     my $config_file;
2534
2535     # look in current directory first
2536     $config_file = ".perltidyrc";
2537     return $config_file if $exists_config_file->($config_file);
2538     if ($is_Windows) {
2539         $config_file = "perltidy.ini";
2540         return $config_file if $exists_config_file->($config_file);
2541     }
2542
2543     # Default environment vars.
2544     my @envs = qw(PERLTIDY HOME);
2545
2546     # Check the NT/2k/XP locations, first a local machine def, then a
2547     # network def
2548     push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2549
2550     # Now go through the enviornment ...
2551     foreach my $var (@envs) {
2552         $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2553         if ( defined( $ENV{$var} ) ) {
2554             $$rconfig_file_chatter .= " = $ENV{$var}\n";
2555
2556             # test ENV{ PERLTIDY } as file:
2557             if ( $var eq 'PERLTIDY' ) {
2558                 $config_file = "$ENV{$var}";
2559                 return $config_file if $exists_config_file->($config_file);
2560             }
2561
2562             # test ENV as directory:
2563             $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2564             return $config_file if $exists_config_file->($config_file);
2565
2566             if ($is_Windows) {
2567                 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2568                 return $config_file if $exists_config_file->($config_file);
2569             }
2570         }
2571         else {
2572             $$rconfig_file_chatter .= "\n";
2573         }
2574     }
2575
2576     # then look for a system-wide definition
2577     # where to look varies with OS
2578     if ($is_Windows) {
2579
2580         if ($Windows_type) {
2581             my ( $os, $system, $allusers ) =
2582               Win_Config_Locs( $rpending_complaint, $Windows_type );
2583
2584             # Check All Users directory, if there is one.
2585             # i.e. C:\Documents and Settings\User\perltidy.ini
2586             if ($allusers) {
2587
2588                 $config_file = catfile( $allusers, ".perltidyrc" );
2589                 return $config_file if $exists_config_file->($config_file);
2590
2591                 $config_file = catfile( $allusers, "perltidy.ini" );
2592                 return $config_file if $exists_config_file->($config_file);
2593             }
2594
2595             # Check system directory.
2596             # retain old code in case someone has been able to create
2597             # a file with a leading period.
2598             $config_file = catfile( $system, ".perltidyrc" );
2599             return $config_file if $exists_config_file->($config_file);
2600
2601             $config_file = catfile( $system, "perltidy.ini" );
2602             return $config_file if $exists_config_file->($config_file);
2603         }
2604     }
2605
2606     # Place to add customization code for other systems
2607     elsif ( $^O eq 'OS2' ) {
2608     }
2609     elsif ( $^O eq 'MacOS' ) {
2610     }
2611     elsif ( $^O eq 'VMS' ) {
2612     }
2613
2614     # Assume some kind of Unix
2615     else {
2616
2617         $config_file = "/usr/local/etc/perltidyrc";
2618         return $config_file if $exists_config_file->($config_file);
2619
2620         $config_file = "/etc/perltidyrc";
2621         return $config_file if $exists_config_file->($config_file);
2622     }
2623
2624     # Couldn't find a config file
2625     return;
2626 }
2627
2628 sub Win_Config_Locs {
2629
2630     # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2631     # or undef if its not a win32 OS.  In list context returns OS, System
2632     # Directory, and All Users Directory.  All Users will be empty on a
2633     # 9x/Me box.  Contributed by: Yves Orton.
2634
2635     my $rpending_complaint = shift;
2636     my $os = (@_) ? shift : Win_OS_Type();
2637     return unless $os;
2638
2639     my $system   = "";
2640     my $allusers = "";
2641
2642     if ( $os =~ /9[58]|Me/ ) {
2643         $system = "C:/Windows";
2644     }
2645     elsif ( $os =~ /NT|XP|200?/ ) {
2646         $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2647         $allusers =
2648           ( $os =~ /NT/ )
2649           ? "C:/WinNT/profiles/All Users/"
2650           : "C:/Documents and Settings/All Users/";
2651     }
2652     else {
2653
2654         # This currently would only happen on a win32s computer.  I dont have
2655         # one to test, so I am unsure how to proceed.  Suggestions welcome!
2656         $$rpending_complaint .=
2657 "I dont know a sensible place to look for config files on an $os system.\n";
2658         return;
2659     }
2660     return wantarray ? ( $os, $system, $allusers ) : $os;
2661 }
2662
2663 sub dump_config_file {
2664     my $fh                   = shift;
2665     my $config_file          = shift;
2666     my $rconfig_file_chatter = shift;
2667     print STDOUT "$$rconfig_file_chatter";
2668     if ($fh) {
2669         print STDOUT "# Dump of file: '$config_file'\n";
2670         while ( my $line = $fh->getline() ) { print STDOUT $line }
2671         eval { $fh->close() };
2672     }
2673     else {
2674         print STDOUT "# ...no config file found\n";
2675     }
2676 }
2677
2678 sub read_config_file {
2679
2680     my ( $fh, $config_file, $rexpansion ) = @_;
2681     my @config_list = ();
2682
2683     # file is bad if non-empty $death_message is returned
2684     my $death_message = "";
2685
2686     my $name = undef;
2687     my $line_no;
2688     while ( my $line = $fh->getline() ) {
2689         $line_no++;
2690         chomp $line;
2691         next if $line =~ /^\s*#/;    # skip full-line comment
2692         ( $line, $death_message ) =
2693           strip_comment( $line, $config_file, $line_no );
2694         last if ($death_message);
2695         $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
2696         next unless $line;
2697
2698         # look for something of the general form
2699         #    newname { body }
2700         # or just
2701         #    body
2702
2703         if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2704             my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2705
2706             # handle a new alias definition
2707             if ($newname) {
2708                 if ($name) {
2709                     $death_message =
2710 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2711                     last;
2712                 }
2713                 $name = $newname;
2714
2715                 if ( ${$rexpansion}{$name} ) {
2716                     local $" = ')(';
2717                     my @names = sort keys %$rexpansion;
2718                     $death_message =
2719                         "Here is a list of all installed aliases\n(@names)\n"
2720                       . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2721                     last;
2722                 }
2723                 ${$rexpansion}{$name} = [];
2724             }
2725
2726             # now do the body
2727             if ($body) {
2728
2729                 my ( $rbody_parts, $msg ) = parse_args($body);
2730                 if ($msg) {
2731                     $death_message = <<EOM;
2732 Error reading file '$config_file' at line number $line_no.
2733 $msg
2734 Please fix this line or use -npro to avoid reading this file
2735 EOM
2736                     last;
2737                 }
2738
2739                 if ($name) {
2740
2741                     # remove leading dashes if this is an alias
2742                     foreach (@$rbody_parts) { s/^\-+//; }
2743                     push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2744                 }
2745                 else {
2746                     push( @config_list, @$rbody_parts );
2747                 }
2748             }
2749
2750             if ($curly) {
2751                 unless ($name) {
2752                     $death_message =
2753 "Unexpected '}' seen in config file $config_file line $.\n";
2754                     last;
2755                 }
2756                 $name = undef;
2757             }
2758         }
2759     }
2760     eval { $fh->close() };
2761     return ( \@config_list, $death_message );
2762 }
2763
2764 sub strip_comment {
2765
2766     my ( $instr, $config_file, $line_no ) = @_;
2767     my $msg = "";
2768
2769     # nothing to do if no comments
2770     if ( $instr !~ /#/ ) {
2771         return ( $instr, $msg );
2772     }
2773
2774     # use simple method of no quotes
2775     elsif ( $instr !~ /['"]/ ) {
2776         $instr =~ s/\s*\#.*$//;    # simple trim
2777         return ( $instr, $msg );
2778     }
2779
2780     # handle comments and quotes
2781     my $outstr     = "";
2782     my $quote_char = "";
2783     while (1) {
2784
2785         # looking for ending quote character
2786         if ($quote_char) {
2787             if ( $instr =~ /\G($quote_char)/gc ) {
2788                 $quote_char = "";
2789                 $outstr .= $1;
2790             }
2791             elsif ( $instr =~ /\G(.)/gc ) {
2792                 $outstr .= $1;
2793             }
2794
2795             # error..we reached the end without seeing the ending quote char
2796             else {
2797                 $msg = <<EOM;
2798 Error reading file $config_file at line number $line_no.
2799 Did not see ending quote character <$quote_char> in this text:
2800 $instr
2801 Please fix this line or use -npro to avoid reading this file
2802 EOM
2803                 last;
2804             }
2805         }
2806
2807         # accumulating characters and looking for start of a quoted string
2808         else {
2809             if ( $instr =~ /\G([\"\'])/gc ) {
2810                 $outstr .= $1;
2811                 $quote_char = $1;
2812             }
2813             elsif ( $instr =~ /\G#/gc ) {
2814                 last;
2815             }
2816             elsif ( $instr =~ /\G(.)/gc ) {
2817                 $outstr .= $1;
2818             }
2819             else {
2820                 last;
2821             }
2822         }
2823     }
2824     return ( $outstr, $msg );
2825 }
2826
2827 sub parse_args {
2828
2829     # Parse a command string containing multiple string with possible
2830     # quotes, into individual commands.  It might look like this, for example:
2831     #
2832     #    -wba=" + - "  -some-thing -wbb='. && ||'
2833     #
2834     # There is no need, at present, to handle escaped quote characters.
2835     # (They are not perltidy tokens, so needn't be in strings).
2836
2837     my ($body)     = @_;
2838     my @body_parts = ();
2839     my $quote_char = "";
2840     my $part       = "";
2841     my $msg        = "";
2842     while (1) {
2843
2844         # looking for ending quote character
2845         if ($quote_char) {
2846             if ( $body =~ /\G($quote_char)/gc ) {
2847                 $quote_char = "";
2848             }
2849             elsif ( $body =~ /\G(.)/gc ) {
2850                 $part .= $1;
2851             }
2852
2853             # error..we reached the end without seeing the ending quote char
2854             else {
2855                 if ( length($part) ) { push @body_parts, $part; }
2856                 $msg = <<EOM;
2857 Did not see ending quote character <$quote_char> in this text:
2858 $body
2859 EOM
2860                 last;
2861             }
2862         }
2863
2864         # accumulating characters and looking for start of a quoted string
2865         else {
2866             if ( $body =~ /\G([\"\'])/gc ) {
2867                 $quote_char = $1;
2868             }
2869             elsif ( $body =~ /\G(\s+)/gc ) {
2870                 if ( length($part) ) { push @body_parts, $part; }
2871                 $part = "";
2872             }
2873             elsif ( $body =~ /\G(.)/gc ) {
2874                 $part .= $1;
2875             }
2876             else {
2877                 if ( length($part) ) { push @body_parts, $part; }
2878                 last;
2879             }
2880         }
2881     }
2882     return ( \@body_parts, $msg );
2883 }
2884
2885 sub dump_long_names {
2886
2887     my @names = sort @_;
2888     print STDOUT <<EOM;
2889 # Command line long names (passed to GetOptions)
2890 #---------------------------------------------------------------
2891 # here is a summary of the Getopt codes:
2892 # <none> does not take an argument
2893 # =s takes a mandatory string
2894 # :s takes an optional string
2895 # =i takes a mandatory integer
2896 # :i takes an optional integer
2897 # ! does not take an argument and may be negated
2898 #  i.e., -foo and -nofoo are allowed
2899 # a double dash signals the end of the options list
2900 #
2901 #---------------------------------------------------------------
2902 EOM
2903
2904     foreach (@names) { print STDOUT "$_\n" }
2905 }
2906
2907 sub dump_defaults {
2908     my @defaults = sort @_;
2909     print STDOUT "Default command line options:\n";
2910     foreach (@_) { print STDOUT "$_\n" }
2911 }
2912
2913 sub readable_options {
2914
2915     # return options for this run as a string which could be
2916     # put in a perltidyrc file
2917     my ( $rOpts, $roption_string ) = @_;
2918     my %Getopt_flags;
2919     my $rGetopt_flags    = \%Getopt_flags;
2920     my $readable_options = "# Final parameter set for this run.\n";
2921     $readable_options .=
2922       "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
2923     foreach my $opt ( @{$roption_string} ) {
2924         my $flag = "";
2925         if ( $opt =~ /(.*)(!|=.*)$/ ) {
2926             $opt  = $1;
2927             $flag = $2;
2928         }
2929         if ( defined( $rOpts->{$opt} ) ) {
2930             $rGetopt_flags->{$opt} = $flag;
2931         }
2932     }
2933     foreach my $key ( sort keys %{$rOpts} ) {
2934         my $flag   = $rGetopt_flags->{$key};
2935         my $value  = $rOpts->{$key};
2936         my $prefix = '--';
2937         my $suffix = "";
2938         if ($flag) {
2939             if ( $flag =~ /^=/ ) {
2940                 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2941                 $suffix = "=" . $value;
2942             }
2943             elsif ( $flag =~ /^!/ ) {
2944                 $prefix .= "no" unless ($value);
2945             }
2946             else {
2947
2948                 # shouldn't happen
2949                 $readable_options .=
2950                   "# ERROR in dump_options: unrecognized flag $flag for $key\n";
2951             }
2952         }
2953         $readable_options .= $prefix . $key . $suffix . "\n";
2954     }
2955     return $readable_options;
2956 }
2957
2958 sub show_version {
2959     print <<"EOM";
2960 This is perltidy, v$VERSION
2961
2962 Copyright 2000-2010, Steve Hancock
2963
2964 Perltidy is free software and may be copied under the terms of the GNU
2965 General Public License, which is included in the distribution files.
2966
2967 Complete documentation for perltidy can be found using 'man perltidy'
2968 or on the internet at http://perltidy.sourceforge.net.
2969 EOM
2970 }
2971
2972 sub usage {
2973
2974     print STDOUT <<EOF;
2975 This is perltidy version $VERSION, a perl script indenter.  Usage:
2976
2977     perltidy [ options ] file1 file2 file3 ...
2978             (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2979     perltidy [ options ] file1 -o outfile
2980     perltidy [ options ] file1 -st >outfile
2981     perltidy [ options ] <infile >outfile
2982
2983 Options have short and long forms. Short forms are shown; see
2984 man pages for long forms.  Note: '=s' indicates a required string,
2985 and '=n' indicates a required integer.
2986
2987 I/O control
2988  -h      show this help
2989  -o=file name of the output file (only if single input file)
2990  -oext=s change output extension from 'tdy' to s
2991  -opath=path  change path to be 'path' for output files
2992  -b      backup original to .bak and modify file in-place
2993  -bext=s change default backup extension from 'bak' to s
2994  -q      deactivate error messages (for running under editor)
2995  -w      include non-critical warning messages in the .ERR error output
2996  -syn    run perl -c to check syntax (default under unix systems)
2997  -log    save .LOG file, which has useful diagnostics
2998  -f      force perltidy to read a binary file
2999  -g      like -log but writes more detailed .LOG file, for debugging scripts
3000  -opt    write the set of options actually used to a .LOG file
3001  -npro   ignore .perltidyrc configuration command file
3002  -pro=file   read configuration commands from file instead of .perltidyrc
3003  -st     send output to standard output, STDOUT
3004  -se     send error output to standard error output, STDERR
3005  -v      display version number to standard output and quit
3006
3007 Basic Options:
3008  -i=n    use n columns per indentation level (default n=4)
3009  -t      tabs: use one tab character per indentation level, not recommeded
3010  -nt     no tabs: use n spaces per indentation level (default)
3011  -et=n   entab leading whitespace n spaces per tab; not recommended
3012  -io     "indent only": just do indentation, no other formatting.
3013  -sil=n  set starting indentation level to n;  use if auto detection fails
3014  -ole=s  specify output line ending (s=dos or win, mac, unix)
3015  -ple    keep output line endings same as input (input must be filename)
3016
3017 Whitespace Control
3018  -fws    freeze whitespace; this disables all whitespace changes
3019            and disables the following switches:
3020  -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
3021  -bbt    same as -bt but for code block braces; same as -bt if not given
3022  -bbvt   block braces vertically tight; use with -bl or -bli
3023  -bbvtl=s  make -bbvt to apply to selected list of block types
3024  -pt=n   paren tightness (n=0, 1 or 2)
3025  -sbt=n  square bracket tightness (n=0, 1, or 2)
3026  -bvt=n  brace vertical tightness,
3027          n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3028  -pvt=n  paren vertical tightness (see -bvt for n)
3029  -sbvt=n square bracket vertical tightness (see -bvt for n)
3030  -bvtc=n closing brace vertical tightness:
3031          n=(0=open, 1=sometimes close, 2=always close)
3032  -pvtc=n closing paren vertical tightness, see -bvtc for n.
3033  -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3034  -ci=n   sets continuation indentation=n,  default is n=2 spaces
3035  -lp     line up parentheses, brackets, and non-BLOCK braces
3036  -sfs    add space before semicolon in for( ; ; )
3037  -aws    allow perltidy to add whitespace (default)
3038  -dws    delete all old non-essential whitespace
3039  -icb    indent closing brace of a code block
3040  -cti=n  closing indentation of paren, square bracket, or non-block brace:
3041          n=0 none, =1 align with opening, =2 one full indentation level
3042  -icp    equivalent to -cti=2
3043  -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
3044  -wrs=s  want space right of tokens in string;
3045  -sts    put space before terminal semicolon of a statement
3046  -sak=s  put space between keywords given in s and '(';
3047  -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3048
3049 Line Break Control
3050  -fnl    freeze newlines; this disables all line break changes
3051             and disables the following switches:
3052  -anl    add newlines;  ok to introduce new line breaks
3053  -bbs    add blank line before subs and packages
3054  -bbc    add blank line before block comments
3055  -bbb    add blank line between major blocks
3056  -kbl=n  keep old blank lines? 0=no, 1=some, 2=all
3057  -mbl=n  maximum consecutive blank lines to output (default=1)
3058  -ce     cuddled else; use this style: '} else {'
3059  -dnl    delete old newlines (default)
3060  -l=n    maximum line length;  default n=80
3061  -bl     opening brace on new line
3062  -sbl    opening sub brace on new line.  value of -bl is used if not given.
3063  -bli    opening brace on new line and indented
3064  -bar    opening brace always on right, even for long clauses
3065  -vt=n   vertical tightness (requires -lp); n controls break after opening
3066          token: 0=never  1=no break if next line balanced   2=no break
3067  -vtc=n  vertical tightness of closing container; n controls if closing
3068          token starts new line: 0=always  1=not unless list  1=never
3069  -wba=s  want break after tokens in string; i.e. wba=': .'
3070  -wbb=s  want break before tokens in string
3071
3072 Following Old Breakpoints
3073  -kis    keep interior semicolons.  Allows multiple statements per line.
3074  -boc    break at old comma breaks: turns off all automatic list formatting
3075  -bol    break at old logical breakpoints: or, and, ||, && (default)
3076  -bok    break at old list keyword breakpoints such as map, sort (default)
3077  -bot    break at old conditional (ternary ?:) operator breakpoints (default)
3078  -cab=n  break at commas after a comma-arrow (=>):
3079          n=0 break at all commas after =>
3080          n=1 stable: break unless this breaks an existing one-line container
3081          n=2 break only if a one-line container cannot be formed
3082          n=3 do not treat commas after => specially at all
3083
3084 Comment controls
3085  -ibc    indent block comments (default)
3086  -isbc   indent spaced block comments; may indent unless no leading space
3087  -msc=n  minimum desired spaces to side comment, default 4
3088  -fpsc=n fix position for side comments; default 0;
3089  -csc    add or update closing side comments after closing BLOCK brace
3090  -dcsc   delete closing side comments created by a -csc command
3091  -cscp=s change closing side comment prefix to be other than '## end'
3092  -cscl=s change closing side comment to apply to selected list of blocks
3093  -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3094  -csct=n maximum number of columns of appended text, default n=20
3095  -cscw   causes warning if old side comment is overwritten with -csc
3096
3097  -sbc    use 'static block comments' identified by leading '##' (default)
3098  -sbcp=s change static block comment identifier to be other than '##'
3099  -osbc   outdent static block comments
3100
3101  -ssc    use 'static side comments' identified by leading '##' (default)
3102  -sscp=s change static side comment identifier to be other than '##'
3103
3104 Delete selected text
3105  -dac    delete all comments AND pod
3106  -dbc    delete block comments
3107  -dsc    delete side comments
3108  -dp     delete pod
3109
3110 Send selected text to a '.TEE' file
3111  -tac    tee all comments AND pod
3112  -tbc    tee block comments
3113  -tsc    tee side comments
3114  -tp     tee pod
3115
3116 Outdenting
3117  -olq    outdent long quoted strings (default)
3118  -olc    outdent a long block comment line
3119  -ola    outdent statement labels
3120  -okw    outdent control keywords (redo, next, last, goto, return)
3121  -okwl=s specify alternative keywords for -okw command
3122
3123 Other controls
3124  -mft=n  maximum fields per table; default n=40
3125  -x      do not format lines before hash-bang line (i.e., for VMS)
3126  -asc    allows perltidy to add a ';' when missing (default)
3127  -dsm    allows perltidy to delete an unnecessary ';'  (default)
3128
3129 Combinations of other parameters
3130  -gnu     attempt to follow GNU Coding Standards as applied to perl
3131  -mangle  remove as many newlines as possible (but keep comments and pods)
3132  -extrude  insert as many newlines as possible
3133
3134 Dump and die, debugging
3135  -dop    dump options used in this run to standard output and quit
3136  -ddf    dump default options to standard output and quit
3137  -dsn    dump all option short names to standard output and quit
3138  -dln    dump option long names to standard output and quit
3139  -dpro   dump whatever configuration file is in effect to standard output
3140  -dtt    dump all token types to standard output and quit
3141
3142 HTML
3143  -html write an html file (see 'man perl2web' for many options)
3144        Note: when -html is used, no indentation or formatting are done.
3145        Hint: try perltidy -html -css=mystyle.css filename.pl
3146        and edit mystyle.css to change the appearance of filename.html.
3147        -nnn gives line numbers
3148        -pre only writes out <pre>..</pre> code section
3149        -toc places a table of contents to subs at the top (default)
3150        -pod passes pod text through pod2html (default)
3151        -frm write html as a frame (3 files)
3152        -text=s extra extension for table of contents if -frm, default='toc'
3153        -sext=s extra extension for file content if -frm, default='src'
3154
3155 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3156 negates the long forms.  For example, -nasc means don't add missing
3157 semicolons.
3158
3159 If you are unable to see this entire text, try "perltidy -h | more"
3160 For more detailed information, and additional options, try "man perltidy",
3161 or go to the perltidy home page at http://perltidy.sourceforge.net
3162 EOF
3163
3164 }
3165
3166 sub process_this_file {
3167
3168     my ( $truth, $beauty ) = @_;
3169
3170     # loop to process each line of this file
3171     while ( my $line_of_tokens = $truth->get_line() ) {
3172         $beauty->write_line($line_of_tokens);
3173     }
3174
3175     # finish up
3176     eval { $beauty->finish_formatting() };
3177     $truth->report_tokenization_errors();
3178 }
3179
3180 sub check_syntax {
3181
3182     # Use 'perl -c' to make sure that we did not create bad syntax
3183     # This is a very good independent check for programming errors
3184     #
3185     # Given names of the input and output files, ($ifname, $ofname),
3186     # we do the following:
3187     # - check syntax of the input file
3188     # - if bad, all done (could be an incomplete code snippet)
3189     # - if infile syntax ok, then check syntax of the output file;
3190     #   - if outfile syntax bad, issue warning; this implies a code bug!
3191     # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3192
3193     my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
3194     my $infile_syntax_ok = 0;
3195     my $line_of_dashes   = '-' x 42 . "\n";
3196
3197     my $flags = $rOpts->{'perl-syntax-check-flags'};
3198
3199     # be sure we invoke perl with -c
3200     # note: perl will accept repeated flags like '-c -c'.  It is safest
3201     # to append another -c than try to find an interior bundled c, as
3202     # in -Tc, because such a 'c' might be in a quoted string, for example.
3203     if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3204
3205     # be sure we invoke perl with -x if requested
3206     # same comments about repeated parameters applies
3207     if ( $rOpts->{'look-for-hash-bang'} ) {
3208         if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3209     }
3210
3211     # this shouldn't happen unless a termporary file couldn't be made
3212     if ( $ifname eq '-' ) {
3213         $logger_object->write_logfile_entry(
3214             "Cannot run perl -c on STDIN and STDOUT\n");
3215         return $infile_syntax_ok;
3216     }
3217
3218     $logger_object->write_logfile_entry(
3219         "checking input file syntax with perl $flags\n");
3220     $logger_object->write_logfile_entry($line_of_dashes);
3221
3222     # Not all operating systems/shells support redirection of the standard
3223     # error output.
3224     my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3225
3226     my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3227     $logger_object->write_logfile_entry("$perl_output\n");
3228
3229     if ( $perl_output =~ /syntax\s*OK/ ) {
3230         $infile_syntax_ok = 1;
3231         $logger_object->write_logfile_entry($line_of_dashes);
3232         $logger_object->write_logfile_entry(
3233             "checking output file syntax with perl $flags ...\n");
3234         $logger_object->write_logfile_entry($line_of_dashes);
3235
3236         my $perl_output =
3237           do_syntax_check( $ofname, $flags, $error_redirection );
3238         $logger_object->write_logfile_entry("$perl_output\n");
3239
3240         unless ( $perl_output =~ /syntax\s*OK/ ) {
3241             $logger_object->write_logfile_entry($line_of_dashes);
3242             $logger_object->warning(
3243 "The output file has a syntax error when tested with perl $flags $ofname !\n"
3244             );
3245             $logger_object->warning(
3246                 "This implies an error in perltidy; the file $ofname is bad\n");
3247             $logger_object->report_definite_bug();
3248
3249             # the perl version number will be helpful for diagnosing the problem
3250             $logger_object->write_logfile_entry(
3251                 qx/perl -v $error_redirection/ . "\n" );
3252         }
3253     }
3254     else {
3255
3256         # Only warn of perl -c syntax errors.  Other messages,
3257         # such as missing modules, are too common.  They can be
3258         # seen by running with perltidy -w
3259         $logger_object->complain("A syntax check using perl $flags gives: \n");
3260         $logger_object->complain($line_of_dashes);
3261         $logger_object->complain("$perl_output\n");
3262         $logger_object->complain($line_of_dashes);
3263         $infile_syntax_ok = -1;
3264         $logger_object->write_logfile_entry($line_of_dashes);
3265         $logger_object->write_logfile_entry(
3266 "The output file will not be checked because of input file problems\n"
3267         );
3268     }
3269     return $infile_syntax_ok;
3270 }
3271
3272 sub do_syntax_check {
3273     my ( $fname, $flags, $error_redirection ) = @_;
3274
3275     # We have to quote the filename in case it has unusual characters
3276     # or spaces.  Example: this filename #CM11.pm# gives trouble.
3277     $fname = '"' . $fname . '"';
3278
3279     # Under VMS something like -T will become -t (and an error) so we
3280     # will put quotes around the flags.  Double quotes seem to work on
3281     # Unix/Windows/VMS, but this may not work on all systems.  (Single
3282     # quotes do not work under Windows).  It could become necessary to
3283     # put double quotes around each flag, such as:  -"c"  -"T"
3284     # We may eventually need some system-dependent coding here.
3285     $flags = '"' . $flags . '"';
3286
3287     # now wish for luck...
3288     return qx/perl $flags $fname $error_redirection/;
3289 }
3290
3291 #####################################################################
3292 #
3293 # This is a stripped down version of IO::Scalar
3294 # Given a reference to a scalar, it supplies either:
3295 # a getline method which reads lines (mode='r'), or
3296 # a print method which reads lines (mode='w')
3297 #
3298 #####################################################################
3299 package Perl::Tidy::IOScalar;
3300 use Carp;
3301
3302 sub new {
3303     my ( $package, $rscalar, $mode ) = @_;
3304     my $ref = ref $rscalar;
3305     if ( $ref ne 'SCALAR' ) {
3306         confess <<EOM;
3307 ------------------------------------------------------------------------
3308 expecting ref to SCALAR but got ref to ($ref); trace follows:
3309 ------------------------------------------------------------------------
3310 EOM
3311
3312     }
3313     if ( $mode eq 'w' ) {
3314         $$rscalar = "";
3315         return bless [ $rscalar, $mode ], $package;
3316     }
3317     elsif ( $mode eq 'r' ) {
3318
3319         # Convert a scalar to an array.
3320         # This avoids looking for "\n" on each call to getline
3321         my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3322         my $i_next = 0;
3323         return bless [ \@array, $mode, $i_next ], $package;
3324     }
3325     else {
3326         confess <<EOM;
3327 ------------------------------------------------------------------------
3328 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3329 ------------------------------------------------------------------------
3330 EOM
3331     }
3332 }
3333
3334 sub getline {
3335     my $self = shift;
3336     my $mode = $self->[1];
3337     if ( $mode ne 'r' ) {
3338         confess <<EOM;
3339 ------------------------------------------------------------------------
3340 getline call requires mode = 'r' but mode = ($mode); trace follows:
3341 ------------------------------------------------------------------------
3342 EOM
3343     }
3344     my $i = $self->[2]++;
3345     ##my $line = $self->[0]->[$i];
3346     return $self->[0]->[$i];
3347 }
3348
3349 sub print {
3350     my $self = shift;
3351     my $mode = $self->[1];
3352     if ( $mode ne 'w' ) {
3353         confess <<EOM;
3354 ------------------------------------------------------------------------
3355 print call requires mode = 'w' but mode = ($mode); trace follows:
3356 ------------------------------------------------------------------------
3357 EOM
3358     }
3359     ${ $self->[0] } .= $_[0];
3360 }
3361 sub close { return }
3362
3363 #####################################################################
3364 #
3365 # This is a stripped down version of IO::ScalarArray
3366 # Given a reference to an array, it supplies either:
3367 # a getline method which reads lines (mode='r'), or
3368 # a print method which reads lines (mode='w')
3369 #
3370 # NOTE: this routine assumes that that there aren't any embedded
3371 # newlines within any of the array elements.  There are no checks
3372 # for that.
3373 #
3374 #####################################################################
3375 package Perl::Tidy::IOScalarArray;
3376 use Carp;
3377
3378 sub new {
3379     my ( $package, $rarray, $mode ) = @_;
3380     my $ref = ref $rarray;
3381     if ( $ref ne 'ARRAY' ) {
3382         confess <<EOM;
3383 ------------------------------------------------------------------------
3384 expecting ref to ARRAY but got ref to ($ref); trace follows:
3385 ------------------------------------------------------------------------
3386 EOM
3387
3388     }
3389     if ( $mode eq 'w' ) {
3390         @$rarray = ();
3391         return bless [ $rarray, $mode ], $package;
3392     }
3393     elsif ( $mode eq 'r' ) {
3394         my $i_next = 0;
3395         return bless [ $rarray, $mode, $i_next ], $package;
3396     }
3397     else {
3398         confess <<EOM;
3399 ------------------------------------------------------------------------
3400 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3401 ------------------------------------------------------------------------
3402 EOM
3403     }
3404 }
3405
3406 sub getline {
3407     my $self = shift;
3408     my $mode = $self->[1];
3409     if ( $mode ne 'r' ) {
3410         confess <<EOM;
3411 ------------------------------------------------------------------------
3412 getline requires mode = 'r' but mode = ($mode); trace follows:
3413 ------------------------------------------------------------------------
3414 EOM
3415     }
3416     my $i = $self->[2]++;
3417     return $self->[0]->[$i];
3418 }
3419
3420 sub print {
3421     my $self = shift;
3422     my $mode = $self->[1];
3423     if ( $mode ne 'w' ) {
3424         confess <<EOM;
3425 ------------------------------------------------------------------------
3426 print requires mode = 'w' but mode = ($mode); trace follows:
3427 ------------------------------------------------------------------------
3428 EOM
3429     }
3430     push @{ $self->[0] }, $_[0];
3431 }
3432 sub close { return }
3433
3434 #####################################################################
3435 #
3436 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3437 # which returns the next line to be parsed
3438 #
3439 #####################################################################
3440
3441 package Perl::Tidy::LineSource;
3442
3443 sub new {
3444
3445     my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3446     my $input_file_copy = undef;
3447     my $fh_copy;
3448
3449     my $input_line_ending;
3450     if ( $rOpts->{'preserve-line-endings'} ) {
3451         $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3452     }
3453
3454     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3455     return undef unless $fh;
3456
3457     # in order to check output syntax when standard output is used,
3458     # or when it is an object, we have to make a copy of the file
3459     if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3460     {
3461
3462         # Turning off syntax check when input output is used.
3463         # The reason is that temporary files cause problems on
3464         # on many systems.
3465         $rOpts->{'check-syntax'} = 0;
3466         $input_file_copy = '-';
3467
3468         $$rpending_logfile_message .= <<EOM;
3469 Note: --syntax check will be skipped because standard input is used
3470 EOM
3471
3472     }
3473
3474     return bless {
3475         _fh                => $fh,
3476         _fh_copy           => $fh_copy,
3477         _filename          => $input_file,
3478         _input_file_copy   => $input_file_copy,
3479         _input_line_ending => $input_line_ending,
3480         _rinput_buffer     => [],
3481         _started           => 0,
3482     }, $class;
3483 }
3484
3485 sub get_input_file_copy_name {
3486     my $self   = shift;
3487     my $ifname = $self->{_input_file_copy};
3488     unless ($ifname) {
3489         $ifname = $self->{_filename};
3490     }
3491     return $ifname;
3492 }
3493
3494 sub close_input_file {
3495     my $self = shift;
3496     eval { $self->{_fh}->close() };
3497     eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3498 }
3499
3500 sub get_line {
3501     my $self          = shift;
3502     my $line          = undef;
3503     my $fh            = $self->{_fh};
3504     my $fh_copy       = $self->{_fh_copy};
3505     my $rinput_buffer = $self->{_rinput_buffer};
3506
3507     if ( scalar(@$rinput_buffer) ) {
3508         $line = shift @$rinput_buffer;
3509     }
3510     else {
3511         $line = $fh->getline();
3512
3513         # patch to read raw mac files under unix, dos
3514         # see if the first line has embedded \r's
3515         if ( $line && !$self->{_started} ) {
3516             if ( $line =~ /[\015][^\015\012]/ ) {
3517
3518                 # found one -- break the line up and store in a buffer
3519                 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3520                 my $count = @$rinput_buffer;
3521                 $line = shift @$rinput_buffer;
3522             }
3523             $self->{_started}++;
3524         }
3525     }
3526     if ( $line && $fh_copy ) { $fh_copy->print($line); }
3527     return $line;
3528 }
3529
3530 #####################################################################
3531 #
3532 # the Perl::Tidy::LineSink class supplies a write_line method for
3533 # actual file writing
3534 #
3535 #####################################################################
3536
3537 package Perl::Tidy::LineSink;
3538
3539 sub new {
3540
3541     my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3542         $rpending_logfile_message, $binmode )
3543       = @_;
3544     my $fh               = undef;
3545     my $fh_copy          = undef;
3546     my $fh_tee           = undef;
3547     my $output_file_copy = "";
3548     my $output_file_open = 0;
3549
3550     if ( $rOpts->{'format'} eq 'tidy' ) {
3551         ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3552         unless ($fh) { die "Cannot write to output stream\n"; }
3553         $output_file_open = 1;
3554         if ($binmode) {
3555             if ( ref($fh) eq 'IO::File' ) {
3556                 binmode $fh;
3557             }
3558             if ( $output_file eq '-' ) { binmode STDOUT }
3559         }
3560     }
3561
3562     # in order to check output syntax when standard output is used,
3563     # or when it is an object, we have to make a copy of the file
3564     if ( $output_file eq '-' || ref $output_file ) {
3565         if ( $rOpts->{'check-syntax'} ) {
3566
3567             # Turning off syntax check when standard output is used.
3568             # The reason is that temporary files cause problems on
3569             # on many systems.
3570             $rOpts->{'check-syntax'} = 0;
3571             $output_file_copy = '-';
3572             $$rpending_logfile_message .= <<EOM;
3573 Note: --syntax check will be skipped because standard output is used
3574 EOM
3575
3576         }
3577     }
3578
3579     bless {
3580         _fh               => $fh,
3581         _fh_copy          => $fh_copy,
3582         _fh_tee           => $fh_tee,
3583         _output_file      => $output_file,
3584         _output_file_open => $output_file_open,
3585         _output_file_copy => $output_file_copy,
3586         _tee_flag         => 0,
3587         _tee_file         => $tee_file,
3588         _tee_file_opened  => 0,
3589         _line_separator   => $line_separator,
3590         _binmode          => $binmode,
3591     }, $class;
3592 }
3593
3594 sub write_line {
3595
3596     my $self    = shift;
3597     my $fh      = $self->{_fh};
3598     my $fh_copy = $self->{_fh_copy};
3599
3600     my $output_file_open = $self->{_output_file_open};
3601     chomp $_[0];
3602     $_[0] .= $self->{_line_separator};
3603
3604     $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3605     print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3606
3607     if ( $self->{_tee_flag} ) {
3608         unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3609         my $fh_tee = $self->{_fh_tee};
3610         print $fh_tee $_[0];
3611     }
3612 }
3613
3614 sub get_output_file_copy {
3615     my $self   = shift;
3616     my $ofname = $self->{_output_file_copy};
3617     unless ($ofname) {
3618         $ofname = $self->{_output_file};
3619     }
3620     return $ofname;
3621 }
3622
3623 sub tee_on {
3624     my $self = shift;
3625     $self->{_tee_flag} = 1;
3626 }
3627
3628 sub tee_off {
3629     my $self = shift;
3630     $self->{_tee_flag} = 0;
3631 }
3632
3633 sub really_open_tee_file {
3634     my $self     = shift;
3635     my $tee_file = $self->{_tee_file};
3636     my $fh_tee;
3637     $fh_tee = IO::File->new(">$tee_file")
3638       or die("couldn't open TEE file $tee_file: $!\n");
3639     binmode $fh_tee if $self->{_binmode};
3640     $self->{_tee_file_opened} = 1;
3641     $self->{_fh_tee}          = $fh_tee;
3642 }
3643
3644 sub close_output_file {
3645     my $self = shift;
3646     eval { $self->{_fh}->close() }      if $self->{_output_file_open};
3647     eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3648     $self->close_tee_file();
3649 }
3650
3651 sub close_tee_file {
3652     my $self = shift;
3653
3654     if ( $self->{_tee_file_opened} ) {
3655         eval { $self->{_fh_tee}->close() };
3656         $self->{_tee_file_opened} = 0;
3657     }
3658 }
3659
3660 #####################################################################
3661 #
3662 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3663 # useful for program development.
3664 #
3665 # Only one such file is created regardless of the number of input
3666 # files processed.  This allows the results of processing many files
3667 # to be summarized in a single file.
3668 #
3669 #####################################################################
3670
3671 package Perl::Tidy::Diagnostics;
3672
3673 sub new {
3674
3675     my $class = shift;
3676     bless {
3677         _write_diagnostics_count => 0,
3678         _last_diagnostic_file    => "",
3679         _input_file              => "",
3680         _fh                      => undef,
3681     }, $class;
3682 }
3683
3684 sub set_input_file {
3685     my $self = shift;
3686     $self->{_input_file} = $_[0];
3687 }
3688
3689 # This is a diagnostic routine which is useful for program development.
3690 # Output from debug messages go to a file named DIAGNOSTICS, where
3691 # they are labeled by file and line.  This allows many files to be
3692 # scanned at once for some particular condition of interest.
3693 sub write_diagnostics {
3694     my $self = shift;
3695
3696     unless ( $self->{_write_diagnostics_count} ) {
3697         open DIAGNOSTICS, ">DIAGNOSTICS"
3698           or death("couldn't open DIAGNOSTICS: $!\n");
3699     }
3700
3701     my $last_diagnostic_file = $self->{_last_diagnostic_file};
3702     my $input_file           = $self->{_input_file};
3703     if ( $last_diagnostic_file ne $input_file ) {
3704         print DIAGNOSTICS "\nFILE:$input_file\n";
3705     }
3706     $self->{_last_diagnostic_file} = $input_file;
3707     my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3708     print DIAGNOSTICS "$input_line_number:\t@_";
3709     $self->{_write_diagnostics_count}++;
3710 }
3711
3712 #####################################################################
3713 #
3714 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3715 #
3716 #####################################################################
3717
3718 package Perl::Tidy::Logger;
3719
3720 sub new {
3721     my $class = shift;
3722     my $fh;
3723     my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3724
3725     # remove any old error output file
3726     unless ( ref($warning_file) ) {
3727         if ( -e $warning_file ) { unlink($warning_file) }
3728     }
3729
3730     bless {
3731         _log_file                      => $log_file,
3732         _fh_warnings                   => undef,
3733         _rOpts                         => $rOpts,
3734         _fh_warnings                   => undef,
3735         _last_input_line_written       => 0,
3736         _at_end_of_file                => 0,
3737         _use_prefix                    => 1,
3738         _block_log_output              => 0,
3739         _line_of_tokens                => undef,
3740         _output_line_number            => undef,
3741         _wrote_line_information_string => 0,
3742         _wrote_column_headings         => 0,
3743         _warning_file                  => $warning_file,
3744         _warning_count                 => 0,
3745         _complaint_count               => 0,
3746         _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
3747         _saw_brace_error => 0,
3748         _saw_extrude     => $saw_extrude,
3749         _output_array    => [],
3750     }, $class;
3751 }
3752
3753 sub close_log_file {
3754
3755     my $self = shift;
3756     if ( $self->{_fh_warnings} ) {
3757         eval { $self->{_fh_warnings}->close() };
3758         $self->{_fh_warnings} = undef;
3759     }
3760 }
3761
3762 sub get_warning_count {
3763     my $self = shift;
3764     return $self->{_warning_count};
3765 }
3766
3767 sub get_use_prefix {
3768     my $self = shift;
3769     return $self->{_use_prefix};
3770 }
3771
3772 sub block_log_output {
3773     my $self = shift;
3774     $self->{_block_log_output} = 1;
3775 }
3776
3777 sub unblock_log_output {
3778     my $self = shift;
3779     $self->{_block_log_output} = 0;
3780 }
3781
3782 sub interrupt_logfile {
3783     my $self = shift;
3784     $self->{_use_prefix} = 0;
3785     $self->warning("\n");
3786     $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
3787 }
3788
3789 sub resume_logfile {
3790     my $self = shift;
3791     $self->write_logfile_entry( '#' x 60 . "\n" );
3792     $self->{_use_prefix} = 1;
3793 }
3794
3795 sub we_are_at_the_last_line {
3796     my $self = shift;
3797     unless ( $self->{_wrote_line_information_string} ) {
3798         $self->write_logfile_entry("Last line\n\n");
3799     }
3800     $self->{_at_end_of_file} = 1;
3801 }
3802
3803 # record some stuff in case we go down in flames
3804 sub black_box {
3805     my $self = shift;
3806     my ( $line_of_tokens, $output_line_number ) = @_;
3807     my $input_line        = $line_of_tokens->{_line_text};
3808     my $input_line_number = $line_of_tokens->{_line_number};
3809
3810     # save line information in case we have to write a logfile message
3811     $self->{_line_of_tokens}                = $line_of_tokens;
3812     $self->{_output_line_number}            = $output_line_number;
3813     $self->{_wrote_line_information_string} = 0;
3814
3815     my $last_input_line_written = $self->{_last_input_line_written};
3816     my $rOpts                   = $self->{_rOpts};
3817     if (
3818         (
3819             ( $input_line_number - $last_input_line_written ) >=
3820             $rOpts->{'logfile-gap'}
3821         )
3822         || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3823       )
3824     {
3825         my $rlevels                      = $line_of_tokens->{_rlevels};
3826         my $structural_indentation_level = $$rlevels[0];
3827         $self->{_last_input_line_written} = $input_line_number;
3828         ( my $out_str = $input_line ) =~ s/^\s*//;
3829         chomp $out_str;
3830
3831         $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3832
3833         if ( length($out_str) > 35 ) {
3834             $out_str = substr( $out_str, 0, 35 ) . " ....";
3835         }
3836         $self->logfile_output( "", "$out_str\n" );
3837     }
3838 }
3839
3840 sub write_logfile_entry {
3841     my $self = shift;
3842
3843     # add leading >>> to avoid confusing error mesages and code
3844     $self->logfile_output( ">>>", "@_" );
3845 }
3846
3847 sub write_column_headings {
3848     my $self = shift;
3849
3850     $self->{_wrote_column_headings} = 1;
3851     my $routput_array = $self->{_output_array};
3852     push @{$routput_array}, <<EOM;
3853 The nesting depths in the table below are at the start of the lines.
3854 The indicated output line numbers are not always exact.
3855 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3856
3857 in:out indent c b  nesting   code + messages; (messages begin with >>>)
3858 lines  levels i k            (code begins with one '.' per indent level)
3859 ------  ----- - - --------   -------------------------------------------
3860 EOM
3861 }
3862
3863 sub make_line_information_string {
3864
3865     # make columns of information when a logfile message needs to go out
3866     my $self                    = shift;
3867     my $line_of_tokens          = $self->{_line_of_tokens};
3868     my $input_line_number       = $line_of_tokens->{_line_number};
3869     my $line_information_string = "";
3870     if ($input_line_number) {
3871
3872         my $output_line_number   = $self->{_output_line_number};
3873         my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
3874         my $paren_depth          = $line_of_tokens->{_paren_depth};
3875         my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3876         my $python_indentation_level =
3877           $line_of_tokens->{_python_indentation_level};
3878         my $rlevels         = $line_of_tokens->{_rlevels};
3879         my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3880         my $rci_levels      = $line_of_tokens->{_rci_levels};
3881         my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3882
3883         my $structural_indentation_level = $$rlevels[0];
3884
3885         $self->write_column_headings() unless $self->{_wrote_column_headings};
3886
3887         # keep logfile columns aligned for scripts up to 999 lines;
3888         # for longer scripts it doesn't really matter
3889         my $extra_space = "";
3890         $extra_space .=
3891             ( $input_line_number < 10 )  ? "  "
3892           : ( $input_line_number < 100 ) ? " "
3893           :                                "";
3894         $extra_space .=
3895             ( $output_line_number < 10 )  ? "  "
3896           : ( $output_line_number < 100 ) ? " "
3897           :                                 "";
3898
3899         # there are 2 possible nesting strings:
3900         # the original which looks like this:  (0 [1 {2
3901         # the new one, which looks like this:  {{[
3902         # the new one is easier to read, and shows the order, but
3903         # could be arbitrarily long, so we use it unless it is too long
3904         my $nesting_string =
3905           "($paren_depth [$square_bracket_depth {$brace_depth";
3906         my $nesting_string_new = $$rnesting_tokens[0];
3907
3908         my $ci_level = $$rci_levels[0];
3909         if ( $ci_level > 9 ) { $ci_level = '*' }
3910         my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3911
3912         if ( length($nesting_string_new) <= 8 ) {
3913             $nesting_string =
3914               $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3915         }
3916         if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3917         $line_information_string =
3918 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3919     }
3920     return $line_information_string;
3921 }
3922
3923 sub logfile_output {
3924     my $self = shift;
3925     my ( $prompt, $msg ) = @_;
3926     return if ( $self->{_block_log_output} );
3927
3928     my $routput_array = $self->{_output_array};
3929     if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3930         push @{$routput_array}, "$msg";
3931     }
3932     else {
3933         my $line_information_string = $self->make_line_information_string();
3934         $self->{_wrote_line_information_string} = 1;
3935
3936         if ($line_information_string) {
3937             push @{$routput_array}, "$line_information_string   $prompt$msg";
3938         }
3939         else {
3940             push @{$routput_array}, "$msg";
3941         }
3942     }
3943 }
3944
3945 sub get_saw_brace_error {
3946     my $self = shift;
3947     return $self->{_saw_brace_error};
3948 }
3949
3950 sub increment_brace_error {
3951     my $self = shift;
3952     $self->{_saw_brace_error}++;
3953 }
3954
3955 sub brace_warning {
3956     my $self = shift;
3957     use constant BRACE_WARNING_LIMIT => 10;
3958     my $saw_brace_error = $self->{_saw_brace_error};
3959
3960     if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3961         $self->warning(@_);
3962     }
3963     $saw_brace_error++;
3964     $self->{_saw_brace_error} = $saw_brace_error;
3965
3966     if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3967         $self->warning("No further warnings of this type will be given\n");
3968     }
3969 }
3970
3971 sub complain {
3972
3973     # handle non-critical warning messages based on input flag
3974     my $self  = shift;
3975     my $rOpts = $self->{_rOpts};
3976
3977     # these appear in .ERR output only if -w flag is used
3978     if ( $rOpts->{'warning-output'} ) {
3979         $self->warning(@_);
3980     }
3981
3982     # otherwise, they go to the .LOG file
3983     else {
3984         $self->{_complaint_count}++;
3985         $self->write_logfile_entry(@_);
3986     }
3987 }
3988
3989 sub warning {
3990
3991     # report errors to .ERR file (or stdout)
3992     my $self = shift;
3993     use constant WARNING_LIMIT => 50;
3994
3995     my $rOpts = $self->{_rOpts};
3996     unless ( $rOpts->{'quiet'} ) {
3997
3998         my $warning_count = $self->{_warning_count};
3999         unless ($warning_count) {
4000             my $warning_file = $self->{_warning_file};
4001             my $fh_warnings;
4002             if ( $rOpts->{'standard-error-output'} ) {
4003                 $fh_warnings = *STDERR;
4004             }
4005             else {
4006                 ( $fh_warnings, my $filename ) =
4007                   Perl::Tidy::streamhandle( $warning_file, 'w' );
4008                 $fh_warnings or die("couldn't open $filename $!\n");
4009                 warn "## Please see file $filename\n";
4010             }
4011             $self->{_fh_warnings} = $fh_warnings;
4012         }
4013
4014         my $fh_warnings = $self->{_fh_warnings};
4015         if ( $warning_count < WARNING_LIMIT ) {
4016             if ( $self->get_use_prefix() > 0 ) {
4017                 my $input_line_number =
4018                   Perl::Tidy::Tokenizer::get_input_line_number();
4019                 $fh_warnings->print("$input_line_number:\t@_");
4020                 $self->write_logfile_entry("WARNING: @_");
4021             }
4022             else {
4023                 $fh_warnings->print(@_);
4024                 $self->write_logfile_entry(@_);
4025             }
4026         }
4027         $warning_count++;
4028         $self->{_warning_count} = $warning_count;
4029
4030         if ( $warning_count == WARNING_LIMIT ) {
4031             $fh_warnings->print("No further warnings will be given\n");
4032         }
4033     }
4034 }
4035
4036 # programming bug codes:
4037 #   -1 = no bug
4038 #    0 = maybe, not sure.
4039 #    1 = definitely
4040 sub report_possible_bug {
4041     my $self         = shift;
4042     my $saw_code_bug = $self->{_saw_code_bug};
4043     $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4044 }
4045
4046 sub report_definite_bug {
4047     my $self = shift;
4048     $self->{_saw_code_bug} = 1;
4049 }
4050
4051 sub ask_user_for_bug_report {
4052     my $self = shift;
4053
4054     my ( $infile_syntax_ok, $formatter ) = @_;
4055     my $saw_code_bug = $self->{_saw_code_bug};
4056     if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4057         $self->warning(<<EOM);
4058
4059 You may have encountered a code bug in perltidy.  If you think so, and
4060 the problem is not listed in the BUGS file at
4061 http://perltidy.sourceforge.net, please report it so that it can be
4062 corrected.  Include the smallest possible script which has the problem,
4063 along with the .LOG file. See the manual pages for contact information.
4064 Thank you!
4065 EOM
4066
4067     }
4068     elsif ( $saw_code_bug == 1 ) {
4069         if ( $self->{_saw_extrude} ) {
4070             $self->warning(<<EOM);
4071
4072 You may have encountered a bug in perltidy.  However, since you are using the
4073 -extrude option, the problem may be with perl or one of its modules, which have
4074 occasional problems with this type of file.  If you believe that the
4075 problem is with perltidy, and the problem is not listed in the BUGS file at
4076 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4077 Include the smallest possible script which has the problem, along with the .LOG
4078 file. See the manual pages for contact information.
4079 Thank you!
4080 EOM
4081         }
4082         else {
4083             $self->warning(<<EOM);
4084
4085 Oops, you seem to have encountered a bug in perltidy.  Please check the
4086 BUGS file at http://perltidy.sourceforge.net.  If the problem is not
4087 listed there, please report it so that it can be corrected.  Include the
4088 smallest possible script which produces this message, along with the
4089 .LOG file if appropriate.  See the manual pages for contact information.
4090 Your efforts are appreciated.
4091 Thank you!
4092 EOM
4093             my $added_semicolon_count = 0;
4094             eval {
4095                 $added_semicolon_count =
4096                   $formatter->get_added_semicolon_count();
4097             };
4098             if ( $added_semicolon_count > 0 ) {
4099                 $self->warning(<<EOM);
4100
4101 The log file shows that perltidy added $added_semicolon_count semicolons.
4102 Please rerun with -nasc to see if that is the cause of the syntax error.  Even
4103 if that is the problem, please report it so that it can be fixed.
4104 EOM
4105
4106             }
4107         }
4108     }
4109 }
4110
4111 sub finish {
4112
4113     # called after all formatting to summarize errors
4114     my $self = shift;
4115     my ( $infile_syntax_ok, $formatter ) = @_;
4116
4117     my $rOpts         = $self->{_rOpts};
4118     my $warning_count = $self->{_warning_count};
4119     my $saw_code_bug  = $self->{_saw_code_bug};
4120
4121     my $save_logfile =
4122          ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4123       || $saw_code_bug == 1
4124       || $rOpts->{'logfile'};
4125     my $log_file = $self->{_log_file};
4126     if ($warning_count) {
4127         if ($save_logfile) {
4128             $self->block_log_output();    # avoid echoing this to the logfile
4129             $self->warning(
4130                 "The logfile $log_file may contain useful information\n");
4131             $self->unblock_log_output();
4132         }
4133
4134         if ( $self->{_complaint_count} > 0 ) {
4135             $self->warning(
4136 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4137             );
4138         }
4139
4140         if ( $self->{_saw_brace_error}
4141             && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
4142         {
4143             $self->warning("To save a full .LOG file rerun with -g\n");
4144         }
4145     }
4146     $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4147
4148     if ($save_logfile) {
4149         my $log_file = $self->{_log_file};
4150         my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4151         if ($fh) {
4152             my $routput_array = $self->{_output_array};
4153             foreach ( @{$routput_array} ) { $fh->print($_) }
4154             eval { $fh->close() };
4155         }
4156     }
4157 }
4158
4159 #####################################################################
4160 #
4161 # The Perl::Tidy::DevNull class supplies a dummy print method
4162 #
4163 #####################################################################
4164
4165 package Perl::Tidy::DevNull;
4166 sub new { return bless {}, $_[0] }
4167 sub print { return }
4168 sub close { return }
4169
4170 #####################################################################
4171 #
4172 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4173 #
4174 #####################################################################
4175
4176 package Perl::Tidy::HtmlWriter;
4177
4178 use File::Basename;
4179
4180 # class variables
4181 use vars qw{
4182   %html_color
4183   %html_bold
4184   %html_italic
4185   %token_short_names
4186   %short_to_long_names
4187   $rOpts
4188   $css_filename
4189   $css_linkname
4190   $missing_html_entities
4191 };
4192
4193 # replace unsafe characters with HTML entity representation if HTML::Entities
4194 # is available
4195 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4196
4197 sub new {
4198
4199     my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4200         $html_src_extension )
4201       = @_;
4202
4203     my $html_file_opened = 0;
4204     my $html_fh;
4205     ( $html_fh, my $html_filename ) =
4206       Perl::Tidy::streamhandle( $html_file, 'w' );
4207     unless ($html_fh) {
4208         warn("can't open $html_file: $!\n");
4209         return undef;
4210     }
4211     $html_file_opened = 1;
4212
4213     if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4214         $input_file = "NONAME";
4215     }
4216
4217     # write the table of contents to a string
4218     my $toc_string;
4219     my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4220
4221     my $html_pre_fh;
4222     my @pre_string_stack;
4223     if ( $rOpts->{'html-pre-only'} ) {
4224
4225         # pre section goes directly to the output stream
4226         $html_pre_fh = $html_fh;
4227         $html_pre_fh->print( <<"PRE_END");
4228 <pre>
4229 PRE_END
4230     }
4231     else {
4232
4233         # pre section go out to a temporary string
4234         my $pre_string;
4235         $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4236         push @pre_string_stack, \$pre_string;
4237     }
4238
4239     # pod text gets diverted if the 'pod2html' is used
4240     my $html_pod_fh;
4241     my $pod_string;
4242     if ( $rOpts->{'pod2html'} ) {
4243         if ( $rOpts->{'html-pre-only'} ) {
4244             undef $rOpts->{'pod2html'};
4245         }
4246         else {
4247             eval "use Pod::Html";
4248             if ($@) {
4249                 warn
4250 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4251                 undef $rOpts->{'pod2html'};
4252             }
4253             else {
4254                 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4255             }
4256         }
4257     }
4258
4259     my $toc_filename;
4260     my $src_filename;
4261     if ( $rOpts->{'frames'} ) {
4262         unless ($extension) {
4263             warn
4264 "cannot use frames without a specified output extension; ignoring -frm\n";
4265             undef $rOpts->{'frames'};
4266         }
4267         else {
4268             $toc_filename = $input_file . $html_toc_extension . $extension;
4269             $src_filename = $input_file . $html_src_extension . $extension;
4270         }
4271     }
4272
4273     # ----------------------------------------------------------
4274     # Output is now directed as follows:
4275     # html_toc_fh <-- table of contents items
4276     # html_pre_fh <-- the <pre> section of formatted code, except:
4277     # html_pod_fh <-- pod goes here with the pod2html option
4278     # ----------------------------------------------------------
4279
4280     my $title = $rOpts->{'title'};
4281     unless ($title) {
4282         ( $title, my $path ) = fileparse($input_file);
4283     }
4284     my $toc_item_count = 0;
4285     my $in_toc_package = "";
4286     my $last_level     = 0;
4287     bless {
4288         _input_file        => $input_file,          # name of input file
4289         _title             => $title,               # title, unescaped
4290         _html_file         => $html_file,           # name of .html output file
4291         _toc_filename      => $toc_filename,        # for frames option
4292         _src_filename      => $src_filename,        # for frames option
4293         _html_file_opened  => $html_file_opened,    # a flag
4294         _html_fh           => $html_fh,             # the output stream
4295         _html_pre_fh       => $html_pre_fh,         # pre section goes here
4296         _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4297         _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4298         _rpod_string       => \$pod_string,         # string holding pod
4299         _pod_cut_count     => 0,                    # how many =cut's?
4300         _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4301         _rtoc_string       => \$toc_string,         # string holding toc
4302         _rtoc_item_count   => \$toc_item_count,     # how many toc items
4303         _rin_toc_package   => \$in_toc_package,     # package name
4304         _rtoc_name_count   => {},                   # hash to track unique names
4305         _rpackage_stack    => [],                   # stack to check for package
4306                                                     # name changes
4307         _rlast_level       => \$last_level,         # brace indentation level
4308     }, $class;
4309 }
4310
4311 sub add_toc_item {
4312
4313     # Add an item to the html table of contents.
4314     # This is called even if no table of contents is written,
4315     # because we still want to put the anchors in the <pre> text.
4316     # We are given an anchor name and its type; types are:
4317     #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4318     # There must be an 'EOF' call at the end to wrap things up.
4319     my $self = shift;
4320     my ( $name, $type ) = @_;
4321     my $html_toc_fh     = $self->{_html_toc_fh};
4322     my $html_pre_fh     = $self->{_html_pre_fh};
4323     my $rtoc_name_count = $self->{_rtoc_name_count};
4324     my $rtoc_item_count = $self->{_rtoc_item_count};
4325     my $rlast_level     = $self->{_rlast_level};
4326     my $rin_toc_package = $self->{_rin_toc_package};
4327     my $rpackage_stack  = $self->{_rpackage_stack};
4328
4329     # packages contain sublists of subs, so to avoid errors all package
4330     # items are written and finished with the following routines
4331     my $end_package_list = sub {
4332         if ($$rin_toc_package) {
4333             $html_toc_fh->print("</ul>\n</li>\n");
4334             $$rin_toc_package = "";
4335         }
4336     };
4337
4338     my $start_package_list = sub {
4339         my ( $unique_name, $package ) = @_;
4340         if ($$rin_toc_package) { $end_package_list->() }
4341         $html_toc_fh->print(<<EOM);
4342 <li><a href=\"#$unique_name\">package $package</a>
4343 <ul>
4344 EOM
4345         $$rin_toc_package = $package;
4346     };
4347
4348     # start the table of contents on the first item
4349     unless ($$rtoc_item_count) {
4350
4351         # but just quit if we hit EOF without any other entries
4352         # in this case, there will be no toc
4353         return if ( $type eq 'EOF' );
4354         $html_toc_fh->print( <<"TOC_END");
4355 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4356 <ul>
4357 TOC_END
4358     }
4359     $$rtoc_item_count++;
4360
4361     # make a unique anchor name for this location:
4362     #   - packages get a 'package-' prefix
4363     #   - subs use their names
4364     my $unique_name = $name;
4365     if ( $type eq 'package' ) { $unique_name = "package-$name" }
4366
4367     # append '-1', '-2', etc if necessary to make unique; this will
4368     # be unique because subs and packages cannot have a '-'
4369     if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4370         $unique_name .= "-$count";
4371     }
4372
4373     #   - all names get terminal '-' if pod2html is used, to avoid
4374     #     conflicts with anchor names created by pod2html
4375     if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4376
4377     # start/stop lists of subs
4378     if ( $type eq 'sub' ) {
4379         my $package = $rpackage_stack->[$$rlast_level];
4380         unless ($package) { $package = 'main' }
4381
4382         # if we're already in a package/sub list, be sure its the right
4383         # package or else close it
4384         if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4385             $end_package_list->();
4386         }
4387
4388         # start a package/sub list if necessary
4389         unless ($$rin_toc_package) {
4390             $start_package_list->( $unique_name, $package );
4391         }
4392     }
4393
4394     # now write an entry in the toc for this item
4395     if ( $type eq 'package' ) {
4396         $start_package_list->( $unique_name, $name );
4397     }
4398     elsif ( $type eq 'sub' ) {
4399         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4400     }
4401     else {
4402         $end_package_list->();
4403         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4404     }
4405
4406     # write the anchor in the <pre> section
4407     $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4408
4409     # end the table of contents, if any, on the end of file
4410     if ( $type eq 'EOF' ) {
4411         $html_toc_fh->print( <<"TOC_END");
4412 </ul>
4413 <!-- END CODE INDEX -->
4414 TOC_END
4415     }
4416 }
4417
4418 BEGIN {
4419
4420     # This is the official list of tokens which may be identified by the
4421     # user.  Long names are used as getopt keys.  Short names are
4422     # convenient short abbreviations for specifying input.  Short names
4423     # somewhat resemble token type characters, but are often different
4424     # because they may only be alphanumeric, to allow command line
4425     # input.  Also, note that because of case insensitivity of html,
4426     # this table must be in a single case only (I've chosen to use all
4427     # lower case).
4428     # When adding NEW_TOKENS: update this hash table
4429     # short names => long names
4430     %short_to_long_names = (
4431         'n'  => 'numeric',
4432         'p'  => 'paren',
4433         'q'  => 'quote',
4434         's'  => 'structure',
4435         'c'  => 'comment',
4436         'v'  => 'v-string',
4437         'cm' => 'comma',
4438         'w'  => 'bareword',
4439         'co' => 'colon',
4440         'pu' => 'punctuation',
4441         'i'  => 'identifier',
4442         'j'  => 'label',
4443         'h'  => 'here-doc-target',
4444         'hh' => 'here-doc-text',
4445         'k'  => 'keyword',
4446         'sc' => 'semicolon',
4447         'm'  => 'subroutine',
4448         'pd' => 'pod-text',
4449     );
4450
4451     # Now we have to map actual token types into one of the above short
4452     # names; any token types not mapped will get 'punctuation'
4453     # properties.
4454
4455     # The values of this hash table correspond to the keys of the
4456     # previous hash table.
4457     # The keys of this hash table are token types and can be seen
4458     # by running with --dump-token-types (-dtt).
4459
4460     # When adding NEW_TOKENS: update this hash table
4461     # $type => $short_name
4462     %token_short_names = (
4463         '#'  => 'c',
4464         'n'  => 'n',
4465         'v'  => 'v',
4466         'k'  => 'k',
4467         'F'  => 'k',
4468         'Q'  => 'q',
4469         'q'  => 'q',
4470         'J'  => 'j',
4471         'j'  => 'j',
4472         'h'  => 'h',
4473         'H'  => 'hh',
4474         'w'  => 'w',
4475         ','  => 'cm',
4476         '=>' => 'cm',
4477         ';'  => 'sc',
4478         ':'  => 'co',
4479         'f'  => 'sc',
4480         '('  => 'p',
4481         ')'  => 'p',
4482         'M'  => 'm',
4483         'P'  => 'pd',
4484         'A'  => 'co',
4485     );
4486
4487     # These token types will all be called identifiers for now
4488     # FIXME: need to separate user defined modules as separate type
4489     my @identifier = qw" i t U C Y Z G :: ";
4490     @token_short_names{@identifier} = ('i') x scalar(@identifier);
4491
4492     # These token types will be called 'structure'
4493     my @structure = qw" { } ";
4494     @token_short_names{@structure} = ('s') x scalar(@structure);
4495
4496     # OLD NOTES: save for reference
4497     # Any of these could be added later if it would be useful.
4498     # For now, they will by default become punctuation
4499     #    my @list = qw" L R [ ] ";
4500     #    @token_long_names{@list} = ('non-structure') x scalar(@list);
4501     #
4502     #    my @list = qw"
4503     #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4504     #      ";
4505     #    @token_long_names{@list} = ('math') x scalar(@list);
4506     #
4507     #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
4508     #    @token_long_names{@list} = ('bit') x scalar(@list);
4509     #
4510     #    my @list = qw" == != < > <= <=> ";
4511     #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4512     #
4513     #    my @list = qw" && || ! &&= ||= //= ";
4514     #    @token_long_names{@list} = ('logical') x scalar(@list);
4515     #
4516     #    my @list = qw" . .= =~ !~ x x= ";
4517     #    @token_long_names{@list} = ('string-operators') x scalar(@list);
4518     #
4519     #    # Incomplete..
4520     #    my @list = qw" .. -> <> ... \ ? ";
4521     #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
4522
4523 }
4524
4525 sub make_getopt_long_names {
4526     my $class = shift;
4527     my ($rgetopt_names) = @_;
4528     while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4529         push @$rgetopt_names, "html-color-$name=s";
4530         push @$rgetopt_names, "html-italic-$name!";
4531         push @$rgetopt_names, "html-bold-$name!";
4532     }
4533     push @$rgetopt_names, "html-color-background=s";
4534     push @$rgetopt_names, "html-linked-style-sheet=s";
4535     push @$rgetopt_names, "nohtml-style-sheets";
4536     push @$rgetopt_names, "html-pre-only";
4537     push @$rgetopt_names, "html-line-numbers";
4538     push @$rgetopt_names, "html-entities!";
4539     push @$rgetopt_names, "stylesheet";
4540     push @$rgetopt_names, "html-table-of-contents!";
4541     push @$rgetopt_names, "pod2html!";
4542     push @$rgetopt_names, "frames!";
4543     push @$rgetopt_names, "html-toc-extension=s";
4544     push @$rgetopt_names, "html-src-extension=s";
4545
4546     # Pod::Html parameters:
4547     push @$rgetopt_names, "backlink=s";
4548     push @$rgetopt_names, "cachedir=s";
4549     push @$rgetopt_names, "htmlroot=s";
4550     push @$rgetopt_names, "libpods=s";
4551     push @$rgetopt_names, "podpath=s";
4552     push @$rgetopt_names, "podroot=s";
4553     push @$rgetopt_names, "title=s";
4554
4555     # Pod::Html parameters with leading 'pod' which will be removed
4556     # before the call to Pod::Html
4557     push @$rgetopt_names, "podquiet!";
4558     push @$rgetopt_names, "podverbose!";
4559     push @$rgetopt_names, "podrecurse!";
4560     push @$rgetopt_names, "podflush";
4561     push @$rgetopt_names, "podheader!";
4562     push @$rgetopt_names, "podindex!";
4563 }
4564
4565 sub make_abbreviated_names {
4566
4567     # We're appending things like this to the expansion list:
4568     #      'hcc'    => [qw(html-color-comment)],
4569     #      'hck'    => [qw(html-color-keyword)],
4570     #  etc
4571     my $class = shift;
4572     my ($rexpansion) = @_;
4573
4574     # abbreviations for color/bold/italic properties
4575     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4576         ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
4577         ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
4578         ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
4579         ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4580         ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4581     }
4582
4583     # abbreviations for all other html options
4584     ${$rexpansion}{"hcbg"}  = ["html-color-background"];
4585     ${$rexpansion}{"pre"}   = ["html-pre-only"];
4586     ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
4587     ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
4588     ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
4589     ${$rexpansion}{"hent"}  = ["html-entities"];
4590     ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4591     ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
4592     ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
4593     ${$rexpansion}{"ss"}    = ["stylesheet"];
4594     ${$rexpansion}{"pod"}   = ["pod2html"];
4595     ${$rexpansion}{"npod"}  = ["nopod2html"];
4596     ${$rexpansion}{"frm"}   = ["frames"];
4597     ${$rexpansion}{"nfrm"}  = ["noframes"];
4598     ${$rexpansion}{"text"}  = ["html-toc-extension"];
4599     ${$rexpansion}{"sext"}  = ["html-src-extension"];
4600 }
4601
4602 sub check_options {
4603
4604     # This will be called once after options have been parsed
4605     my $class = shift;
4606     $rOpts = shift;
4607
4608     # X11 color names for default settings that seemed to look ok
4609     # (these color names are only used for programming clarity; the hex
4610     # numbers are actually written)
4611     use constant ForestGreen   => "#228B22";
4612     use constant SaddleBrown   => "#8B4513";
4613     use constant magenta4      => "#8B008B";
4614     use constant IndianRed3    => "#CD5555";
4615     use constant DeepSkyBlue4  => "#00688B";
4616     use constant MediumOrchid3 => "#B452CD";
4617     use constant black         => "#000000";
4618     use constant white         => "#FFFFFF";
4619     use constant red           => "#FF0000";
4620
4621     # set default color, bold, italic properties
4622     # anything not listed here will be given the default (punctuation) color --
4623     # these types currently not listed and get default: ws pu s sc cm co p
4624     # When adding NEW_TOKENS: add an entry here if you don't want defaults
4625
4626     # set_default_properties( $short_name, default_color, bold?, italic? );
4627     set_default_properties( 'c',  ForestGreen,   0, 0 );
4628     set_default_properties( 'pd', ForestGreen,   0, 1 );
4629     set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4630     set_default_properties( 'q',  IndianRed3,    0, 0 );
4631     set_default_properties( 'hh', IndianRed3,    0, 1 );
4632     set_default_properties( 'h',  IndianRed3,    1, 0 );
4633     set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4634     set_default_properties( 'w',  black,         0, 0 );
4635     set_default_properties( 'n',  MediumOrchid3, 0, 0 );
4636     set_default_properties( 'v',  MediumOrchid3, 0, 0 );
4637     set_default_properties( 'j',  IndianRed3,    1, 0 );
4638     set_default_properties( 'm',  red,           1, 0 );
4639
4640     set_default_color( 'html-color-background',  white );
4641     set_default_color( 'html-color-punctuation', black );
4642
4643     # setup property lookup tables for tokens based on their short names
4644     # every token type has a short name, and will use these tables
4645     # to do the html markup
4646     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4647         $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
4648         $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
4649         $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4650     }
4651
4652     # write style sheet to STDOUT and die if requested
4653     if ( defined( $rOpts->{'stylesheet'} ) ) {
4654         write_style_sheet_file('-');
4655         exit 1;
4656     }
4657
4658     # make sure user gives a file name after -css
4659     if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4660         $css_linkname = $rOpts->{'html-linked-style-sheet'};
4661         if ( $css_linkname =~ /^-/ ) {
4662             die "You must specify a valid filename after -css\n";
4663         }
4664     }
4665
4666     # check for conflict
4667     if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4668         $rOpts->{'nohtml-style-sheets'} = 0;
4669         warning("You can't specify both -css and -nss; -nss ignored\n");
4670     }
4671
4672     # write a style sheet file if necessary
4673     if ($css_linkname) {
4674
4675         # if the selected filename exists, don't write, because user may
4676         # have done some work by hand to create it; use backup name instead
4677         # Also, this will avoid a potential disaster in which the user
4678         # forgets to specify the style sheet, like this:
4679         #    perltidy -html -css myfile1.pl myfile2.pl
4680         # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4681         my $css_filename = $css_linkname;
4682         unless ( -e $css_filename ) {
4683             write_style_sheet_file($css_filename);
4684         }
4685     }
4686     $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4687 }
4688
4689 sub write_style_sheet_file {
4690
4691     my $css_filename = shift;
4692     my $fh;
4693     unless ( $fh = IO::File->new("> $css_filename") ) {
4694         die "can't open $css_filename: $!\n";
4695     }
4696     write_style_sheet_data($fh);
4697     eval { $fh->close };
4698 }
4699
4700 sub write_style_sheet_data {
4701
4702     # write the style sheet data to an open file handle
4703     my $fh = shift;
4704
4705     my $bg_color   = $rOpts->{'html-color-background'};
4706     my $text_color = $rOpts->{'html-color-punctuation'};
4707
4708     # pre-bgcolor is new, and may not be defined
4709     my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4710     $pre_bg_color = $bg_color unless $pre_bg_color;
4711
4712     $fh->print(<<"EOM");
4713 /* default style sheet generated by perltidy */
4714 body {background: $bg_color; color: $text_color}
4715 pre { color: $text_color;
4716       background: $pre_bg_color;
4717       font-family: courier;
4718     }
4719
4720 EOM
4721
4722     foreach my $short_name ( sort keys %short_to_long_names ) {
4723         my $long_name = $short_to_long_names{$short_name};
4724
4725         my $abbrev = '.' . $short_name;
4726         if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
4727         my $color = $html_color{$short_name};
4728         if ( !defined($color) ) { $color = $text_color }
4729         $fh->print("$abbrev \{ color: $color;");
4730
4731         if ( $html_bold{$short_name} ) {
4732             $fh->print(" font-weight:bold;");
4733         }
4734
4735         if ( $html_italic{$short_name} ) {
4736             $fh->print(" font-style:italic;");
4737         }
4738         $fh->print("} /* $long_name */\n");
4739     }
4740 }
4741
4742 sub set_default_color {
4743
4744     # make sure that options hash $rOpts->{$key} contains a valid color
4745     my ( $key, $color ) = @_;
4746     if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4747     $rOpts->{$key} = check_RGB($color);
4748 }
4749
4750 sub check_RGB {
4751
4752     # if color is a 6 digit hex RGB value, prepend a #, otherwise
4753     # assume that it is a valid ascii color name
4754     my ($color) = @_;
4755     if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4756     return $color;
4757 }
4758
4759 sub set_default_properties {
4760     my ( $short_name, $color, $bold, $italic ) = @_;
4761
4762     set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4763     my $key;
4764     $key = "html-bold-$short_to_long_names{$short_name}";
4765     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4766     $key = "html-italic-$short_to_long_names{$short_name}";
4767     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4768 }
4769
4770 sub pod_to_html {
4771
4772     # Use Pod::Html to process the pod and make the page
4773     # then merge the perltidy code sections into it.
4774     # return 1 if success, 0 otherwise
4775     my $self = shift;
4776     my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4777     my $input_file   = $self->{_input_file};
4778     my $title        = $self->{_title};
4779     my $success_flag = 0;
4780
4781     # don't try to use pod2html if no pod
4782     unless ($pod_string) {
4783         return $success_flag;
4784     }
4785
4786     # Pod::Html requires a real temporary filename
4787     # If we are making a frame, we have a name available
4788     # Otherwise, we have to fine one
4789     my $tmpfile;
4790     if ( $rOpts->{'frames'} ) {
4791         $tmpfile = $self->{_toc_filename};
4792     }
4793     else {
4794         $tmpfile = Perl::Tidy::make_temporary_filename();
4795     }
4796     my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4797     unless ($fh_tmp) {
4798         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4799         return $success_flag;
4800     }
4801
4802     #------------------------------------------------------------------
4803     # Warning: a temporary file is open; we have to clean up if
4804     # things go bad.  From here on all returns should be by going to
4805     # RETURN so that the temporary file gets unlinked.
4806     #------------------------------------------------------------------
4807
4808     # write the pod text to the temporary file
4809     $fh_tmp->print($pod_string);
4810     $fh_tmp->close();
4811
4812     # Hand off the pod to pod2html.
4813     # Note that we can use the same temporary filename for input and output
4814     # because of the way pod2html works.
4815     {
4816
4817         my @args;
4818         push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4819         my $kw;
4820
4821         # Flags with string args:
4822         # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4823         # "podpath=s", "podroot=s"
4824         # Note: -css=s is handled by perltidy itself
4825         foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4826             if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4827         }
4828
4829         # Toggle switches; these have extra leading 'pod'
4830         # "header!", "index!", "recurse!", "quiet!", "verbose!"
4831         foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4832             my $kwd = $kw;    # allows us to strip 'pod'
4833             if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4834             elsif ( defined( $rOpts->{$kw} ) ) {
4835                 $kwd =~ s/^pod//;
4836                 push @args, "--no$kwd";
4837             }
4838         }
4839
4840         # "flush",
4841         $kw = 'podflush';
4842         if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4843
4844         # Must clean up if pod2html dies (it can);
4845         # Be careful not to overwrite callers __DIE__ routine
4846         local $SIG{__DIE__} = sub {
4847             print $_[0];
4848             unlink $tmpfile if -e $tmpfile;
4849             exit 1;
4850         };
4851
4852         pod2html(@args);
4853     }
4854     $fh_tmp = IO::File->new( $tmpfile, 'r' );
4855     unless ($fh_tmp) {
4856
4857         # this error shouldn't happen ... we just used this filename
4858         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4859         goto RETURN;
4860     }
4861
4862     my $html_fh = $self->{_html_fh};
4863     my @toc;
4864     my $in_toc;
4865     my $no_print;
4866
4867     # This routine will write the html selectively and store the toc
4868     my $html_print = sub {
4869         foreach (@_) {
4870             $html_fh->print($_) unless ($no_print);
4871             if ($in_toc) { push @toc, $_ }
4872         }
4873     };
4874
4875     # loop over lines of html output from pod2html and merge in
4876     # the necessary perltidy html sections
4877     my ( $saw_body, $saw_index, $saw_body_end );
4878     while ( my $line = $fh_tmp->getline() ) {
4879
4880         if ( $line =~ /^\s*<html>\s*$/i ) {
4881             my $date = localtime;
4882             $html_print->("<!-- Generated by perltidy on $date -->\n");
4883             $html_print->($line);
4884         }
4885
4886         # Copy the perltidy css, if any, after <body> tag
4887         elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4888             $saw_body = 1;
4889             $html_print->($css_string) if $css_string;
4890             $html_print->($line);
4891
4892             # add a top anchor and heading
4893             $html_print->("<a name=\"-top-\"></a>\n");
4894             $title = escape_html($title);
4895             $html_print->("<h1>$title</h1>\n");
4896         }
4897         elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4898             $in_toc = 1;
4899
4900             # when frames are used, an extra table of contents in the
4901             # contents panel is confusing, so don't print it
4902             $no_print = $rOpts->{'frames'}
4903               || !$rOpts->{'html-table-of-contents'};
4904             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4905             $html_print->($line);
4906         }
4907
4908         # Copy the perltidy toc, if any, after the Pod::Html toc
4909         elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4910             $saw_index = 1;
4911             $html_print->($line);
4912             if ($toc_string) {
4913                 $html_print->("<hr />\n") if $rOpts->{'frames'};
4914                 $html_print->("<h2>Code Index:</h2>\n");
4915                 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4916                 $html_print->(@toc);
4917             }
4918             $in_toc   = 0;
4919             $no_print = 0;
4920         }
4921
4922         # Copy one perltidy section after each marker
4923         elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4924             $line = $2;
4925             $html_print->($1) if $1;
4926
4927             # Intermingle code and pod sections if we saw multiple =cut's.
4928             if ( $self->{_pod_cut_count} > 1 ) {
4929                 my $rpre_string = shift(@$rpre_string_stack);
4930                 if ($$rpre_string) {
4931                     $html_print->('<pre>');
4932                     $html_print->($$rpre_string);
4933                     $html_print->('</pre>');
4934                 }
4935                 else {
4936
4937                     # shouldn't happen: we stored a string before writing
4938                     # each marker.
4939                     warn
4940 "Problem merging html stream with pod2html; order may be wrong\n";
4941                 }
4942                 $html_print->($line);
4943             }
4944
4945             # If didn't see multiple =cut lines, we'll put the pod out first
4946             # and then the code, because it's less confusing.
4947             else {
4948
4949                 # since we are not intermixing code and pod, we don't need
4950                 # or want any <hr> lines which separated pod and code
4951                 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4952             }
4953         }
4954
4955         # Copy any remaining code section before the </body> tag
4956         elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4957             $saw_body_end = 1;
4958             if (@$rpre_string_stack) {
4959                 unless ( $self->{_pod_cut_count} > 1 ) {
4960                     $html_print->('<hr />');
4961                 }
4962                 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4963                     $html_print->('<pre>');
4964                     $html_print->($$rpre_string);
4965                     $html_print->('</pre>');
4966                 }
4967             }
4968             $html_print->($line);
4969         }
4970         else {
4971             $html_print->($line);
4972         }
4973     }
4974
4975     $success_flag = 1;
4976     unless ($saw_body) {
4977         warn "Did not see <body> in pod2html output\n";
4978         $success_flag = 0;
4979     }
4980     unless ($saw_body_end) {
4981         warn "Did not see </body> in pod2html output\n";
4982         $success_flag = 0;
4983     }
4984     unless ($saw_index) {
4985         warn "Did not find INDEX END in pod2html output\n";
4986         $success_flag = 0;
4987     }
4988
4989   RETURN:
4990     eval { $html_fh->close() };
4991
4992     # note that we have to unlink tmpfile before making frames
4993     # because the tmpfile may be one of the names used for frames
4994     unlink $tmpfile if -e $tmpfile;
4995     if ( $success_flag && $rOpts->{'frames'} ) {
4996         $self->make_frame( \@toc );
4997     }
4998     return $success_flag;
4999 }
5000
5001 sub make_frame {
5002
5003     # Make a frame with table of contents in the left panel
5004     # and the text in the right panel.
5005     # On entry:
5006     #  $html_filename contains the no-frames html output
5007     #  $rtoc is a reference to an array with the table of contents
5008     my $self          = shift;
5009     my ($rtoc)        = @_;
5010     my $input_file    = $self->{_input_file};
5011     my $html_filename = $self->{_html_file};
5012     my $toc_filename  = $self->{_toc_filename};
5013     my $src_filename  = $self->{_src_filename};
5014     my $title         = $self->{_title};
5015     $title = escape_html($title);
5016
5017     # FUTURE input parameter:
5018     my $top_basename = "";
5019
5020     # We need to produce 3 html files:
5021     # 1. - the table of contents
5022     # 2. - the contents (source code) itself
5023     # 3. - the frame which contains them
5024
5025     # get basenames for relative links
5026     my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5027     my ( $src_basename, $src_path ) = fileparse($src_filename);
5028
5029     # 1. Make the table of contents panel, with appropriate changes
5030     # to the anchor names
5031     my $src_frame_name = 'SRC';
5032     my $first_anchor =
5033       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5034         $src_frame_name );
5035
5036     # 2. The current .html filename is renamed to be the contents panel
5037     rename( $html_filename, $src_filename )
5038       or die "Cannot rename $html_filename to $src_filename:$!\n";
5039
5040     # 3. Then use the original html filename for the frame
5041     write_frame_html(
5042         $title,        $html_filename, $top_basename,
5043         $toc_basename, $src_basename,  $src_frame_name
5044     );
5045 }
5046
5047 sub write_toc_html {
5048
5049     # write a separate html table of contents file for frames
5050     my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5051     my $fh = IO::File->new( $toc_filename, 'w' )
5052       or die "Cannot open $toc_filename:$!\n";
5053     $fh->print(<<EOM);
5054 <html>
5055 <head>
5056 <title>$title</title>
5057 </head>
5058 <body>
5059 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5060 EOM
5061
5062     my $first_anchor =
5063       change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5064     $fh->print( join "", @$rtoc );
5065
5066     $fh->print(<<EOM);
5067 </body>
5068 </html>
5069 EOM
5070
5071 }
5072
5073 sub write_frame_html {
5074
5075     # write an html file to be the table of contents frame
5076     my (
5077         $title,        $frame_filename, $top_basename,
5078         $toc_basename, $src_basename,   $src_frame_name
5079     ) = @_;
5080
5081     my $fh = IO::File->new( $frame_filename, 'w' )
5082       or die "Cannot open $toc_basename:$!\n";
5083
5084     $fh->print(<<EOM);
5085 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5086     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5087 <?xml version="1.0" encoding="iso-8859-1" ?>
5088 <html xmlns="http://www.w3.org/1999/xhtml">
5089 <head>
5090 <title>$title</title>
5091 </head>
5092 EOM
5093
5094     # two left panels, one right, if master index file
5095     if ($top_basename) {
5096         $fh->print(<<EOM);
5097 <frameset cols="20%,80%">
5098 <frameset rows="30%,70%">
5099 <frame src = "$top_basename" />
5100 <frame src = "$toc_basename" />
5101 </frameset>
5102 EOM
5103     }
5104
5105     # one left panels, one right, if no master index file
5106     else {
5107         $fh->print(<<EOM);
5108 <frameset cols="20%,*">
5109 <frame src = "$toc_basename" />
5110 EOM
5111     }
5112     $fh->print(<<EOM);
5113 <frame src = "$src_basename" name = "$src_frame_name" />
5114 <noframes>
5115 <body>
5116 <p>If you see this message, you are using a non-frame-capable web client.</p>
5117 <p>This document contains:</p>
5118 <ul>
5119 <li><a href="$toc_basename">A table of contents</a></li>
5120 <li><a href="$src_basename">The source code</a></li>
5121 </ul>
5122 </body>
5123 </noframes>
5124 </frameset>
5125 </html>
5126 EOM
5127 }
5128
5129 sub change_anchor_names {
5130
5131     # add a filename and target to anchors
5132     # also return the first anchor
5133     my ( $rlines, $filename, $target ) = @_;
5134     my $first_anchor;
5135     foreach my $line (@$rlines) {
5136
5137         #  We're looking for lines like this:
5138         #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5139         #  ----  -       --------  -----------------
5140         #  $1              $4            $5
5141         if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5142             my $pre  = $1;
5143             my $name = $4;
5144             my $post = $5;
5145             my $href = "$filename#$name";
5146             $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5147             unless ($first_anchor) { $first_anchor = $href }
5148         }
5149     }
5150     return $first_anchor;
5151 }
5152
5153 sub close_html_file {
5154     my $self = shift;
5155     return unless $self->{_html_file_opened};
5156
5157     my $html_fh     = $self->{_html_fh};
5158     my $rtoc_string = $self->{_rtoc_string};
5159
5160     # There are 3 basic paths to html output...
5161
5162     # ---------------------------------
5163     # Path 1: finish up if in -pre mode
5164     # ---------------------------------
5165     if ( $rOpts->{'html-pre-only'} ) {
5166         $html_fh->print( <<"PRE_END");
5167 </pre>
5168 PRE_END
5169         eval { $html_fh->close() };
5170         return;
5171     }
5172
5173     # Finish the index
5174     $self->add_toc_item( 'EOF', 'EOF' );
5175
5176     my $rpre_string_stack = $self->{_rpre_string_stack};
5177
5178     # Patch to darken the <pre> background color in case of pod2html and
5179     # interleaved code/documentation.  Otherwise, the distinction
5180     # between code and documentation is blurred.
5181     if (   $rOpts->{pod2html}
5182         && $self->{_pod_cut_count} >= 1
5183         && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5184     {
5185         $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5186     }
5187
5188     # put the css or its link into a string, if used
5189     my $css_string;
5190     my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5191
5192     # use css linked to another file
5193     if ( $rOpts->{'html-linked-style-sheet'} ) {
5194         $fh_css->print(
5195             qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5196         );
5197     }
5198
5199     # use css embedded in this file
5200     elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5201         $fh_css->print( <<'ENDCSS');
5202 <style type="text/css">
5203 <!--
5204 ENDCSS
5205         write_style_sheet_data($fh_css);
5206         $fh_css->print( <<"ENDCSS");
5207 -->
5208 </style>
5209 ENDCSS
5210     }
5211
5212     # -----------------------------------------------------------
5213     # path 2: use pod2html if requested
5214     #         If we fail for some reason, continue on to path 3
5215     # -----------------------------------------------------------
5216     if ( $rOpts->{'pod2html'} ) {
5217         my $rpod_string = $self->{_rpod_string};
5218         $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5219             $rpre_string_stack )
5220           && return;
5221     }
5222
5223     # --------------------------------------------------
5224     # path 3: write code in html, with pod only in italics
5225     # --------------------------------------------------
5226     my $input_file = $self->{_input_file};
5227     my $title      = escape_html($input_file);
5228     my $date       = localtime;
5229     $html_fh->print( <<"HTML_START");
5230 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5231    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5232 <!-- Generated by perltidy on $date -->
5233 <html xmlns="http://www.w3.org/1999/xhtml">
5234 <head>
5235 <title>$title</title>
5236 HTML_START
5237
5238     # output the css, if used
5239     if ($css_string) {
5240         $html_fh->print($css_string);
5241         $html_fh->print( <<"ENDCSS");
5242 </head>
5243 <body>
5244 ENDCSS
5245     }
5246     else {
5247
5248         $html_fh->print( <<"HTML_START");
5249 </head>
5250 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5251 HTML_START
5252     }
5253
5254     $html_fh->print("<a name=\"-top-\"></a>\n");
5255     $html_fh->print( <<"EOM");
5256 <h1>$title</h1>
5257 EOM
5258
5259     # copy the table of contents
5260     if (   $$rtoc_string
5261         && !$rOpts->{'frames'}
5262         && $rOpts->{'html-table-of-contents'} )
5263     {
5264         $html_fh->print($$rtoc_string);
5265     }
5266
5267     # copy the pre section(s)
5268     my $fname_comment = $input_file;
5269     $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5270     $html_fh->print( <<"END_PRE");
5271 <hr />
5272 <!-- contents of filename: $fname_comment -->
5273 <pre>
5274 END_PRE
5275
5276     foreach my $rpre_string (@$rpre_string_stack) {
5277         $html_fh->print($$rpre_string);
5278     }
5279
5280     # and finish the html page
5281     $html_fh->print( <<"HTML_END");
5282 </pre>
5283 </body>
5284 </html>
5285 HTML_END
5286     eval { $html_fh->close() };    # could be object without close method
5287
5288     if ( $rOpts->{'frames'} ) {
5289         my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5290         $self->make_frame( \@toc );
5291     }
5292 }
5293
5294 sub markup_tokens {
5295     my $self = shift;
5296     my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5297     my ( @colored_tokens, $j, $string, $type, $token, $level );
5298     my $rlast_level    = $self->{_rlast_level};
5299     my $rpackage_stack = $self->{_rpackage_stack};
5300
5301     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5302         $type  = $$rtoken_type[$j];
5303         $token = $$rtokens[$j];
5304         $level = $$rlevels[$j];
5305         $level = 0 if ( $level < 0 );
5306
5307         #-------------------------------------------------------
5308         # Update the package stack.  The package stack is needed to keep
5309         # the toc correct because some packages may be declared within
5310         # blocks and go out of scope when we leave the block.
5311         #-------------------------------------------------------
5312         if ( $level > $$rlast_level ) {
5313             unless ( $rpackage_stack->[ $level - 1 ] ) {
5314                 $rpackage_stack->[ $level - 1 ] = 'main';
5315             }
5316             $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5317         }
5318         elsif ( $level < $$rlast_level ) {
5319             my $package = $rpackage_stack->[$level];
5320             unless ($package) { $package = 'main' }
5321
5322             # if we change packages due to a nesting change, we
5323             # have to make an entry in the toc
5324             if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5325                 $self->add_toc_item( $package, 'package' );
5326             }
5327         }
5328         $$rlast_level = $level;
5329
5330         #-------------------------------------------------------
5331         # Intercept a sub name here; split it
5332         # into keyword 'sub' and sub name; and add an
5333         # entry in the toc
5334         #-------------------------------------------------------
5335         if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5336             $token = $self->markup_html_element( $1, 'k' );
5337             push @colored_tokens, $token;
5338             $token = $2;
5339             $type  = 'M';
5340
5341             # but don't include sub declarations in the toc;
5342             # these wlll have leading token types 'i;'
5343             my $signature = join "", @$rtoken_type;
5344             unless ( $signature =~ /^i;/ ) {
5345                 my $subname = $token;
5346                 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5347                 $self->add_toc_item( $subname, 'sub' );
5348             }
5349         }
5350
5351         #-------------------------------------------------------
5352         # Intercept a package name here; split it
5353         # into keyword 'package' and name; add to the toc,
5354         # and update the package stack
5355         #-------------------------------------------------------
5356         if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5357             $token = $self->markup_html_element( $1, 'k' );
5358             push @colored_tokens, $token;
5359             $token = $2;
5360             $type  = 'i';
5361             $self->add_toc_item( "$token", 'package' );
5362             $rpackage_stack->[$level] = $token;
5363         }
5364
5365         $token = $self->markup_html_element( $token, $type );
5366         push @colored_tokens, $token;
5367     }
5368     return ( \@colored_tokens );
5369 }
5370
5371 sub markup_html_element {
5372     my $self = shift;
5373     my ( $token, $type ) = @_;
5374
5375     return $token if ( $type eq 'b' );    # skip a blank token
5376     return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5377     $token = escape_html($token);
5378
5379     # get the short abbreviation for this token type
5380     my $short_name = $token_short_names{$type};
5381     if ( !defined($short_name) ) {
5382         $short_name = "pu";                    # punctuation is default
5383     }
5384
5385     # handle style sheets..
5386     if ( !$rOpts->{'nohtml-style-sheets'} ) {
5387         if ( $short_name ne 'pu' ) {
5388             $token = qq(<span class="$short_name">) . $token . "</span>";
5389         }
5390     }
5391
5392     # handle no style sheets..
5393     else {
5394         my $color = $html_color{$short_name};
5395
5396         if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5397             $token = qq(<font color="$color">) . $token . "</font>";
5398         }
5399         if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5400         if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5401     }
5402     return $token;
5403 }
5404
5405 sub escape_html {
5406
5407     my $token = shift;
5408     if ($missing_html_entities) {
5409         $token =~ s/\&/&amp;/g;
5410         $token =~ s/\</&lt;/g;
5411         $token =~ s/\>/&gt;/g;
5412         $token =~ s/\"/&quot;/g;
5413     }
5414     else {
5415         HTML::Entities::encode_entities($token);
5416     }
5417     return $token;
5418 }
5419
5420 sub finish_formatting {
5421
5422     # called after last line
5423     my $self = shift;
5424     $self->close_html_file();
5425     return;
5426 }
5427
5428 sub write_line {
5429
5430     my $self = shift;
5431     return unless $self->{_html_file_opened};
5432     my $html_pre_fh      = $self->{_html_pre_fh};
5433     my ($line_of_tokens) = @_;
5434     my $line_type        = $line_of_tokens->{_line_type};
5435     my $input_line       = $line_of_tokens->{_line_text};
5436     my $line_number      = $line_of_tokens->{_line_number};
5437     chomp $input_line;
5438
5439     # markup line of code..
5440     my $html_line;
5441     if ( $line_type eq 'CODE' ) {
5442         my $rtoken_type = $line_of_tokens->{_rtoken_type};
5443         my $rtokens     = $line_of_tokens->{_rtokens};
5444         my $rlevels     = $line_of_tokens->{_rlevels};
5445
5446         if ( $input_line =~ /(^\s*)/ ) {
5447             $html_line = $1;
5448         }
5449         else {
5450             $html_line = "";
5451         }
5452         my ($rcolored_tokens) =
5453           $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5454         $html_line .= join '', @$rcolored_tokens;
5455     }
5456
5457     # markup line of non-code..
5458     else {
5459         my $line_character;
5460         if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
5461         elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
5462         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
5463         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5464         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
5465         elsif ( $line_type eq 'END_START' ) {
5466             $line_character = 'k';
5467             $self->add_toc_item( '__END__', '__END__' );
5468         }
5469         elsif ( $line_type eq 'DATA_START' ) {
5470             $line_character = 'k';
5471             $self->add_toc_item( '__DATA__', '__DATA__' );
5472         }
5473         elsif ( $line_type =~ /^POD/ ) {
5474             $line_character = 'P';
5475             if ( $rOpts->{'pod2html'} ) {
5476                 my $html_pod_fh = $self->{_html_pod_fh};
5477                 if ( $line_type eq 'POD_START' ) {
5478
5479                     my $rpre_string_stack = $self->{_rpre_string_stack};
5480                     my $rpre_string       = $rpre_string_stack->[-1];
5481
5482                     # if we have written any non-blank lines to the
5483                     # current pre section, start writing to a new output
5484                     # string
5485                     if ( $$rpre_string =~ /\S/ ) {
5486                         my $pre_string;
5487                         $html_pre_fh =
5488                           Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5489                         $self->{_html_pre_fh} = $html_pre_fh;
5490                         push @$rpre_string_stack, \$pre_string;
5491
5492                         # leave a marker in the pod stream so we know
5493                         # where to put the pre section we just
5494                         # finished.
5495                         my $for_html = '=for html';    # don't confuse pod utils
5496                         $html_pod_fh->print(<<EOM);
5497
5498 $for_html
5499 <!-- pERLTIDY sECTION -->
5500
5501 EOM
5502                     }
5503
5504                     # otherwise, just clear the current string and start
5505                     # over
5506                     else {
5507                         $$rpre_string = "";
5508                         $html_pod_fh->print("\n");
5509                     }
5510                 }
5511                 $html_pod_fh->print( $input_line . "\n" );
5512                 if ( $line_type eq 'POD_END' ) {
5513                     $self->{_pod_cut_count}++;
5514                     $html_pod_fh->print("\n");
5515                 }
5516                 return;
5517             }
5518         }
5519         else { $line_character = 'Q' }
5520         $html_line = $self->markup_html_element( $input_line, $line_character );
5521     }
5522
5523     # add the line number if requested
5524     if ( $rOpts->{'html-line-numbers'} ) {
5525         my $extra_space .=
5526             ( $line_number < 10 )   ? "   "
5527           : ( $line_number < 100 )  ? "  "
5528           : ( $line_number < 1000 ) ? " "
5529           :                           "";
5530         $html_line = $extra_space . $line_number . " " . $html_line;
5531     }
5532
5533     # write the line
5534     $html_pre_fh->print("$html_line\n");
5535 }
5536
5537 #####################################################################
5538 #
5539 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5540 # line breaks to the token stream
5541 #
5542 # WARNING: This is not a real class for speed reasons.  Only one
5543 # Formatter may be used.
5544 #
5545 #####################################################################
5546
5547 package Perl::Tidy::Formatter;
5548
5549 BEGIN {
5550
5551     # Caution: these debug flags produce a lot of output
5552     # They should all be 0 except when debugging small scripts
5553     use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
5554     use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
5555     use constant FORMATTER_DEBUG_FLAG_CI      => 0;
5556     use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
5557     use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
5558     use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
5559     use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5560     use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
5561     use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
5562     use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
5563     use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
5564     use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
5565
5566     my $debug_warning = sub {
5567         print "FORMATTER_DEBUGGING with key $_[0]\n";
5568     };
5569
5570     FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
5571     FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
5572     FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
5573     FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
5574     FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
5575     FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
5576     FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5577     FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
5578     FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
5579     FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
5580     FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
5581     FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
5582 }
5583
5584 use Carp;
5585 use vars qw{
5586
5587   @gnu_stack
5588   $max_gnu_stack_index
5589   $gnu_position_predictor
5590   $line_start_index_to_go
5591   $last_indentation_written
5592   $last_unadjusted_indentation
5593   $last_leading_token
5594
5595   $saw_VERSION_in_this_file
5596   $saw_END_or_DATA_
5597
5598   @gnu_item_list
5599   $max_gnu_item_index
5600   $gnu_sequence_number
5601   $last_output_indentation
5602   %last_gnu_equals
5603   %gnu_comma_count
5604   %gnu_arrow_count
5605
5606   @block_type_to_go
5607   @type_sequence_to_go
5608   @container_environment_to_go
5609   @bond_strength_to_go
5610   @forced_breakpoint_to_go
5611   @lengths_to_go
5612   @levels_to_go
5613   @leading_spaces_to_go
5614   @reduced_spaces_to_go
5615   @matching_token_to_go
5616   @mate_index_to_go
5617   @nesting_blocks_to_go
5618   @ci_levels_to_go
5619   @nesting_depth_to_go
5620   @nobreak_to_go
5621   @old_breakpoint_to_go
5622   @tokens_to_go
5623   @types_to_go
5624
5625   %saved_opening_indentation
5626
5627   $max_index_to_go
5628   $comma_count_in_batch
5629   $old_line_count_in_batch
5630   $last_nonblank_index_to_go
5631   $last_nonblank_type_to_go
5632   $last_nonblank_token_to_go
5633   $last_last_nonblank_index_to_go
5634   $last_last_nonblank_type_to_go
5635   $last_last_nonblank_token_to_go
5636   @nonblank_lines_at_depth
5637   $starting_in_quote
5638   $ending_in_quote
5639
5640   $in_format_skipping_section
5641   $format_skipping_pattern_begin
5642   $format_skipping_pattern_end
5643
5644   $forced_breakpoint_count
5645   $forced_breakpoint_undo_count
5646   @forced_breakpoint_undo_stack
5647   %postponed_breakpoint
5648
5649   $tabbing
5650   $embedded_tab_count
5651   $first_embedded_tab_at
5652   $last_embedded_tab_at
5653   $deleted_semicolon_count
5654   $first_deleted_semicolon_at
5655   $last_deleted_semicolon_at
5656   $added_semicolon_count
5657   $first_added_semicolon_at
5658   $last_added_semicolon_at
5659   $first_tabbing_disagreement
5660   $last_tabbing_disagreement
5661   $in_tabbing_disagreement
5662   $tabbing_disagreement_count
5663   $input_line_tabbing
5664
5665   $last_line_type
5666   $last_line_leading_type
5667   $last_line_leading_level
5668   $last_last_line_leading_level
5669
5670   %block_leading_text
5671   %block_opening_line_number
5672   $csc_new_statement_ok
5673   $accumulating_text_for_block
5674   $leading_block_text
5675   $rleading_block_if_elsif_text
5676   $leading_block_text_level
5677   $leading_block_text_length_exceeded
5678   $leading_block_text_line_length
5679   $leading_block_text_line_number
5680   $closing_side_comment_prefix_pattern
5681   $closing_side_comment_list_pattern
5682
5683   $last_nonblank_token
5684   $last_nonblank_type
5685   $last_last_nonblank_token
5686   $last_last_nonblank_type
5687   $last_nonblank_block_type
5688   $last_output_level
5689   %is_do_follower
5690   %is_if_brace_follower
5691   %space_after_keyword
5692   $rbrace_follower
5693   $looking_for_else
5694   %is_last_next_redo_return
5695   %is_other_brace_follower
5696   %is_else_brace_follower
5697   %is_anon_sub_brace_follower
5698   %is_anon_sub_1_brace_follower
5699   %is_sort_map_grep
5700   %is_sort_map_grep_eval
5701   %is_sort_map_grep_eval_do
5702   %is_block_without_semicolon
5703   %is_if_unless
5704   %is_and_or
5705   %is_assignment
5706   %is_chain_operator
5707   %is_if_unless_and_or_last_next_redo_return
5708   %is_until_while_for_if_elsif_else
5709
5710   @has_broken_sublist
5711   @dont_align
5712   @want_comma_break
5713
5714   $is_static_block_comment
5715   $index_start_one_line_block
5716   $semicolons_before_block_self_destruct
5717   $index_max_forced_break
5718   $input_line_number
5719   $diagnostics_object
5720   $vertical_aligner_object
5721   $logger_object
5722   $file_writer_object
5723   $formatter_self
5724   @ci_stack
5725   $last_line_had_side_comment
5726   %want_break_before
5727   %outdent_keyword
5728   $static_block_comment_pattern
5729   $static_side_comment_pattern
5730   %opening_vertical_tightness
5731   %closing_vertical_tightness
5732   %closing_token_indentation
5733
5734   %opening_token_right
5735   %stack_opening_token
5736   %stack_closing_token
5737
5738   $block_brace_vertical_tightness_pattern
5739
5740   $rOpts_add_newlines
5741   $rOpts_add_whitespace
5742   $rOpts_block_brace_tightness
5743   $rOpts_block_brace_vertical_tightness
5744   $rOpts_brace_left_and_indent
5745   $rOpts_comma_arrow_breakpoints
5746   $rOpts_break_at_old_keyword_breakpoints
5747   $rOpts_break_at_old_comma_breakpoints
5748   $rOpts_break_at_old_logical_breakpoints
5749   $rOpts_break_at_old_ternary_breakpoints
5750   $rOpts_closing_side_comment_else_flag
5751   $rOpts_closing_side_comment_maximum_text
5752   $rOpts_continuation_indentation
5753   $rOpts_cuddled_else
5754   $rOpts_delete_old_whitespace
5755   $rOpts_fuzzy_line_length
5756   $rOpts_indent_columns
5757   $rOpts_line_up_parentheses
5758   $rOpts_maximum_fields_per_table
5759   $rOpts_maximum_line_length
5760   $rOpts_short_concatenation_item_length
5761   $rOpts_keep_old_blank_lines
5762   $rOpts_ignore_old_breakpoints
5763   $rOpts_format_skipping
5764   $rOpts_space_function_paren
5765   $rOpts_space_keyword_paren
5766   $rOpts_keep_interior_semicolons
5767
5768   $half_maximum_line_length
5769
5770   %is_opening_type
5771   %is_closing_type
5772   %is_keyword_returning_list
5773   %tightness
5774   %matching_token
5775   $rOpts
5776   %right_bond_strength
5777   %left_bond_strength
5778   %binary_ws_rules
5779   %want_left_space
5780   %want_right_space
5781   %is_digraph
5782   %is_trigraph
5783   $bli_pattern
5784   $bli_list_string
5785   %is_closing_type
5786   %is_opening_type
5787   %is_closing_token
5788   %is_opening_token
5789 };
5790
5791 BEGIN {
5792
5793     # default list of block types for which -bli would apply
5794     $bli_list_string = 'if else elsif unless while for foreach do : sub';
5795
5796     @_ = qw(
5797       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5798       <= >= == =~ !~ != ++ -- /= x=
5799     );
5800     @is_digraph{@_} = (1) x scalar(@_);
5801
5802     @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5803     @is_trigraph{@_} = (1) x scalar(@_);
5804
5805     @_ = qw(
5806       = **= += *= &= <<= &&=
5807       -= /= |= >>= ||= //=
5808       .= %= ^=
5809       x=
5810     );
5811     @is_assignment{@_} = (1) x scalar(@_);
5812
5813     @_ = qw(
5814       grep
5815       keys
5816       map
5817       reverse
5818       sort
5819       split
5820     );
5821     @is_keyword_returning_list{@_} = (1) x scalar(@_);
5822
5823     @_ = qw(is if unless and or err last next redo return);
5824     @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5825
5826     # always break after a closing curly of these block types:
5827     @_ = qw(until while for if elsif else);
5828     @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
5829
5830     @_ = qw(last next redo return);
5831     @is_last_next_redo_return{@_} = (1) x scalar(@_);
5832
5833     @_ = qw(sort map grep);
5834     @is_sort_map_grep{@_} = (1) x scalar(@_);
5835
5836     @_ = qw(sort map grep eval);
5837     @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5838
5839     @_ = qw(sort map grep eval do);
5840     @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5841
5842     @_ = qw(if unless);
5843     @is_if_unless{@_} = (1) x scalar(@_);
5844
5845     @_ = qw(and or err);
5846     @is_and_or{@_} = (1) x scalar(@_);
5847
5848     # Identify certain operators which often occur in chains.
5849     # Note: the minus (-) causes a side effect of padding of the first line in
5850     # something like this (by sub set_logical_padding):
5851     #    Checkbutton => 'Transmission checked',
5852     #   -variable    => \$TRANS
5853     # This usually improves appearance so it seems ok.
5854     @_ = qw(&& || and or : ? . + - * /);
5855     @is_chain_operator{@_} = (1) x scalar(@_);
5856
5857     # We can remove semicolons after blocks preceded by these keywords
5858     @_ =
5859       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
5860       unless while until for foreach);
5861     @is_block_without_semicolon{@_} = (1) x scalar(@_);
5862
5863     # 'L' is token for opening { at hash key
5864     @_ = qw" L { ( [ ";
5865     @is_opening_type{@_} = (1) x scalar(@_);
5866
5867     # 'R' is token for closing } at hash key
5868     @_ = qw" R } ) ] ";
5869     @is_closing_type{@_} = (1) x scalar(@_);
5870
5871     @_ = qw" { ( [ ";
5872     @is_opening_token{@_} = (1) x scalar(@_);
5873
5874     @_ = qw" } ) ] ";
5875     @is_closing_token{@_} = (1) x scalar(@_);
5876 }
5877
5878 # whitespace codes
5879 use constant WS_YES      => 1;
5880 use constant WS_OPTIONAL => 0;
5881 use constant WS_NO       => -1;
5882
5883 # Token bond strengths.
5884 use constant NO_BREAK    => 10000;
5885 use constant VERY_STRONG => 100;
5886 use constant STRONG      => 2.1;
5887 use constant NOMINAL     => 1.1;
5888 use constant WEAK        => 0.8;
5889 use constant VERY_WEAK   => 0.55;
5890
5891 # values for testing indexes in output array
5892 use constant UNDEFINED_INDEX => -1;
5893
5894 # Maximum number of little messages; probably need not be changed.
5895 use constant MAX_NAG_MESSAGES => 6;
5896
5897 # increment between sequence numbers for each type
5898 # For example, ?: pairs might have numbers 7,11,15,...
5899 use constant TYPE_SEQUENCE_INCREMENT => 4;
5900
5901 {
5902
5903     # methods to count instances
5904     my $_count = 0;
5905     sub get_count        { $_count; }
5906     sub _increment_count { ++$_count }
5907     sub _decrement_count { --$_count }
5908 }
5909
5910 sub trim {
5911
5912     # trim leading and trailing whitespace from a string
5913     $_[0] =~ s/\s+$//;
5914     $_[0] =~ s/^\s+//;
5915     return $_[0];
5916 }
5917
5918 sub split_words {
5919
5920     # given a string containing words separated by whitespace,
5921     # return the list of words
5922     my ($str) = @_;
5923     return unless $str;
5924     $str =~ s/\s+$//;
5925     $str =~ s/^\s+//;
5926     return split( /\s+/, $str );
5927 }
5928
5929 # interface to Perl::Tidy::Logger routines
5930 sub warning {
5931     if ($logger_object) {
5932         $logger_object->warning(@_);
5933     }
5934 }
5935
5936 sub complain {
5937     if ($logger_object) {
5938         $logger_object->complain(@_);
5939     }
5940 }
5941
5942 sub write_logfile_entry {
5943     if ($logger_object) {
5944         $logger_object->write_logfile_entry(@_);
5945     }
5946 }
5947
5948 sub black_box {
5949     if ($logger_object) {
5950         $logger_object->black_box(@_);
5951     }
5952 }
5953
5954 sub report_definite_bug {
5955     if ($logger_object) {
5956         $logger_object->report_definite_bug();
5957     }
5958 }
5959
5960 sub get_saw_brace_error {
5961     if ($logger_object) {
5962         $logger_object->get_saw_brace_error();
5963     }
5964 }
5965
5966 sub we_are_at_the_last_line {
5967     if ($logger_object) {
5968         $logger_object->we_are_at_the_last_line();
5969     }
5970 }
5971
5972 # interface to Perl::Tidy::Diagnostics routine
5973 sub write_diagnostics {
5974
5975     if ($diagnostics_object) {
5976         $diagnostics_object->write_diagnostics(@_);
5977     }
5978 }
5979
5980 sub get_added_semicolon_count {
5981     my $self = shift;
5982     return $added_semicolon_count;
5983 }
5984
5985 sub DESTROY {
5986     $_[0]->_decrement_count();
5987 }
5988
5989 sub new {
5990
5991     my $class = shift;
5992
5993     # we are given an object with a write_line() method to take lines
5994     my %defaults = (
5995         sink_object        => undef,
5996         diagnostics_object => undef,
5997         logger_object      => undef,
5998     );
5999     my %args = ( %defaults, @_ );
6000
6001     $logger_object      = $args{logger_object};
6002     $diagnostics_object = $args{diagnostics_object};
6003
6004     # we create another object with a get_line() and peek_ahead() method
6005     my $sink_object = $args{sink_object};
6006     $file_writer_object =
6007       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6008
6009     # initialize the leading whitespace stack to negative levels
6010     # so that we can never run off the end of the stack
6011     $gnu_position_predictor = 0;    # where the current token is predicted to be
6012     $max_gnu_stack_index    = 0;
6013     $max_gnu_item_index     = -1;
6014     $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6015     @gnu_item_list               = ();
6016     $last_output_indentation     = 0;
6017     $last_indentation_written    = 0;
6018     $last_unadjusted_indentation = 0;
6019     $last_leading_token          = "";
6020
6021     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6022     $saw_END_or_DATA_         = 0;
6023
6024     @block_type_to_go            = ();
6025     @type_sequence_to_go         = ();
6026     @container_environment_to_go = ();
6027     @bond_strength_to_go         = ();
6028     @forced_breakpoint_to_go     = ();
6029     @lengths_to_go               = ();    # line length to start of ith token
6030     @levels_to_go                = ();
6031     @matching_token_to_go        = ();
6032     @mate_index_to_go            = ();
6033     @nesting_blocks_to_go        = ();
6034     @ci_levels_to_go             = ();
6035     @nesting_depth_to_go         = (0);
6036     @nobreak_to_go               = ();
6037     @old_breakpoint_to_go        = ();
6038     @tokens_to_go                = ();
6039     @types_to_go                 = ();
6040     @leading_spaces_to_go        = ();
6041     @reduced_spaces_to_go        = ();
6042
6043     @dont_align         = ();
6044     @has_broken_sublist = ();
6045     @want_comma_break   = ();
6046
6047     @ci_stack                   = ("");
6048     $first_tabbing_disagreement = 0;
6049     $last_tabbing_disagreement  = 0;
6050     $tabbing_disagreement_count = 0;
6051     $in_tabbing_disagreement    = 0;
6052     $input_line_tabbing         = undef;
6053
6054     $last_line_type               = "";
6055     $last_last_line_leading_level = 0;
6056     $last_line_leading_level      = 0;
6057     $last_line_leading_type       = '#';
6058
6059     $last_nonblank_token        = ';';
6060     $last_nonblank_type         = ';';
6061     $last_last_nonblank_token   = ';';
6062     $last_last_nonblank_type    = ';';
6063     $last_nonblank_block_type   = "";
6064     $last_output_level          = 0;
6065     $looking_for_else           = 0;
6066     $embedded_tab_count         = 0;
6067     $first_embedded_tab_at      = 0;
6068     $last_embedded_tab_at       = 0;
6069     $deleted_semicolon_count    = 0;
6070     $first_deleted_semicolon_at = 0;
6071     $last_deleted_semicolon_at  = 0;
6072     $added_semicolon_count      = 0;
6073     $first_added_semicolon_at   = 0;
6074     $last_added_semicolon_at    = 0;
6075     $last_line_had_side_comment = 0;
6076     $is_static_block_comment    = 0;
6077     %postponed_breakpoint       = ();
6078
6079     # variables for adding side comments
6080     %block_leading_text        = ();
6081     %block_opening_line_number = ();
6082     $csc_new_statement_ok      = 1;
6083
6084     %saved_opening_indentation  = ();
6085     $in_format_skipping_section = 0;
6086
6087     reset_block_text_accumulator();
6088
6089     prepare_for_new_input_lines();
6090
6091     $vertical_aligner_object =
6092       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6093         $logger_object, $diagnostics_object );
6094
6095     if ( $rOpts->{'entab-leading-whitespace'} ) {
6096         write_logfile_entry(
6097 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6098         );
6099     }
6100     elsif ( $rOpts->{'tabs'} ) {
6101         write_logfile_entry("Indentation will be with a tab character\n");
6102     }
6103     else {
6104         write_logfile_entry(
6105             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6106     }
6107
6108     # This was the start of a formatter referent, but object-oriented
6109     # coding has turned out to be too slow here.
6110     $formatter_self = {};
6111
6112     bless $formatter_self, $class;
6113
6114     # Safety check..this is not a class yet
6115     if ( _increment_count() > 1 ) {
6116         confess
6117 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6118     }
6119     return $formatter_self;
6120 }
6121
6122 sub prepare_for_new_input_lines {
6123
6124     $gnu_sequence_number++;    # increment output batch counter
6125     %last_gnu_equals                = ();
6126     %gnu_comma_count                = ();
6127     %gnu_arrow_count                = ();
6128     $line_start_index_to_go         = 0;
6129     $max_gnu_item_index             = UNDEFINED_INDEX;
6130     $index_max_forced_break         = UNDEFINED_INDEX;
6131     $max_index_to_go                = UNDEFINED_INDEX;
6132     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
6133     $last_nonblank_type_to_go       = '';
6134     $last_nonblank_token_to_go      = '';
6135     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6136     $last_last_nonblank_type_to_go  = '';
6137     $last_last_nonblank_token_to_go = '';
6138     $forced_breakpoint_count        = 0;
6139     $forced_breakpoint_undo_count   = 0;
6140     $rbrace_follower                = undef;
6141     $lengths_to_go[0]               = 0;
6142     $old_line_count_in_batch        = 1;
6143     $comma_count_in_batch           = 0;
6144     $starting_in_quote              = 0;
6145
6146     destroy_one_line_block();
6147 }
6148
6149 sub write_line {
6150
6151     my $self = shift;
6152     my ($line_of_tokens) = @_;
6153
6154     my $line_type  = $line_of_tokens->{_line_type};
6155     my $input_line = $line_of_tokens->{_line_text};
6156
6157     if ( $rOpts->{notidy} ) {
6158         write_unindented_line($input_line);
6159         $last_line_type = $line_type;
6160         return;
6161     }
6162
6163     # _line_type codes are:
6164     #   SYSTEM         - system-specific code before hash-bang line
6165     #   CODE           - line of perl code (including comments)
6166     #   POD_START      - line starting pod, such as '=head'
6167     #   POD            - pod documentation text
6168     #   POD_END        - last line of pod section, '=cut'
6169     #   HERE           - text of here-document
6170     #   HERE_END       - last line of here-doc (target word)
6171     #   FORMAT         - format section
6172     #   FORMAT_END     - last line of format section, '.'
6173     #   DATA_START     - __DATA__ line
6174     #   DATA           - unidentified text following __DATA__
6175     #   END_START      - __END__ line
6176     #   END            - unidentified text following __END__
6177     #   ERROR          - we are in big trouble, probably not a perl script
6178
6179     # put a blank line after an =cut which comes before __END__ and __DATA__
6180     # (required by podchecker)
6181     if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6182         $file_writer_object->reset_consecutive_blank_lines();
6183         if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6184     }
6185
6186     # handle line of code..
6187     if ( $line_type eq 'CODE' ) {
6188
6189         # let logger see all non-blank lines of code
6190         if ( $input_line !~ /^\s*$/ ) {
6191             my $output_line_number =
6192               $vertical_aligner_object->get_output_line_number();
6193             black_box( $line_of_tokens, $output_line_number );
6194         }
6195         print_line_of_tokens($line_of_tokens);
6196     }
6197
6198     # handle line of non-code..
6199     else {
6200
6201         # set special flags
6202         my $skip_line = 0;
6203         my $tee_line  = 0;
6204         if ( $line_type =~ /^POD/ ) {
6205
6206             # Pod docs should have a preceding blank line.  But be
6207             # very careful in __END__ and __DATA__ sections, because:
6208             #   1. the user may be using this section for any purpose whatsoever
6209             #   2. the blank counters are not active there
6210             # It should be safe to request a blank line between an
6211             # __END__ or __DATA__ and an immediately following '=head'
6212             # type line, (types END_START and DATA_START), but not for
6213             # any other lines of type END or DATA.
6214             if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6215             if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
6216             if (  !$skip_line
6217                 && $line_type eq 'POD_START'
6218                   # If the previous line is a __DATA__ line (or data
6219                   # contents, it's not valid to change it at all, no
6220                   # matter what is in the data
6221                 && $last_line_type !~ /^(END|DATA(?:_START)?)$/ )
6222             {
6223                 want_blank_line();
6224             }
6225         }
6226
6227         # leave the blank counters in a predictable state
6228         # after __END__ or __DATA__
6229         elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6230             $file_writer_object->reset_consecutive_blank_lines();
6231             $saw_END_or_DATA_ = 1;
6232         }
6233
6234         # write unindented non-code line
6235         if ( !$skip_line ) {
6236             if ($tee_line) { $file_writer_object->tee_on() }
6237             write_unindented_line($input_line);
6238             if ($tee_line) { $file_writer_object->tee_off() }
6239         }
6240     }
6241     $last_line_type = $line_type;
6242 }
6243
6244 sub create_one_line_block {
6245     $index_start_one_line_block            = $_[0];
6246     $semicolons_before_block_self_destruct = $_[1];
6247 }
6248
6249 sub destroy_one_line_block {
6250     $index_start_one_line_block            = UNDEFINED_INDEX;
6251     $semicolons_before_block_self_destruct = 0;
6252 }
6253
6254 sub leading_spaces_to_go {
6255
6256     # return the number of indentation spaces for a token in the output stream;
6257     # these were previously stored by 'set_leading_whitespace'.
6258
6259     return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6260
6261 }
6262
6263 sub get_SPACES {
6264
6265     # return the number of leading spaces associated with an indentation
6266     # variable $indentation is either a constant number of spaces or an object
6267     # with a get_SPACES method.
6268     my $indentation = shift;
6269     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6270 }
6271
6272 sub get_RECOVERABLE_SPACES {
6273
6274     # return the number of spaces (+ means shift right, - means shift left)
6275     # that we would like to shift a group of lines with the same indentation
6276     # to get them to line up with their opening parens
6277     my $indentation = shift;
6278     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6279 }
6280
6281 sub get_AVAILABLE_SPACES_to_go {
6282
6283     my $item = $leading_spaces_to_go[ $_[0] ];
6284
6285     # return the number of available leading spaces associated with an
6286     # indentation variable.  $indentation is either a constant number of
6287     # spaces or an object with a get_AVAILABLE_SPACES method.
6288     return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6289 }
6290
6291 sub new_lp_indentation_item {
6292
6293     # this is an interface to the IndentationItem class
6294     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6295
6296     # A negative level implies not to store the item in the item_list
6297     my $index = 0;
6298     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6299
6300     my $item = Perl::Tidy::IndentationItem->new(
6301         $spaces,      $level,
6302         $ci_level,    $available_spaces,
6303         $index,       $gnu_sequence_number,
6304         $align_paren, $max_gnu_stack_index,
6305         $line_start_index_to_go,
6306     );
6307
6308     if ( $level >= 0 ) {
6309         $gnu_item_list[$max_gnu_item_index] = $item;
6310     }
6311
6312     return $item;
6313 }
6314
6315 sub set_leading_whitespace {
6316
6317     # This routine defines leading whitespace
6318     # given: the level and continuation_level of a token,
6319     # define: space count of leading string which would apply if it
6320     # were the first token of a new line.
6321
6322     my ( $level, $ci_level, $in_continued_quote ) = @_;
6323
6324     # modify for -bli, which adds one continuation indentation for
6325     # opening braces
6326     if (   $rOpts_brace_left_and_indent
6327         && $max_index_to_go == 0
6328         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6329     {
6330         $ci_level++;
6331     }
6332
6333     # patch to avoid trouble when input file has negative indentation.
6334     # other logic should catch this error.
6335     if ( $level < 0 ) { $level = 0 }
6336
6337     #-------------------------------------------
6338     # handle the standard indentation scheme
6339     #-------------------------------------------
6340     unless ($rOpts_line_up_parentheses) {
6341         my $space_count =
6342           $ci_level * $rOpts_continuation_indentation +
6343           $level * $rOpts_indent_columns;
6344         my $ci_spaces =
6345           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6346
6347         if ($in_continued_quote) {
6348             $space_count = 0;
6349             $ci_spaces   = 0;
6350         }
6351         $leading_spaces_to_go[$max_index_to_go] = $space_count;
6352         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6353         return;
6354     }
6355
6356     #-------------------------------------------------------------
6357     # handle case of -lp indentation..
6358     #-------------------------------------------------------------
6359
6360     # The continued_quote flag means that this is the first token of a
6361     # line, and it is the continuation of some kind of multi-line quote
6362     # or pattern.  It requires special treatment because it must have no
6363     # added leading whitespace. So we create a special indentation item
6364     # which is not in the stack.
6365     if ($in_continued_quote) {
6366         my $space_count     = 0;
6367         my $available_space = 0;
6368         $level = -1;    # flag to prevent storing in item_list
6369         $leading_spaces_to_go[$max_index_to_go] =
6370           $reduced_spaces_to_go[$max_index_to_go] =
6371           new_lp_indentation_item( $space_count, $level, $ci_level,
6372             $available_space, 0 );
6373         return;
6374     }
6375
6376     # get the top state from the stack
6377     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6378     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6379     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6380
6381     my $type        = $types_to_go[$max_index_to_go];
6382     my $token       = $tokens_to_go[$max_index_to_go];
6383     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6384
6385     if ( $type eq '{' || $type eq '(' ) {
6386
6387         $gnu_comma_count{ $total_depth + 1 } = 0;
6388         $gnu_arrow_count{ $total_depth + 1 } = 0;
6389
6390         # If we come to an opening token after an '=' token of some type,
6391         # see if it would be helpful to 'break' after the '=' to save space
6392         my $last_equals = $last_gnu_equals{$total_depth};
6393         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6394
6395             # find the position if we break at the '='
6396             my $i_test = $last_equals;
6397             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6398
6399             # TESTING
6400             ##my $too_close = ($i_test==$max_index_to_go-1);
6401
6402             my $test_position = total_line_length( $i_test, $max_index_to_go );
6403
6404             if (
6405
6406                 # the equals is not just before an open paren (testing)
6407                 ##!$too_close &&
6408
6409                 # if we are beyond the midpoint
6410                 $gnu_position_predictor > $half_maximum_line_length
6411
6412                 # or we are beyont the 1/4 point and there was an old
6413                 # break at the equals
6414                 || (
6415                     $gnu_position_predictor > $half_maximum_line_length / 2
6416                     && (
6417                         $old_breakpoint_to_go[$last_equals]
6418                         || (   $last_equals > 0
6419                             && $old_breakpoint_to_go[ $last_equals - 1 ] )
6420                         || (   $last_equals > 1
6421                             && $types_to_go[ $last_equals - 1 ] eq 'b'
6422                             && $old_breakpoint_to_go[ $last_equals - 2 ] )
6423                     )
6424                 )
6425               )
6426             {
6427
6428                 # then make the switch -- note that we do not set a real
6429                 # breakpoint here because we may not really need one; sub
6430                 # scan_list will do that if necessary
6431                 $line_start_index_to_go = $i_test + 1;
6432                 $gnu_position_predictor = $test_position;
6433             }
6434         }
6435     }
6436
6437     # Check for decreasing depth ..
6438     # Note that one token may have both decreasing and then increasing
6439     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
6440     # in this example we would first go back to (1,0) then up to (2,0)
6441     # in a single call.
6442     if ( $level < $current_level || $ci_level < $current_ci_level ) {
6443
6444         # loop to find the first entry at or completely below this level
6445         my ( $lev, $ci_lev );
6446         while (1) {
6447             if ($max_gnu_stack_index) {
6448
6449                 # save index of token which closes this level
6450                 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6451
6452                 # Undo any extra indentation if we saw no commas
6453                 my $available_spaces =
6454                   $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6455
6456                 my $comma_count = 0;
6457                 my $arrow_count = 0;
6458                 if ( $type eq '}' || $type eq ')' ) {
6459                     $comma_count = $gnu_comma_count{$total_depth};
6460                     $arrow_count = $gnu_arrow_count{$total_depth};
6461                     $comma_count = 0 unless $comma_count;
6462                     $arrow_count = 0 unless $arrow_count;
6463                 }
6464                 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6465                 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6466
6467                 if ( $available_spaces > 0 ) {
6468
6469                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
6470
6471                         my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6472                         my $seqno =
6473                           $gnu_stack[$max_gnu_stack_index]
6474                           ->get_SEQUENCE_NUMBER();
6475
6476                         # Be sure this item was created in this batch.  This
6477                         # should be true because we delete any available
6478                         # space from open items at the end of each batch.
6479                         if (   $gnu_sequence_number != $seqno
6480                             || $i > $max_gnu_item_index )
6481                         {
6482                             warning(
6483 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6484                             );
6485                             report_definite_bug();
6486                         }
6487
6488                         else {
6489                             if ( $arrow_count == 0 ) {
6490                                 $gnu_item_list[$i]
6491                                   ->permanently_decrease_AVAILABLE_SPACES(
6492                                     $available_spaces);
6493                             }
6494                             else {
6495                                 $gnu_item_list[$i]
6496                                   ->tentatively_decrease_AVAILABLE_SPACES(
6497                                     $available_spaces);
6498                             }
6499
6500                             my $j;
6501                             for (
6502                                 $j = $i + 1 ;
6503                                 $j <= $max_gnu_item_index ;
6504                                 $j++
6505                               )
6506                             {
6507                                 $gnu_item_list[$j]
6508                                   ->decrease_SPACES($available_spaces);
6509                             }
6510                         }
6511                     }
6512                 }
6513
6514                 # go down one level
6515                 --$max_gnu_stack_index;
6516                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6517                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6518
6519                 # stop when we reach a level at or below the current level
6520                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6521                     $space_count =
6522                       $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6523                     $current_level    = $lev;
6524                     $current_ci_level = $ci_lev;
6525                     last;
6526                 }
6527             }
6528
6529             # reached bottom of stack .. should never happen because
6530             # only negative levels can get here, and $level was forced
6531             # to be positive above.
6532             else {
6533                 warning(
6534 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6535                 );
6536                 report_definite_bug();
6537                 last;
6538             }
6539         }
6540     }
6541
6542     # handle increasing depth
6543     if ( $level > $current_level || $ci_level > $current_ci_level ) {
6544
6545         # Compute the standard incremental whitespace.  This will be
6546         # the minimum incremental whitespace that will be used.  This
6547         # choice results in a smooth transition between the gnu-style
6548         # and the standard style.
6549         my $standard_increment =
6550           ( $level - $current_level ) * $rOpts_indent_columns +
6551           ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6552
6553         # Now we have to define how much extra incremental space
6554         # ("$available_space") we want.  This extra space will be
6555         # reduced as necessary when long lines are encountered or when
6556         # it becomes clear that we do not have a good list.
6557         my $available_space = 0;
6558         my $align_paren     = 0;
6559         my $excess          = 0;
6560
6561         # initialization on empty stack..
6562         if ( $max_gnu_stack_index == 0 ) {
6563             $space_count = $level * $rOpts_indent_columns;
6564         }
6565
6566         # if this is a BLOCK, add the standard increment
6567         elsif ($last_nonblank_block_type) {
6568             $space_count += $standard_increment;
6569         }
6570
6571         # if last nonblank token was not structural indentation,
6572         # just use standard increment
6573         elsif ( $last_nonblank_type ne '{' ) {
6574             $space_count += $standard_increment;
6575         }
6576
6577         # otherwise use the space to the first non-blank level change token
6578         else {
6579
6580             $space_count = $gnu_position_predictor;
6581
6582             my $min_gnu_indentation =
6583               $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6584
6585             $available_space = $space_count - $min_gnu_indentation;
6586             if ( $available_space >= $standard_increment ) {
6587                 $min_gnu_indentation += $standard_increment;
6588             }
6589             elsif ( $available_space > 1 ) {
6590                 $min_gnu_indentation += $available_space + 1;
6591             }
6592             elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6593                 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6594                     $min_gnu_indentation += 2;
6595                 }
6596                 else {
6597                     $min_gnu_indentation += 1;
6598                 }
6599             }
6600             else {
6601                 $min_gnu_indentation += $standard_increment;
6602             }
6603             $available_space = $space_count - $min_gnu_indentation;
6604
6605             if ( $available_space < 0 ) {
6606                 $space_count     = $min_gnu_indentation;
6607                 $available_space = 0;
6608             }
6609             $align_paren = 1;
6610         }
6611
6612         # update state, but not on a blank token
6613         if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6614
6615             $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6616
6617             ++$max_gnu_stack_index;
6618             $gnu_stack[$max_gnu_stack_index] =
6619               new_lp_indentation_item( $space_count, $level, $ci_level,
6620                 $available_space, $align_paren );
6621
6622             # If the opening paren is beyond the half-line length, then
6623             # we will use the minimum (standard) indentation.  This will
6624             # help avoid problems associated with running out of space
6625             # near the end of a line.  As a result, in deeply nested
6626             # lists, there will be some indentations which are limited
6627             # to this minimum standard indentation. But the most deeply
6628             # nested container will still probably be able to shift its
6629             # parameters to the right for proper alignment, so in most
6630             # cases this will not be noticable.
6631             if (   $available_space > 0
6632                 && $space_count > $half_maximum_line_length )
6633             {
6634                 $gnu_stack[$max_gnu_stack_index]
6635                   ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6636             }
6637         }
6638     }
6639
6640     # Count commas and look for non-list characters.  Once we see a
6641     # non-list character, we give up and don't look for any more commas.
6642     if ( $type eq '=>' ) {
6643         $gnu_arrow_count{$total_depth}++;
6644
6645         # tentatively treating '=>' like '=' for estimating breaks
6646         # TODO: this could use some experimentation
6647         $last_gnu_equals{$total_depth} = $max_index_to_go;
6648     }
6649
6650     elsif ( $type eq ',' ) {
6651         $gnu_comma_count{$total_depth}++;
6652     }
6653
6654     elsif ( $is_assignment{$type} ) {
6655         $last_gnu_equals{$total_depth} = $max_index_to_go;
6656     }
6657
6658     # this token might start a new line
6659     # if this is a non-blank..
6660     if ( $type ne 'b' ) {
6661
6662         # and if ..
6663         if (
6664
6665             # this is the first nonblank token of the line
6666             $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6667
6668             # or previous character was one of these:
6669             || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6670
6671             # or previous character was opening and this does not close it
6672             || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6673             || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6674
6675             # or this token is one of these:
6676             || $type =~ /^([\.]|\|\||\&\&)$/
6677
6678             # or this is a closing structure
6679             || (   $last_nonblank_type_to_go eq '}'
6680                 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6681
6682             # or previous token was keyword 'return'
6683             || ( $last_nonblank_type_to_go eq 'k'
6684                 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6685
6686             # or starting a new line at certain keywords is fine
6687             || (   $type eq 'k'
6688                 && $is_if_unless_and_or_last_next_redo_return{$token} )
6689
6690             # or this is after an assignment after a closing structure
6691             || (
6692                 $is_assignment{$last_nonblank_type_to_go}
6693                 && (
6694                     $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6695
6696                     # and it is significantly to the right
6697                     || $gnu_position_predictor > $half_maximum_line_length
6698                 )
6699             )
6700           )
6701         {
6702             check_for_long_gnu_style_lines();
6703             $line_start_index_to_go = $max_index_to_go;
6704
6705             # back up 1 token if we want to break before that type
6706             # otherwise, we may strand tokens like '?' or ':' on a line
6707             if ( $line_start_index_to_go > 0 ) {
6708                 if ( $last_nonblank_type_to_go eq 'k' ) {
6709
6710                     if ( $want_break_before{$last_nonblank_token_to_go} ) {
6711                         $line_start_index_to_go--;
6712                     }
6713                 }
6714                 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6715                     $line_start_index_to_go--;
6716                 }
6717             }
6718         }
6719     }
6720
6721     # remember the predicted position of this token on the output line
6722     if ( $max_index_to_go > $line_start_index_to_go ) {
6723         $gnu_position_predictor =
6724           total_line_length( $line_start_index_to_go, $max_index_to_go );
6725     }
6726     else {
6727         $gnu_position_predictor = $space_count +
6728           token_sequence_length( $max_index_to_go, $max_index_to_go );
6729     }
6730
6731     # store the indentation object for this token
6732     # this allows us to manipulate the leading whitespace
6733     # (in case we have to reduce indentation to fit a line) without
6734     # having to change any token values
6735     $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6736     $reduced_spaces_to_go[$max_index_to_go] =
6737       ( $max_gnu_stack_index > 0 && $ci_level )
6738       ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6739       : $gnu_stack[$max_gnu_stack_index];
6740     return;
6741 }
6742
6743 sub check_for_long_gnu_style_lines {
6744
6745     # look at the current estimated maximum line length, and
6746     # remove some whitespace if it exceeds the desired maximum
6747
6748     # this is only for the '-lp' style
6749     return unless ($rOpts_line_up_parentheses);
6750
6751     # nothing can be done if no stack items defined for this line
6752     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6753
6754     # see if we have exceeded the maximum desired line length
6755     # keep 2 extra free because they are needed in some cases
6756     # (result of trial-and-error testing)
6757     my $spaces_needed =
6758       $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6759
6760     return if ( $spaces_needed <= 0 );
6761
6762     # We are over the limit, so try to remove a requested number of
6763     # spaces from leading whitespace.  We are only allowed to remove
6764     # from whitespace items created on this batch, since others have
6765     # already been used and cannot be undone.
6766     my @candidates = ();
6767     my $i;
6768
6769     # loop over all whitespace items created for the current batch
6770     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6771         my $item = $gnu_item_list[$i];
6772
6773         # item must still be open to be a candidate (otherwise it
6774         # cannot influence the current token)
6775         next if ( $item->get_CLOSED() >= 0 );
6776
6777         my $available_spaces = $item->get_AVAILABLE_SPACES();
6778
6779         if ( $available_spaces > 0 ) {
6780             push( @candidates, [ $i, $available_spaces ] );
6781         }
6782     }
6783
6784     return unless (@candidates);
6785
6786     # sort by available whitespace so that we can remove whitespace
6787     # from the maximum available first
6788     @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6789
6790     # keep removing whitespace until we are done or have no more
6791     my $candidate;
6792     foreach $candidate (@candidates) {
6793         my ( $i, $available_spaces ) = @{$candidate};
6794         my $deleted_spaces =
6795           ( $available_spaces > $spaces_needed )
6796           ? $spaces_needed
6797           : $available_spaces;
6798
6799         # remove the incremental space from this item
6800         $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6801
6802         my $i_debug = $i;
6803
6804         # update the leading whitespace of this item and all items
6805         # that came after it
6806         for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6807
6808             my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6809             if ( $old_spaces >= $deleted_spaces ) {
6810                 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6811             }
6812
6813             # shouldn't happen except for code bug:
6814             else {
6815                 my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
6816                 my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6817                 my $old_level    = $gnu_item_list[$i]->get_LEVEL();
6818                 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6819                 warning(
6820 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
6821                 );
6822                 report_definite_bug();
6823             }
6824         }
6825         $gnu_position_predictor -= $deleted_spaces;
6826         $spaces_needed          -= $deleted_spaces;
6827         last unless ( $spaces_needed > 0 );
6828     }
6829 }
6830
6831 sub finish_lp_batch {
6832
6833     # This routine is called once after each each output stream batch is
6834     # finished to undo indentation for all incomplete -lp
6835     # indentation levels.  It is too risky to leave a level open,
6836     # because then we can't backtrack in case of a long line to follow.
6837     # This means that comments and blank lines will disrupt this
6838     # indentation style.  But the vertical aligner may be able to
6839     # get the space back if there are side comments.
6840
6841     # this is only for the 'lp' style
6842     return unless ($rOpts_line_up_parentheses);
6843
6844     # nothing can be done if no stack items defined for this line
6845     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6846
6847     # loop over all whitespace items created for the current batch
6848     my $i;
6849     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6850         my $item = $gnu_item_list[$i];
6851
6852         # only look for open items
6853         next if ( $item->get_CLOSED() >= 0 );
6854
6855         # Tentatively remove all of the available space
6856         # (The vertical aligner will try to get it back later)
6857         my $available_spaces = $item->get_AVAILABLE_SPACES();
6858         if ( $available_spaces > 0 ) {
6859
6860             # delete incremental space for this item
6861             $gnu_item_list[$i]
6862               ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6863
6864             # Reduce the total indentation space of any nodes that follow
6865             # Note that any such nodes must necessarily be dependents
6866             # of this node.
6867             foreach ( $i + 1 .. $max_gnu_item_index ) {
6868                 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6869             }
6870         }
6871     }
6872     return;
6873 }
6874
6875 sub reduce_lp_indentation {
6876
6877     # reduce the leading whitespace at token $i if possible by $spaces_needed
6878     # (a large value of $spaces_needed will remove all excess space)
6879     # NOTE: to be called from scan_list only for a sequence of tokens
6880     # contained between opening and closing parens/braces/brackets
6881
6882     my ( $i, $spaces_wanted ) = @_;
6883     my $deleted_spaces = 0;
6884
6885     my $item             = $leading_spaces_to_go[$i];
6886     my $available_spaces = $item->get_AVAILABLE_SPACES();
6887
6888     if (
6889         $available_spaces > 0
6890         && ( ( $spaces_wanted <= $available_spaces )
6891             || !$item->get_HAVE_CHILD() )
6892       )
6893     {
6894
6895         # we'll remove these spaces, but mark them as recoverable
6896         $deleted_spaces =
6897           $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6898     }
6899
6900     return $deleted_spaces;
6901 }
6902
6903 sub token_sequence_length {
6904
6905     # return length of tokens ($ifirst .. $ilast) including first & last
6906     # returns 0 if $ifirst > $ilast
6907     my $ifirst = shift;
6908     my $ilast  = shift;
6909     return 0 if ( $ilast < 0 || $ifirst > $ilast );
6910     return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6911     return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6912 }
6913
6914 sub total_line_length {
6915
6916     # return length of a line of tokens ($ifirst .. $ilast)
6917     my $ifirst = shift;
6918     my $ilast  = shift;
6919     if ( $ifirst < 0 ) { $ifirst = 0 }
6920
6921     return leading_spaces_to_go($ifirst) +
6922       token_sequence_length( $ifirst, $ilast );
6923 }
6924
6925 sub excess_line_length {
6926
6927     # return number of characters by which a line of tokens ($ifirst..$ilast)
6928     # exceeds the allowable line length.
6929     my $ifirst = shift;
6930     my $ilast  = shift;
6931     if ( $ifirst < 0 ) { $ifirst = 0 }
6932     return leading_spaces_to_go($ifirst) +
6933       token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6934 }
6935
6936 sub finish_formatting {
6937
6938     # flush buffer and write any informative messages
6939     my $self = shift;
6940
6941     flush();
6942     $file_writer_object->decrement_output_line_number()
6943       ;    # fix up line number since it was incremented
6944     we_are_at_the_last_line();
6945     if ( $added_semicolon_count > 0 ) {
6946         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6947         my $what =
6948           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6949         write_logfile_entry("$added_semicolon_count $what added:\n");
6950         write_logfile_entry(
6951             "  $first at input line $first_added_semicolon_at\n");
6952
6953         if ( $added_semicolon_count > 1 ) {
6954             write_logfile_entry(
6955                 "   Last at input line $last_added_semicolon_at\n");
6956         }
6957         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
6958         write_logfile_entry("\n");
6959     }
6960
6961     if ( $deleted_semicolon_count > 0 ) {
6962         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6963         my $what =
6964           ( $deleted_semicolon_count > 1 )
6965           ? "semicolons were"
6966           : "semicolon was";
6967         write_logfile_entry(
6968             "$deleted_semicolon_count unnecessary $what deleted:\n");
6969         write_logfile_entry(
6970             "  $first at input line $first_deleted_semicolon_at\n");
6971
6972         if ( $deleted_semicolon_count > 1 ) {
6973             write_logfile_entry(
6974                 "   Last at input line $last_deleted_semicolon_at\n");
6975         }
6976         write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
6977         write_logfile_entry("\n");
6978     }
6979
6980     if ( $embedded_tab_count > 0 ) {
6981         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6982         my $what =
6983           ( $embedded_tab_count > 1 )
6984           ? "quotes or patterns"
6985           : "quote or pattern";
6986         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6987         write_logfile_entry(
6988 "This means the display of this script could vary with device or software\n"
6989         );
6990         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
6991
6992         if ( $embedded_tab_count > 1 ) {
6993             write_logfile_entry(
6994                 "   Last at input line $last_embedded_tab_at\n");
6995         }
6996         write_logfile_entry("\n");
6997     }
6998
6999     if ($first_tabbing_disagreement) {
7000         write_logfile_entry(
7001 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7002         );
7003     }
7004
7005     if ($in_tabbing_disagreement) {
7006         write_logfile_entry(
7007 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7008         );
7009     }
7010     else {
7011
7012         if ($last_tabbing_disagreement) {
7013
7014             write_logfile_entry(
7015 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7016             );
7017         }
7018         else {
7019             write_logfile_entry("No indentation disagreement seen\n");
7020         }
7021     }
7022     write_logfile_entry("\n");
7023
7024     $vertical_aligner_object->report_anything_unusual();
7025
7026     $file_writer_object->report_line_length_errors();
7027 }
7028
7029 sub check_options {
7030
7031     # This routine is called to check the Opts hash after it is defined
7032
7033     ($rOpts) = @_;
7034     my ( $tabbing_string, $tab_msg );
7035
7036     make_static_block_comment_pattern();
7037     make_static_side_comment_pattern();
7038     make_closing_side_comment_prefix();
7039     make_closing_side_comment_list_pattern();
7040     $format_skipping_pattern_begin =
7041       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7042     $format_skipping_pattern_end =
7043       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7044
7045     # If closing side comments ARE selected, then we can safely
7046     # delete old closing side comments unless closing side comment
7047     # warnings are requested.  This is a good idea because it will
7048     # eliminate any old csc's which fall below the line count threshold.
7049     # We cannot do this if warnings are turned on, though, because we
7050     # might delete some text which has been added.  So that must
7051     # be handled when comments are created.
7052     if ( $rOpts->{'closing-side-comments'} ) {
7053         if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7054             $rOpts->{'delete-closing-side-comments'} = 1;
7055         }
7056     }
7057
7058     # If closing side comments ARE NOT selected, but warnings ARE
7059     # selected and we ARE DELETING csc's, then we will pretend to be
7060     # adding with a huge interval.  This will force the comments to be
7061     # generated for comparison with the old comments, but not added.
7062     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7063         if ( $rOpts->{'delete-closing-side-comments'} ) {
7064             $rOpts->{'delete-closing-side-comments'}  = 0;
7065             $rOpts->{'closing-side-comments'}         = 1;
7066             $rOpts->{'closing-side-comment-interval'} = 100000000;
7067         }
7068     }
7069
7070     make_bli_pattern();
7071     make_block_brace_vertical_tightness_pattern();
7072
7073     if ( $rOpts->{'line-up-parentheses'} ) {
7074
7075         if (   $rOpts->{'indent-only'}
7076             || !$rOpts->{'add-newlines'}
7077             || !$rOpts->{'delete-old-newlines'} )
7078         {
7079             warn <<EOM;
7080 -----------------------------------------------------------------------
7081 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7082
7083 The -lp indentation logic requires that perltidy be able to coordinate
7084 arbitrarily large numbers of line breakpoints.  This isn't possible
7085 with these flags. Sometimes an acceptable workaround is to use -wocb=3
7086 -----------------------------------------------------------------------
7087 EOM
7088             $rOpts->{'line-up-parentheses'} = 0;
7089         }
7090     }
7091
7092     # At present, tabs are not compatable with the line-up-parentheses style
7093     # (it would be possible to entab the total leading whitespace
7094     # just prior to writing the line, if desired).
7095     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7096         warn <<EOM;
7097 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
7098 EOM
7099         $rOpts->{'tabs'} = 0;
7100     }
7101
7102     # Likewise, tabs are not compatable with outdenting..
7103     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7104         warn <<EOM;
7105 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7106 EOM
7107         $rOpts->{'tabs'} = 0;
7108     }
7109
7110     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7111         warn <<EOM;
7112 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
7113 EOM
7114         $rOpts->{'tabs'} = 0;
7115     }
7116
7117     if ( !$rOpts->{'space-for-semicolon'} ) {
7118         $want_left_space{'f'} = -1;
7119     }
7120
7121     if ( $rOpts->{'space-terminal-semicolon'} ) {
7122         $want_left_space{';'} = 1;
7123     }
7124
7125     # implement outdenting preferences for keywords
7126     %outdent_keyword = ();
7127     unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7128         @_ = qw(next last redo goto return);    # defaults
7129     }
7130
7131     # FUTURE: if not a keyword, assume that it is an identifier
7132     foreach (@_) {
7133         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7134             $outdent_keyword{$_} = 1;
7135         }
7136         else {
7137             warn "ignoring '$_' in -okwl list; not a perl keyword";
7138         }
7139     }
7140
7141     # implement user whitespace preferences
7142     if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7143         @want_left_space{@_} = (1) x scalar(@_);
7144     }
7145
7146     if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7147         @want_right_space{@_} = (1) x scalar(@_);
7148     }
7149
7150     if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7151         @want_left_space{@_} = (-1) x scalar(@_);
7152     }
7153
7154     if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7155         @want_right_space{@_} = (-1) x scalar(@_);
7156     }
7157     if ( $rOpts->{'dump-want-left-space'} ) {
7158         dump_want_left_space(*STDOUT);
7159         exit 1;
7160     }
7161
7162     if ( $rOpts->{'dump-want-right-space'} ) {
7163         dump_want_right_space(*STDOUT);
7164         exit 1;
7165     }
7166
7167     # default keywords for which space is introduced before an opening paren
7168     # (at present, including them messes up vertical alignment)
7169     @_ = qw(my local our and or err eq ne if else elsif until
7170       unless while for foreach return switch case given when);
7171     @space_after_keyword{@_} = (1) x scalar(@_);
7172
7173     # allow user to modify these defaults
7174     if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7175         @space_after_keyword{@_} = (1) x scalar(@_);
7176     }
7177
7178     if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7179         @space_after_keyword{@_} = (0) x scalar(@_);
7180     }
7181
7182     # implement user break preferences
7183     my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7184       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7185       . : ? && || and or err xor
7186     );
7187
7188     my $break_after = sub {
7189         foreach my $tok (@_) {
7190             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
7191             my $lbs = $left_bond_strength{$tok};
7192             my $rbs = $right_bond_strength{$tok};
7193             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7194                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7195                   ( $lbs, $rbs );
7196             }
7197         }
7198     };
7199
7200     my $break_before = sub {
7201         foreach my $tok (@_) {
7202             my $lbs = $left_bond_strength{$tok};
7203             my $rbs = $right_bond_strength{$tok};
7204             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7205                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7206                   ( $lbs, $rbs );
7207             }
7208         }
7209     };
7210
7211     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7212     $break_before->(@all_operators)
7213       if ( $rOpts->{'break-before-all-operators'} );
7214
7215     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7216     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7217
7218     # make note if breaks are before certain key types
7219     %want_break_before = ();
7220     foreach my $tok ( @all_operators, ',' ) {
7221         $want_break_before{$tok} =
7222           $left_bond_strength{$tok} < $right_bond_strength{$tok};
7223     }
7224
7225     # Coordinate ?/: breaks, which must be similar
7226     if ( !$want_break_before{':'} ) {
7227         $want_break_before{'?'}   = $want_break_before{':'};
7228         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7229         $left_bond_strength{'?'}  = NO_BREAK;
7230     }
7231
7232     # Define here tokens which may follow the closing brace of a do statement
7233     # on the same line, as in:
7234     #   } while ( $something);
7235     @_ = qw(until while unless if ; : );
7236     push @_, ',';
7237     @is_do_follower{@_} = (1) x scalar(@_);
7238
7239     # These tokens may follow the closing brace of an if or elsif block.
7240     # In other words, for cuddled else we want code to look like:
7241     #   } elsif ( $something) {
7242     #   } else {
7243     if ( $rOpts->{'cuddled-else'} ) {
7244         @_ = qw(else elsif);
7245         @is_if_brace_follower{@_} = (1) x scalar(@_);
7246     }
7247     else {
7248         %is_if_brace_follower = ();
7249     }
7250
7251     # nothing can follow the closing curly of an else { } block:
7252     %is_else_brace_follower = ();
7253
7254     # what can follow a multi-line anonymous sub definition closing curly:
7255     @_ = qw# ; : => or and  && || ~~ !~~ ) #;
7256     push @_, ',';
7257     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7258
7259     # what can follow a one-line anonynomous sub closing curly:
7260     # one-line anonumous subs also have ']' here...
7261     # see tk3.t and PP.pm
7262     @_ = qw#  ; : => or and  && || ) ] ~~ !~~ #;
7263     push @_, ',';
7264     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7265
7266     # What can follow a closing curly of a block
7267     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7268     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7269     @_ = qw#  ; : => or and  && || ) #;
7270     push @_, ',';
7271
7272     # allow cuddled continue if cuddled else is specified
7273     if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7274
7275     @is_other_brace_follower{@_} = (1) x scalar(@_);
7276
7277     $right_bond_strength{'{'} = WEAK;
7278     $left_bond_strength{'{'}  = VERY_STRONG;
7279
7280     # make -l=0  equal to -l=infinite
7281     if ( !$rOpts->{'maximum-line-length'} ) {
7282         $rOpts->{'maximum-line-length'} = 1000000;
7283     }
7284
7285     # make -lbl=0  equal to -lbl=infinite
7286     if ( !$rOpts->{'long-block-line-count'} ) {
7287         $rOpts->{'long-block-line-count'} = 1000000;
7288     }
7289
7290     my $ole = $rOpts->{'output-line-ending'};
7291     if ($ole) {
7292         my %endings = (
7293             dos  => "\015\012",
7294             win  => "\015\012",
7295             mac  => "\015",
7296             unix => "\012",
7297         );
7298         $ole = lc $ole;
7299         unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7300             my $str = join " ", keys %endings;
7301             die <<EOM;
7302 Unrecognized line ending '$ole'; expecting one of: $str
7303 EOM
7304         }
7305         if ( $rOpts->{'preserve-line-endings'} ) {
7306             warn "Ignoring -ple; conflicts with -ole\n";
7307             $rOpts->{'preserve-line-endings'} = undef;
7308         }
7309     }
7310
7311     # hashes used to simplify setting whitespace
7312     %tightness = (
7313         '{' => $rOpts->{'brace-tightness'},
7314         '}' => $rOpts->{'brace-tightness'},
7315         '(' => $rOpts->{'paren-tightness'},
7316         ')' => $rOpts->{'paren-tightness'},
7317         '[' => $rOpts->{'square-bracket-tightness'},
7318         ']' => $rOpts->{'square-bracket-tightness'},
7319     );
7320     %matching_token = (
7321         '{' => '}',
7322         '(' => ')',
7323         '[' => ']',
7324         '?' => ':',
7325     );
7326
7327     # frequently used parameters
7328     $rOpts_add_newlines          = $rOpts->{'add-newlines'};
7329     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
7330     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7331     $rOpts_block_brace_vertical_tightness =
7332       $rOpts->{'block-brace-vertical-tightness'};
7333     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
7334     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7335     $rOpts_break_at_old_ternary_breakpoints =
7336       $rOpts->{'break-at-old-ternary-breakpoints'};
7337     $rOpts_break_at_old_comma_breakpoints =
7338       $rOpts->{'break-at-old-comma-breakpoints'};
7339     $rOpts_break_at_old_keyword_breakpoints =
7340       $rOpts->{'break-at-old-keyword-breakpoints'};
7341     $rOpts_break_at_old_logical_breakpoints =
7342       $rOpts->{'break-at-old-logical-breakpoints'};
7343     $rOpts_closing_side_comment_else_flag =
7344       $rOpts->{'closing-side-comment-else-flag'};
7345     $rOpts_closing_side_comment_maximum_text =
7346       $rOpts->{'closing-side-comment-maximum-text'};
7347     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7348     $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
7349     $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
7350     $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
7351     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
7352     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
7353     $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7354     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
7355     $rOpts_short_concatenation_item_length =
7356       $rOpts->{'short-concatenation-item-length'};
7357     $rOpts_keep_old_blank_lines     = $rOpts->{'keep-old-blank-lines'};
7358     $rOpts_ignore_old_breakpoints   = $rOpts->{'ignore-old-breakpoints'};
7359     $rOpts_format_skipping          = $rOpts->{'format-skipping'};
7360     $rOpts_space_function_paren     = $rOpts->{'space-function-paren'};
7361     $rOpts_space_keyword_paren      = $rOpts->{'space-keyword-paren'};
7362     $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7363     $half_maximum_line_length       = $rOpts_maximum_line_length / 2;
7364
7365     # Note that both opening and closing tokens can access the opening
7366     # and closing flags of their container types.
7367     %opening_vertical_tightness = (
7368         '(' => $rOpts->{'paren-vertical-tightness'},
7369         '{' => $rOpts->{'brace-vertical-tightness'},
7370         '[' => $rOpts->{'square-bracket-vertical-tightness'},
7371         ')' => $rOpts->{'paren-vertical-tightness'},
7372         '}' => $rOpts->{'brace-vertical-tightness'},
7373         ']' => $rOpts->{'square-bracket-vertical-tightness'},
7374     );
7375
7376     %closing_vertical_tightness = (
7377         '(' => $rOpts->{'paren-vertical-tightness-closing'},
7378         '{' => $rOpts->{'brace-vertical-tightness-closing'},
7379         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7380         ')' => $rOpts->{'paren-vertical-tightness-closing'},
7381         '}' => $rOpts->{'brace-vertical-tightness-closing'},
7382         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7383     );
7384
7385     # assume flag for '>' same as ')' for closing qw quotes
7386     %closing_token_indentation = (
7387         ')' => $rOpts->{'closing-paren-indentation'},
7388         '}' => $rOpts->{'closing-brace-indentation'},
7389         ']' => $rOpts->{'closing-square-bracket-indentation'},
7390         '>' => $rOpts->{'closing-paren-indentation'},
7391     );
7392
7393     %opening_token_right = (
7394         '(' => $rOpts->{'opening-paren-right'},
7395         '{' => $rOpts->{'opening-hash-brace-right'},
7396         '[' => $rOpts->{'opening-square-bracket-right'},
7397     );
7398
7399     %stack_opening_token = (
7400         '(' => $rOpts->{'stack-opening-paren'},
7401         '{' => $rOpts->{'stack-opening-hash-brace'},
7402         '[' => $rOpts->{'stack-opening-square-bracket'},
7403     );
7404
7405     %stack_closing_token = (
7406         ')' => $rOpts->{'stack-closing-paren'},
7407         '}' => $rOpts->{'stack-closing-hash-brace'},
7408         ']' => $rOpts->{'stack-closing-square-bracket'},
7409     );
7410 }
7411
7412 sub make_static_block_comment_pattern {
7413
7414     # create the pattern used to identify static block comments
7415     $static_block_comment_pattern = '^\s*##';
7416
7417     # allow the user to change it
7418     if ( $rOpts->{'static-block-comment-prefix'} ) {
7419         my $prefix = $rOpts->{'static-block-comment-prefix'};
7420         $prefix =~ s/^\s*//;
7421         my $pattern = $prefix;
7422
7423         # user may give leading caret to force matching left comments only
7424         if ( $prefix !~ /^\^#/ ) {
7425             if ( $prefix !~ /^#/ ) {
7426                 die
7427 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7428             }
7429             $pattern = '^\s*' . $prefix;
7430         }
7431         eval "'##'=~/$pattern/";
7432         if ($@) {
7433             die
7434 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7435         }
7436         $static_block_comment_pattern = $pattern;
7437     }
7438 }
7439
7440 sub make_format_skipping_pattern {
7441     my ( $opt_name, $default ) = @_;
7442     my $param = $rOpts->{$opt_name};
7443     unless ($param) { $param = $default }
7444     $param =~ s/^\s*//;
7445     if ( $param !~ /^#/ ) {
7446         die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7447     }
7448     my $pattern = '^' . $param . '\s';
7449     eval "'#'=~/$pattern/";
7450     if ($@) {
7451         die
7452 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7453     }
7454     return $pattern;
7455 }
7456
7457 sub make_closing_side_comment_list_pattern {
7458
7459     # turn any input list into a regex for recognizing selected block types
7460     $closing_side_comment_list_pattern = '^\w+';
7461     if ( defined( $rOpts->{'closing-side-comment-list'} )
7462         && $rOpts->{'closing-side-comment-list'} )
7463     {
7464         $closing_side_comment_list_pattern =
7465           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7466     }
7467 }
7468
7469 sub make_bli_pattern {
7470
7471     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7472         && $rOpts->{'brace-left-and-indent-list'} )
7473     {
7474         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7475     }
7476
7477     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7478 }
7479
7480 sub make_block_brace_vertical_tightness_pattern {
7481
7482     # turn any input list into a regex for recognizing selected block types
7483     $block_brace_vertical_tightness_pattern =
7484       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7485
7486     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7487         && $rOpts->{'block-brace-vertical-tightness-list'} )
7488     {
7489         $block_brace_vertical_tightness_pattern =
7490           make_block_pattern( '-bbvtl',
7491             $rOpts->{'block-brace-vertical-tightness-list'} );
7492     }
7493 }
7494
7495 sub make_block_pattern {
7496
7497     #  given a string of block-type keywords, return a regex to match them
7498     #  The only tricky part is that labels are indicated with a single ':'
7499     #  and the 'sub' token text may have additional text after it (name of
7500     #  sub).
7501     #
7502     #  Example:
7503     #
7504     #   input string: "if else elsif unless while for foreach do : sub";
7505     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7506
7507     my ( $abbrev, $string ) = @_;
7508     my @list  = split_words($string);
7509     my @words = ();
7510     my %seen;
7511     for my $i (@list) {
7512         next if $seen{$i};
7513         $seen{$i} = 1;
7514         if ( $i eq 'sub' ) {
7515         }
7516         elsif ( $i eq ':' ) {
7517             push @words, '\w+:';
7518         }
7519         elsif ( $i =~ /^\w/ ) {
7520             push @words, $i;
7521         }
7522         else {
7523             warn "unrecognized block type $i after $abbrev, ignoring\n";
7524         }
7525     }
7526     my $pattern = '(' . join( '|', @words ) . ')$';
7527     if ( $seen{'sub'} ) {
7528         $pattern = '(' . $pattern . '|sub)';
7529     }
7530     $pattern = '^' . $pattern;
7531     return $pattern;
7532 }
7533
7534 sub make_static_side_comment_pattern {
7535
7536     # create the pattern used to identify static side comments
7537     $static_side_comment_pattern = '^##';
7538
7539     # allow the user to change it
7540     if ( $rOpts->{'static-side-comment-prefix'} ) {
7541         my $prefix = $rOpts->{'static-side-comment-prefix'};
7542         $prefix =~ s/^\s*//;
7543         my $pattern = '^' . $prefix;
7544         eval "'##'=~/$pattern/";
7545         if ($@) {
7546             die
7547 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7548         }
7549         $static_side_comment_pattern = $pattern;
7550     }
7551 }
7552
7553 sub make_closing_side_comment_prefix {
7554
7555     # Be sure we have a valid closing side comment prefix
7556     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7557     my $csc_prefix_pattern;
7558     if ( !defined($csc_prefix) ) {
7559         $csc_prefix         = '## end';
7560         $csc_prefix_pattern = '^##\s+end';
7561     }
7562     else {
7563         my $test_csc_prefix = $csc_prefix;
7564         if ( $test_csc_prefix !~ /^#/ ) {
7565             $test_csc_prefix = '#' . $test_csc_prefix;
7566         }
7567
7568         # make a regex to recognize the prefix
7569         my $test_csc_prefix_pattern = $test_csc_prefix;
7570
7571         # escape any special characters
7572         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7573
7574         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7575
7576         # allow exact number of intermediate spaces to vary
7577         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7578
7579         # make sure we have a good pattern
7580         # if we fail this we probably have an error in escaping
7581         # characters.
7582         eval "'##'=~/$test_csc_prefix_pattern/";
7583         if ($@) {
7584
7585             # shouldn't happen..must have screwed up escaping, above
7586             report_definite_bug();
7587             warn
7588 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7589
7590             # just warn and keep going with defaults
7591             warn "Please consider using a simpler -cscp prefix\n";
7592             warn "Using default -cscp instead; please check output\n";
7593         }
7594         else {
7595             $csc_prefix         = $test_csc_prefix;
7596             $csc_prefix_pattern = $test_csc_prefix_pattern;
7597         }
7598     }
7599     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7600     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7601 }
7602
7603 sub dump_want_left_space {
7604     my $fh = shift;
7605     local $" = "\n";
7606     print $fh <<EOM;
7607 These values are the main control of whitespace to the left of a token type;
7608 They may be altered with the -wls parameter.
7609 For a list of token types, use perltidy --dump-token-types (-dtt)
7610  1 means the token wants a space to its left
7611 -1 means the token does not want a space to its left
7612 ------------------------------------------------------------------------
7613 EOM
7614     foreach ( sort keys %want_left_space ) {
7615         print $fh "$_\t$want_left_space{$_}\n";
7616     }
7617 }
7618
7619 sub dump_want_right_space {
7620     my $fh = shift;
7621     local $" = "\n";
7622     print $fh <<EOM;
7623 These values are the main control of whitespace to the right of a token type;
7624 They may be altered with the -wrs parameter.
7625 For a list of token types, use perltidy --dump-token-types (-dtt)
7626  1 means the token wants a space to its right
7627 -1 means the token does not want a space to its right
7628 ------------------------------------------------------------------------
7629 EOM
7630     foreach ( sort keys %want_right_space ) {
7631         print $fh "$_\t$want_right_space{$_}\n";
7632     }
7633 }
7634
7635 {    # begin is_essential_whitespace
7636
7637     my %is_sort_grep_map;
7638     my %is_for_foreach;
7639
7640     BEGIN {
7641
7642         @_ = qw(sort grep map);
7643         @is_sort_grep_map{@_} = (1) x scalar(@_);
7644
7645         @_ = qw(for foreach);
7646         @is_for_foreach{@_} = (1) x scalar(@_);
7647
7648     }
7649
7650     sub is_essential_whitespace {
7651
7652         # Essential whitespace means whitespace which cannot be safely deleted
7653         # without risking the introduction of a syntax error.
7654         # We are given three tokens and their types:
7655         # ($tokenl, $typel) is the token to the left of the space in question
7656         # ($tokenr, $typer) is the token to the right of the space in question
7657         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7658         #
7659         # This is a slow routine but is not needed too often except when -mangle
7660         # is used.
7661         #
7662         # Note: This routine should almost never need to be changed.  It is
7663         # for avoiding syntax problems rather than for formatting.
7664         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7665
7666         my $result =
7667
7668           # never combine two bare words or numbers
7669           # examples:  and ::ok(1)
7670           #            return ::spw(...)
7671           #            for bla::bla:: abc
7672           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7673           #            $input eq"quit" to make $inputeq"quit"
7674           #            my $size=-s::SINK if $file;  <==OK but we won't do it
7675           # don't join something like: for bla::bla:: abc
7676           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7677           ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7678
7679           # do not combine a number with a concatination dot
7680           # example: pom.caputo:
7681           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7682           || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7683           || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7684
7685           # do not join a minus with a bare word, because you might form
7686           # a file test operator.  Example from Complex.pm:
7687           # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7688           || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7689
7690           # and something like this could become ambiguous without space
7691           # after the '-':
7692           #   use constant III=>1;
7693           #   $a = $b - III;
7694           # and even this:
7695           #   $a = - III;
7696           || ( ( $tokenl eq '-' )
7697             && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7698
7699           # '= -' should not become =- or you will get a warning
7700           # about reversed -=
7701           # || ($tokenr eq '-')
7702
7703           # keep a space between a quote and a bareword to prevent the
7704           # bareword from becomming a quote modifier.
7705           || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7706
7707           # keep a space between a token ending in '$' and any word;
7708           # this caused trouble:  "die @$ if $@"
7709           || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7710             && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7711
7712           # perl is very fussy about spaces before <<
7713           || ( $tokenr =~ /^\<\</ )
7714
7715           # avoid combining tokens to create new meanings. Example:
7716           #     $a+ +$b must not become $a++$b
7717           || ( $is_digraph{ $tokenl . $tokenr } )
7718           || ( $is_trigraph{ $tokenl . $tokenr } )
7719
7720           # another example: do not combine these two &'s:
7721           #     allow_options & &OPT_EXECCGI
7722           || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7723
7724           # don't combine $$ or $# with any alphanumeric
7725           # (testfile mangle.t with --mangle)
7726           || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7727
7728           # retain any space after possible filehandle
7729           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7730           || ( $typel eq 'Z' )
7731
7732           # Perl is sensitive to whitespace after the + here:
7733           #  $b = xvals $a + 0.1 * yvals $a;
7734           || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
7735
7736           # keep paren separate in 'use Foo::Bar ()'
7737           || ( $tokenr eq '('
7738             && $typel   eq 'w'
7739             && $typell  eq 'k'
7740             && $tokenll eq 'use' )
7741
7742           # keep any space between filehandle and paren:
7743           # file mangle.t with --mangle:
7744           || ( $typel eq 'Y' && $tokenr eq '(' )
7745
7746           # retain any space after here doc operator ( hereerr.t)
7747           || ( $typel eq 'h' )
7748
7749           # be careful with a space around ++ and --, to avoid ambiguity as to
7750           # which token it applies
7751           || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7752           || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7753
7754           # need space after foreach my; for example, this will fail in
7755           # older versions of Perl:
7756           # foreach my$ft(@filetypes)...
7757           || (
7758             $tokenl eq 'my'
7759
7760             #  /^(for|foreach)$/
7761             && $is_for_foreach{$tokenll}
7762             && $tokenr =~ /^\$/
7763           )
7764
7765           # must have space between grep and left paren; "grep(" will fail
7766           || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7767
7768           # don't stick numbers next to left parens, as in:
7769           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7770           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7771
7772           # We must be sure that a space between a ? and a quoted string
7773           # remains if the space before the ? remains.  [Loca.pm, lockarea]
7774           # ie,
7775           #    $b=join $comma ? ',' : ':', @_;  # ok
7776           #    $b=join $comma?',' : ':', @_;    # ok!
7777           #    $b=join $comma ?',' : ':', @_;   # error!
7778           # Not really required:
7779           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
7780
7781           # do not remove space between an '&' and a bare word because
7782           # it may turn into a function evaluation, like here
7783           # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
7784           #    $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
7785           || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7786
7787           ;    # the value of this long logic sequence is the result we want
7788         return $result;
7789     }
7790 }
7791
7792 sub set_white_space_flag {
7793
7794     #    This routine examines each pair of nonblank tokens and
7795     #    sets values for array @white_space_flag.
7796     #
7797     #    $white_space_flag[$j] is a flag indicating whether a white space
7798     #    BEFORE token $j is needed, with the following values:
7799     #
7800     #            -1 do not want a space before token $j
7801     #             0 optional space or $j is a whitespace
7802     #             1 want a space before token $j
7803     #
7804     #
7805     #   The values for the first token will be defined based
7806     #   upon the contents of the "to_go" output array.
7807     #
7808     #   Note: retain debug print statements because they are usually
7809     #   required after adding new token types.
7810
7811     BEGIN {
7812
7813         # initialize these global hashes, which control the use of
7814         # whitespace around tokens:
7815         #
7816         # %binary_ws_rules
7817         # %want_left_space
7818         # %want_right_space
7819         # %space_after_keyword
7820         #
7821         # Many token types are identical to the tokens themselves.
7822         # See the tokenizer for a complete list. Here are some special types:
7823         #   k = perl keyword
7824         #   f = semicolon in for statement
7825         #   m = unary minus
7826         #   p = unary plus
7827         # Note that :: is excluded since it should be contained in an identifier
7828         # Note that '->' is excluded because it never gets space
7829         # parentheses and brackets are excluded since they are handled specially
7830         # curly braces are included but may be overridden by logic, such as
7831         # newline logic.
7832
7833         # NEW_TOKENS: create a whitespace rule here.  This can be as
7834         # simple as adding your new letter to @spaces_both_sides, for
7835         # example.
7836
7837         @_ = qw" L { ( [ ";
7838         @is_opening_type{@_} = (1) x scalar(@_);
7839
7840         @_ = qw" R } ) ] ";
7841         @is_closing_type{@_} = (1) x scalar(@_);
7842
7843         my @spaces_both_sides = qw"
7844           + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7845           .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
7846           &&= ||= //= <=> A k f w F n C Y U G v
7847           ";
7848
7849         my @spaces_left_side = qw"
7850           t ! ~ m p { \ h pp mm Z j
7851           ";
7852         push( @spaces_left_side, '#' );    # avoids warning message
7853
7854         my @spaces_right_side = qw"
7855           ; } ) ] R J ++ -- **=
7856           ";
7857         push( @spaces_right_side, ',' );    # avoids warning message
7858         @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7859         @want_right_space{@spaces_both_sides} =
7860           (1) x scalar(@spaces_both_sides);
7861         @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
7862         @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7863         @want_left_space{@spaces_right_side} =
7864           (-1) x scalar(@spaces_right_side);
7865         @want_right_space{@spaces_right_side} =
7866           (1) x scalar(@spaces_right_side);
7867         $want_left_space{'L'}   = WS_NO;
7868         $want_left_space{'->'}  = WS_NO;
7869         $want_right_space{'->'} = WS_NO;
7870         $want_left_space{'**'}  = WS_NO;
7871         $want_right_space{'**'} = WS_NO;
7872
7873         # hash type information must stay tightly bound
7874         # as in :  ${xxxx}
7875         $binary_ws_rules{'i'}{'L'} = WS_NO;
7876         $binary_ws_rules{'i'}{'{'} = WS_YES;
7877         $binary_ws_rules{'k'}{'{'} = WS_YES;
7878         $binary_ws_rules{'U'}{'{'} = WS_YES;
7879         $binary_ws_rules{'i'}{'['} = WS_NO;
7880         $binary_ws_rules{'R'}{'L'} = WS_NO;
7881         $binary_ws_rules{'R'}{'{'} = WS_NO;
7882         $binary_ws_rules{'t'}{'L'} = WS_NO;
7883         $binary_ws_rules{'t'}{'{'} = WS_NO;
7884         $binary_ws_rules{'}'}{'L'} = WS_NO;
7885         $binary_ws_rules{'}'}{'{'} = WS_NO;
7886         $binary_ws_rules{'$'}{'L'} = WS_NO;
7887         $binary_ws_rules{'$'}{'{'} = WS_NO;
7888         $binary_ws_rules{'@'}{'L'} = WS_NO;
7889         $binary_ws_rules{'@'}{'{'} = WS_NO;
7890         $binary_ws_rules{'='}{'L'} = WS_YES;
7891
7892         # the following includes ') {'
7893         # as in :    if ( xxx ) { yyy }
7894         $binary_ws_rules{']'}{'L'} = WS_NO;
7895         $binary_ws_rules{']'}{'{'} = WS_NO;
7896         $binary_ws_rules{')'}{'{'} = WS_YES;
7897         $binary_ws_rules{')'}{'['} = WS_NO;
7898         $binary_ws_rules{']'}{'['} = WS_NO;
7899         $binary_ws_rules{']'}{'{'} = WS_NO;
7900         $binary_ws_rules{'}'}{'['} = WS_NO;
7901         $binary_ws_rules{'R'}{'['} = WS_NO;
7902
7903         $binary_ws_rules{']'}{'++'} = WS_NO;
7904         $binary_ws_rules{']'}{'--'} = WS_NO;
7905         $binary_ws_rules{')'}{'++'} = WS_NO;
7906         $binary_ws_rules{')'}{'--'} = WS_NO;
7907
7908         $binary_ws_rules{'R'}{'++'} = WS_NO;
7909         $binary_ws_rules{'R'}{'--'} = WS_NO;
7910
7911         ########################################################
7912         # should no longer be necessary (see niek.pl)
7913         ##$binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
7914         ##$binary_ws_rules{'w'}{':'} = WS_NO;
7915         ########################################################
7916         $binary_ws_rules{'i'}{'Q'} = WS_YES;
7917         $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
7918
7919         # FIXME: we need to split 'i' into variables and functions
7920         # and have no space for functions but space for variables.  For now,
7921         # I have a special patch in the special rules below
7922         $binary_ws_rules{'i'}{'('} = WS_NO;
7923
7924         $binary_ws_rules{'w'}{'('} = WS_NO;
7925         $binary_ws_rules{'w'}{'{'} = WS_YES;
7926     }
7927     my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7928     my ( $last_token, $last_type, $last_block_type, $token, $type,
7929         $block_type );
7930     my (@white_space_flag);
7931     my $j_tight_closing_paren = -1;
7932
7933     if ( $max_index_to_go >= 0 ) {
7934         $token      = $tokens_to_go[$max_index_to_go];
7935         $type       = $types_to_go[$max_index_to_go];
7936         $block_type = $block_type_to_go[$max_index_to_go];
7937     }
7938     else {
7939         $token      = ' ';
7940         $type       = 'b';
7941         $block_type = '';
7942     }
7943
7944     # loop over all tokens
7945     my ( $j, $ws );
7946
7947     for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7948
7949         if ( $$rtoken_type[$j] eq 'b' ) {
7950             $white_space_flag[$j] = WS_OPTIONAL;
7951             next;
7952         }
7953
7954         # set a default value, to be changed as needed
7955         $ws              = undef;
7956         $last_token      = $token;
7957         $last_type       = $type;
7958         $last_block_type = $block_type;
7959         $token           = $$rtokens[$j];
7960         $type            = $$rtoken_type[$j];
7961         $block_type      = $$rblock_type[$j];
7962
7963         #---------------------------------------------------------------
7964         # section 1:
7965         # handle space on the inside of opening braces
7966         #---------------------------------------------------------------
7967
7968         #    /^[L\{\(\[]$/
7969         if ( $is_opening_type{$last_type} ) {
7970
7971             $j_tight_closing_paren = -1;
7972
7973             # let's keep empty matched braces together: () {} []
7974             # except for BLOCKS
7975             if ( $token eq $matching_token{$last_token} ) {
7976                 if ($block_type) {
7977                     $ws = WS_YES;
7978                 }
7979                 else {
7980                     $ws = WS_NO;
7981                 }
7982             }
7983             else {
7984
7985                 # we're considering the right of an opening brace
7986                 # tightness = 0 means always pad inside with space
7987                 # tightness = 1 means pad inside if "complex"
7988                 # tightness = 2 means never pad inside with space
7989
7990                 my $tightness;
7991                 if (   $last_type eq '{'
7992                     && $last_token eq '{'
7993                     && $last_block_type )
7994                 {
7995                     $tightness = $rOpts_block_brace_tightness;
7996                 }
7997                 else { $tightness = $tightness{$last_token} }
7998
7999     #=================================================================
8000     # Patch for fabrice_bug.pl
8001     # We must always avoid spaces around a bare word beginning with ^ as in:
8002     #    my $before = ${^PREMATCH};
8003     # Because all of the following cause an error in perl:
8004     #    my $before = ${ ^PREMATCH };
8005     #    my $before = ${ ^PREMATCH};
8006     #    my $before = ${^PREMATCH };
8007     # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1.
8008     # Note that here we must set tightness=1 and not 2 so that the closing space
8009     # is also avoided (via the $j_tight_closing_paren flag in coding)
8010                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8011
8012               #=================================================================
8013
8014                 if ( $tightness <= 0 ) {
8015                     $ws = WS_YES;
8016                 }
8017                 elsif ( $tightness > 1 ) {
8018                     $ws = WS_NO;
8019                 }
8020                 else {
8021
8022                     # Patch to count '-foo' as single token so that
8023                     # each of  $a{-foo} and $a{foo} and $a{'foo'} do
8024                     # not get spaces with default formatting.
8025                     my $j_here = $j;
8026                     ++$j_here
8027                       if ( $token eq '-'
8028                         && $last_token eq '{'
8029                         && $$rtoken_type[ $j + 1 ] eq 'w' );
8030
8031                     # $j_next is where a closing token should be if
8032                     # the container has a single token
8033                     my $j_next =
8034                       ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8035                       ? $j_here + 2
8036                       : $j_here + 1;
8037                     my $tok_next  = $$rtokens[$j_next];
8038                     my $type_next = $$rtoken_type[$j_next];
8039
8040                     # for tightness = 1, if there is just one token
8041                     # within the matching pair, we will keep it tight
8042                     if (
8043                         $tok_next eq $matching_token{$last_token}
8044
8045                         # but watch out for this: [ [ ]    (misc.t)
8046                         && $last_token ne $token
8047                       )
8048                     {
8049
8050                         # remember where to put the space for the closing paren
8051                         $j_tight_closing_paren = $j_next;
8052                         $ws                    = WS_NO;
8053                     }
8054                     else {
8055                         $ws = WS_YES;
8056                     }
8057                 }
8058             }
8059         }    # done with opening braces and brackets
8060         my $ws_1 = $ws
8061           if FORMATTER_DEBUG_FLAG_WHITE;
8062
8063         #---------------------------------------------------------------
8064         # section 2:
8065         # handle space on inside of closing brace pairs
8066         #---------------------------------------------------------------
8067
8068         #   /[\}\)\]R]/
8069         if ( $is_closing_type{$type} ) {
8070
8071             if ( $j == $j_tight_closing_paren ) {
8072
8073                 $j_tight_closing_paren = -1;
8074                 $ws                    = WS_NO;
8075             }
8076             else {
8077
8078                 if ( !defined($ws) ) {
8079
8080                     my $tightness;
8081                     if ( $type eq '}' && $token eq '}' && $block_type ) {
8082                         $tightness = $rOpts_block_brace_tightness;
8083                     }
8084                     else { $tightness = $tightness{$token} }
8085
8086                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8087                 }
8088             }
8089         }
8090
8091         my $ws_2 = $ws
8092           if FORMATTER_DEBUG_FLAG_WHITE;
8093
8094         #---------------------------------------------------------------
8095         # section 3:
8096         # use the binary table
8097         #---------------------------------------------------------------
8098         if ( !defined($ws) ) {
8099             $ws = $binary_ws_rules{$last_type}{$type};
8100         }
8101         my $ws_3 = $ws
8102           if FORMATTER_DEBUG_FLAG_WHITE;
8103
8104         #---------------------------------------------------------------
8105         # section 4:
8106         # some special cases
8107         #---------------------------------------------------------------
8108         if ( $token eq '(' ) {
8109
8110             # This will have to be tweaked as tokenization changes.
8111             # We usually want a space at '} (', for example:
8112             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8113             #
8114             # But not others:
8115             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8116             # At present, the above & block is marked as type L/R so this case
8117             # won't go through here.
8118             if ( $last_type eq '}' ) { $ws = WS_YES }
8119
8120             # NOTE: some older versions of Perl had occasional problems if
8121             # spaces are introduced between keywords or functions and opening
8122             # parens.  So the default is not to do this except is certain
8123             # cases.  The current Perl seems to tolerate spaces.
8124
8125             # Space between keyword and '('
8126             elsif ( $last_type eq 'k' ) {
8127                 $ws = WS_NO
8128                   unless ( $rOpts_space_keyword_paren
8129                     || $space_after_keyword{$last_token} );
8130             }
8131
8132             # Space between function and '('
8133             # -----------------------------------------------------
8134             # 'w' and 'i' checks for something like:
8135             #   myfun(    &myfun(   ->myfun(
8136             # -----------------------------------------------------
8137             elsif (( $last_type =~ /^[wUG]$/ )
8138                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8139             {
8140                 $ws = WS_NO unless ($rOpts_space_function_paren);
8141             }
8142
8143             # space between something like $i and ( in
8144             # for $i ( 0 .. 20 ) {
8145             # FIXME: eventually, type 'i' needs to be split into multiple
8146             # token types so this can be a hardwired rule.
8147             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8148                 $ws = WS_YES;
8149             }
8150
8151             # allow constant function followed by '()' to retain no space
8152             elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8153                 $ws = WS_NO;
8154             }
8155         }
8156
8157         # patch for SWITCH/CASE: make space at ']{' optional
8158         # since the '{' might begin a case or when block
8159         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8160             $ws = WS_OPTIONAL;
8161         }
8162
8163         # keep space between 'sub' and '{' for anonymous sub definition
8164         if ( $type eq '{' ) {
8165             if ( $last_token eq 'sub' ) {
8166                 $ws = WS_YES;
8167             }
8168
8169             # this is needed to avoid no space in '){'
8170             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8171
8172             # avoid any space before the brace or bracket in something like
8173             #  @opts{'a','b',...}
8174             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8175                 $ws = WS_NO;
8176             }
8177         }
8178
8179         elsif ( $type eq 'i' ) {
8180
8181             # never a space before ->
8182             if ( $token =~ /^\-\>/ ) {
8183                 $ws = WS_NO;
8184             }
8185         }
8186
8187         # retain any space between '-' and bare word
8188         elsif ( $type eq 'w' || $type eq 'C' ) {
8189             $ws = WS_OPTIONAL if $last_type eq '-';
8190
8191             # never a space before ->
8192             if ( $token =~ /^\-\>/ ) {
8193                 $ws = WS_NO;
8194             }
8195         }
8196
8197         # retain any space between '-' and bare word
8198         # example: avoid space between 'USER' and '-' here:
8199         #   $myhash{USER-NAME}='steve';
8200         elsif ( $type eq 'm' || $type eq '-' ) {
8201             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8202         }
8203
8204         # always space before side comment
8205         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8206
8207         # always preserver whatever space was used after a possible
8208         # filehandle (except _) or here doc operator
8209         if (
8210             $type ne '#'
8211             && ( ( $last_type eq 'Z' && $last_token ne '_' )
8212                 || $last_type eq 'h' )
8213           )
8214         {
8215             $ws = WS_OPTIONAL;
8216         }
8217
8218         my $ws_4 = $ws
8219           if FORMATTER_DEBUG_FLAG_WHITE;
8220
8221         #---------------------------------------------------------------
8222         # section 5:
8223         # default rules not covered above
8224         #---------------------------------------------------------------
8225         # if we fall through to here,
8226         # look at the pre-defined hash tables for the two tokens, and
8227         # if (they are equal) use the common value
8228         # if (either is zero or undef) use the other
8229         # if (either is -1) use it
8230         # That is,
8231         # left  vs right
8232         #  1    vs    1     -->  1
8233         #  0    vs    0     -->  0
8234         # -1    vs   -1     --> -1
8235         #
8236         #  0    vs   -1     --> -1
8237         #  0    vs    1     -->  1
8238         #  1    vs    0     -->  1
8239         # -1    vs    0     --> -1
8240         #
8241         # -1    vs    1     --> -1
8242         #  1    vs   -1     --> -1
8243         if ( !defined($ws) ) {
8244             my $wl = $want_left_space{$type};
8245             my $wr = $want_right_space{$last_type};
8246             if ( !defined($wl) ) { $wl = 0 }
8247             if ( !defined($wr) ) { $wr = 0 }
8248             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8249         }
8250
8251         if ( !defined($ws) ) {
8252             $ws = 0;
8253             write_diagnostics(
8254                 "WS flag is undefined for tokens $last_token $token\n");
8255         }
8256
8257         # Treat newline as a whitespace. Otherwise, we might combine
8258         # 'Send' and '-recipients' here according to the above rules:
8259         #    my $msg = new Fax::Send
8260         #      -recipients => $to,
8261         #      -data => $data;
8262         if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8263
8264         if (   ( $ws == 0 )
8265             && $j > 0
8266             && $j < $jmax
8267             && ( $last_type !~ /^[Zh]$/ ) )
8268         {
8269
8270             # If this happens, we have a non-fatal but undesirable
8271             # hole in the above rules which should be patched.
8272             write_diagnostics(
8273                 "WS flag is zero for tokens $last_token $token\n");
8274         }
8275         $white_space_flag[$j] = $ws;
8276
8277         FORMATTER_DEBUG_FLAG_WHITE && do {
8278             my $str = substr( $last_token, 0, 15 );
8279             $str .= ' ' x ( 16 - length($str) );
8280             if ( !defined($ws_1) ) { $ws_1 = "*" }
8281             if ( !defined($ws_2) ) { $ws_2 = "*" }
8282             if ( !defined($ws_3) ) { $ws_3 = "*" }
8283             if ( !defined($ws_4) ) { $ws_4 = "*" }
8284             print
8285 "WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8286         };
8287     }
8288     return \@white_space_flag;
8289 }
8290
8291 {    # begin print_line_of_tokens
8292
8293     my $rtoken_type;
8294     my $rtokens;
8295     my $rlevels;
8296     my $rslevels;
8297     my $rblock_type;
8298     my $rcontainer_type;
8299     my $rcontainer_environment;
8300     my $rtype_sequence;
8301     my $input_line;
8302     my $rnesting_tokens;
8303     my $rci_levels;
8304     my $rnesting_blocks;
8305
8306     my $in_quote;
8307     my $python_indentation_level;
8308
8309     # These local token variables are stored by store_token_to_go:
8310     my $block_type;
8311     my $ci_level;
8312     my $container_environment;
8313     my $container_type;
8314     my $in_continued_quote;
8315     my $level;
8316     my $nesting_blocks;
8317     my $no_internal_newlines;
8318     my $slevel;
8319     my $token;
8320     my $type;
8321     my $type_sequence;
8322
8323     # routine to pull the jth token from the line of tokens
8324     sub extract_token {
8325         my $j = shift;
8326         $token                 = $$rtokens[$j];
8327         $type                  = $$rtoken_type[$j];
8328         $block_type            = $$rblock_type[$j];
8329         $container_type        = $$rcontainer_type[$j];
8330         $container_environment = $$rcontainer_environment[$j];
8331         $type_sequence         = $$rtype_sequence[$j];
8332         $level                 = $$rlevels[$j];
8333         $slevel                = $$rslevels[$j];
8334         $nesting_blocks        = $$rnesting_blocks[$j];
8335         $ci_level              = $$rci_levels[$j];
8336     }
8337
8338     {
8339         my @saved_token;
8340
8341         sub save_current_token {
8342
8343             @saved_token = (
8344                 $block_type,            $ci_level,
8345                 $container_environment, $container_type,
8346                 $in_continued_quote,    $level,
8347                 $nesting_blocks,        $no_internal_newlines,
8348                 $slevel,                $token,
8349                 $type,                  $type_sequence,
8350             );
8351         }
8352
8353         sub restore_current_token {
8354             (
8355                 $block_type,            $ci_level,
8356                 $container_environment, $container_type,
8357                 $in_continued_quote,    $level,
8358                 $nesting_blocks,        $no_internal_newlines,
8359                 $slevel,                $token,
8360                 $type,                  $type_sequence,
8361             ) = @saved_token;
8362         }
8363     }
8364
8365     # Routine to place the current token into the output stream.
8366     # Called once per output token.
8367     sub store_token_to_go {
8368
8369         my $flag = $no_internal_newlines;
8370         if ( $_[0] ) { $flag = 1 }
8371
8372         $tokens_to_go[ ++$max_index_to_go ]            = $token;
8373         $types_to_go[$max_index_to_go]                 = $type;
8374         $nobreak_to_go[$max_index_to_go]               = $flag;
8375         $old_breakpoint_to_go[$max_index_to_go]        = 0;
8376         $forced_breakpoint_to_go[$max_index_to_go]     = 0;
8377         $block_type_to_go[$max_index_to_go]            = $block_type;
8378         $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
8379         $container_environment_to_go[$max_index_to_go] = $container_environment;
8380         $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
8381         $ci_levels_to_go[$max_index_to_go]             = $ci_level;
8382         $mate_index_to_go[$max_index_to_go]            = -1;
8383         $matching_token_to_go[$max_index_to_go]        = '';
8384         $bond_strength_to_go[$max_index_to_go]         = 0;
8385
8386         # Note: negative levels are currently retained as a diagnostic so that
8387         # the 'final indentation level' is correctly reported for bad scripts.
8388         # But this means that every use of $level as an index must be checked.
8389         # If this becomes too much of a problem, we might give up and just clip
8390         # them at zero.
8391         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8392         $levels_to_go[$max_index_to_go] = $level;
8393         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8394         $lengths_to_go[ $max_index_to_go + 1 ] =
8395           $lengths_to_go[$max_index_to_go] + length($token);
8396
8397         # Define the indentation that this token would have if it started
8398         # a new line.  We have to do this now because we need to know this
8399         # when considering one-line blocks.
8400         set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8401
8402         if ( $type ne 'b' ) {
8403             $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8404             $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
8405             $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8406             $last_nonblank_index_to_go      = $max_index_to_go;
8407             $last_nonblank_type_to_go       = $type;
8408             $last_nonblank_token_to_go      = $token;
8409             if ( $type eq ',' ) {
8410                 $comma_count_in_batch++;
8411             }
8412         }
8413
8414         FORMATTER_DEBUG_FLAG_STORE && do {
8415             my ( $a, $b, $c ) = caller();
8416             print
8417 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8418         };
8419     }
8420
8421     sub insert_new_token_to_go {
8422
8423         # insert a new token into the output stream.  use same level as
8424         # previous token; assumes a character at max_index_to_go.
8425         save_current_token();
8426         ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8427
8428         if ( $max_index_to_go == UNDEFINED_INDEX ) {
8429             warning("code bug: bad call to insert_new_token_to_go\n");
8430         }
8431         $level = $levels_to_go[$max_index_to_go];
8432
8433         # FIXME: it seems to be necessary to use the next, rather than
8434         # previous, value of this variable when creating a new blank (align.t)
8435         #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
8436         $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
8437         $ci_level              = $ci_levels_to_go[$max_index_to_go];
8438         $container_environment = $container_environment_to_go[$max_index_to_go];
8439         $in_continued_quote    = 0;
8440         $block_type            = "";
8441         $type_sequence         = "";
8442         store_token_to_go();
8443         restore_current_token();
8444         return;
8445     }
8446
8447     sub print_line_of_tokens {
8448
8449         my $line_of_tokens = shift;
8450
8451         # This routine is called once per input line to process all of
8452         # the tokens on that line.  This is the first stage of
8453         # beautification.
8454         #
8455         # Full-line comments and blank lines may be processed immediately.
8456         #
8457         # For normal lines of code, the tokens are stored one-by-one,
8458         # via calls to 'sub store_token_to_go', until a known line break
8459         # point is reached.  Then, the batch of collected tokens is
8460         # passed along to 'sub output_line_to_go' for further
8461         # processing.  This routine decides if there should be
8462         # whitespace between each pair of non-white tokens, so later
8463         # routines only need to decide on any additional line breaks.
8464         # Any whitespace is initally a single space character.  Later,
8465         # the vertical aligner may expand that to be multiple space
8466         # characters if necessary for alignment.
8467
8468         # extract input line number for error messages
8469         $input_line_number = $line_of_tokens->{_line_number};
8470
8471         $rtoken_type            = $line_of_tokens->{_rtoken_type};
8472         $rtokens                = $line_of_tokens->{_rtokens};
8473         $rlevels                = $line_of_tokens->{_rlevels};
8474         $rslevels               = $line_of_tokens->{_rslevels};
8475         $rblock_type            = $line_of_tokens->{_rblock_type};
8476         $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
8477         $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8478         $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
8479         $input_line             = $line_of_tokens->{_line_text};
8480         $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
8481         $rci_levels             = $line_of_tokens->{_rci_levels};
8482         $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
8483
8484         $in_continued_quote = $starting_in_quote =
8485           $line_of_tokens->{_starting_in_quote};
8486         $in_quote        = $line_of_tokens->{_ending_in_quote};
8487         $ending_in_quote = $in_quote;
8488         $python_indentation_level =
8489           $line_of_tokens->{_python_indentation_level};
8490
8491         my $j;
8492         my $j_next;
8493         my $jmax;
8494         my $next_nonblank_token;
8495         my $next_nonblank_token_type;
8496         my $rwhite_space_flag;
8497
8498         $jmax                    = @$rtokens - 1;
8499         $block_type              = "";
8500         $container_type          = "";
8501         $container_environment   = "";
8502         $type_sequence           = "";
8503         $no_internal_newlines    = 1 - $rOpts_add_newlines;
8504         $is_static_block_comment = 0;
8505
8506         # Handle a continued quote..
8507         if ($in_continued_quote) {
8508
8509             # A line which is entirely a quote or pattern must go out
8510             # verbatim.  Note: the \n is contained in $input_line.
8511             if ( $jmax <= 0 ) {
8512                 if ( ( $input_line =~ "\t" ) ) {
8513                     note_embedded_tab();
8514                 }
8515                 write_unindented_line("$input_line");
8516                 $last_line_had_side_comment = 0;
8517                 return;
8518             }
8519
8520             # prior to version 20010406, perltidy had a bug which placed
8521             # continuation indentation before the last line of some multiline
8522             # quotes and patterns -- exactly the lines passing this way.
8523             # To help find affected lines in scripts run with these
8524             # versions, run with '-chk', and it will warn of any quotes or
8525             # patterns which might have been modified by these early
8526             # versions.
8527             if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8528                 warning(
8529 "-chk: please check this line for extra leading whitespace\n"
8530                 );
8531             }
8532         }
8533
8534         # Write line verbatim if we are in a formatting skip section
8535         if ($in_format_skipping_section) {
8536             write_unindented_line("$input_line");
8537             $last_line_had_side_comment = 0;
8538
8539             # Note: extra space appended to comment simplifies pattern matching
8540             if (   $jmax == 0
8541                 && $$rtoken_type[0] eq '#'
8542                 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8543             {
8544                 $in_format_skipping_section = 0;
8545                 write_logfile_entry("Exiting formatting skip section\n");
8546             }
8547             return;
8548         }
8549
8550         # See if we are entering a formatting skip section
8551         if (   $rOpts_format_skipping
8552             && $jmax == 0
8553             && $$rtoken_type[0] eq '#'
8554             && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8555         {
8556             flush();
8557             $in_format_skipping_section = 1;
8558             write_logfile_entry("Entering formatting skip section\n");
8559             write_unindented_line("$input_line");
8560             $last_line_had_side_comment = 0;
8561             return;
8562         }
8563
8564         # delete trailing blank tokens
8565         if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8566
8567         # Handle a blank line..
8568         if ( $jmax < 0 ) {
8569
8570             # If keep-old-blank-lines is zero, we delete all
8571             # old blank lines and let the blank line rules generate any
8572             # needed blanks.
8573             if ($rOpts_keep_old_blank_lines) {
8574                 flush();
8575                 $file_writer_object->write_blank_code_line(
8576                     $rOpts_keep_old_blank_lines == 2 );
8577                 $last_line_leading_type = 'b';
8578             }
8579             $last_line_had_side_comment = 0;
8580             return;
8581         }
8582
8583         # see if this is a static block comment (starts with ## by default)
8584         my $is_static_block_comment_without_leading_space = 0;
8585         if (   $jmax == 0
8586             && $$rtoken_type[0] eq '#'
8587             && $rOpts->{'static-block-comments'}
8588             && $input_line =~ /$static_block_comment_pattern/o )
8589         {
8590             $is_static_block_comment = 1;
8591             $is_static_block_comment_without_leading_space =
8592               substr( $input_line, 0, 1 ) eq '#';
8593         }
8594
8595         # Check for comments which are line directives
8596         # Treat exactly as static block comments without leading space
8597         # reference: perlsyn, near end, section Plain Old Comments (Not!)
8598         # example: '# line 42 "new_filename.plx"'
8599         if (
8600                $jmax == 0
8601             && $$rtoken_type[0] eq '#'
8602             && $input_line =~ /^\#   \s*
8603                                line \s+ (\d+)   \s*
8604                                (?:\s("?)([^"]+)\2)? \s*
8605                                $/x
8606           )
8607         {
8608             $is_static_block_comment                       = 1;
8609             $is_static_block_comment_without_leading_space = 1;
8610         }
8611
8612         # create a hanging side comment if appropriate
8613         if (
8614                $jmax == 0
8615             && $$rtoken_type[0] eq '#'    # only token is a comment
8616             && $last_line_had_side_comment    # last line had side comment
8617             && $input_line =~ /^\s/           # there is some leading space
8618             && !$is_static_block_comment    # do not make static comment hanging
8619             && $rOpts->{'hanging-side-comments'}    # user is allowing this
8620           )
8621         {
8622
8623             # We will insert an empty qw string at the start of the token list
8624             # to force this comment to be a side comment. The vertical aligner
8625             # should then line it up with the previous side comment.
8626             unshift @$rtoken_type,            'q';
8627             unshift @$rtokens,                '';
8628             unshift @$rlevels,                $$rlevels[0];
8629             unshift @$rslevels,               $$rslevels[0];
8630             unshift @$rblock_type,            '';
8631             unshift @$rcontainer_type,        '';
8632             unshift @$rcontainer_environment, '';
8633             unshift @$rtype_sequence,         '';
8634             unshift @$rnesting_tokens,        $$rnesting_tokens[0];
8635             unshift @$rci_levels,             $$rci_levels[0];
8636             unshift @$rnesting_blocks,        $$rnesting_blocks[0];
8637             $jmax = 1;
8638         }
8639
8640         # remember if this line has a side comment
8641         $last_line_had_side_comment =
8642           ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8643
8644         # Handle a block (full-line) comment..
8645         if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8646
8647             if ( $rOpts->{'delete-block-comments'} ) { return }
8648
8649             if ( $rOpts->{'tee-block-comments'} ) {
8650                 $file_writer_object->tee_on();
8651             }
8652
8653             destroy_one_line_block();
8654             output_line_to_go();
8655
8656             # output a blank line before block comments
8657             if (
8658                    $last_line_leading_type !~ /^[#b]$/
8659                 && $rOpts->{'blanks-before-comments'}    # only if allowed
8660                 && !
8661                 $is_static_block_comment    # never before static block comments
8662               )
8663             {
8664                 flush();                    # switching to new output stream
8665                 $file_writer_object->write_blank_code_line();
8666                 $last_line_leading_type = 'b';
8667             }
8668
8669             # TRIM COMMENTS -- This could be turned off as a option
8670             $$rtokens[0] =~ s/\s*$//;       # trim right end
8671
8672             if (
8673                 $rOpts->{'indent-block-comments'}
8674                 && (  !$rOpts->{'indent-spaced-block-comments'}
8675                     || $input_line =~ /^\s+/ )
8676                 && !$is_static_block_comment_without_leading_space
8677               )
8678             {
8679                 extract_token(0);
8680                 store_token_to_go();
8681                 output_line_to_go();
8682             }
8683             else {
8684                 flush();    # switching to new output stream
8685                 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8686                 $last_line_leading_type = '#';
8687             }
8688             if ( $rOpts->{'tee-block-comments'} ) {
8689                 $file_writer_object->tee_off();
8690             }
8691             return;
8692         }
8693
8694         # compare input/output indentation except for continuation lines
8695         # (because they have an unknown amount of initial blank space)
8696         # and lines which are quotes (because they may have been outdented)
8697         # Note: this test is placed here because we know the continuation flag
8698         # at this point, which allows us to avoid non-meaningful checks.
8699         my $structural_indentation_level = $$rlevels[0];
8700         compare_indentation_levels( $python_indentation_level,
8701             $structural_indentation_level )
8702           unless ( $python_indentation_level < 0
8703             || ( $$rci_levels[0] > 0 )
8704             || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8705           );
8706
8707         #   Patch needed for MakeMaker.  Do not break a statement
8708         #   in which $VERSION may be calculated.  See MakeMaker.pm;
8709         #   this is based on the coding in it.
8710         #   The first line of a file that matches this will be eval'd:
8711         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8712         #   Examples:
8713         #     *VERSION = \'1.01';
8714         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
8715         #   We will pass such a line straight through without breaking
8716         #   it unless -npvl is used
8717
8718         my $is_VERSION_statement = 0;
8719
8720         if (
8721               !$saw_VERSION_in_this_file
8722             && $input_line =~ /VERSION/    # quick check to reject most lines
8723             && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8724           )
8725         {
8726             $saw_VERSION_in_this_file = 1;
8727             $is_VERSION_statement     = 1;
8728             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8729             $no_internal_newlines = 1;
8730         }
8731
8732         # take care of indentation-only
8733         # NOTE: In previous versions we sent all qw lines out immediately here.
8734         # No longer doing this: also write a line which is entirely a 'qw' list
8735         # to allow stacking of opening and closing tokens.  Note that interior
8736         # qw lines will still go out at the end of this routine.
8737         if ( $rOpts->{'indent-only'} ) {
8738             flush();
8739             trim($input_line);
8740
8741             extract_token(0);
8742             $token                 = $input_line;
8743             $type                  = 'q';
8744             $block_type            = "";
8745             $container_type        = "";
8746             $container_environment = "";
8747             $type_sequence         = "";
8748             store_token_to_go();
8749             output_line_to_go();
8750             return;
8751         }
8752
8753         push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
8754         push( @$rtoken_type, 'b', 'b' );
8755         ($rwhite_space_flag) =
8756           set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8757
8758         # find input tabbing to allow checks for tabbing disagreement
8759         ## not used for now
8760         ##$input_line_tabbing = "";
8761         ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8762
8763         # if the buffer hasn't been flushed, add a leading space if
8764         # necessary to keep essential whitespace. This is really only
8765         # necessary if we are squeezing out all ws.
8766         if ( $max_index_to_go >= 0 ) {
8767
8768             $old_line_count_in_batch++;
8769
8770             if (
8771                 is_essential_whitespace(
8772                     $last_last_nonblank_token,
8773                     $last_last_nonblank_type,
8774                     $tokens_to_go[$max_index_to_go],
8775                     $types_to_go[$max_index_to_go],
8776                     $$rtokens[0],
8777                     $$rtoken_type[0]
8778                 )
8779               )
8780             {
8781                 my $slevel = $$rslevels[0];
8782                 insert_new_token_to_go( ' ', 'b', $slevel,
8783                     $no_internal_newlines );
8784             }
8785         }
8786
8787         # If we just saw the end of an elsif block, write nag message
8788         # if we do not see another elseif or an else.
8789         if ($looking_for_else) {
8790
8791             unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8792                 write_logfile_entry("(No else block)\n");
8793             }
8794             $looking_for_else = 0;
8795         }
8796
8797         # This is a good place to kill incomplete one-line blocks
8798         if (   ( $semicolons_before_block_self_destruct == 0 )
8799             && ( $max_index_to_go >= 0 )
8800             && ( $types_to_go[$max_index_to_go] eq ';' )
8801             && ( $$rtokens[0] ne '}' ) )
8802         {
8803             destroy_one_line_block();
8804             output_line_to_go();
8805         }
8806
8807         # loop to process the tokens one-by-one
8808         $type  = 'b';
8809         $token = "";
8810
8811         foreach $j ( 0 .. $jmax ) {
8812
8813             # pull out the local values for this token
8814             extract_token($j);
8815
8816             if ( $type eq '#' ) {
8817
8818                 # trim trailing whitespace
8819                 # (there is no option at present to prevent this)
8820                 $token =~ s/\s*$//;
8821
8822                 if (
8823                     $rOpts->{'delete-side-comments'}
8824
8825                     # delete closing side comments if necessary
8826                     || (   $rOpts->{'delete-closing-side-comments'}
8827                         && $token =~ /$closing_side_comment_prefix_pattern/o
8828                         && $last_nonblank_block_type =~
8829                         /$closing_side_comment_list_pattern/o )
8830                   )
8831                 {
8832                     if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8833                         unstore_token_to_go();
8834                     }
8835                     last;
8836                 }
8837             }
8838
8839             # If we are continuing after seeing a right curly brace, flush
8840             # buffer unless we see what we are looking for, as in
8841             #   } else ...
8842             if ( $rbrace_follower && $type ne 'b' ) {
8843
8844                 unless ( $rbrace_follower->{$token} ) {
8845                     output_line_to_go();
8846                 }
8847                 $rbrace_follower = undef;
8848             }
8849
8850             $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8851             $next_nonblank_token      = $$rtokens[$j_next];
8852             $next_nonblank_token_type = $$rtoken_type[$j_next];
8853
8854             #--------------------------------------------------------
8855             # Start of section to patch token text
8856             #--------------------------------------------------------
8857
8858             # Modify certain tokens here for whitespace
8859             # The following is not yet done, but could be:
8860             #   sub (x x x)
8861             if ( $type =~ /^[wit]$/ ) {
8862
8863                 # Examples:
8864                 # change '$  var'  to '$var' etc
8865                 #        '-> new'  to '->new'
8866                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8867                     $token =~ s/\s*//g;
8868                 }
8869
8870                 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8871             }
8872
8873             # change 'LABEL   :'   to 'LABEL:'
8874             elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8875
8876             # patch to add space to something like "x10"
8877             # This avoids having to split this token in the pre-tokenizer
8878             elsif ( $type eq 'n' ) {
8879                 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8880             }
8881
8882             elsif ( $type eq 'Q' ) {
8883                 note_embedded_tab() if ( $token =~ "\t" );
8884
8885                 # make note of something like '$var = s/xxx/yyy/;'
8886                 # in case it should have been '$var =~ s/xxx/yyy/;'
8887                 if (
8888                        $token =~ /^(s|tr|y|m|\/)/
8889                     && $last_nonblank_token =~ /^(=|==|!=)$/
8890
8891                     # precededed by simple scalar
8892                     && $last_last_nonblank_type eq 'i'
8893                     && $last_last_nonblank_token =~ /^\$/
8894
8895                     # followed by some kind of termination
8896                     # (but give complaint if we can's see far enough ahead)
8897                     && $next_nonblank_token =~ /^[; \)\}]$/
8898
8899                     # scalar is not decleared
8900                     && !(
8901                            $types_to_go[0] eq 'k'
8902                         && $tokens_to_go[0] =~ /^(my|our|local)$/
8903                     )
8904                   )
8905                 {
8906                     my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8907                     complain(
8908 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8909                     );
8910                 }
8911             }
8912
8913            # trim blanks from right of qw quotes
8914            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8915             elsif ( $type eq 'q' ) {
8916                 $token =~ s/\s*$//;
8917                 note_embedded_tab() if ( $token =~ "\t" );
8918             }
8919
8920             #--------------------------------------------------------
8921             # End of section to patch token text
8922             #--------------------------------------------------------
8923
8924             # insert any needed whitespace
8925             if (   ( $type ne 'b' )
8926                 && ( $max_index_to_go >= 0 )
8927                 && ( $types_to_go[$max_index_to_go] ne 'b' )
8928                 && $rOpts_add_whitespace )
8929             {
8930                 my $ws = $$rwhite_space_flag[$j];
8931
8932                 if ( $ws == 1 ) {
8933                     insert_new_token_to_go( ' ', 'b', $slevel,
8934                         $no_internal_newlines );
8935                 }
8936             }
8937
8938             # Do not allow breaks which would promote a side comment to a
8939             # block comment.  In order to allow a break before an opening
8940             # or closing BLOCK, followed by a side comment, those sections
8941             # of code will handle this flag separately.
8942             my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8943             my $is_opening_BLOCK =
8944               (      $type eq '{'
8945                   && $token eq '{'
8946                   && $block_type
8947                   && $block_type ne 't' );
8948             my $is_closing_BLOCK =
8949               (      $type eq '}'
8950                   && $token eq '}'
8951                   && $block_type
8952                   && $block_type ne 't' );
8953
8954             if (   $side_comment_follows
8955                 && !$is_opening_BLOCK
8956                 && !$is_closing_BLOCK )
8957             {
8958                 $no_internal_newlines = 1;
8959             }
8960
8961             # We're only going to handle breaking for code BLOCKS at this
8962             # (top) level.  Other indentation breaks will be handled by
8963             # sub scan_list, which is better suited to dealing with them.
8964             if ($is_opening_BLOCK) {
8965
8966                 # Tentatively output this token.  This is required before
8967                 # calling starting_one_line_block.  We may have to unstore
8968                 # it, though, if we have to break before it.
8969                 store_token_to_go($side_comment_follows);
8970
8971                 # Look ahead to see if we might form a one-line block
8972                 my $too_long =
8973                   starting_one_line_block( $j, $jmax, $level, $slevel,
8974                     $ci_level, $rtokens, $rtoken_type, $rblock_type );
8975                 clear_breakpoint_undo_stack();
8976
8977                 # to simplify the logic below, set a flag to indicate if
8978                 # this opening brace is far from the keyword which introduces it
8979                 my $keyword_on_same_line = 1;
8980                 if (   ( $max_index_to_go >= 0 )
8981                     && ( $last_nonblank_type eq ')' ) )
8982                 {
8983                     if (   $block_type =~ /^(if|else|elsif)$/
8984                         && ( $tokens_to_go[0] eq '}' )
8985                         && $rOpts_cuddled_else )
8986                     {
8987                         $keyword_on_same_line = 1;
8988                     }
8989                     elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8990                     {
8991                         $keyword_on_same_line = 0;
8992                     }
8993                 }
8994
8995                 # decide if user requested break before '{'
8996                 my $want_break =
8997
8998                   # use -bl flag if not a sub block of any type
8999                   $block_type !~ /^sub/
9000                   ? $rOpts->{'opening-brace-on-new-line'}
9001
9002                   # use -sbl flag for a named sub block
9003                   : $block_type !~ /^sub\W*$/
9004                   ? $rOpts->{'opening-sub-brace-on-new-line'}
9005
9006                   # use -asbl flag for an anonymous sub block
9007                   : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9008
9009                 # Break before an opening '{' ...
9010                 if (
9011
9012                     # if requested
9013                     $want_break
9014
9015                     # and we were unable to start looking for a block,
9016                     && $index_start_one_line_block == UNDEFINED_INDEX
9017
9018                     # or if it will not be on same line as its keyword, so that
9019                     # it will be outdented (eval.t, overload.t), and the user
9020                     # has not insisted on keeping it on the right
9021                     || (   !$keyword_on_same_line
9022                         && !$rOpts->{'opening-brace-always-on-right'} )
9023
9024                   )
9025                 {
9026
9027                     # but only if allowed
9028                     unless ($no_internal_newlines) {
9029
9030                         # since we already stored this token, we must unstore it
9031                         unstore_token_to_go();
9032
9033                         # then output the line
9034                         output_line_to_go();
9035
9036                         # and now store this token at the start of a new line
9037                         store_token_to_go($side_comment_follows);
9038                     }
9039                 }
9040
9041                 # Now update for side comment
9042                 if ($side_comment_follows) { $no_internal_newlines = 1 }
9043
9044                 # now output this line
9045                 unless ($no_internal_newlines) {
9046                     output_line_to_go();
9047                 }
9048             }
9049
9050             elsif ($is_closing_BLOCK) {
9051
9052                 # If there is a pending one-line block ..
9053                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9054
9055                     # we have to terminate it if..
9056                     if (
9057
9058                     # it is too long (final length may be different from
9059                     # initial estimate). note: must allow 1 space for this token
9060                         excess_line_length( $index_start_one_line_block,
9061                             $max_index_to_go ) >= 0
9062
9063                         # or if it has too many semicolons
9064                         || (   $semicolons_before_block_self_destruct == 0
9065                             && $last_nonblank_type ne ';' )
9066                       )
9067                     {
9068                         destroy_one_line_block();
9069                     }
9070                 }
9071
9072                 # put a break before this closing curly brace if appropriate
9073                 unless ( $no_internal_newlines
9074                     || $index_start_one_line_block != UNDEFINED_INDEX )
9075                 {
9076
9077                     # add missing semicolon if ...
9078                     # there are some tokens
9079                     if (
9080                         ( $max_index_to_go > 0 )
9081
9082                         # and we don't have one
9083                         && ( $last_nonblank_type ne ';' )
9084
9085                         # patch until some block type issues are fixed:
9086                         # Do not add semi-colon for block types '{',
9087                         # '}', and ';' because we cannot be sure yet
9088                         # that this is a block and not an anonomyous
9089                         # hash (blktype.t, blktype1.t)
9090                         && ( $block_type !~ /^[\{\};]$/ )
9091
9092                         # patch: and do not add semi-colons for recently
9093                         # added block types (see tmp/semicolon.t)
9094                         && ( $block_type !~
9095                             /^(switch|case|given|when|default)$/ )
9096
9097                         # it seems best not to add semicolons in these
9098                         # special block types: sort|map|grep
9099                         && ( !$is_sort_map_grep{$block_type} )
9100
9101                         # and we are allowed to do so.
9102                         && $rOpts->{'add-semicolons'}
9103                       )
9104                     {
9105
9106                         save_current_token();
9107                         $token  = ';';
9108                         $type   = ';';
9109                         $level  = $levels_to_go[$max_index_to_go];
9110                         $slevel = $nesting_depth_to_go[$max_index_to_go];
9111                         $nesting_blocks =
9112                           $nesting_blocks_to_go[$max_index_to_go];
9113                         $ci_level       = $ci_levels_to_go[$max_index_to_go];
9114                         $block_type     = "";
9115                         $container_type = "";
9116                         $container_environment = "";
9117                         $type_sequence         = "";
9118
9119                         # Note - we remove any blank AFTER extracting its
9120                         # parameters such as level, etc, above
9121                         if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9122                             unstore_token_to_go();
9123                         }
9124                         store_token_to_go();
9125
9126                         note_added_semicolon();
9127                         restore_current_token();
9128                     }
9129
9130                     # then write out everything before this closing curly brace
9131                     output_line_to_go();
9132
9133                 }
9134
9135                 # Now update for side comment
9136                 if ($side_comment_follows) { $no_internal_newlines = 1 }
9137
9138                 # store the closing curly brace
9139                 store_token_to_go();
9140
9141                 # ok, we just stored a closing curly brace.  Often, but
9142                 # not always, we want to end the line immediately.
9143                 # So now we have to check for special cases.
9144
9145                 # if this '}' successfully ends a one-line block..
9146                 my $is_one_line_block = 0;
9147                 my $keep_going        = 0;
9148                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9149
9150                     # Remember the type of token just before the
9151                     # opening brace.  It would be more general to use
9152                     # a stack, but this will work for one-line blocks.
9153                     $is_one_line_block =
9154                       $types_to_go[$index_start_one_line_block];
9155
9156                     # we have to actually make it by removing tentative
9157                     # breaks that were set within it
9158                     undo_forced_breakpoint_stack(0);
9159                     set_nobreaks( $index_start_one_line_block,
9160                         $max_index_to_go - 1 );
9161
9162                     # then re-initialize for the next one-line block
9163                     destroy_one_line_block();
9164
9165                     # then decide if we want to break after the '}' ..
9166                     # We will keep going to allow certain brace followers as in:
9167                     #   do { $ifclosed = 1; last } unless $losing;
9168                     #
9169                     # But make a line break if the curly ends a
9170                     # significant block:
9171                     if (
9172                         $is_block_without_semicolon{$block_type}
9173
9174                         # if needless semicolon follows we handle it later
9175                         && $next_nonblank_token ne ';'
9176                       )
9177                     {
9178                         output_line_to_go() unless ($no_internal_newlines);
9179                     }
9180                 }
9181
9182                 # set string indicating what we need to look for brace follower
9183                 # tokens
9184                 if ( $block_type eq 'do' ) {
9185                     $rbrace_follower = \%is_do_follower;
9186                 }
9187                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9188                     $rbrace_follower = \%is_if_brace_follower;
9189                 }
9190                 elsif ( $block_type eq 'else' ) {
9191                     $rbrace_follower = \%is_else_brace_follower;
9192                 }
9193
9194                 # added eval for borris.t
9195                 elsif ($is_sort_map_grep_eval{$block_type}
9196                     || $is_one_line_block eq 'G' )
9197                 {
9198                     $rbrace_follower = undef;
9199                     $keep_going      = 1;
9200                 }
9201
9202                 # anonymous sub
9203                 elsif ( $block_type =~ /^sub\W*$/ ) {
9204
9205                     if ($is_one_line_block) {
9206                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
9207                     }
9208                     else {
9209                         $rbrace_follower = \%is_anon_sub_brace_follower;
9210                     }
9211                 }
9212
9213                 # None of the above: specify what can follow a closing
9214                 # brace of a block which is not an
9215                 # if/elsif/else/do/sort/map/grep/eval
9216                 # Testfiles:
9217                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9218                 else {
9219                     $rbrace_follower = \%is_other_brace_follower;
9220                 }
9221
9222                 # See if an elsif block is followed by another elsif or else;
9223                 # complain if not.
9224                 if ( $block_type eq 'elsif' ) {
9225
9226                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
9227                         $looking_for_else = 1;    # ok, check on next line
9228                     }
9229                     else {
9230
9231                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9232                             write_logfile_entry("No else block :(\n");
9233                         }
9234                     }
9235                 }
9236
9237                 # keep going after certain block types (map,sort,grep,eval)
9238                 # added eval for borris.t
9239                 if ($keep_going) {
9240
9241                     # keep going
9242                 }
9243
9244                 # if no more tokens, postpone decision until re-entring
9245                 elsif ( ( $next_nonblank_token_type eq 'b' )
9246                     && $rOpts_add_newlines )
9247                 {
9248                     unless ($rbrace_follower) {
9249                         output_line_to_go() unless ($no_internal_newlines);
9250                     }
9251                 }
9252
9253                 elsif ($rbrace_follower) {
9254
9255                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
9256                         output_line_to_go() unless ($no_internal_newlines);
9257                     }
9258                     $rbrace_follower = undef;
9259                 }
9260
9261                 else {
9262                     output_line_to_go() unless ($no_internal_newlines);
9263                 }
9264
9265             }    # end treatment of closing block token
9266
9267             # handle semicolon
9268             elsif ( $type eq ';' ) {
9269
9270                 # kill one-line blocks with too many semicolons
9271                 $semicolons_before_block_self_destruct--;
9272                 if (
9273                     ( $semicolons_before_block_self_destruct < 0 )
9274                     || (   $semicolons_before_block_self_destruct == 0
9275                         && $next_nonblank_token_type !~ /^[b\}]$/ )
9276                   )
9277                 {
9278                     destroy_one_line_block();
9279                 }
9280
9281                 # Remove unnecessary semicolons, but not after bare
9282                 # blocks, where it could be unsafe if the brace is
9283                 # mistokenized.
9284                 if (
9285                     (
9286                         $last_nonblank_token eq '}'
9287                         && (
9288                             $is_block_without_semicolon{
9289                                 $last_nonblank_block_type}
9290                             || $last_nonblank_block_type =~ /^sub\s+\w/
9291                             || $last_nonblank_block_type =~ /^\w+:$/ )
9292                     )
9293                     || $last_nonblank_type eq ';'
9294                   )
9295                 {
9296
9297                     if (
9298                         $rOpts->{'delete-semicolons'}
9299
9300                         # don't delete ; before a # because it would promote it
9301                         # to a block comment
9302                         && ( $next_nonblank_token_type ne '#' )
9303                       )
9304                     {
9305                         note_deleted_semicolon();
9306                         output_line_to_go()
9307                           unless ( $no_internal_newlines
9308                             || $index_start_one_line_block != UNDEFINED_INDEX );
9309                         next;
9310                     }
9311                     else {
9312                         write_logfile_entry("Extra ';'\n");
9313                     }
9314                 }
9315                 store_token_to_go();
9316
9317                 output_line_to_go()
9318                   unless ( $no_internal_newlines
9319                     || ( $rOpts_keep_interior_semicolons && $j < $jmax )
9320                     || ( $next_nonblank_token eq '}' ) );
9321
9322             }
9323
9324             # handle here_doc target string
9325             elsif ( $type eq 'h' ) {
9326                 $no_internal_newlines =
9327                   1;    # no newlines after seeing here-target
9328                 destroy_one_line_block();
9329                 store_token_to_go();
9330             }
9331
9332             # handle all other token types
9333             else {
9334
9335                 # if this is a blank...
9336                 if ( $type eq 'b' ) {
9337
9338                     # make it just one character
9339                     $token = ' ' if $rOpts_add_whitespace;
9340
9341                     # delete it if unwanted by whitespace rules
9342                     # or we are deleting all whitespace
9343                     my $ws = $$rwhite_space_flag[ $j + 1 ];
9344                     if ( ( defined($ws) && $ws == -1 )
9345                         || $rOpts_delete_old_whitespace )
9346                     {
9347
9348                         # unless it might make a syntax error
9349                         next
9350                           unless is_essential_whitespace(
9351                             $last_last_nonblank_token,
9352                             $last_last_nonblank_type,
9353                             $tokens_to_go[$max_index_to_go],
9354                             $types_to_go[$max_index_to_go],
9355                             $$rtokens[ $j + 1 ],
9356                             $$rtoken_type[ $j + 1 ]
9357                           );
9358                     }
9359                 }
9360                 store_token_to_go();
9361             }
9362
9363             # remember two previous nonblank OUTPUT tokens
9364             if ( $type ne '#' && $type ne 'b' ) {
9365                 $last_last_nonblank_token = $last_nonblank_token;
9366                 $last_last_nonblank_type  = $last_nonblank_type;
9367                 $last_nonblank_token      = $token;
9368                 $last_nonblank_type       = $type;
9369                 $last_nonblank_block_type = $block_type;
9370             }
9371
9372             # unset the continued-quote flag since it only applies to the
9373             # first token, and we want to resume normal formatting if
9374             # there are additional tokens on the line
9375             $in_continued_quote = 0;
9376
9377         }    # end of loop over all tokens in this 'line_of_tokens'
9378
9379         # we have to flush ..
9380         if (
9381
9382             # if there is a side comment
9383             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9384
9385             # if this line ends in a quote
9386             # NOTE: This is critically important for insuring that quoted lines
9387             # do not get processed by things like -sot and -sct
9388             || $in_quote
9389
9390             # if this is a VERSION statement
9391             || $is_VERSION_statement
9392
9393             # to keep a label on one line if that is how it is now
9394             || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9395
9396             # if we are instructed to keep all old line breaks
9397             || !$rOpts->{'delete-old-newlines'}
9398           )
9399         {
9400             destroy_one_line_block();
9401             output_line_to_go();
9402         }
9403
9404         # mark old line breakpoints in current output stream
9405         if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9406             $old_breakpoint_to_go[$max_index_to_go] = 1;
9407         }
9408     }    # end sub print_line_of_tokens
9409 }    # end print_line_of_tokens
9410
9411 # sub output_line_to_go sends one logical line of tokens on down the
9412 # pipeline to the VerticalAligner package, breaking the line into continuation
9413 # lines as necessary.  The line of tokens is ready to go in the "to_go"
9414 # arrays.
9415 sub output_line_to_go {
9416
9417     # debug stuff; this routine can be called from many points
9418     FORMATTER_DEBUG_FLAG_OUTPUT && do {
9419         my ( $a, $b, $c ) = caller;
9420         write_diagnostics(
9421 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9422         );
9423         my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9424         write_diagnostics("$output_str\n");
9425     };
9426
9427     # just set a tentative breakpoint if we might be in a one-line block
9428     if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9429         set_forced_breakpoint($max_index_to_go);
9430         return;
9431     }
9432
9433     my $cscw_block_comment;
9434     $cscw_block_comment = add_closing_side_comment()
9435       if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9436
9437     match_opening_and_closing_tokens();
9438
9439     # tell the -lp option we are outputting a batch so it can close
9440     # any unfinished items in its stack
9441     finish_lp_batch();
9442
9443     # If this line ends in a code block brace, set breaks at any
9444     # previous closing code block braces to breakup a chain of code
9445     # blocks on one line.  This is very rare but can happen for
9446     # user-defined subs.  For example we might be looking at this:
9447     #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9448     my $saw_good_break = 0;    # flag to force breaks even if short line
9449     if (
9450
9451         # looking for opening or closing block brace
9452         $block_type_to_go[$max_index_to_go]
9453
9454         # but not one of these which are never duplicated on a line:
9455         # until|while|for|if|elsif|else
9456         && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9457       )
9458     {
9459         my $lev = $nesting_depth_to_go[$max_index_to_go];
9460
9461         # Walk backwards from the end and
9462         # set break at any closing block braces at the same level.
9463         # But quit if we are not in a chain of blocks.
9464         for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
9465             last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
9466             next if ( $levels_to_go[$i] > $lev );    # skip past higher level
9467
9468             if ( $block_type_to_go[$i] ) {
9469                 if ( $tokens_to_go[$i] eq '}' ) {
9470                     set_forced_breakpoint($i);
9471                     $saw_good_break = 1;
9472                 }
9473             }
9474
9475             # quit if we see anything besides words, function, blanks
9476             # at this level
9477             elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
9478         }
9479     }
9480
9481     my $imin = 0;
9482     my $imax = $max_index_to_go;
9483
9484     # trim any blank tokens
9485     if ( $max_index_to_go >= 0 ) {
9486         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9487         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9488     }
9489
9490     # anything left to write?
9491     if ( $imin <= $imax ) {
9492
9493         # add a blank line before certain key types
9494         if ( $last_line_leading_type !~ /^[#b]/ ) {
9495             my $want_blank    = 0;
9496             my $leading_token = $tokens_to_go[$imin];
9497             my $leading_type  = $types_to_go[$imin];
9498
9499             # blank lines before subs except declarations and one-liners
9500             # MCONVERSION LOCATION - for sub tokenization change
9501             if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9502                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9503                   && (
9504                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9505                         $imax ) !~ /^[\;\}]$/
9506                   );
9507             }
9508
9509             # break before all package declarations
9510             # MCONVERSION LOCATION - for tokenizaton change
9511             elsif ($leading_token =~ /^(package\s)/
9512                 && $leading_type eq 'i' )
9513             {
9514                 $want_blank = ( $rOpts->{'blanks-before-subs'} );
9515             }
9516
9517             # break before certain key blocks except one-liners
9518             if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9519                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9520                   && (
9521                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9522                         $imax ) ne '}'
9523                   );
9524             }
9525
9526             # Break before certain block types if we haven't had a
9527             # break at this level for a while.  This is the
9528             # difficult decision..
9529             elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9530                 && $leading_type eq 'k' )
9531             {
9532                 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9533                 if ( !defined($lc) ) { $lc = 0 }
9534
9535                 $want_blank =
9536                      $rOpts->{'blanks-before-blocks'}
9537                   && $lc >= $rOpts->{'long-block-line-count'}
9538                   && $file_writer_object->get_consecutive_nonblank_lines() >=
9539                   $rOpts->{'long-block-line-count'}
9540                   && (
9541                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9542                         $imax ) ne '}'
9543                   );
9544             }
9545
9546             if ($want_blank) {
9547
9548                 # future: send blank line down normal path to VerticalAligner
9549                 Perl::Tidy::VerticalAligner::flush();
9550                 $file_writer_object->write_blank_code_line();
9551             }
9552         }
9553
9554         # update blank line variables and count number of consecutive
9555         # non-blank, non-comment lines at this level
9556         $last_last_line_leading_level = $last_line_leading_level;
9557         $last_line_leading_level      = $levels_to_go[$imin];
9558         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9559         $last_line_leading_type = $types_to_go[$imin];
9560         if (   $last_line_leading_level == $last_last_line_leading_level
9561             && $last_line_leading_type ne 'b'
9562             && $last_line_leading_type ne '#'
9563             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9564         {
9565             $nonblank_lines_at_depth[$last_line_leading_level]++;
9566         }
9567         else {
9568             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9569         }
9570
9571         FORMATTER_DEBUG_FLAG_FLUSH && do {
9572             my ( $package, $file, $line ) = caller;
9573             print
9574 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9575         };
9576
9577         # add a couple of extra terminal blank tokens
9578         pad_array_to_go();
9579
9580         # set all forced breakpoints for good list formatting
9581         my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9582
9583         if (
9584             $max_index_to_go > 0
9585             && (
9586                    $is_long_line
9587                 || $old_line_count_in_batch > 1
9588                 || is_unbalanced_batch()
9589                 || (
9590                     $comma_count_in_batch
9591                     && (   $rOpts_maximum_fields_per_table > 0
9592                         || $rOpts_comma_arrow_breakpoints == 0 )
9593                 )
9594             )
9595           )
9596         {
9597             $saw_good_break ||= scan_list();
9598         }
9599
9600         # let $ri_first and $ri_last be references to lists of
9601         # first and last tokens of line fragments to output..
9602         my ( $ri_first, $ri_last );
9603
9604         # write a single line if..
9605         if (
9606
9607             # we aren't allowed to add any newlines
9608             !$rOpts_add_newlines
9609
9610             # or, we don't already have an interior breakpoint
9611             # and we didn't see a good breakpoint
9612             || (
9613                    !$forced_breakpoint_count
9614                 && !$saw_good_break
9615
9616                 # and this line is 'short'
9617                 && !$is_long_line
9618             )
9619           )
9620         {
9621             @$ri_first = ($imin);
9622             @$ri_last  = ($imax);
9623         }
9624
9625         # otherwise use multiple lines
9626         else {
9627
9628             ( $ri_first, $ri_last, my $colon_count ) =
9629               set_continuation_breaks($saw_good_break);
9630
9631             break_all_chain_tokens( $ri_first, $ri_last );
9632
9633             break_equals( $ri_first, $ri_last );
9634
9635             # now we do a correction step to clean this up a bit
9636             # (The only time we would not do this is for debugging)
9637             if ( $rOpts->{'recombine'} ) {
9638                 ( $ri_first, $ri_last ) =
9639                   recombine_breakpoints( $ri_first, $ri_last );
9640             }
9641
9642             insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
9643         }
9644
9645         # do corrector step if -lp option is used
9646         my $do_not_pad = 0;
9647         if ($rOpts_line_up_parentheses) {
9648             $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9649         }
9650         send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9651     }
9652     prepare_for_new_input_lines();
9653
9654     # output any new -cscw block comment
9655     if ($cscw_block_comment) {
9656         flush();
9657         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9658     }
9659 }
9660
9661 sub note_added_semicolon {
9662     $last_added_semicolon_at = $input_line_number;
9663     if ( $added_semicolon_count == 0 ) {
9664         $first_added_semicolon_at = $last_added_semicolon_at;
9665     }
9666     $added_semicolon_count++;
9667     write_logfile_entry("Added ';' here\n");
9668 }
9669
9670 sub note_deleted_semicolon {
9671     $last_deleted_semicolon_at = $input_line_number;
9672     if ( $deleted_semicolon_count == 0 ) {
9673         $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9674     }
9675     $deleted_semicolon_count++;
9676     write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
9677 }
9678
9679 sub note_embedded_tab {
9680     $embedded_tab_count++;
9681     $last_embedded_tab_at = $input_line_number;
9682     if ( !$first_embedded_tab_at ) {
9683         $first_embedded_tab_at = $last_embedded_tab_at;
9684     }
9685
9686     if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9687         write_logfile_entry("Embedded tabs in quote or pattern\n");
9688     }
9689 }
9690
9691 sub starting_one_line_block {
9692
9693     # after seeing an opening curly brace, look for the closing brace
9694     # and see if the entire block will fit on a line.  This routine is
9695     # not always right because it uses the old whitespace, so a check
9696     # is made later (at the closing brace) to make sure we really
9697     # have a one-line block.  We have to do this preliminary check,
9698     # though, because otherwise we would always break at a semicolon
9699     # within a one-line block if the block contains multiple statements.
9700
9701     my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9702         $rblock_type )
9703       = @_;
9704
9705     # kill any current block - we can only go 1 deep
9706     destroy_one_line_block();
9707
9708     # return value:
9709     #  1=distance from start of block to opening brace exceeds line length
9710     #  0=otherwise
9711
9712     my $i_start = 0;
9713
9714     # shouldn't happen: there must have been a prior call to
9715     # store_token_to_go to put the opening brace in the output stream
9716     if ( $max_index_to_go < 0 ) {
9717         warning("program bug: store_token_to_go called incorrectly\n");
9718         report_definite_bug();
9719     }
9720     else {
9721
9722         # cannot use one-line blocks with cuddled else else/elsif lines
9723         if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9724             return 0;
9725         }
9726     }
9727
9728     my $block_type = $$rblock_type[$j];
9729
9730     # find the starting keyword for this block (such as 'if', 'else', ...)
9731
9732     if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9733         $i_start = $max_index_to_go;
9734     }
9735
9736     elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9737
9738         # For something like "if (xxx) {", the keyword "if" will be
9739         # just after the most recent break. This will be 0 unless
9740         # we have just killed a one-line block and are starting another.
9741         # (doif.t)
9742         $i_start = $index_max_forced_break + 1;
9743         if ( $types_to_go[$i_start] eq 'b' ) {
9744             $i_start++;
9745         }
9746
9747         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9748             return 0;
9749         }
9750     }
9751
9752     # the previous nonblank token should start these block types
9753     elsif (
9754         ( $last_last_nonblank_token_to_go eq $block_type )
9755         || (   $block_type =~ /^sub/
9756             && $last_last_nonblank_token_to_go =~ /^sub/ )
9757       )
9758     {
9759         $i_start = $last_last_nonblank_index_to_go;
9760     }
9761
9762     # patch for SWITCH/CASE to retain one-line case/when blocks
9763     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9764         $i_start = $index_max_forced_break + 1;
9765         if ( $types_to_go[$i_start] eq 'b' ) {
9766             $i_start++;
9767         }
9768         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9769             return 0;
9770         }
9771     }
9772
9773     else {
9774         return 1;
9775     }
9776
9777     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9778
9779     my $i;
9780
9781     # see if length is too long to even start
9782     if ( $pos > $rOpts_maximum_line_length ) {
9783         return 1;
9784     }
9785
9786     for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9787
9788         # old whitespace could be arbitrarily large, so don't use it
9789         if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9790         else                              { $pos += length( $$rtokens[$i] ) }
9791
9792         # Return false result if we exceed the maximum line length,
9793         if ( $pos > $rOpts_maximum_line_length ) {
9794             return 0;
9795         }
9796
9797         # or encounter another opening brace before finding the closing brace.
9798         elsif ($$rtokens[$i] eq '{'
9799             && $$rtoken_type[$i] eq '{'
9800             && $$rblock_type[$i] )
9801         {
9802             return 0;
9803         }
9804
9805         # if we find our closing brace..
9806         elsif ($$rtokens[$i] eq '}'
9807             && $$rtoken_type[$i] eq '}'
9808             && $$rblock_type[$i] )
9809         {
9810
9811             # be sure any trailing comment also fits on the line
9812             my $i_nonblank =
9813               ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9814
9815             if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9816                 $pos += length( $$rtokens[$i_nonblank] );
9817
9818                 if ( $i_nonblank > $i + 1 ) {
9819                     $pos += length( $$rtokens[ $i + 1 ] );
9820                 }
9821
9822                 if ( $pos > $rOpts_maximum_line_length ) {
9823                     return 0;
9824                 }
9825             }
9826
9827             # ok, it's a one-line block
9828             create_one_line_block( $i_start, 20 );
9829             return 0;
9830         }
9831
9832         # just keep going for other characters
9833         else {
9834         }
9835     }
9836
9837     # Allow certain types of new one-line blocks to form by joining
9838     # input lines.  These can be safely done, but for other block types,
9839     # we keep old one-line blocks but do not form new ones. It is not
9840     # always a good idea to make as many one-line blocks as possible,
9841     # so other types are not done.  The user can always use -mangle.
9842     if ( $is_sort_map_grep_eval{$block_type} ) {
9843         create_one_line_block( $i_start, 1 );
9844     }
9845
9846     return 0;
9847 }
9848
9849 sub unstore_token_to_go {
9850
9851     # remove most recent token from output stream
9852     if ( $max_index_to_go > 0 ) {
9853         $max_index_to_go--;
9854     }
9855     else {
9856         $max_index_to_go = UNDEFINED_INDEX;
9857     }
9858
9859 }
9860
9861 sub want_blank_line {
9862     flush();
9863     $file_writer_object->want_blank_line();
9864 }
9865
9866 sub write_unindented_line {
9867     flush();
9868     $file_writer_object->write_line( $_[0] );
9869 }
9870
9871 sub undo_ci {
9872
9873     # Undo continuation indentation in certain sequences
9874     # For example, we can undo continuation indation in sort/map/grep chains
9875     #    my $dat1 = pack( "n*",
9876     #        map { $_, $lookup->{$_} }
9877     #          sort { $a <=> $b }
9878     #          grep { $lookup->{$_} ne $default } keys %$lookup );
9879     # To align the map/sort/grep keywords like this:
9880     #    my $dat1 = pack( "n*",
9881     #        map { $_, $lookup->{$_} }
9882     #        sort { $a <=> $b }
9883     #        grep { $lookup->{$_} ne $default } keys %$lookup );
9884     my ( $ri_first, $ri_last ) = @_;
9885     my ( $line_1, $line_2, $lev_last );
9886     my $this_line_is_semicolon_terminated;
9887     my $max_line = @$ri_first - 1;
9888
9889     # looking at each line of this batch..
9890     # We are looking at leading tokens and looking for a sequence
9891     # all at the same level and higher level than enclosing lines.
9892     foreach my $line ( 0 .. $max_line ) {
9893
9894         my $ibeg = $$ri_first[$line];
9895         my $lev  = $levels_to_go[$ibeg];
9896         if ( $line > 0 ) {
9897
9898             # if we have started a chain..
9899             if ($line_1) {
9900
9901                 # see if it continues..
9902                 if ( $lev == $lev_last ) {
9903                     if (   $types_to_go[$ibeg] eq 'k'
9904                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
9905                     {
9906
9907                         # chain continues...
9908                         # check for chain ending at end of a a statement
9909                         if ( $line == $max_line ) {
9910
9911                             # see of this line ends a statement
9912                             my $iend = $$ri_last[$line];
9913                             $this_line_is_semicolon_terminated =
9914                               $types_to_go[$iend] eq ';'
9915
9916                               # with possible side comment
9917                               || ( $types_to_go[$iend] eq '#'
9918                                 && $iend - $ibeg >= 2
9919                                 && $types_to_go[ $iend - 2 ] eq ';'
9920                                 && $types_to_go[ $iend - 1 ] eq 'b' );
9921                         }
9922                         $line_2 = $line if ($this_line_is_semicolon_terminated);
9923                     }
9924                     else {
9925
9926                         # kill chain
9927                         $line_1 = undef;
9928                     }
9929                 }
9930                 elsif ( $lev < $lev_last ) {
9931
9932                     # chain ends with previous line
9933                     $line_2 = $line - 1;
9934                 }
9935                 elsif ( $lev > $lev_last ) {
9936
9937                     # kill chain
9938                     $line_1 = undef;
9939                 }
9940
9941                 # undo the continuation indentation if a chain ends
9942                 if ( defined($line_2) && defined($line_1) ) {
9943                     my $continuation_line_count = $line_2 - $line_1 + 1;
9944                     @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
9945                       (0) x ($continuation_line_count);
9946                     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
9947                       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
9948                     $line_1 = undef;
9949                 }
9950             }
9951
9952             # not in a chain yet..
9953             else {
9954
9955                 # look for start of a new sort/map/grep chain
9956                 if ( $lev > $lev_last ) {
9957                     if (   $types_to_go[$ibeg] eq 'k'
9958                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
9959                     {
9960                         $line_1 = $line;
9961                     }
9962                 }
9963             }
9964         }
9965         $lev_last = $lev;
9966     }
9967 }
9968
9969 sub undo_lp_ci {
9970
9971     # If there is a single, long parameter within parens, like this:
9972     #
9973     #  $self->command( "/msg "
9974     #        . $infoline->chan
9975     #        . " You said $1, but did you know that it's square was "
9976     #        . $1 * $1 . " ?" );
9977     #
9978     # we can remove the continuation indentation of the 2nd and higher lines
9979     # to achieve this effect, which is more pleasing:
9980     #
9981     #  $self->command("/msg "
9982     #                 . $infoline->chan
9983     #                 . " You said $1, but did you know that it's square was "
9984     #                 . $1 * $1 . " ?");
9985
9986     my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9987     my $max_line = @$ri_first - 1;
9988
9989     # must be multiple lines
9990     return unless $max_line > $line_open;
9991
9992     my $lev_start     = $levels_to_go[$i_start];
9993     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9994
9995     # see if all additional lines in this container have continuation
9996     # indentation
9997     my $n;
9998     my $line_1 = 1 + $line_open;
9999     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
10000         my $ibeg = $$ri_first[$n];
10001         my $iend = $$ri_last[$n];
10002         if ( $ibeg eq $closing_index ) { $n--; last }
10003         return if ( $lev_start != $levels_to_go[$ibeg] );
10004         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10005         last   if ( $closing_index <= $iend );
10006     }
10007
10008     # we can reduce the indentation of all continuation lines
10009     my $continuation_line_count = $n - $line_open;
10010     @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10011       (0) x ($continuation_line_count);
10012     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10013       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10014 }
10015
10016 sub set_logical_padding {
10017
10018     # Look at a batch of lines and see if extra padding can improve the
10019     # alignment when there are certain leading operators. Here is an
10020     # example, in which some extra space is introduced before
10021     # '( $year' to make it line up with the subsequent lines:
10022     #
10023     #       if (   ( $Year < 1601 )
10024     #           || ( $Year > 2899 )
10025     #           || ( $EndYear < 1601 )
10026     #           || ( $EndYear > 2899 ) )
10027     #       {
10028     #           &Error_OutOfRange;
10029     #       }
10030     #
10031     my ( $ri_first, $ri_last ) = @_;
10032     my $max_line = @$ri_first - 1;
10033
10034     my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
10035         $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10036
10037     # looking at each line of this batch..
10038     foreach $line ( 0 .. $max_line - 1 ) {
10039
10040         # see if the next line begins with a logical operator
10041         $ibeg      = $$ri_first[$line];
10042         $iend      = $$ri_last[$line];
10043         $ibeg_next = $$ri_first[ $line + 1 ];
10044         $tok_next  = $tokens_to_go[$ibeg_next];
10045         $type_next = $types_to_go[$ibeg_next];
10046
10047         $has_leading_op_next = ( $tok_next =~ /^\w/ )
10048           ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
10049           : $is_chain_operator{$type_next};    # and, or
10050
10051         next unless ($has_leading_op_next);
10052
10053         # next line must not be at lesser depth
10054         next
10055           if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
10056
10057         # identify the token in this line to be padded on the left
10058         $ipad = undef;
10059
10060         # handle lines at same depth...
10061         if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
10062
10063             # if this is not first line of the batch ...
10064             if ( $line > 0 ) {
10065
10066                 # and we have leading operator..
10067                 next if $has_leading_op;
10068
10069                 # Introduce padding if..
10070                 # 1. the previous line is at lesser depth, or
10071                 # 2. the previous line ends in an assignment
10072                 # 3. the previous line ends in a 'return'
10073                 # 4. the previous line ends in a comma
10074                 # Example 1: previous line at lesser depth
10075                 #       if (   ( $Year < 1601 )      # <- we are here but
10076                 #           || ( $Year > 2899 )      #  list has not yet
10077                 #           || ( $EndYear < 1601 )   # collapsed vertically
10078                 #           || ( $EndYear > 2899 ) )
10079                 #       {
10080                 #
10081                 # Example 2: previous line ending in assignment:
10082                 #    $leapyear =
10083                 #        $year % 4   ? 0     # <- We are here
10084                 #      : $year % 100 ? 1
10085                 #      : $year % 400 ? 0
10086                 #      : 1;
10087                 #
10088                 # Example 3: previous line ending in comma:
10089                 #    push @expr,
10090                 #        /test/   ? undef
10091                 #      : eval($_) ? 1
10092                 #      : eval($_) ? 1
10093                 #      :            0;
10094
10095                 # be sure levels agree (do not indent after an indented 'if')
10096                 next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
10097
10098                 # allow padding on first line after a comma but only if:
10099                 # (1) this is line 2 and
10100                 # (2) there are at more than three lines and
10101                 # (3) lines 3 and 4 have the same leading operator
10102                 # These rules try to prevent padding within a long
10103                 # comma-separated list.
10104                 my $ok_comma;
10105                 if (   $types_to_go[$iendm] eq ','
10106                     && $line == 1
10107                     && $max_line > 2 )
10108                 {
10109                     my $ibeg_next_next = $$ri_first[ $line + 2 ];
10110                     my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
10111                     $ok_comma = $tok_next_next eq $tok_next;
10112                 }
10113
10114                 next
10115                   unless (
10116                        $is_assignment{ $types_to_go[$iendm] }
10117                     || $ok_comma
10118                     || ( $nesting_depth_to_go[$ibegm] <
10119                         $nesting_depth_to_go[$ibeg] )
10120                     || (   $types_to_go[$iendm] eq 'k'
10121                         && $tokens_to_go[$iendm] eq 'return' )
10122                   );
10123
10124                 # we will add padding before the first token
10125                 $ipad = $ibeg;
10126             }
10127
10128             # for first line of the batch..
10129             else {
10130
10131                 # WARNING: Never indent if first line is starting in a
10132                 # continued quote, which would change the quote.
10133                 next if $starting_in_quote;
10134
10135                 # if this is text after closing '}'
10136                 # then look for an interior token to pad
10137                 if ( $types_to_go[$ibeg] eq '}' ) {
10138
10139                 }
10140
10141                 # otherwise, we might pad if it looks really good
10142                 else {
10143
10144                     # we might pad token $ibeg, so be sure that it
10145                     # is at the same depth as the next line.
10146                     next
10147                       if ( $nesting_depth_to_go[$ibeg] !=
10148                         $nesting_depth_to_go[$ibeg_next] );
10149
10150                     # We can pad on line 1 of a statement if at least 3
10151                     # lines will be aligned. Otherwise, it
10152                     # can look very confusing.
10153
10154                  # We have to be careful not to pad if there are too few
10155                  # lines.  The current rule is:
10156                  # (1) in general we require at least 3 consecutive lines
10157                  # with the same leading chain operator token,
10158                  # (2) but an exception is that we only require two lines
10159                  # with leading colons if there are no more lines.  For example,
10160                  # the first $i in the following snippet would get padding
10161                  # by the second rule:
10162                  #
10163                  #   $i == 1 ? ( "First", "Color" )
10164                  # : $i == 2 ? ( "Then",  "Rarity" )
10165                  # :           ( "Then",  "Name" );
10166
10167                     if ( $max_line > 1 ) {
10168                         my $leading_token = $tokens_to_go[$ibeg_next];
10169                         my $tokens_differ;
10170
10171                         # never indent line 1 of a '.' series because
10172                         # previous line is most likely at same level.
10173                         # TODO: we should also look at the leasing_spaces
10174                         # of the last output line and skip if it is same
10175                         # as this line.
10176                         next if ( $leading_token eq '.' );
10177
10178                         my $count = 1;
10179                         foreach my $l ( 2 .. 3 ) {
10180                             last if ( $line + $l > $max_line );
10181                             my $ibeg_next_next = $$ri_first[ $line + $l ];
10182                             if ( $tokens_to_go[$ibeg_next_next] ne
10183                                 $leading_token )
10184                             {
10185                                 $tokens_differ = 1;
10186                                 last;
10187                             }
10188                             $count++;
10189                         }
10190                         next if ($tokens_differ);
10191                         next if ( $count < 3 && $leading_token ne ':' );
10192                         $ipad = $ibeg;
10193                     }
10194                     else {
10195                         next;
10196                     }
10197                 }
10198             }
10199         }
10200
10201         # find interior token to pad if necessary
10202         if ( !defined($ipad) ) {
10203
10204             for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
10205
10206                 # find any unclosed container
10207                 next
10208                   unless ( $type_sequence_to_go[$i]
10209                     && $mate_index_to_go[$i] > $iend );
10210
10211                 # find next nonblank token to pad
10212                 $ipad = $i + 1;
10213                 if ( $types_to_go[$ipad] eq 'b' ) {
10214                     $ipad++;
10215                     last if ( $ipad > $iend );
10216                 }
10217             }
10218             last unless $ipad;
10219         }
10220
10221         # next line must not be at greater depth
10222         my $iend_next = $$ri_last[ $line + 1 ];
10223         next
10224           if ( $nesting_depth_to_go[ $iend_next + 1 ] >
10225             $nesting_depth_to_go[$ipad] );
10226
10227         # lines must be somewhat similar to be padded..
10228         my $inext_next = $ibeg_next + 1;
10229         if ( $types_to_go[$inext_next] eq 'b' ) {
10230             $inext_next++;
10231         }
10232         my $type      = $types_to_go[$ipad];
10233         my $type_next = $types_to_go[ $ipad + 1 ];
10234
10235         # see if there are multiple continuation lines
10236         my $logical_continuation_lines = 1;
10237         if ( $line + 2 <= $max_line ) {
10238             my $leading_token  = $tokens_to_go[$ibeg_next];
10239             my $ibeg_next_next = $$ri_first[ $line + 2 ];
10240             if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
10241                 && $nesting_depth_to_go[$ibeg_next] eq
10242                 $nesting_depth_to_go[$ibeg_next_next] )
10243             {
10244                 $logical_continuation_lines++;
10245             }
10246         }
10247
10248         # see if leading types match
10249         my $types_match = $types_to_go[$inext_next] eq $type;
10250         my $matches_without_bang;
10251
10252         # if first line has leading ! then compare the following token
10253         if ( !$types_match && $type eq '!' ) {
10254             $types_match = $matches_without_bang =
10255               $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
10256         }
10257
10258         if (
10259
10260             # either we have multiple continuation lines to follow
10261             # and we are not padding the first token
10262             ( $logical_continuation_lines > 1 && $ipad > 0 )
10263
10264             # or..
10265             || (
10266
10267                 # types must match
10268                 $types_match
10269
10270                 # and keywords must match if keyword
10271                 && !(
10272                        $type eq 'k'
10273                     && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
10274                 )
10275             )
10276           )
10277         {
10278
10279             #----------------------begin special checks--------------
10280             #
10281             # SPECIAL CHECK 1:
10282             # A check is needed before we can make the pad.
10283             # If we are in a list with some long items, we want each
10284             # item to stand out.  So in the following example, the
10285             # first line begining with '$casefold->' would look good
10286             # padded to align with the next line, but then it
10287             # would be indented more than the last line, so we
10288             # won't do it.
10289             #
10290             #  ok(
10291             #      $casefold->{code}         eq '0041'
10292             #        && $casefold->{status}  eq 'C'
10293             #        && $casefold->{mapping} eq '0061',
10294             #      'casefold 0x41'
10295             #  );
10296             #
10297             # Note:
10298             # It would be faster, and almost as good, to use a comma
10299             # count, and not pad if comma_count > 1 and the previous
10300             # line did not end with a comma.
10301             #
10302             my $ok_to_pad = 1;
10303
10304             my $ibg   = $$ri_first[ $line + 1 ];
10305             my $depth = $nesting_depth_to_go[ $ibg + 1 ];
10306
10307             # just use simplified formula for leading spaces to avoid
10308             # needless sub calls
10309             my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
10310
10311             # look at each line beyond the next ..
10312             my $l = $line + 1;
10313             foreach $l ( $line + 2 .. $max_line ) {
10314                 my $ibg = $$ri_first[$l];
10315
10316                 # quit looking at the end of this container
10317                 last
10318                   if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
10319                   || ( $nesting_depth_to_go[$ibg] < $depth );
10320
10321                 # cannot do the pad if a later line would be
10322                 # outdented more
10323                 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
10324                     $ok_to_pad = 0;
10325                     last;
10326                 }
10327             }
10328
10329             # don't pad if we end in a broken list
10330             if ( $l == $max_line ) {
10331                 my $i2 = $$ri_last[$l];
10332                 if ( $types_to_go[$i2] eq '#' ) {
10333                     my $i1 = $$ri_first[$l];
10334                     next
10335                       if (
10336                         terminal_type( \@types_to_go, \@block_type_to_go, $i1,
10337                             $i2 ) eq ','
10338                       );
10339                 }
10340             }
10341
10342             # SPECIAL CHECK 2:
10343             # a minus may introduce a quoted variable, and we will
10344             # add the pad only if this line begins with a bare word,
10345             # such as for the word 'Button' here:
10346             #    [
10347             #         Button      => "Print letter \"~$_\"",
10348             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10349             #        -accelerator => "Meta+$_"
10350             #    ];
10351             #
10352             #  On the other hand, if 'Button' is quoted, it looks best
10353             #  not to pad:
10354             #    [
10355             #        'Button'     => "Print letter \"~$_\"",
10356             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10357             #        -accelerator => "Meta+$_"
10358             #    ];
10359             if ( $types_to_go[$ibeg_next] eq 'm' ) {
10360                 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
10361             }
10362
10363             next unless $ok_to_pad;
10364
10365             #----------------------end special check---------------
10366
10367             my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
10368             my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
10369             $pad_spaces = $length_2 - $length_1;
10370
10371             # If the first line has a leading ! and the second does
10372             # not, then remove one space to try to align the next
10373             # leading characters, which are often the same.  For example:
10374             #  if (  !$ts
10375             #      || $ts == $self->Holder
10376             #      || $self->Holder->Type eq "Arena" )
10377             #
10378             # This usually helps readability, but if there are subsequent
10379             # ! operators things will still get messed up.  For example:
10380             #
10381             #  if (  !exists $Net::DNS::typesbyname{$qtype}
10382             #      && exists $Net::DNS::classesbyname{$qtype}
10383             #      && !exists $Net::DNS::classesbyname{$qclass}
10384             #      && exists $Net::DNS::typesbyname{$qclass} )
10385             # We can't fix that.
10386             if ($matches_without_bang) { $pad_spaces-- }
10387
10388             # make sure this won't change if -lp is used
10389             my $indentation_1 = $leading_spaces_to_go[$ibeg];
10390             if ( ref($indentation_1) ) {
10391                 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
10392                     my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
10393                     unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
10394                         $pad_spaces = 0;
10395                     }
10396                 }
10397             }
10398
10399             # we might be able to handle a pad of -1 by removing a blank
10400             # token
10401             if ( $pad_spaces < 0 ) {
10402
10403                 if ( $pad_spaces == -1 ) {
10404                     if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
10405                         $tokens_to_go[ $ipad - 1 ] = '';
10406                     }
10407                 }
10408                 $pad_spaces = 0;
10409             }
10410
10411             # now apply any padding for alignment
10412             if ( $ipad >= 0 && $pad_spaces ) {
10413
10414                 my $length_t = total_line_length( $ibeg, $iend );
10415                 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
10416                     $tokens_to_go[$ipad] =
10417                       ' ' x $pad_spaces . $tokens_to_go[$ipad];
10418                 }
10419             }
10420         }
10421     }
10422     continue {
10423         $iendm          = $iend;
10424         $ibegm          = $ibeg;
10425         $has_leading_op = $has_leading_op_next;
10426     }    # end of loop over lines
10427     return;
10428 }
10429
10430 sub correct_lp_indentation {
10431
10432     # When the -lp option is used, we need to make a last pass through
10433     # each line to correct the indentation positions in case they differ
10434     # from the predictions.  This is necessary because perltidy uses a
10435     # predictor/corrector method for aligning with opening parens.  The
10436     # predictor is usually good, but sometimes stumbles.  The corrector
10437     # tries to patch things up once the actual opening paren locations
10438     # are known.
10439     my ( $ri_first, $ri_last ) = @_;
10440     my $do_not_pad = 0;
10441
10442     #  Note on flag '$do_not_pad':
10443     #  We want to avoid a situation like this, where the aligner inserts
10444     #  whitespace before the '=' to align it with a previous '=', because
10445     #  otherwise the parens might become mis-aligned in a situation like
10446     #  this, where the '=' has become aligned with the previous line,
10447     #  pushing the opening '(' forward beyond where we want it.
10448     #
10449     #  $mkFloor::currentRoom = '';
10450     #  $mkFloor::c_entry     = $c->Entry(
10451     #                                 -width        => '10',
10452     #                                 -relief       => 'sunken',
10453     #                                 ...
10454     #                                 );
10455     #
10456     #  We leave it to the aligner to decide how to do this.
10457
10458     # first remove continuation indentation if appropriate
10459     my $max_line = @$ri_first - 1;
10460
10461     # looking at each line of this batch..
10462     my ( $ibeg, $iend );
10463     my $line;
10464     foreach $line ( 0 .. $max_line ) {
10465         $ibeg = $$ri_first[$line];
10466         $iend = $$ri_last[$line];
10467
10468         # looking at each token in this output line..
10469         my $i;
10470         foreach $i ( $ibeg .. $iend ) {
10471
10472             # How many space characters to place before this token
10473             # for special alignment.  Actual padding is done in the
10474             # continue block.
10475
10476             # looking for next unvisited indentation item
10477             my $indentation = $leading_spaces_to_go[$i];
10478             if ( !$indentation->get_MARKED() ) {
10479                 $indentation->set_MARKED(1);
10480
10481                 # looking for indentation item for which we are aligning
10482                 # with parens, braces, and brackets
10483                 next unless ( $indentation->get_ALIGN_PAREN() );
10484
10485                 # skip closed container on this line
10486                 if ( $i > $ibeg ) {
10487                     my $im = $i - 1;
10488                     if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
10489                     if (   $type_sequence_to_go[$im]
10490                         && $mate_index_to_go[$im] <= $iend )
10491                     {
10492                         next;
10493                     }
10494                 }
10495
10496                 if ( $line == 1 && $i == $ibeg ) {
10497                     $do_not_pad = 1;
10498                 }
10499
10500                 # Ok, let's see what the error is and try to fix it
10501                 my $actual_pos;
10502                 my $predicted_pos = $indentation->get_SPACES();
10503                 if ( $i > $ibeg ) {
10504
10505                     # token is mid-line - use length to previous token
10506                     $actual_pos = total_line_length( $ibeg, $i - 1 );
10507
10508                     # for mid-line token, we must check to see if all
10509                     # additional lines have continuation indentation,
10510                     # and remove it if so.  Otherwise, we do not get
10511                     # good alignment.
10512                     my $closing_index = $indentation->get_CLOSED();
10513                     if ( $closing_index > $iend ) {
10514                         my $ibeg_next = $$ri_first[ $line + 1 ];
10515                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
10516                             undo_lp_ci( $line, $i, $closing_index, $ri_first,
10517                                 $ri_last );
10518                         }
10519                     }
10520                 }
10521                 elsif ( $line > 0 ) {
10522
10523                     # handle case where token starts a new line;
10524                     # use length of previous line
10525                     my $ibegm = $$ri_first[ $line - 1 ];
10526                     my $iendm = $$ri_last[ $line - 1 ];
10527                     $actual_pos = total_line_length( $ibegm, $iendm );
10528
10529                     # follow -pt style
10530                     ++$actual_pos
10531                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
10532                 }
10533                 else {
10534
10535                     # token is first character of first line of batch
10536                     $actual_pos = $predicted_pos;
10537                 }
10538
10539                 my $move_right = $actual_pos - $predicted_pos;
10540
10541                 # done if no error to correct (gnu2.t)
10542                 if ( $move_right == 0 ) {
10543                     $indentation->set_RECOVERABLE_SPACES($move_right);
10544                     next;
10545                 }
10546
10547                 # if we have not seen closure for this indentation in
10548                 # this batch, we can only pass on a request to the
10549                 # vertical aligner
10550                 my $closing_index = $indentation->get_CLOSED();
10551
10552                 if ( $closing_index < 0 ) {
10553                     $indentation->set_RECOVERABLE_SPACES($move_right);
10554                     next;
10555                 }
10556
10557                 # If necessary, look ahead to see if there is really any
10558                 # leading whitespace dependent on this whitespace, and
10559                 # also find the longest line using this whitespace.
10560                 # Since it is always safe to move left if there are no
10561                 # dependents, we only need to do this if we may have
10562                 # dependent nodes or need to move right.
10563
10564                 my $right_margin = 0;
10565                 my $have_child   = $indentation->get_HAVE_CHILD();
10566
10567                 my %saw_indentation;
10568                 my $line_count = 1;
10569                 $saw_indentation{$indentation} = $indentation;
10570
10571                 if ( $have_child || $move_right > 0 ) {
10572                     $have_child = 0;
10573                     my $max_length = 0;
10574                     if ( $i == $ibeg ) {
10575                         $max_length = total_line_length( $ibeg, $iend );
10576                     }
10577
10578                     # look ahead at the rest of the lines of this batch..
10579                     my $line_t;
10580                     foreach $line_t ( $line + 1 .. $max_line ) {
10581                         my $ibeg_t = $$ri_first[$line_t];
10582                         my $iend_t = $$ri_last[$line_t];
10583                         last if ( $closing_index <= $ibeg_t );
10584
10585                         # remember all different indentation objects
10586                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
10587                         $saw_indentation{$indentation_t} = $indentation_t;
10588                         $line_count++;
10589
10590                         # remember longest line in the group
10591                         my $length_t = total_line_length( $ibeg_t, $iend_t );
10592                         if ( $length_t > $max_length ) {
10593                             $max_length = $length_t;
10594                         }
10595                     }
10596                     $right_margin = $rOpts_maximum_line_length - $max_length;
10597                     if ( $right_margin < 0 ) { $right_margin = 0 }
10598                 }
10599
10600                 my $first_line_comma_count =
10601                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
10602                 my $comma_count = $indentation->get_COMMA_COUNT();
10603                 my $arrow_count = $indentation->get_ARROW_COUNT();
10604
10605                 # This is a simple approximate test for vertical alignment:
10606                 # if we broke just after an opening paren, brace, bracket,
10607                 # and there are 2 or more commas in the first line,
10608                 # and there are no '=>'s,
10609                 # then we are probably vertically aligned.  We could set
10610                 # an exact flag in sub scan_list, but this is good
10611                 # enough.
10612                 my $indentation_count = keys %saw_indentation;
10613                 my $is_vertically_aligned =
10614                   (      $i == $ibeg
10615                       && $first_line_comma_count > 1
10616                       && $indentation_count == 1
10617                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
10618
10619                 # Make the move if possible ..
10620                 if (
10621
10622                     # we can always move left
10623                     $move_right < 0
10624
10625                     # but we should only move right if we are sure it will
10626                     # not spoil vertical alignment
10627                     || ( $comma_count == 0 )
10628                     || ( $comma_count > 0 && !$is_vertically_aligned )
10629                   )
10630                 {
10631                     my $move =
10632                       ( $move_right <= $right_margin )
10633                       ? $move_right
10634                       : $right_margin;
10635
10636                     foreach ( keys %saw_indentation ) {
10637                         $saw_indentation{$_}
10638                           ->permanently_decrease_AVAILABLE_SPACES( -$move );
10639                     }
10640                 }
10641
10642                 # Otherwise, record what we want and the vertical aligner
10643                 # will try to recover it.
10644                 else {
10645                     $indentation->set_RECOVERABLE_SPACES($move_right);
10646                 }
10647             }
10648         }
10649     }
10650     return $do_not_pad;
10651 }
10652
10653 # flush is called to output any tokens in the pipeline, so that
10654 # an alternate source of lines can be written in the correct order
10655
10656 sub flush {
10657     destroy_one_line_block();
10658     output_line_to_go();
10659     Perl::Tidy::VerticalAligner::flush();
10660 }
10661
10662 sub reset_block_text_accumulator {
10663
10664     # save text after 'if' and 'elsif' to append after 'else'
10665     if ($accumulating_text_for_block) {
10666
10667         if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10668             push @{$rleading_block_if_elsif_text}, $leading_block_text;
10669         }
10670     }
10671     $accumulating_text_for_block        = "";
10672     $leading_block_text                 = "";
10673     $leading_block_text_level           = 0;
10674     $leading_block_text_length_exceeded = 0;
10675     $leading_block_text_line_number     = 0;
10676     $leading_block_text_line_length     = 0;
10677 }
10678
10679 sub set_block_text_accumulator {
10680     my $i = shift;
10681     $accumulating_text_for_block = $tokens_to_go[$i];
10682     if ( $accumulating_text_for_block !~ /^els/ ) {
10683         $rleading_block_if_elsif_text = [];
10684     }
10685     $leading_block_text       = "";
10686     $leading_block_text_level = $levels_to_go[$i];
10687     $leading_block_text_line_number =
10688       $vertical_aligner_object->get_output_line_number();
10689     $leading_block_text_length_exceeded = 0;
10690
10691     # this will contain the column number of the last character
10692     # of the closing side comment
10693     $leading_block_text_line_length =
10694       length($accumulating_text_for_block) +
10695       length( $rOpts->{'closing-side-comment-prefix'} ) +
10696       $leading_block_text_level * $rOpts_indent_columns + 3;
10697 }
10698
10699 sub accumulate_block_text {
10700     my $i = shift;
10701
10702     # accumulate leading text for -csc, ignoring any side comments
10703     if (   $accumulating_text_for_block
10704         && !$leading_block_text_length_exceeded
10705         && $types_to_go[$i] ne '#' )
10706     {
10707
10708         my $added_length = length( $tokens_to_go[$i] );
10709         $added_length += 1 if $i == 0;
10710         my $new_line_length = $leading_block_text_line_length + $added_length;
10711
10712         # we can add this text if we don't exceed some limits..
10713         if (
10714
10715             # we must not have already exceeded the text length limit
10716             length($leading_block_text) <
10717             $rOpts_closing_side_comment_maximum_text
10718
10719             # and either:
10720             # the new total line length must be below the line length limit
10721             # or the new length must be below the text length limit
10722             # (ie, we may allow one token to exceed the text length limit)
10723             && ( $new_line_length < $rOpts_maximum_line_length
10724                 || length($leading_block_text) + $added_length <
10725                 $rOpts_closing_side_comment_maximum_text )
10726
10727             # UNLESS: we are adding a closing paren before the brace we seek.
10728             # This is an attempt to avoid situations where the ... to be
10729             # added are longer than the omitted right paren, as in:
10730
10731             #   foreach my $item (@a_rather_long_variable_name_here) {
10732             #      &whatever;
10733             #   } ## end foreach my $item (@a_rather_long_variable_name_here...
10734
10735             || (
10736                 $tokens_to_go[$i] eq ')'
10737                 && (
10738                     (
10739                            $i + 1 <= $max_index_to_go
10740                         && $block_type_to_go[ $i + 1 ] eq
10741                         $accumulating_text_for_block
10742                     )
10743                     || (   $i + 2 <= $max_index_to_go
10744                         && $block_type_to_go[ $i + 2 ] eq
10745                         $accumulating_text_for_block )
10746                 )
10747             )
10748           )
10749         {
10750
10751             # add an extra space at each newline
10752             if ( $i == 0 ) { $leading_block_text .= ' ' }
10753
10754             # add the token text
10755             $leading_block_text .= $tokens_to_go[$i];
10756             $leading_block_text_line_length = $new_line_length;
10757         }
10758
10759         # show that text was truncated if necessary
10760         elsif ( $types_to_go[$i] ne 'b' ) {
10761             $leading_block_text_length_exceeded = 1;
10762             $leading_block_text .= '...';
10763         }
10764     }
10765 }
10766
10767 {
10768     my %is_if_elsif_else_unless_while_until_for_foreach;
10769
10770     BEGIN {
10771
10772         # These block types may have text between the keyword and opening
10773         # curly.  Note: 'else' does not, but must be included to allow trailing
10774         # if/elsif text to be appended.
10775         # patch for SWITCH/CASE: added 'case' and 'when'
10776         @_ = qw(if elsif else unless while until for foreach case when);
10777         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10778     }
10779
10780     sub accumulate_csc_text {
10781
10782         # called once per output buffer when -csc is used. Accumulates
10783         # the text placed after certain closing block braces.
10784         # Defines and returns the following for this buffer:
10785
10786         my $block_leading_text = "";    # the leading text of the last '}'
10787         my $rblock_leading_if_elsif_text;
10788         my $i_block_leading_text =
10789           -1;    # index of token owning block_leading_text
10790         my $block_line_count    = 100;    # how many lines the block spans
10791         my $terminal_type       = 'b';    # type of last nonblank token
10792         my $i_terminal          = 0;      # index of last nonblank token
10793         my $terminal_block_type = "";
10794
10795         for my $i ( 0 .. $max_index_to_go ) {
10796             my $type       = $types_to_go[$i];
10797             my $block_type = $block_type_to_go[$i];
10798             my $token      = $tokens_to_go[$i];
10799
10800             # remember last nonblank token type
10801             if ( $type ne '#' && $type ne 'b' ) {
10802                 $terminal_type       = $type;
10803                 $terminal_block_type = $block_type;
10804                 $i_terminal          = $i;
10805             }
10806
10807             my $type_sequence = $type_sequence_to_go[$i];
10808             if ( $block_type && $type_sequence ) {
10809
10810                 if ( $token eq '}' ) {
10811
10812                     # restore any leading text saved when we entered this block
10813                     if ( defined( $block_leading_text{$type_sequence} ) ) {
10814                         ( $block_leading_text, $rblock_leading_if_elsif_text ) =
10815                           @{ $block_leading_text{$type_sequence} };
10816                         $i_block_leading_text = $i;
10817                         delete $block_leading_text{$type_sequence};
10818                         $rleading_block_if_elsif_text =
10819                           $rblock_leading_if_elsif_text;
10820                     }
10821
10822                     # if we run into a '}' then we probably started accumulating
10823                     # at something like a trailing 'if' clause..no harm done.
10824                     if (   $accumulating_text_for_block
10825                         && $levels_to_go[$i] <= $leading_block_text_level )
10826                     {
10827                         my $lev = $levels_to_go[$i];
10828                         reset_block_text_accumulator();
10829                     }
10830
10831                     if ( defined( $block_opening_line_number{$type_sequence} ) )
10832                     {
10833                         my $output_line_number =
10834                           $vertical_aligner_object->get_output_line_number();
10835                         $block_line_count =
10836                           $output_line_number -
10837                           $block_opening_line_number{$type_sequence} + 1;
10838                         delete $block_opening_line_number{$type_sequence};
10839                     }
10840                     else {
10841
10842                         # Error: block opening line undefined for this line..
10843                         # This shouldn't be possible, but it is not a
10844                         # significant problem.
10845                     }
10846                 }
10847
10848                 elsif ( $token eq '{' ) {
10849
10850                     my $line_number =
10851                       $vertical_aligner_object->get_output_line_number();
10852                     $block_opening_line_number{$type_sequence} = $line_number;
10853
10854                     if (   $accumulating_text_for_block
10855                         && $levels_to_go[$i] == $leading_block_text_level )
10856                     {
10857
10858                         if ( $accumulating_text_for_block eq $block_type ) {
10859
10860                             # save any leading text before we enter this block
10861                             $block_leading_text{$type_sequence} = [
10862                                 $leading_block_text,
10863                                 $rleading_block_if_elsif_text
10864                             ];
10865                             $block_opening_line_number{$type_sequence} =
10866                               $leading_block_text_line_number;
10867                             reset_block_text_accumulator();
10868                         }
10869                         else {
10870
10871                             # shouldn't happen, but not a serious error.
10872                             # We were accumulating -csc text for block type
10873                             # $accumulating_text_for_block and unexpectedly
10874                             # encountered a '{' for block type $block_type.
10875                         }
10876                     }
10877                 }
10878             }
10879
10880             if (   $type eq 'k'
10881                 && $csc_new_statement_ok
10882                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
10883                 && $token =~ /$closing_side_comment_list_pattern/o )
10884             {
10885                 set_block_text_accumulator($i);
10886             }
10887             else {
10888
10889                 # note: ignoring type 'q' because of tricks being played
10890                 # with 'q' for hanging side comments
10891                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10892                     $csc_new_statement_ok =
10893                       ( $block_type || $type eq 'J' || $type eq ';' );
10894                 }
10895                 if (   $type eq ';'
10896                     && $accumulating_text_for_block
10897                     && $levels_to_go[$i] == $leading_block_text_level )
10898                 {
10899                     reset_block_text_accumulator();
10900                 }
10901                 else {
10902                     accumulate_block_text($i);
10903                 }
10904             }
10905         }
10906
10907         # Treat an 'else' block specially by adding preceding 'if' and
10908         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
10909         # especially for cuddled-else formatting.
10910         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10911             $block_leading_text =
10912               make_else_csc_text( $i_terminal, $terminal_block_type,
10913                 $block_leading_text, $rblock_leading_if_elsif_text );
10914         }
10915
10916         return ( $terminal_type, $i_terminal, $i_block_leading_text,
10917             $block_leading_text, $block_line_count );
10918     }
10919 }
10920
10921 sub make_else_csc_text {
10922
10923     # create additional -csc text for an 'else' and optionally 'elsif',
10924     # depending on the value of switch
10925     # $rOpts_closing_side_comment_else_flag:
10926     #
10927     #  = 0 add 'if' text to trailing else
10928     #  = 1 same as 0 plus:
10929     #      add 'if' to 'elsif's if can fit in line length
10930     #      add last 'elsif' to trailing else if can fit in one line
10931     #  = 2 same as 1 but do not check if exceed line length
10932     #
10933     # $rif_elsif_text = a reference to a list of all previous closing
10934     # side comments created for this if block
10935     #
10936     my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10937     my $csc_text = $block_leading_text;
10938
10939     if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10940     {
10941         return $csc_text;
10942     }
10943
10944     my $count = @{$rif_elsif_text};
10945     return $csc_text unless ($count);
10946
10947     my $if_text = '[ if' . $rif_elsif_text->[0];
10948
10949     # always show the leading 'if' text on 'else'
10950     if ( $block_type eq 'else' ) {
10951         $csc_text .= $if_text;
10952     }
10953
10954     # see if that's all
10955     if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10956         return $csc_text;
10957     }
10958
10959     my $last_elsif_text = "";
10960     if ( $count > 1 ) {
10961         $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10962         if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10963     }
10964
10965     # tentatively append one more item
10966     my $saved_text = $csc_text;
10967     if ( $block_type eq 'else' ) {
10968         $csc_text .= $last_elsif_text;
10969     }
10970     else {
10971         $csc_text .= ' ' . $if_text;
10972     }
10973
10974     # all done if no length checks requested
10975     if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10976         return $csc_text;
10977     }
10978
10979     # undo it if line length exceeded
10980     my $length =
10981       length($csc_text) +
10982       length($block_type) +
10983       length( $rOpts->{'closing-side-comment-prefix'} ) +
10984       $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10985     if ( $length > $rOpts_maximum_line_length ) {
10986         $csc_text = $saved_text;
10987     }
10988     return $csc_text;
10989 }
10990
10991 {    # sub balance_csc_text
10992
10993     my %matching_char;
10994
10995     BEGIN {
10996         %matching_char = (
10997             '{' => '}',
10998             '(' => ')',
10999             '[' => ']',
11000             '}' => '{',
11001             ')' => '(',
11002             ']' => '[',
11003         );
11004     }
11005
11006     sub balance_csc_text {
11007
11008         # Append characters to balance a closing side comment so that editors
11009         # such as vim can correctly jump through code.
11010         # Simple Example:
11011         #  input  = ## end foreach my $foo ( sort { $b  ...
11012         #  output = ## end foreach my $foo ( sort { $b  ...})
11013
11014         # NOTE: This routine does not currently filter out structures within
11015         # quoted text because the bounce algorithims in text editors do not
11016         # necessarily do this either (a version of vim was checked and
11017         # did not do this).
11018
11019         # Some complex examples which will cause trouble for some editors:
11020         #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
11021         #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
11022         #  if ( $1 eq '{' ) {
11023         # test file test1/braces.pl has many such examples.
11024
11025         my ($csc) = @_;
11026
11027         # loop to examine characters one-by-one, RIGHT to LEFT and
11028         # build a balancing ending, LEFT to RIGHT.
11029         for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
11030
11031             my $char = substr( $csc, $pos, 1 );
11032
11033             # ignore everything except structural characters
11034             next unless ( $matching_char{$char} );
11035
11036             # pop most recently appended character
11037             my $top = chop($csc);
11038
11039             # push it back plus the mate to the newest character
11040             # unless they balance each other.
11041             $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
11042         }
11043
11044         # return the balanced string
11045         return $csc;
11046     }
11047 }
11048
11049 sub add_closing_side_comment {
11050
11051     # add closing side comments after closing block braces if -csc used
11052     my $cscw_block_comment;
11053
11054     #---------------------------------------------------------------
11055     # Step 1: loop through all tokens of this line to accumulate
11056     # the text needed to create the closing side comments. Also see
11057     # how the line ends.
11058     #---------------------------------------------------------------
11059
11060     my ( $terminal_type, $i_terminal, $i_block_leading_text,
11061         $block_leading_text, $block_line_count )
11062       = accumulate_csc_text();
11063
11064     #---------------------------------------------------------------
11065     # Step 2: make the closing side comment if this ends a block
11066     #---------------------------------------------------------------
11067     my $have_side_comment = $i_terminal != $max_index_to_go;
11068
11069     # if this line might end in a block closure..
11070     if (
11071         $terminal_type eq '}'
11072
11073         # ..and either
11074         && (
11075
11076             # the block is long enough
11077             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
11078
11079             # or there is an existing comment to check
11080             || (   $have_side_comment
11081                 && $rOpts->{'closing-side-comment-warnings'} )
11082         )
11083
11084         # .. and if this is one of the types of interest
11085         && $block_type_to_go[$i_terminal] =~
11086         /$closing_side_comment_list_pattern/o
11087
11088         # .. but not an anonymous sub
11089         # These are not normally of interest, and their closing braces are
11090         # often followed by commas or semicolons anyway.  This also avoids
11091         # possible erratic output due to line numbering inconsistencies
11092         # in the cases where their closing braces terminate a line.
11093         && $block_type_to_go[$i_terminal] ne 'sub'
11094
11095         # ..and the corresponding opening brace must is not in this batch
11096         # (because we do not need to tag one-line blocks, although this
11097         # should also be caught with a positive -csci value)
11098         && $mate_index_to_go[$i_terminal] < 0
11099
11100         # ..and either
11101         && (
11102
11103             # this is the last token (line doesnt have a side comment)
11104             !$have_side_comment
11105
11106             # or the old side comment is a closing side comment
11107             || $tokens_to_go[$max_index_to_go] =~
11108             /$closing_side_comment_prefix_pattern/o
11109         )
11110       )
11111     {
11112
11113         # then make the closing side comment text
11114         my $token =
11115 "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
11116
11117         # append any extra descriptive text collected above
11118         if ( $i_block_leading_text == $i_terminal ) {
11119             $token .= $block_leading_text;
11120         }
11121
11122         $token = balance_csc_text($token)
11123           if $rOpts->{'closing-side-comments-balanced'};
11124
11125         $token =~ s/\s*$//;    # trim any trailing whitespace
11126
11127         # handle case of existing closing side comment
11128         if ($have_side_comment) {
11129
11130             # warn if requested and tokens differ significantly
11131             if ( $rOpts->{'closing-side-comment-warnings'} ) {
11132                 my $old_csc = $tokens_to_go[$max_index_to_go];
11133                 my $new_csc = $token;
11134                 $new_csc =~ s/\s+//g;            # trim all whitespace
11135                 $old_csc =~ s/\s+//g;            # trim all whitespace
11136                 $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11137                 $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11138                 $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
11139                 my $new_trailing_dots = $1;
11140                 $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
11141
11142                 # Patch to handle multiple closing side comments at
11143                 # else and elsif's.  These have become too complicated
11144                 # to check, so if we see an indication of
11145                 # '[ if' or '[ # elsif', then assume they were made
11146                 # by perltidy.
11147                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
11148                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
11149                 }
11150                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
11151                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
11152                 }
11153
11154                 # if old comment is contained in new comment,
11155                 # only compare the common part.
11156                 if ( length($new_csc) > length($old_csc) ) {
11157                     $new_csc = substr( $new_csc, 0, length($old_csc) );
11158                 }
11159
11160                 # if the new comment is shorter and has been limited,
11161                 # only compare the common part.
11162                 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
11163                 {
11164                     $old_csc = substr( $old_csc, 0, length($new_csc) );
11165                 }
11166
11167                 # any remaining difference?
11168                 if ( $new_csc ne $old_csc ) {
11169
11170                     # just leave the old comment if we are below the threshold
11171                     # for creating side comments
11172                     if ( $block_line_count <
11173                         $rOpts->{'closing-side-comment-interval'} )
11174                     {
11175                         $token = undef;
11176                     }
11177
11178                     # otherwise we'll make a note of it
11179                     else {
11180
11181                         warning(
11182 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
11183                         );
11184
11185                      # save the old side comment in a new trailing block comment
11186                         my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
11187                         $year  += 1900;
11188                         $month += 1;
11189                         $cscw_block_comment =
11190 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
11191                     }
11192                 }
11193                 else {
11194
11195                     # No differences.. we can safely delete old comment if we
11196                     # are below the threshold
11197                     if ( $block_line_count <
11198                         $rOpts->{'closing-side-comment-interval'} )
11199                     {
11200                         $token = undef;
11201                         unstore_token_to_go()
11202                           if ( $types_to_go[$max_index_to_go] eq '#' );
11203                         unstore_token_to_go()
11204                           if ( $types_to_go[$max_index_to_go] eq 'b' );
11205                     }
11206                 }
11207             }
11208
11209             # switch to the new csc (unless we deleted it!)
11210             $tokens_to_go[$max_index_to_go] = $token if $token;
11211         }
11212
11213         # handle case of NO existing closing side comment
11214         else {
11215
11216             # insert the new side comment into the output token stream
11217             my $type          = '#';
11218             my $block_type    = '';
11219             my $type_sequence = '';
11220             my $container_environment =
11221               $container_environment_to_go[$max_index_to_go];
11222             my $level                = $levels_to_go[$max_index_to_go];
11223             my $slevel               = $nesting_depth_to_go[$max_index_to_go];
11224             my $no_internal_newlines = 0;
11225
11226             my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
11227             my $ci_level           = $ci_levels_to_go[$max_index_to_go];
11228             my $in_continued_quote = 0;
11229
11230             # first insert a blank token
11231             insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
11232
11233             # then the side comment
11234             insert_new_token_to_go( $token, $type, $slevel,
11235                 $no_internal_newlines );
11236         }
11237     }
11238     return $cscw_block_comment;
11239 }
11240
11241 sub previous_nonblank_token {
11242     my ($i)  = @_;
11243     my $name = "";
11244     my $im   = $i - 1;
11245     return "" if ( $im < 0 );
11246     if ( $types_to_go[$im] eq 'b' ) { $im--; }
11247     return "" if ( $im < 0 );
11248     $name = $tokens_to_go[$im];
11249
11250     # prepend any sub name to an isolated -> to avoid unwanted alignments
11251     # [test case is test8/penco.pl]
11252     if ( $name eq '->' ) {
11253         $im--;
11254         if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
11255             $name = $tokens_to_go[$im] . $name;
11256         }
11257     }
11258     return $name;
11259 }
11260
11261 sub send_lines_to_vertical_aligner {
11262
11263     my ( $ri_first, $ri_last, $do_not_pad ) = @_;
11264
11265     my $rindentation_list = [0];    # ref to indentations for each line
11266
11267     # define the array @matching_token_to_go for the output tokens
11268     # which will be non-blank for each special token (such as =>)
11269     # for which alignment is required.
11270     set_vertical_alignment_markers( $ri_first, $ri_last );
11271
11272     # flush if necessary to avoid unwanted alignment
11273     my $must_flush = 0;
11274     if ( @$ri_first > 1 ) {
11275
11276         # flush before a long if statement
11277         if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
11278             $must_flush = 1;
11279         }
11280     }
11281     if ($must_flush) {
11282         Perl::Tidy::VerticalAligner::flush();
11283     }
11284
11285     undo_ci( $ri_first, $ri_last );
11286
11287     set_logical_padding( $ri_first, $ri_last );
11288
11289     # loop to prepare each line for shipment
11290     my $n_last_line = @$ri_first - 1;
11291     my $in_comma_list;
11292     for my $n ( 0 .. $n_last_line ) {
11293         my $ibeg = $$ri_first[$n];
11294         my $iend = $$ri_last[$n];
11295
11296         my ( $rtokens, $rfields, $rpatterns ) =
11297           make_alignment_patterns( $ibeg, $iend );
11298
11299         my ( $indentation, $lev, $level_end, $terminal_type,
11300             $is_semicolon_terminated, $is_outdented_line )
11301           = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
11302             $ri_first, $ri_last, $rindentation_list );
11303
11304         # we will allow outdenting of long lines..
11305         my $outdent_long_lines = (
11306
11307             # which are long quotes, if allowed
11308             ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
11309
11310             # which are long block comments, if allowed
11311               || (
11312                    $types_to_go[$ibeg] eq '#'
11313                 && $rOpts->{'outdent-long-comments'}
11314
11315                 # but not if this is a static block comment
11316                 && !$is_static_block_comment
11317               )
11318         );
11319
11320         my $level_jump =
11321           $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
11322
11323         my $rvertical_tightness_flags =
11324           set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
11325             $ri_first, $ri_last );
11326
11327         # flush an outdented line to avoid any unwanted vertical alignment
11328         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11329
11330         my $is_terminal_ternary = 0;
11331         if (   $tokens_to_go[$ibeg] eq ':'
11332             || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
11333         {
11334             if (   ( $terminal_type eq ';' && $level_end <= $lev )
11335                 || ( $level_end < $lev ) )
11336             {
11337                 $is_terminal_ternary = 1;
11338             }
11339         }
11340
11341         # send this new line down the pipe
11342         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
11343         Perl::Tidy::VerticalAligner::append_line(
11344             $lev,
11345             $level_end,
11346             $indentation,
11347             $rfields,
11348             $rtokens,
11349             $rpatterns,
11350             $forced_breakpoint_to_go[$iend] || $in_comma_list,
11351             $outdent_long_lines,
11352             $is_terminal_ternary,
11353             $is_semicolon_terminated,
11354             $do_not_pad,
11355             $rvertical_tightness_flags,
11356             $level_jump,
11357         );
11358         $in_comma_list =
11359           $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
11360
11361         # flush an outdented line to avoid any unwanted vertical alignment
11362         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11363
11364         $do_not_pad = 0;
11365
11366     }    # end of loop to output each line
11367
11368     # remember indentation of lines containing opening containers for
11369     # later use by sub set_adjusted_indentation
11370     save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
11371 }
11372
11373 {        # begin make_alignment_patterns
11374
11375     my %block_type_map;
11376     my %keyword_map;
11377
11378     BEGIN {
11379
11380         # map related block names into a common name to
11381         # allow alignment
11382         %block_type_map = (
11383             'unless'  => 'if',
11384             'else'    => 'if',
11385             'elsif'   => 'if',
11386             'when'    => 'if',
11387             'default' => 'if',
11388             'case'    => 'if',
11389             'sort'    => 'map',
11390             'grep'    => 'map',
11391         );
11392
11393         # map certain keywords to the same 'if' class to align
11394         # long if/elsif sequences. [elsif.pl]
11395         %keyword_map = (
11396             'unless'  => 'if',
11397             'else'    => 'if',
11398             'elsif'   => 'if',
11399             'when'    => 'given',
11400             'default' => 'given',
11401             'case'    => 'switch',
11402
11403             # treat an 'undef' similar to numbers and quotes
11404             'undef' => 'Q',
11405         );
11406     }
11407
11408     sub make_alignment_patterns {
11409
11410         # Here we do some important preliminary work for the
11411         # vertical aligner.  We create three arrays for one
11412         # output line. These arrays contain strings that can
11413         # be tested by the vertical aligner to see if
11414         # consecutive lines can be aligned vertically.
11415         #
11416         # The three arrays are indexed on the vertical
11417         # alignment fields and are:
11418         # @tokens - a list of any vertical alignment tokens for this line.
11419         #   These are tokens, such as '=' '&&' '#' etc which
11420         #   we want to might align vertically.  These are
11421         #   decorated with various information such as
11422         #   nesting depth to prevent unwanted vertical
11423         #   alignment matches.
11424         # @fields - the actual text of the line between the vertical alignment
11425         #   tokens.
11426         # @patterns - a modified list of token types, one for each alignment
11427         #   field.  These should normally each match before alignment is
11428         #   allowed, even when the alignment tokens match.
11429         my ( $ibeg, $iend ) = @_;
11430         my @tokens   = ();
11431         my @fields   = ();
11432         my @patterns = ();
11433         my $i_start  = $ibeg;
11434         my $i;
11435
11436         my $depth                 = 0;
11437         my @container_name        = ("");
11438         my @multiple_comma_arrows = (undef);
11439
11440         my $j = 0;    # field index
11441
11442         $patterns[0] = "";
11443         for $i ( $ibeg .. $iend ) {
11444
11445             # Keep track of containers balanced on this line only.
11446             # These are used below to prevent unwanted cross-line alignments.
11447             # Unbalanced containers already avoid aligning across
11448             # container boundaries.
11449             if ( $tokens_to_go[$i] eq '(' ) {
11450
11451                 # if container is balanced on this line...
11452                 my $i_mate = $mate_index_to_go[$i];
11453                 if ( $i_mate > $i && $i_mate <= $iend ) {
11454                     $depth++;
11455                     my $seqno = $type_sequence_to_go[$i];
11456                     my $count = comma_arrow_count($seqno);
11457                     $multiple_comma_arrows[$depth] = $count && $count > 1;
11458
11459                     # Append the previous token name to make the container name
11460                     # more unique.  This name will also be given to any commas
11461                     # within this container, and it helps avoid undesirable
11462                     # alignments of different types of containers.
11463                     my $name = previous_nonblank_token($i);
11464                     $name =~ s/^->//;
11465                     $container_name[$depth] = "+" . $name;
11466
11467                     # Make the container name even more unique if necessary.
11468                     # If we are not vertically aligning this opening paren,
11469                     # append a character count to avoid bad alignment because
11470                     # it usually looks bad to align commas within continers
11471                     # for which the opening parens do not align.  Here
11472                     # is an example very BAD alignment of commas (because
11473                     # the atan2 functions are not all aligned):
11474                     #    $XY =
11475                     #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
11476                     #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
11477                     #      $X * atan2( $X,            1 ) -
11478                     #      $Y * atan2( $Y,            1 );
11479                     #
11480                     # On the other hand, it is usually okay to align commas if
11481                     # opening parens align, such as:
11482                     #    glVertex3d( $cx + $s * $xs, $cy,            $z );
11483                     #    glVertex3d( $cx,            $cy + $s * $ys, $z );
11484                     #    glVertex3d( $cx - $s * $xs, $cy,            $z );
11485                     #    glVertex3d( $cx,            $cy - $s * $ys, $z );
11486                     #
11487                     # To distinguish between these situations, we will
11488                     # append the length of the line from the previous matching
11489                     # token, or beginning of line, to the function name.  This
11490                     # will allow the vertical aligner to reject undesirable
11491                     # matches.
11492
11493                     # if we are not aligning on this paren...
11494                     if ( $matching_token_to_go[$i] eq '' ) {
11495
11496                         # Sum length from previous alignment, or start of line.
11497                         # Note that we have to sum token lengths here because
11498                         # padding has been done and so array $lengths_to_go
11499                         # is now wrong.
11500                         my $len =
11501                           length(
11502                             join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11503                         $len += leading_spaces_to_go($i_start)
11504                           if ( $i_start == $ibeg );
11505
11506                         # tack length onto the container name to make unique
11507                         $container_name[$depth] .= "-" . $len;
11508                     }
11509                 }
11510             }
11511             elsif ( $tokens_to_go[$i] eq ')' ) {
11512                 $depth-- if $depth > 0;
11513             }
11514
11515             # if we find a new synchronization token, we are done with
11516             # a field
11517             if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
11518
11519                 my $tok = my $raw_tok = $matching_token_to_go[$i];
11520
11521                 # make separators in different nesting depths unique
11522                 # by appending the nesting depth digit.
11523                 if ( $raw_tok ne '#' ) {
11524                     $tok .= "$nesting_depth_to_go[$i]";
11525                 }
11526
11527                 # also decorate commas with any container name to avoid
11528                 # unwanted cross-line alignments.
11529                 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
11530                     if ( $container_name[$depth] ) {
11531                         $tok .= $container_name[$depth];
11532                     }
11533                 }
11534
11535                 # Patch to avoid aligning leading and trailing if, unless.
11536                 # Mark trailing if, unless statements with container names.
11537                 # This makes them different from leading if, unless which
11538                 # are not so marked at present.  If we ever need to name
11539                 # them too, we could use ci to distinguish them.
11540                 # Example problem to avoid:
11541                 #    return ( 2, "DBERROR" )
11542                 #      if ( $retval == 2 );
11543                 #    if   ( scalar @_ ) {
11544                 #        my ( $a, $b, $c, $d, $e, $f ) = @_;
11545                 #    }
11546                 if ( $raw_tok eq '(' ) {
11547                     my $ci = $ci_levels_to_go[$ibeg];
11548                     if (   $container_name[$depth] =~ /^\+(if|unless)/
11549                         && $ci )
11550                     {
11551                         $tok .= $container_name[$depth];
11552                     }
11553                 }
11554
11555                 # Decorate block braces with block types to avoid
11556                 # unwanted alignments such as the following:
11557                 # foreach ( @{$routput_array} ) { $fh->print($_) }
11558                 # eval                          { $fh->close() };
11559                 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
11560                     my $block_type = $block_type_to_go[$i];
11561
11562                     # map certain related block types to allow
11563                     # else blocks to align
11564                     $block_type = $block_type_map{$block_type}
11565                       if ( defined( $block_type_map{$block_type} ) );
11566
11567                     # remove sub names to allow one-line sub braces to align
11568                     # regardless of name
11569                     if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
11570
11571                     # allow all control-type blocks to align
11572                     if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
11573
11574                     $tok .= $block_type;
11575                 }
11576
11577                 # concatenate the text of the consecutive tokens to form
11578                 # the field
11579                 push( @fields,
11580                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11581
11582                 # store the alignment token for this field
11583                 push( @tokens, $tok );
11584
11585                 # get ready for the next batch
11586                 $i_start = $i;
11587                 $j++;
11588                 $patterns[$j] = "";
11589             }
11590
11591             # continue accumulating tokens
11592             # handle non-keywords..
11593             if ( $types_to_go[$i] ne 'k' ) {
11594                 my $type = $types_to_go[$i];
11595
11596                 # Mark most things before arrows as a quote to
11597                 # get them to line up. Testfile: mixed.pl.
11598                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
11599                     my $next_type = $types_to_go[ $i + 1 ];
11600                     my $i_next_nonblank =
11601                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
11602
11603                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
11604                         $type = 'Q';
11605
11606                         # Patch to ignore leading minus before words,
11607                         # by changing pattern 'mQ' into just 'Q',
11608                         # so that we can align things like this:
11609                         #  Button   => "Print letter \"~$_\"",
11610                         #  -command => [ sub { print "$_[0]\n" }, $_ ],
11611                         if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
11612                     }
11613                 }
11614
11615                 # patch to make numbers and quotes align
11616                 if ( $type eq 'n' ) { $type = 'Q' }
11617
11618                 # patch to ignore any ! in patterns
11619                 if ( $type eq '!' ) { $type = '' }
11620
11621                 $patterns[$j] .= $type;
11622             }
11623
11624             # for keywords we have to use the actual text
11625             else {
11626
11627                 my $tok = $tokens_to_go[$i];
11628
11629                 # but map certain keywords to a common string to allow
11630                 # alignment.
11631                 $tok = $keyword_map{$tok}
11632                   if ( defined( $keyword_map{$tok} ) );
11633                 $patterns[$j] .= $tok;
11634             }
11635         }
11636
11637         # done with this line .. join text of tokens to make the last field
11638         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
11639         return ( \@tokens, \@fields, \@patterns );
11640     }
11641
11642 }    # end make_alignment_patterns
11643
11644 {    # begin unmatched_indexes
11645
11646     # closure to keep track of unbalanced containers.
11647     # arrays shared by the routines in this block:
11648     my @unmatched_opening_indexes_in_this_batch;
11649     my @unmatched_closing_indexes_in_this_batch;
11650     my %comma_arrow_count;
11651
11652     sub is_unbalanced_batch {
11653         @unmatched_opening_indexes_in_this_batch +
11654           @unmatched_closing_indexes_in_this_batch;
11655     }
11656
11657     sub comma_arrow_count {
11658         my $seqno = $_[0];
11659         return $comma_arrow_count{$seqno};
11660     }
11661
11662     sub match_opening_and_closing_tokens {
11663
11664         # Match up indexes of opening and closing braces, etc, in this batch.
11665         # This has to be done after all tokens are stored because unstoring
11666         # of tokens would otherwise cause trouble.
11667
11668         @unmatched_opening_indexes_in_this_batch = ();
11669         @unmatched_closing_indexes_in_this_batch = ();
11670         %comma_arrow_count                       = ();
11671
11672         my ( $i, $i_mate, $token );
11673         foreach $i ( 0 .. $max_index_to_go ) {
11674             if ( $type_sequence_to_go[$i] ) {
11675                 $token = $tokens_to_go[$i];
11676                 if ( $token =~ /^[\(\[\{\?]$/ ) {
11677                     push @unmatched_opening_indexes_in_this_batch, $i;
11678                 }
11679                 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
11680
11681                     $i_mate = pop @unmatched_opening_indexes_in_this_batch;
11682                     if ( defined($i_mate) && $i_mate >= 0 ) {
11683                         if ( $type_sequence_to_go[$i_mate] ==
11684                             $type_sequence_to_go[$i] )
11685                         {
11686                             $mate_index_to_go[$i]      = $i_mate;
11687                             $mate_index_to_go[$i_mate] = $i;
11688                         }
11689                         else {
11690                             push @unmatched_opening_indexes_in_this_batch,
11691                               $i_mate;
11692                             push @unmatched_closing_indexes_in_this_batch, $i;
11693                         }
11694                     }
11695                     else {
11696                         push @unmatched_closing_indexes_in_this_batch, $i;
11697                     }
11698                 }
11699             }
11700             elsif ( $tokens_to_go[$i] eq '=>' ) {
11701                 if (@unmatched_opening_indexes_in_this_batch) {
11702                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
11703                     my $seqno = $type_sequence_to_go[$j];
11704                     $comma_arrow_count{$seqno}++;
11705                 }
11706             }
11707         }
11708     }
11709
11710     sub save_opening_indentation {
11711
11712         # This should be called after each batch of tokens is output. It
11713         # saves indentations of lines of all unmatched opening tokens.
11714         # These will be used by sub get_opening_indentation.
11715
11716         my ( $ri_first, $ri_last, $rindentation_list ) = @_;
11717
11718         # we no longer need indentations of any saved indentations which
11719         # are unmatched closing tokens in this batch, because we will
11720         # never encounter them again.  So we can delete them to keep
11721         # the hash size down.
11722         foreach (@unmatched_closing_indexes_in_this_batch) {
11723             my $seqno = $type_sequence_to_go[$_];
11724             delete $saved_opening_indentation{$seqno};
11725         }
11726
11727         # we need to save indentations of any unmatched opening tokens
11728         # in this batch because we may need them in a subsequent batch.
11729         foreach (@unmatched_opening_indexes_in_this_batch) {
11730             my $seqno = $type_sequence_to_go[$_];
11731             $saved_opening_indentation{$seqno} = [
11732                 lookup_opening_indentation(
11733                     $_, $ri_first, $ri_last, $rindentation_list
11734                 )
11735             ];
11736         }
11737     }
11738 }    # end unmatched_indexes
11739
11740 sub get_opening_indentation {
11741
11742     # get the indentation of the line which output the opening token
11743     # corresponding to a given closing token in the current output batch.
11744     #
11745     # given:
11746     # $i_closing - index in this line of a closing token ')' '}' or ']'
11747     #
11748     # $ri_first - reference to list of the first index $i for each output
11749     #               line in this batch
11750     # $ri_last - reference to list of the last index $i for each output line
11751     #              in this batch
11752     # $rindentation_list - reference to a list containing the indentation
11753     #            used for each line.
11754     #
11755     # return:
11756     #   -the indentation of the line which contained the opening token
11757     #    which matches the token at index $i_opening
11758     #   -and its offset (number of columns) from the start of the line
11759     #
11760     my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
11761
11762     # first, see if the opening token is in the current batch
11763     my $i_opening = $mate_index_to_go[$i_closing];
11764     my ( $indent, $offset, $is_leading, $exists );
11765     $exists = 1;
11766     if ( $i_opening >= 0 ) {
11767
11768         # it is..look up the indentation
11769         ( $indent, $offset, $is_leading ) =
11770           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
11771             $rindentation_list );
11772     }
11773
11774     # if not, it should have been stored in the hash by a previous batch
11775     else {
11776         my $seqno = $type_sequence_to_go[$i_closing];
11777         if ($seqno) {
11778             if ( $saved_opening_indentation{$seqno} ) {
11779                 ( $indent, $offset, $is_leading ) =
11780                   @{ $saved_opening_indentation{$seqno} };
11781             }
11782
11783             # some kind of serious error
11784             # (example is badfile.t)
11785             else {
11786                 $indent     = 0;
11787                 $offset     = 0;
11788                 $is_leading = 0;
11789                 $exists     = 0;
11790             }
11791         }
11792
11793         # if no sequence number it must be an unbalanced container
11794         else {
11795             $indent     = 0;
11796             $offset     = 0;
11797             $is_leading = 0;
11798             $exists     = 0;
11799         }
11800     }
11801     return ( $indent, $offset, $is_leading, $exists );
11802 }
11803
11804 sub lookup_opening_indentation {
11805
11806     # get the indentation of the line in the current output batch
11807     # which output a selected opening token
11808     #
11809     # given:
11810     #   $i_opening - index of an opening token in the current output batch
11811     #                whose line indentation we need
11812     #   $ri_first - reference to list of the first index $i for each output
11813     #               line in this batch
11814     #   $ri_last - reference to list of the last index $i for each output line
11815     #              in this batch
11816     #   $rindentation_list - reference to a list containing the indentation
11817     #            used for each line.  (NOTE: the first slot in
11818     #            this list is the last returned line number, and this is
11819     #            followed by the list of indentations).
11820     #
11821     # return
11822     #   -the indentation of the line which contained token $i_opening
11823     #   -and its offset (number of columns) from the start of the line
11824
11825     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11826
11827     my $nline = $rindentation_list->[0];    # line number of previous lookup
11828
11829     # reset line location if necessary
11830     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
11831
11832     # find the correct line
11833     unless ( $i_opening > $ri_last->[-1] ) {
11834         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11835     }
11836
11837     # error - token index is out of bounds - shouldn't happen
11838     else {
11839         warning(
11840 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
11841         );
11842         report_definite_bug();
11843         $nline = $#{$ri_last};
11844     }
11845
11846     $rindentation_list->[0] =
11847       $nline;    # save line number to start looking next call
11848     my $ibeg       = $ri_start->[$nline];
11849     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
11850     my $is_leading = ( $ibeg == $i_opening );
11851     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
11852 }
11853
11854 {
11855     my %is_if_elsif_else_unless_while_until_for_foreach;
11856
11857     BEGIN {
11858
11859         # These block types may have text between the keyword and opening
11860         # curly.  Note: 'else' does not, but must be included to allow trailing
11861         # if/elsif text to be appended.
11862         # patch for SWITCH/CASE: added 'case' and 'when'
11863         @_ = qw(if elsif else unless while until for foreach case when);
11864         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11865     }
11866
11867     sub set_adjusted_indentation {
11868
11869         # This routine has the final say regarding the actual indentation of
11870         # a line.  It starts with the basic indentation which has been
11871         # defined for the leading token, and then takes into account any
11872         # options that the user has set regarding special indenting and
11873         # outdenting.
11874
11875         my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11876             $rindentation_list )
11877           = @_;
11878
11879         # we need to know the last token of this line
11880         my ( $terminal_type, $i_terminal ) =
11881           terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11882
11883         my $is_outdented_line = 0;
11884
11885         my $is_semicolon_terminated = $terminal_type eq ';'
11886           && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11887
11888         ##########################################################
11889         # Section 1: set a flag and a default indentation
11890         #
11891         # Most lines are indented according to the initial token.
11892         # But it is common to outdent to the level just after the
11893         # terminal token in certain cases...
11894         # adjust_indentation flag:
11895         #       0 - do not adjust
11896         #       1 - outdent
11897         #       2 - vertically align with opening token
11898         #       3 - indent
11899         ##########################################################
11900         my $adjust_indentation         = 0;
11901         my $default_adjust_indentation = $adjust_indentation;
11902
11903         my (
11904             $opening_indentation, $opening_offset,
11905             $is_leading,          $opening_exists
11906         );
11907
11908         # if we are at a closing token of some type..
11909         if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11910
11911             # get the indentation of the line containing the corresponding
11912             # opening token
11913             (
11914                 $opening_indentation, $opening_offset,
11915                 $is_leading,          $opening_exists
11916               )
11917               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11918                 $rindentation_list );
11919
11920             # First set the default behavior:
11921             # default behavior is to outdent closing lines
11922             # of the form:   ");  };  ];  )->xxx;"
11923             if (
11924                 $is_semicolon_terminated
11925
11926                 # and 'cuddled parens' of the form:   ")->pack("
11927                 || (
11928                        $terminal_type eq '('
11929                     && $types_to_go[$ibeg] eq ')'
11930                     && ( $nesting_depth_to_go[$iend] + 1 ==
11931                         $nesting_depth_to_go[$ibeg] )
11932                 )
11933               )
11934             {
11935                 $adjust_indentation = 1;
11936             }
11937
11938             # TESTING: outdent something like '),'
11939             if (
11940                 $terminal_type eq ','
11941
11942                 # allow just one character before the comma
11943                 && $i_terminal == $ibeg + 1
11944
11945                 # requre LIST environment; otherwise, we may outdent too much --
11946                 # this can happen in calls without parentheses (overload.t);
11947                 && $container_environment_to_go[$i_terminal] eq 'LIST'
11948               )
11949             {
11950                 $adjust_indentation = 1;
11951             }
11952
11953             # undo continuation indentation of a terminal closing token if
11954             # it is the last token before a level decrease.  This will allow
11955             # a closing token to line up with its opening counterpart, and
11956             # avoids a indentation jump larger than 1 level.
11957             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11958                 && $i_terminal == $ibeg )
11959             {
11960                 my $ci        = $ci_levels_to_go[$ibeg];
11961                 my $lev       = $levels_to_go[$ibeg];
11962                 my $next_type = $types_to_go[ $ibeg + 1 ];
11963                 my $i_next_nonblank =
11964                   ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11965                 if (   $i_next_nonblank <= $max_index_to_go
11966                     && $levels_to_go[$i_next_nonblank] < $lev )
11967                 {
11968                     $adjust_indentation = 1;
11969                 }
11970             }
11971
11972             # YVES patch 1 of 2:
11973             # Undo ci of line with leading closing eval brace,
11974             # but not beyond the indention of the line with
11975             # the opening brace.
11976             if (   $block_type_to_go[$ibeg] eq 'eval'
11977                 && !$rOpts->{'line-up-parentheses'}
11978                 && !$rOpts->{'indent-closing-brace'} )
11979             {
11980                 (
11981                     $opening_indentation, $opening_offset,
11982                     $is_leading,          $opening_exists
11983                   )
11984                   = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11985                     $rindentation_list );
11986                 my $indentation = $leading_spaces_to_go[$ibeg];
11987                 if ( defined($opening_indentation)
11988                     && $indentation > $opening_indentation )
11989                 {
11990                     $adjust_indentation = 1;
11991                 }
11992             }
11993
11994             $default_adjust_indentation = $adjust_indentation;
11995
11996             # Now modify default behavior according to user request:
11997             # handle option to indent non-blocks of the form );  };  ];
11998             # But don't do special indentation to something like ')->pack('
11999             if ( !$block_type_to_go[$ibeg] ) {
12000                 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
12001                 if ( $cti == 1 ) {
12002                     if (   $i_terminal <= $ibeg + 1
12003                         || $is_semicolon_terminated )
12004                     {
12005                         $adjust_indentation = 2;
12006                     }
12007                     else {
12008                         $adjust_indentation = 0;
12009                     }
12010                 }
12011                 elsif ( $cti == 2 ) {
12012                     if ($is_semicolon_terminated) {
12013                         $adjust_indentation = 3;
12014                     }
12015                     else {
12016                         $adjust_indentation = 0;
12017                     }
12018                 }
12019                 elsif ( $cti == 3 ) {
12020                     $adjust_indentation = 3;
12021                 }
12022             }
12023
12024             # handle option to indent blocks
12025             else {
12026                 if (
12027                     $rOpts->{'indent-closing-brace'}
12028                     && (
12029                         $i_terminal == $ibeg    #  isolated terminal '}'
12030                         || $is_semicolon_terminated
12031                     )
12032                   )                             #  } xxxx ;
12033                 {
12034                     $adjust_indentation = 3;
12035                 }
12036             }
12037         }
12038
12039         # if at ');', '};', '>;', and '];' of a terminal qw quote
12040         elsif ($$rpatterns[0] =~ /^qb*;$/
12041             && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
12042         {
12043             if ( $closing_token_indentation{$1} == 0 ) {
12044                 $adjust_indentation = 1;
12045             }
12046             else {
12047                 $adjust_indentation = 3;
12048             }
12049         }
12050
12051         # if line begins with a ':', align it with any
12052         # previous line leading with corresponding ?
12053         elsif ( $types_to_go[$ibeg] eq ':' ) {
12054             (
12055                 $opening_indentation, $opening_offset,
12056                 $is_leading,          $opening_exists
12057               )
12058               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12059                 $rindentation_list );
12060             if ($is_leading) { $adjust_indentation = 2; }
12061         }
12062
12063         ##########################################################
12064         # Section 2: set indentation according to flag set above
12065         #
12066         # Select the indentation object to define leading
12067         # whitespace.  If we are outdenting something like '} } );'
12068         # then we want to use one level below the last token
12069         # ($i_terminal) in order to get it to fully outdent through
12070         # all levels.
12071         ##########################################################
12072         my $indentation;
12073         my $lev;
12074         my $level_end = $levels_to_go[$iend];
12075
12076         if ( $adjust_indentation == 0 ) {
12077             $indentation = $leading_spaces_to_go[$ibeg];
12078             $lev         = $levels_to_go[$ibeg];
12079         }
12080         elsif ( $adjust_indentation == 1 ) {
12081             $indentation = $reduced_spaces_to_go[$i_terminal];
12082             $lev         = $levels_to_go[$i_terminal];
12083         }
12084
12085         # handle indented closing token which aligns with opening token
12086         elsif ( $adjust_indentation == 2 ) {
12087
12088             # handle option to align closing token with opening token
12089             $lev = $levels_to_go[$ibeg];
12090
12091             # calculate spaces needed to align with opening token
12092             my $space_count =
12093               get_SPACES($opening_indentation) + $opening_offset;
12094
12095             # Indent less than the previous line.
12096             #
12097             # Problem: For -lp we don't exactly know what it was if there
12098             # were recoverable spaces sent to the aligner.  A good solution
12099             # would be to force a flush of the vertical alignment buffer, so
12100             # that we would know.  For now, this rule is used for -lp:
12101             #
12102             # When the last line did not start with a closing token we will
12103             # be optimistic that the aligner will recover everything wanted.
12104             #
12105             # This rule will prevent us from breaking a hierarchy of closing
12106             # tokens, and in a worst case will leave a closing paren too far
12107             # indented, but this is better than frequently leaving it not
12108             # indented enough.
12109             my $last_spaces = get_SPACES($last_indentation_written);
12110             if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
12111                 $last_spaces +=
12112                   get_RECOVERABLE_SPACES($last_indentation_written);
12113             }
12114
12115             # reset the indentation to the new space count if it works
12116             # only options are all or none: nothing in-between looks good
12117             $lev = $levels_to_go[$ibeg];
12118             if ( $space_count < $last_spaces ) {
12119                 if ($rOpts_line_up_parentheses) {
12120                     my $lev = $levels_to_go[$ibeg];
12121                     $indentation =
12122                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12123                 }
12124                 else {
12125                     $indentation = $space_count;
12126                 }
12127             }
12128
12129             # revert to default if it doesnt work
12130             else {
12131                 $space_count = leading_spaces_to_go($ibeg);
12132                 if ( $default_adjust_indentation == 0 ) {
12133                     $indentation = $leading_spaces_to_go[$ibeg];
12134                 }
12135                 elsif ( $default_adjust_indentation == 1 ) {
12136                     $indentation = $reduced_spaces_to_go[$i_terminal];
12137                     $lev         = $levels_to_go[$i_terminal];
12138                 }
12139             }
12140         }
12141
12142         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
12143         else {
12144
12145             # handle -icb (indented closing code block braces)
12146             # Updated method for indented block braces: indent one full level if
12147             # there is no continuation indentation.  This will occur for major
12148             # structures such as sub, if, else, but not for things like map
12149             # blocks.
12150             #
12151             # Note: only code blocks without continuation indentation are
12152             # handled here (if, else, unless, ..). In the following snippet,
12153             # the terminal brace of the sort block will have continuation
12154             # indentation as shown so it will not be handled by the coding
12155             # here.  We would have to undo the continuation indentation to do
12156             # this, but it probably looks ok as is.  This is a possible future
12157             # update for semicolon terminated lines.
12158             #
12159             #     if ($sortby eq 'date' or $sortby eq 'size') {
12160             #         @files = sort {
12161             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
12162             #                 or $a cmp $b
12163             #                 } @files;
12164             #         }
12165             #
12166             if (   $block_type_to_go[$ibeg]
12167                 && $ci_levels_to_go[$i_terminal] == 0 )
12168             {
12169                 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
12170                 $indentation = $spaces + $rOpts_indent_columns;
12171
12172                 # NOTE: for -lp we could create a new indentation object, but
12173                 # there is probably no need to do it
12174             }
12175
12176             # handle -icp and any -icb block braces which fall through above
12177             # test such as the 'sort' block mentioned above.
12178             else {
12179
12180                 # There are currently two ways to handle -icp...
12181                 # One way is to use the indentation of the previous line:
12182                 # $indentation = $last_indentation_written;
12183
12184                 # The other way is to use the indentation that the previous line
12185                 # would have had if it hadn't been adjusted:
12186                 $indentation = $last_unadjusted_indentation;
12187
12188                 # Current method: use the minimum of the two. This avoids
12189                 # inconsistent indentation.
12190                 if ( get_SPACES($last_indentation_written) <
12191                     get_SPACES($indentation) )
12192                 {
12193                     $indentation = $last_indentation_written;
12194                 }
12195             }
12196
12197             # use previous indentation but use own level
12198             # to cause list to be flushed properly
12199             $lev = $levels_to_go[$ibeg];
12200         }
12201
12202         # remember indentation except for multi-line quotes, which get
12203         # no indentation
12204         unless ( $ibeg == 0 && $starting_in_quote ) {
12205             $last_indentation_written    = $indentation;
12206             $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
12207             $last_leading_token          = $tokens_to_go[$ibeg];
12208         }
12209
12210         # be sure lines with leading closing tokens are not outdented more
12211         # than the line which contained the corresponding opening token.
12212
12213         #############################################################
12214         # updated per bug report in alex_bug.pl: we must not
12215         # mess with the indentation of closing logical braces so
12216         # we must treat something like '} else {' as if it were
12217         # an isolated brace my $is_isolated_block_brace = (
12218         # $iend == $ibeg ) && $block_type_to_go[$ibeg];
12219         #############################################################
12220         my $is_isolated_block_brace = $block_type_to_go[$ibeg]
12221           && ( $iend == $ibeg
12222             || $is_if_elsif_else_unless_while_until_for_foreach{
12223                 $block_type_to_go[$ibeg] } );
12224
12225         # only do this for a ':; which is aligned with its leading '?'
12226         my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
12227         if (   defined($opening_indentation)
12228             && !$is_isolated_block_brace
12229             && !$is_unaligned_colon )
12230         {
12231             if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
12232                 $indentation = $opening_indentation;
12233             }
12234         }
12235
12236         # remember the indentation of each line of this batch
12237         push @{$rindentation_list}, $indentation;
12238
12239         # outdent lines with certain leading tokens...
12240         if (
12241
12242             # must be first word of this batch
12243             $ibeg == 0
12244
12245             # and ...
12246             && (
12247
12248                 # certain leading keywords if requested
12249                 (
12250                        $rOpts->{'outdent-keywords'}
12251                     && $types_to_go[$ibeg] eq 'k'
12252                     && $outdent_keyword{ $tokens_to_go[$ibeg] }
12253                 )
12254
12255                 # or labels if requested
12256                 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
12257
12258                 # or static block comments if requested
12259                 || (   $types_to_go[$ibeg] eq '#'
12260                     && $rOpts->{'outdent-static-block-comments'}
12261                     && $is_static_block_comment )
12262             )
12263           )
12264
12265         {
12266             my $space_count = leading_spaces_to_go($ibeg);
12267             if ( $space_count > 0 ) {
12268                 $space_count -= $rOpts_continuation_indentation;
12269                 $is_outdented_line = 1;
12270                 if ( $space_count < 0 ) { $space_count = 0 }
12271
12272                 # do not promote a spaced static block comment to non-spaced;
12273                 # this is not normally necessary but could be for some
12274                 # unusual user inputs (such as -ci = -i)
12275                 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
12276                     $space_count = 1;
12277                 }
12278
12279                 if ($rOpts_line_up_parentheses) {
12280                     $indentation =
12281                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12282                 }
12283                 else {
12284                     $indentation = $space_count;
12285                 }
12286             }
12287         }
12288
12289         return ( $indentation, $lev, $level_end, $terminal_type,
12290             $is_semicolon_terminated, $is_outdented_line );
12291     }
12292 }
12293
12294 sub set_vertical_tightness_flags {
12295
12296     my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
12297
12298     # Define vertical tightness controls for the nth line of a batch.
12299     # We create an array of parameters which tell the vertical aligner
12300     # if we should combine this line with the next line to achieve the
12301     # desired vertical tightness.  The array of parameters contains:
12302     #
12303     #   [0] type: 1=is opening tok 2=is closing tok  3=is opening block brace
12304     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
12305     #             if closing: spaces of padding to use
12306     #   [2] sequence number of container
12307     #   [3] valid flag: do not append if this flag is false. Will be
12308     #       true if appropriate -vt flag is set.  Otherwise, Will be
12309     #       made true only for 2 line container in parens with -lp
12310     #
12311     # These flags are used by sub set_leading_whitespace in
12312     # the vertical aligner
12313
12314     my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
12315
12316     # For non-BLOCK tokens, we will need to examine the next line
12317     # too, so we won't consider the last line.
12318     if ( $n < $n_last_line ) {
12319
12320         # see if last token is an opening token...not a BLOCK...
12321         my $ibeg_next = $$ri_first[ $n + 1 ];
12322         my $token_end = $tokens_to_go[$iend];
12323         my $iend_next = $$ri_last[ $n + 1 ];
12324         if (
12325                $type_sequence_to_go[$iend]
12326             && !$block_type_to_go[$iend]
12327             && $is_opening_token{$token_end}
12328             && (
12329                 $opening_vertical_tightness{$token_end} > 0
12330
12331                 # allow 2-line method call to be closed up
12332                 || (   $rOpts_line_up_parentheses
12333                     && $token_end eq '('
12334                     && $iend > $ibeg
12335                     && $types_to_go[ $iend - 1 ] ne 'b' )
12336             )
12337           )
12338         {
12339
12340             # avoid multiple jumps in nesting depth in one line if
12341             # requested
12342             my $ovt       = $opening_vertical_tightness{$token_end};
12343             my $iend_next = $$ri_last[ $n + 1 ];
12344             unless (
12345                 $ovt < 2
12346                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
12347                     $nesting_depth_to_go[$ibeg_next] )
12348               )
12349             {
12350
12351                 # If -vt flag has not been set, mark this as invalid
12352                 # and aligner will validate it if it sees the closing paren
12353                 # within 2 lines.
12354                 my $valid_flag = $ovt;
12355                 @{$rvertical_tightness_flags} =
12356                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
12357             }
12358         }
12359
12360         # see if first token of next line is a closing token...
12361         # ..and be sure this line does not have a side comment
12362         my $token_next = $tokens_to_go[$ibeg_next];
12363         if (   $type_sequence_to_go[$ibeg_next]
12364             && !$block_type_to_go[$ibeg_next]
12365             && $is_closing_token{$token_next}
12366             && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
12367         {
12368             my $ovt = $opening_vertical_tightness{$token_next};
12369             my $cvt = $closing_vertical_tightness{$token_next};
12370             if (
12371
12372                 # never append a trailing line like   )->pack(
12373                 # because it will throw off later alignment
12374                 (
12375                     $nesting_depth_to_go[$ibeg_next] ==
12376                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
12377                 )
12378                 && (
12379                     $cvt == 2
12380                     || (
12381                         $container_environment_to_go[$ibeg_next] ne 'LIST'
12382                         && (
12383                             $cvt == 1
12384
12385                             # allow closing up 2-line method calls
12386                             || (   $rOpts_line_up_parentheses
12387                                 && $token_next eq ')' )
12388                         )
12389                     )
12390                 )
12391               )
12392             {
12393
12394                 # decide which trailing closing tokens to append..
12395                 my $ok = 0;
12396                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
12397                 else {
12398                     my $str = join( '',
12399                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
12400
12401                     # append closing token if followed by comment or ';'
12402                     if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
12403                 }
12404
12405                 if ($ok) {
12406                     my $valid_flag = $cvt;
12407                     @{$rvertical_tightness_flags} = (
12408                         2,
12409                         $tightness{$token_next} == 2 ? 0 : 1,
12410                         $type_sequence_to_go[$ibeg_next], $valid_flag,
12411                     );
12412                 }
12413             }
12414         }
12415
12416         # Opening Token Right
12417         # If requested, move an isolated trailing opening token to the end of
12418         # the previous line which ended in a comma.  We could do this
12419         # in sub recombine_breakpoints but that would cause problems
12420         # with -lp formatting.  The problem is that indentation will
12421         # quickly move far to the right in nested expressions.  By
12422         # doing it after indentation has been set, we avoid changes
12423         # to the indentation.  Actual movement of the token takes place
12424         # in sub write_leader_and_string.
12425         if (
12426             $opening_token_right{ $tokens_to_go[$ibeg_next] }
12427
12428             # previous line is not opening
12429             # (use -sot to combine with it)
12430             && !$is_opening_token{$token_end}
12431
12432             # previous line ended in one of these
12433             # (add other cases if necessary; '=>' and '.' are not necessary
12434             ##&& ($is_opening_token{$token_end} || $token_end eq ',')
12435             && !$block_type_to_go[$ibeg_next]
12436
12437             # this is a line with just an opening token
12438             && (   $iend_next == $ibeg_next
12439                 || $iend_next == $ibeg_next + 2
12440                 && $types_to_go[$iend_next] eq '#' )
12441
12442             # looks bad if we align vertically with the wrong container
12443             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
12444           )
12445         {
12446             my $valid_flag = 1;
12447             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12448             @{$rvertical_tightness_flags} =
12449               ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
12450         }
12451
12452         # Stacking of opening and closing tokens
12453         my $stackable;
12454         my $token_beg_next = $tokens_to_go[$ibeg_next];
12455
12456         # patch to make something like 'qw(' behave like an opening paren
12457         # (aran.t)
12458         if ( $types_to_go[$ibeg_next] eq 'q' ) {
12459             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
12460                 $token_beg_next = $1;
12461             }
12462         }
12463
12464         if (   $is_closing_token{$token_end}
12465             && $is_closing_token{$token_beg_next} )
12466         {
12467             $stackable = $stack_closing_token{$token_beg_next}
12468               unless ( $block_type_to_go[$ibeg_next] )
12469               ;    # shouldn't happen; just checking
12470         }
12471         elsif ($is_opening_token{$token_end}
12472             && $is_opening_token{$token_beg_next} )
12473         {
12474             $stackable = $stack_opening_token{$token_beg_next}
12475               unless ( $block_type_to_go[$ibeg_next] )
12476               ;    # shouldn't happen; just checking
12477         }
12478
12479         if ($stackable) {
12480
12481             my $is_semicolon_terminated;
12482             if ( $n + 1 == $n_last_line ) {
12483                 my ( $terminal_type, $i_terminal ) = terminal_type(
12484                     \@types_to_go, \@block_type_to_go,
12485                     $ibeg_next,    $iend_next
12486                 );
12487                 $is_semicolon_terminated = $terminal_type eq ';'
12488                   && $nesting_depth_to_go[$iend_next] <
12489                   $nesting_depth_to_go[$ibeg_next];
12490             }
12491
12492             # this must be a line with just an opening token
12493             # or end in a semicolon
12494             if (
12495                 $is_semicolon_terminated
12496                 || (   $iend_next == $ibeg_next
12497                     || $iend_next == $ibeg_next + 2
12498                     && $types_to_go[$iend_next] eq '#' )
12499               )
12500             {
12501                 my $valid_flag = 1;
12502                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12503                 @{$rvertical_tightness_flags} =
12504                   ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
12505                   );
12506             }
12507         }
12508     }
12509
12510     # Check for a last line with isolated opening BLOCK curly
12511     elsif ($rOpts_block_brace_vertical_tightness
12512         && $ibeg eq $iend
12513         && $types_to_go[$iend] eq '{'
12514         && $block_type_to_go[$iend] =~
12515         /$block_brace_vertical_tightness_pattern/o )
12516     {
12517         @{$rvertical_tightness_flags} =
12518           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
12519     }
12520
12521     # pack in the sequence numbers of the ends of this line
12522     $rvertical_tightness_flags->[4] = get_seqno($ibeg);
12523     $rvertical_tightness_flags->[5] = get_seqno($iend);
12524     return $rvertical_tightness_flags;
12525 }
12526
12527 sub get_seqno {
12528
12529     # get opening and closing sequence numbers of a token for the vertical
12530     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
12531     # to be treated somewhat like opening and closing tokens for stacking
12532     # tokens by the vertical aligner.
12533     my ($ii) = @_;
12534     my $seqno = $type_sequence_to_go[$ii];
12535     if ( $types_to_go[$ii] eq 'q' ) {
12536         my $SEQ_QW = -1;
12537         if ( $ii > 0 ) {
12538             $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
12539         }
12540         else {
12541             if ( !$ending_in_quote ) {
12542                 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
12543             }
12544         }
12545     }
12546     return ($seqno);
12547 }
12548
12549 {
12550     my %is_vertical_alignment_type;
12551     my %is_vertical_alignment_keyword;
12552
12553     BEGIN {
12554
12555         @_ = qw#
12556           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
12557           { ? : => =~ && || // ~~ !~~
12558           #;
12559         @is_vertical_alignment_type{@_} = (1) x scalar(@_);
12560
12561         @_ = qw(if unless and or err eq ne for foreach while until);
12562         @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
12563     }
12564
12565     sub set_vertical_alignment_markers {
12566
12567         # This routine takes the first step toward vertical alignment of the
12568         # lines of output text.  It looks for certain tokens which can serve as
12569         # vertical alignment markers (such as an '=').
12570         #
12571         # Method: We look at each token $i in this output batch and set
12572         # $matching_token_to_go[$i] equal to those tokens at which we would
12573         # accept vertical alignment.
12574
12575         # nothing to do if we aren't allowed to change whitespace
12576         if ( !$rOpts_add_whitespace ) {
12577             for my $i ( 0 .. $max_index_to_go ) {
12578                 $matching_token_to_go[$i] = '';
12579             }
12580             return;
12581         }
12582
12583         my ( $ri_first, $ri_last ) = @_;
12584
12585         # remember the index of last nonblank token before any sidecomment
12586         my $i_terminal = $max_index_to_go;
12587         if ( $types_to_go[$i_terminal] eq '#' ) {
12588             if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
12589                 if ( $i_terminal > 0 ) { --$i_terminal }
12590             }
12591         }
12592
12593         # look at each line of this batch..
12594         my $last_vertical_alignment_before_index;
12595         my $vert_last_nonblank_type;
12596         my $vert_last_nonblank_token;
12597         my $vert_last_nonblank_block_type;
12598         my $max_line = @$ri_first - 1;
12599         my ( $i, $type, $token, $block_type, $alignment_type );
12600         my ( $ibeg, $iend, $line );
12601
12602         foreach $line ( 0 .. $max_line ) {
12603             $ibeg                                 = $$ri_first[$line];
12604             $iend                                 = $$ri_last[$line];
12605             $last_vertical_alignment_before_index = -1;
12606             $vert_last_nonblank_type              = '';
12607             $vert_last_nonblank_token             = '';
12608             $vert_last_nonblank_block_type        = '';
12609
12610             # look at each token in this output line..
12611             foreach $i ( $ibeg .. $iend ) {
12612                 $alignment_type = '';
12613                 $type           = $types_to_go[$i];
12614                 $block_type     = $block_type_to_go[$i];
12615                 $token          = $tokens_to_go[$i];
12616
12617                 # check for flag indicating that we should not align
12618                 # this token
12619                 if ( $matching_token_to_go[$i] ) {
12620                     $matching_token_to_go[$i] = '';
12621                     next;
12622                 }
12623
12624                 #--------------------------------------------------------
12625                 # First see if we want to align BEFORE this token
12626                 #--------------------------------------------------------
12627
12628                 # The first possible token that we can align before
12629                 # is index 2 because: 1) it doesn't normally make sense to
12630                 # align before the first token and 2) the second
12631                 # token must be a blank if we are to align before
12632                 # the third
12633                 if ( $i < $ibeg + 2 ) { }
12634
12635                 # must follow a blank token
12636                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
12637
12638                 # align a side comment --
12639                 elsif ( $type eq '#' ) {
12640
12641                     unless (
12642
12643                         # it is a static side comment
12644                         (
12645                                $rOpts->{'static-side-comments'}
12646                             && $token =~ /$static_side_comment_pattern/o
12647                         )
12648
12649                         # or a closing side comment
12650                         || (   $vert_last_nonblank_block_type
12651                             && $token =~
12652                             /$closing_side_comment_prefix_pattern/o )
12653                       )
12654                     {
12655                         $alignment_type = $type;
12656                     }    ## Example of a static side comment
12657                 }
12658
12659                 # otherwise, do not align two in a row to create a
12660                 # blank field
12661                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
12662
12663                 # align before one of these keywords
12664                 # (within a line, since $i>1)
12665                 elsif ( $type eq 'k' ) {
12666
12667                     #  /^(if|unless|and|or|eq|ne)$/
12668                     if ( $is_vertical_alignment_keyword{$token} ) {
12669                         $alignment_type = $token;
12670                     }
12671                 }
12672
12673                 # align before one of these types..
12674                 # Note: add '.' after new vertical aligner is operational
12675                 elsif ( $is_vertical_alignment_type{$type} ) {
12676                     $alignment_type = $token;
12677
12678                     # Do not align a terminal token.  Although it might
12679                     # occasionally look ok to do this, it has been found to be
12680                     # a good general rule.  The main problems are:
12681                     # (1) that the terminal token (such as an = or :) might get
12682                     # moved far to the right where it is hard to see because
12683                     # nothing follows it, and
12684                     # (2) doing so may prevent other good alignments.
12685                     if ( $i == $iend || $i >= $i_terminal ) {
12686                         $alignment_type = "";
12687                     }
12688
12689                     # Do not align leading ': (' or '. ('.  This would prevent
12690                     # alignment in something like the following:
12691                     #   $extra_space .=
12692                     #       ( $input_line_number < 10 )  ? "  "
12693                     #     : ( $input_line_number < 100 ) ? " "
12694                     #     :                                "";
12695                     # or
12696                     #  $code =
12697                     #      ( $case_matters ? $accessor : " lc($accessor) " )
12698                     #    . ( $yesno        ? " eq "       : " ne " )
12699                     if (   $i == $ibeg + 2
12700                         && $types_to_go[$ibeg] =~ /^[\.\:]$/
12701                         && $types_to_go[ $i - 1 ] eq 'b' )
12702                     {
12703                         $alignment_type = "";
12704                     }
12705
12706                     # For a paren after keyword, only align something like this:
12707                     #    if    ( $a ) { &a }
12708                     #    elsif ( $b ) { &b }
12709                     if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
12710                         $alignment_type = ""
12711                           unless $vert_last_nonblank_token =~
12712                               /^(if|unless|elsif)$/;
12713                     }
12714
12715                     # be sure the alignment tokens are unique
12716                     # This didn't work well: reason not determined
12717                     # if ($token ne $type) {$alignment_type .= $type}
12718                 }
12719
12720                 # NOTE: This is deactivated because it causes the previous
12721                 # if/elsif alignment to fail
12722                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
12723                 #{ $alignment_type = $type; }
12724
12725                 if ($alignment_type) {
12726                     $last_vertical_alignment_before_index = $i;
12727                 }
12728
12729                 #--------------------------------------------------------
12730                 # Next see if we want to align AFTER the previous nonblank
12731                 #--------------------------------------------------------
12732
12733                 # We want to line up ',' and interior ';' tokens, with the added
12734                 # space AFTER these tokens.  (Note: interior ';' is included
12735                 # because it may occur in short blocks).
12736                 if (
12737
12738                     # we haven't already set it
12739                     !$alignment_type
12740
12741                     # and its not the first token of the line
12742                     && ( $i > $ibeg )
12743
12744                     # and it follows a blank
12745                     && $types_to_go[ $i - 1 ] eq 'b'
12746
12747                     # and previous token IS one of these:
12748                     && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12749
12750                     # and it's NOT one of these
12751                     && ( $type !~ /^[b\#\)\]\}]$/ )
12752
12753                     # then go ahead and align
12754                   )
12755
12756                 {
12757                     $alignment_type = $vert_last_nonblank_type;
12758                 }
12759
12760                 #--------------------------------------------------------
12761                 # then store the value
12762                 #--------------------------------------------------------
12763                 $matching_token_to_go[$i] = $alignment_type;
12764                 if ( $type ne 'b' ) {
12765                     $vert_last_nonblank_type       = $type;
12766                     $vert_last_nonblank_token      = $token;
12767                     $vert_last_nonblank_block_type = $block_type;
12768                 }
12769             }
12770         }
12771     }
12772 }
12773
12774 sub terminal_type {
12775
12776     #    returns type of last token on this line (terminal token), as follows:
12777     #    returns # for a full-line comment
12778     #    returns ' ' for a blank line
12779     #    otherwise returns final token type
12780
12781     my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
12782
12783     # check for full-line comment..
12784     if ( $$rtype[$ibeg] eq '#' ) {
12785         return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
12786     }
12787     else {
12788
12789         # start at end and walk bakwards..
12790         for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
12791
12792             # skip past any side comment and blanks
12793             next if ( $$rtype[$i] eq 'b' );
12794             next if ( $$rtype[$i] eq '#' );
12795
12796             # found it..make sure it is a BLOCK termination,
12797             # but hide a terminal } after sort/grep/map because it is not
12798             # necessarily the end of the line.  (terminal.t)
12799             my $terminal_type = $$rtype[$i];
12800             if (
12801                 $terminal_type eq '}'
12802                 && ( !$$rblock_type[$i]
12803                     || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
12804               )
12805             {
12806                 $terminal_type = 'b';
12807             }
12808             return wantarray ? ( $terminal_type, $i ) : $terminal_type;
12809         }
12810
12811         # empty line
12812         return wantarray ? ( ' ', $ibeg ) : ' ';
12813     }
12814 }
12815
12816 {
12817     my %is_good_keyword_breakpoint;
12818     my %is_lt_gt_le_ge;
12819
12820     sub set_bond_strengths {
12821
12822         BEGIN {
12823
12824             @_ = qw(if unless while until for foreach);
12825             @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
12826
12827             @_ = qw(lt gt le ge);
12828             @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
12829
12830             ###############################################################
12831             # NOTE: NO_BREAK's set here are HINTS which may not be honored;
12832             # essential NO_BREAKS's must be enforced in section 2, below.
12833             ###############################################################
12834
12835             # adding NEW_TOKENS: add a left and right bond strength by
12836             # mimmicking what is done for an existing token type.  You
12837             # can skip this step at first and take the default, then
12838             # tweak later to get desired results.
12839
12840             # The bond strengths should roughly follow precenence order where
12841             # possible.  If you make changes, please check the results very
12842             # carefully on a variety of scripts.
12843
12844             # no break around possible filehandle
12845             $left_bond_strength{'Z'}  = NO_BREAK;
12846             $right_bond_strength{'Z'} = NO_BREAK;
12847
12848             # never put a bare word on a new line:
12849             # example print (STDERR, "bla"); will fail with break after (
12850             $left_bond_strength{'w'} = NO_BREAK;
12851
12852         # blanks always have infinite strength to force breaks after real tokens
12853             $right_bond_strength{'b'} = NO_BREAK;
12854
12855             # try not to break on exponentation
12856             @_                       = qw" ** .. ... <=> ";
12857             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12858             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12859
12860             # The comma-arrow has very low precedence but not a good break point
12861             $left_bond_strength{'=>'}  = NO_BREAK;
12862             $right_bond_strength{'=>'} = NOMINAL;
12863
12864             # ok to break after label
12865             $left_bond_strength{'J'}  = NO_BREAK;
12866             $right_bond_strength{'J'} = NOMINAL;
12867             $left_bond_strength{'j'}  = STRONG;
12868             $right_bond_strength{'j'} = STRONG;
12869             $left_bond_strength{'A'}  = STRONG;
12870             $right_bond_strength{'A'} = STRONG;
12871
12872             $left_bond_strength{'->'}  = STRONG;
12873             $right_bond_strength{'->'} = VERY_STRONG;
12874
12875             # breaking AFTER modulus operator is ok:
12876             @_ = qw" % ";
12877             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12878             @right_bond_strength{@_} =
12879               ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
12880
12881             # Break AFTER math operators * and /
12882             @_                       = qw" * / x  ";
12883             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12884             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12885
12886             # Break AFTER weakest math operators + and -
12887             # Make them weaker than * but a bit stronger than '.'
12888             @_ = qw" + - ";
12889             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12890             @right_bond_strength{@_} =
12891               ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
12892
12893             # breaking BEFORE these is just ok:
12894             @_                       = qw" >> << ";
12895             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12896             @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
12897
12898             # breaking before the string concatenation operator seems best
12899             # because it can be hard to see at the end of a line
12900             $right_bond_strength{'.'} = STRONG;
12901             $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
12902
12903             @_                       = qw"} ] ) ";
12904             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12905             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12906
12907             # make these a little weaker than nominal so that they get
12908             # favored for end-of-line characters
12909             @_ = qw"!= == =~ !~ ~~ !~~";
12910             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12911             @right_bond_strength{@_} =
12912               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
12913
12914             # break AFTER these
12915             @_ = qw" < >  | & >= <=";
12916             @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
12917             @right_bond_strength{@_} =
12918               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
12919
12920             # breaking either before or after a quote is ok
12921             # but bias for breaking before a quote
12922             $left_bond_strength{'Q'}  = NOMINAL;
12923             $right_bond_strength{'Q'} = NOMINAL + 0.02;
12924             $left_bond_strength{'q'}  = NOMINAL;
12925             $right_bond_strength{'q'} = NOMINAL;
12926
12927             # starting a line with a keyword is usually ok
12928             $left_bond_strength{'k'} = NOMINAL;
12929
12930             # we usually want to bond a keyword strongly to what immediately
12931             # follows, rather than leaving it stranded at the end of a line
12932             $right_bond_strength{'k'} = STRONG;
12933
12934             $left_bond_strength{'G'}  = NOMINAL;
12935             $right_bond_strength{'G'} = STRONG;
12936
12937             # it is good to break AFTER various assignment operators
12938             @_ = qw(
12939               = **= += *= &= <<= &&=
12940               -= /= |= >>= ||= //=
12941               .= %= ^=
12942               x=
12943             );
12944             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12945             @right_bond_strength{@_} =
12946               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12947
12948             # break BEFORE '&&' and '||' and '//'
12949             # set strength of '||' to same as '=' so that chains like
12950             # $a = $b || $c || $d   will break before the first '||'
12951             $right_bond_strength{'||'} = NOMINAL;
12952             $left_bond_strength{'||'}  = $right_bond_strength{'='};
12953
12954             # same thing for '//'
12955             $right_bond_strength{'//'} = NOMINAL;
12956             $left_bond_strength{'//'}  = $right_bond_strength{'='};
12957
12958             # set strength of && a little higher than ||
12959             $right_bond_strength{'&&'} = NOMINAL;
12960             $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
12961
12962             $left_bond_strength{';'}  = VERY_STRONG;
12963             $right_bond_strength{';'} = VERY_WEAK;
12964             $left_bond_strength{'f'}  = VERY_STRONG;
12965
12966             # make right strength of for ';' a little less than '='
12967             # to make for contents break after the ';' to avoid this:
12968             #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12969             #     $number_of_fields )
12970             # and make it weaker than ',' and 'and' too
12971             $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12972
12973             # The strengths of ?/: should be somewhere between
12974             # an '=' and a quote (NOMINAL),
12975             # make strength of ':' slightly less than '?' to help
12976             # break long chains of ? : after the colons
12977             $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
12978             $right_bond_strength{':'} = NO_BREAK;
12979             $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
12980             $right_bond_strength{'?'} = NO_BREAK;
12981
12982             $left_bond_strength{','}  = VERY_STRONG;
12983             $right_bond_strength{','} = VERY_WEAK;
12984
12985             # Set bond strengths of certain keywords
12986             # make 'or', 'err', 'and' slightly weaker than a ','
12987             $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
12988             $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
12989             $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
12990             $left_bond_strength{'xor'}  = NOMINAL;
12991             $right_bond_strength{'and'} = NOMINAL;
12992             $right_bond_strength{'or'}  = NOMINAL;
12993             $right_bond_strength{'err'} = NOMINAL;
12994             $right_bond_strength{'xor'} = STRONG;
12995         }
12996
12997         # patch-its always ok to break at end of line
12998         $nobreak_to_go[$max_index_to_go] = 0;
12999
13000         # adding a small 'bias' to strengths is a simple way to make a line
13001         # break at the first of a sequence of identical terms.  For example,
13002         # to force long string of conditional operators to break with
13003         # each line ending in a ':', we can add a small number to the bond
13004         # strength of each ':'
13005         my $colon_bias = 0;
13006         my $amp_bias   = 0;
13007         my $bar_bias   = 0;
13008         my $and_bias   = 0;
13009         my $or_bias    = 0;
13010         my $dot_bias   = 0;
13011         my $f_bias     = 0;
13012         my $code_bias  = -.01;
13013         my $type       = 'b';
13014         my $token      = ' ';
13015         my $last_type;
13016         my $last_nonblank_type  = $type;
13017         my $last_nonblank_token = $token;
13018         my $delta_bias          = 0.0001;
13019         my $list_str            = $left_bond_strength{'?'};
13020
13021         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
13022             $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
13023         );
13024
13025         # preliminary loop to compute bond strengths
13026         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
13027             $last_type = $type;
13028             if ( $type ne 'b' ) {
13029                 $last_nonblank_type  = $type;
13030                 $last_nonblank_token = $token;
13031             }
13032             $type = $types_to_go[$i];
13033
13034             # strength on both sides of a blank is the same
13035             if ( $type eq 'b' && $last_type ne 'b' ) {
13036                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
13037                 next;
13038             }
13039
13040             $token               = $tokens_to_go[$i];
13041             $block_type          = $block_type_to_go[$i];
13042             $i_next              = $i + 1;
13043             $next_type           = $types_to_go[$i_next];
13044             $next_token          = $tokens_to_go[$i_next];
13045             $total_nesting_depth = $nesting_depth_to_go[$i_next];
13046             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13047             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
13048             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
13049
13050             # Some token chemistry...  The decision about where to break a
13051             # line depends upon a "bond strength" between tokens.  The LOWER
13052             # the bond strength, the MORE likely a break.  The strength
13053             # values are based on trial-and-error, and need to be tweaked
13054             # occasionally to get desired results.  Things to keep in mind
13055             # are:
13056             #   1. relative strengths are important.  small differences
13057             #      in strengths can make big formatting differences.
13058             #   2. each indentation level adds one unit of bond strength
13059             #   3. a value of NO_BREAK makes an unbreakable bond
13060             #   4. a value of VERY_WEAK is the strength of a ','
13061             #   5. values below NOMINAL are considered ok break points
13062             #   6. values above NOMINAL are considered poor break points
13063             # We are computing the strength of the bond between the current
13064             # token and the NEXT token.
13065             my $bond_str = VERY_STRONG;    # a default, high strength
13066
13067             #---------------------------------------------------------------
13068             # section 1:
13069             # use minimum of left and right bond strengths if defined;
13070             # digraphs and trigraphs like to break on their left
13071             #---------------------------------------------------------------
13072             my $bsr = $right_bond_strength{$type};
13073
13074             if ( !defined($bsr) ) {
13075
13076                 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
13077                     $bsr = STRONG;
13078                 }
13079                 else {
13080                     $bsr = VERY_STRONG;
13081                 }
13082             }
13083
13084             # define right bond strengths of certain keywords
13085             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
13086                 $bsr = $right_bond_strength{$token};
13087             }
13088             elsif ( $token eq 'ne' or $token eq 'eq' ) {
13089                 $bsr = NOMINAL;
13090             }
13091             my $bsl = $left_bond_strength{$next_nonblank_type};
13092
13093             # set terminal bond strength to the nominal value
13094             # this will cause good preceding breaks to be retained
13095             if ( $i_next_nonblank > $max_index_to_go ) {
13096                 $bsl = NOMINAL;
13097             }
13098
13099             if ( !defined($bsl) ) {
13100
13101                 if (   $is_digraph{$next_nonblank_type}
13102                     || $is_trigraph{$next_nonblank_type} )
13103                 {
13104                     $bsl = WEAK;
13105                 }
13106                 else {
13107                     $bsl = VERY_STRONG;
13108                 }
13109             }
13110
13111             # define right bond strengths of certain keywords
13112             if ( $next_nonblank_type eq 'k'
13113                 && defined( $left_bond_strength{$next_nonblank_token} ) )
13114             {
13115                 $bsl = $left_bond_strength{$next_nonblank_token};
13116             }
13117             elsif ($next_nonblank_token eq 'ne'
13118                 or $next_nonblank_token eq 'eq' )
13119             {
13120                 $bsl = NOMINAL;
13121             }
13122             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
13123                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
13124             }
13125
13126             # Note: it might seem that we would want to keep a NO_BREAK if
13127             # either token has this value.  This didn't work, because in an
13128             # arrow list, it prevents the comma from separating from the
13129             # following bare word (which is probably quoted by its arrow).
13130             # So necessary NO_BREAK's have to be handled as special cases
13131             # in the final section.
13132             $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
13133             my $bond_str_1 = $bond_str;
13134
13135             #---------------------------------------------------------------
13136             # section 2:
13137             # special cases
13138             #---------------------------------------------------------------
13139
13140             # allow long lines before final { in an if statement, as in:
13141             #    if (..........
13142             #      ..........)
13143             #    {
13144             #
13145             # Otherwise, the line before the { tends to be too short.
13146             if ( $type eq ')' ) {
13147                 if ( $next_nonblank_type eq '{' ) {
13148                     $bond_str = VERY_WEAK + 0.03;
13149                 }
13150             }
13151
13152             elsif ( $type eq '(' ) {
13153                 if ( $next_nonblank_type eq '{' ) {
13154                     $bond_str = NOMINAL;
13155                 }
13156             }
13157
13158             # break on something like '} (', but keep this stronger than a ','
13159             # example is in 'howe.pl'
13160             elsif ( $type eq 'R' or $type eq '}' ) {
13161                 if ( $next_nonblank_type eq '(' ) {
13162                     $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
13163                 }
13164             }
13165
13166             #-----------------------------------------------------------------
13167             # adjust bond strength bias
13168             #-----------------------------------------------------------------
13169
13170             # TESTING: add any bias set by sub scan_list at old comma
13171             # break points.
13172             elsif ( $type eq ',' ) {
13173                 $bond_str += $bond_strength_to_go[$i];
13174             }
13175
13176             elsif ( $type eq 'f' ) {
13177                 $bond_str += $f_bias;
13178                 $f_bias   += $delta_bias;
13179             }
13180
13181           # in long ?: conditionals, bias toward just one set per line (colon.t)
13182             elsif ( $type eq ':' ) {
13183                 if ( !$want_break_before{$type} ) {
13184                     $bond_str   += $colon_bias;
13185                     $colon_bias += $delta_bias;
13186                 }
13187             }
13188
13189             if (   $next_nonblank_type eq ':'
13190                 && $want_break_before{$next_nonblank_type} )
13191             {
13192                 $bond_str   += $colon_bias;
13193                 $colon_bias += $delta_bias;
13194             }
13195
13196             # if leading '.' is used, align all but 'short' quotes;
13197             # the idea is to not place something like "\n" on a single line.
13198             elsif ( $next_nonblank_type eq '.' ) {
13199                 if ( $want_break_before{'.'} ) {
13200                     unless (
13201                         $last_nonblank_type eq '.'
13202                         && (
13203                             length($token) <=
13204                             $rOpts_short_concatenation_item_length )
13205                         && ( $token !~ /^[\)\]\}]$/ )
13206                       )
13207                     {
13208                         $dot_bias += $delta_bias;
13209                     }
13210                     $bond_str += $dot_bias;
13211                 }
13212             }
13213             elsif ($next_nonblank_type eq '&&'
13214                 && $want_break_before{$next_nonblank_type} )
13215             {
13216                 $bond_str += $amp_bias;
13217                 $amp_bias += $delta_bias;
13218             }
13219             elsif ($next_nonblank_type eq '||'
13220                 && $want_break_before{$next_nonblank_type} )
13221             {
13222                 $bond_str += $bar_bias;
13223                 $bar_bias += $delta_bias;
13224             }
13225             elsif ( $next_nonblank_type eq 'k' ) {
13226
13227                 if (   $next_nonblank_token eq 'and'
13228                     && $want_break_before{$next_nonblank_token} )
13229                 {
13230                     $bond_str += $and_bias;
13231                     $and_bias += $delta_bias;
13232                 }
13233                 elsif ($next_nonblank_token =~ /^(or|err)$/
13234                     && $want_break_before{$next_nonblank_token} )
13235                 {
13236                     $bond_str += $or_bias;
13237                     $or_bias  += $delta_bias;
13238                 }
13239
13240                 # FIXME: needs more testing
13241                 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
13242                     $bond_str = $list_str if ( $bond_str > $list_str );
13243                 }
13244                 elsif ( $token eq 'err'
13245                     && !$want_break_before{$token} )
13246                 {
13247                     $bond_str += $or_bias;
13248                     $or_bias  += $delta_bias;
13249                 }
13250             }
13251
13252             if ( $type eq ':'
13253                 && !$want_break_before{$type} )
13254             {
13255                 $bond_str   += $colon_bias;
13256                 $colon_bias += $delta_bias;
13257             }
13258             elsif ( $type eq '&&'
13259                 && !$want_break_before{$type} )
13260             {
13261                 $bond_str += $amp_bias;
13262                 $amp_bias += $delta_bias;
13263             }
13264             elsif ( $type eq '||'
13265                 && !$want_break_before{$type} )
13266             {
13267                 $bond_str += $bar_bias;
13268                 $bar_bias += $delta_bias;
13269             }
13270             elsif ( $type eq 'k' ) {
13271
13272                 if ( $token eq 'and'
13273                     && !$want_break_before{$token} )
13274                 {
13275                     $bond_str += $and_bias;
13276                     $and_bias += $delta_bias;
13277                 }
13278                 elsif ( $token eq 'or'
13279                     && !$want_break_before{$token} )
13280                 {
13281                     $bond_str += $or_bias;
13282                     $or_bias  += $delta_bias;
13283                 }
13284             }
13285
13286             # keep matrix and hash indices together
13287             # but make them a little below STRONG to allow breaking open
13288             # something like {'some-word'}{'some-very-long-word'} at the }{
13289             # (bracebrk.t)
13290             if (   ( $type eq ']' or $type eq 'R' )
13291                 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
13292               )
13293             {
13294                 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
13295             }
13296
13297             if ( $next_nonblank_token =~ /^->/ ) {
13298
13299                 # increase strength to the point where a break in the following
13300                 # will be after the opening paren rather than at the arrow:
13301                 #    $a->$b($c);
13302                 if ( $type eq 'i' ) {
13303                     $bond_str = 1.45 * STRONG;
13304                 }
13305
13306                 elsif ( $type =~ /^[\)\]\}R]$/ ) {
13307                     $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
13308                 }
13309
13310                 # otherwise make strength before an '->' a little over a '+'
13311                 else {
13312                     if ( $bond_str <= NOMINAL ) {
13313                         $bond_str = NOMINAL + 0.01;
13314                     }
13315                 }
13316             }
13317
13318             if ( $token eq ')' && $next_nonblank_token eq '[' ) {
13319                 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
13320             }
13321
13322             # map1.t -- correct for a quirk in perl
13323             if (   $token eq '('
13324                 && $next_nonblank_type eq 'i'
13325                 && $last_nonblank_type eq 'k'
13326                 && $is_sort_map_grep{$last_nonblank_token} )
13327
13328               #     /^(sort|map|grep)$/ )
13329             {
13330                 $bond_str = NO_BREAK;
13331             }
13332
13333             # extrude.t: do not break before paren at:
13334             #    -l pid_filename(
13335             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
13336                 $bond_str = NO_BREAK;
13337             }
13338
13339             # good to break after end of code blocks
13340             if ( $type eq '}' && $block_type ) {
13341
13342                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
13343                 $code_bias += $delta_bias;
13344             }
13345
13346             if ( $type eq 'k' ) {
13347
13348                 # allow certain control keywords to stand out
13349                 if (   $next_nonblank_type eq 'k'
13350                     && $is_last_next_redo_return{$token} )
13351                 {
13352                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
13353                 }
13354
13355 # Don't break after keyword my.  This is a quick fix for a
13356 # rare problem with perl. An example is this line from file
13357 # Container.pm:
13358 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
13359
13360                 if ( $token eq 'my' ) {
13361                     $bond_str = NO_BREAK;
13362                 }
13363
13364             }
13365
13366             # good to break before 'if', 'unless', etc
13367             if ( $is_if_brace_follower{$next_nonblank_token} ) {
13368                 $bond_str = VERY_WEAK;
13369             }
13370
13371             if ( $next_nonblank_type eq 'k' ) {
13372
13373                 # keywords like 'unless', 'if', etc, within statements
13374                 # make good breaks
13375                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
13376                     $bond_str = VERY_WEAK / 1.05;
13377                 }
13378             }
13379
13380             # try not to break before a comma-arrow
13381             elsif ( $next_nonblank_type eq '=>' ) {
13382                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
13383             }
13384
13385          #----------------------------------------------------------------------
13386          # only set NO_BREAK's from here on
13387          #----------------------------------------------------------------------
13388             if ( $type eq 'C' or $type eq 'U' ) {
13389
13390                 # use strict requires that bare word and => not be separated
13391                 if ( $next_nonblank_type eq '=>' ) {
13392                     $bond_str = NO_BREAK;
13393                 }
13394
13395                 # Never break between a bareword and a following paren because
13396                 # perl may give an error.  For example, if a break is placed
13397                 # between 'to_filehandle' and its '(' the following line will
13398                 # give a syntax error [Carp.pm]: my( $no) =fileno(
13399                 # to_filehandle( $in)) ;
13400                 if ( $next_nonblank_token eq '(' ) {
13401                     $bond_str = NO_BREAK;
13402                 }
13403             }
13404
13405            # use strict requires that bare word within braces not start new line
13406             elsif ( $type eq 'L' ) {
13407
13408                 if ( $next_nonblank_type eq 'w' ) {
13409                     $bond_str = NO_BREAK;
13410                 }
13411             }
13412
13413             # in older version of perl, use strict can cause problems with
13414             # breaks before bare words following opening parens.  For example,
13415             # this will fail under older versions if a break is made between
13416             # '(' and 'MAIL':
13417             #  use strict;
13418             #  open( MAIL, "a long filename or command");
13419             #  close MAIL;
13420             elsif ( $type eq '{' ) {
13421
13422                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
13423
13424                     # but it's fine to break if the word is followed by a '=>'
13425                     # or if it is obviously a sub call
13426                     my $i_next_next_nonblank = $i_next_nonblank + 1;
13427                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
13428                     if (   $next_next_type eq 'b'
13429                         && $i_next_nonblank < $max_index_to_go )
13430                     {
13431                         $i_next_next_nonblank++;
13432                         $next_next_type = $types_to_go[$i_next_next_nonblank];
13433                     }
13434
13435                     ##if ( $next_next_type ne '=>' ) {
13436                     # these are ok: '->xxx', '=>', '('
13437
13438                     # We'll check for an old breakpoint and keep a leading
13439                     # bareword if it was that way in the input file.
13440                     # Presumably it was ok that way.  For example, the
13441                     # following would remain unchanged:
13442                     #
13443                     # @months = (
13444                     #   January,   February, March,    April,
13445                     #   May,       June,     July,     August,
13446                     #   September, October,  November, December,
13447                     # );
13448                     #
13449                     # This should be sufficient:
13450                     if ( !$old_breakpoint_to_go[$i]
13451                         && ( $next_next_type eq ',' || $next_next_type eq '}' )
13452                       )
13453                     {
13454                         $bond_str = NO_BREAK;
13455                     }
13456                 }
13457             }
13458
13459             elsif ( $type eq 'w' ) {
13460
13461                 if ( $next_nonblank_type eq 'R' ) {
13462                     $bond_str = NO_BREAK;
13463                 }
13464
13465                 # use strict requires that bare word and => not be separated
13466                 if ( $next_nonblank_type eq '=>' ) {
13467                     $bond_str = NO_BREAK;
13468                 }
13469             }
13470
13471             # in fact, use strict hates bare words on any new line.  For
13472             # example, a break before the underscore here provokes the
13473             # wrath of use strict:
13474             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
13475             elsif ( $type eq 'F' ) {
13476                 $bond_str = NO_BREAK;
13477             }
13478
13479             # use strict does not allow separating type info from trailing { }
13480             # testfile is readmail.pl
13481             elsif ( $type eq 't' or $type eq 'i' ) {
13482
13483                 if ( $next_nonblank_type eq 'L' ) {
13484                     $bond_str = NO_BREAK;
13485                 }
13486             }
13487
13488             # Do not break between a possible filehandle and a ? or / and do
13489             # not introduce a break after it if there is no blank
13490             # (extrude.t)
13491             elsif ( $type eq 'Z' ) {
13492
13493                 # dont break..
13494                 if (
13495
13496                     # if there is no blank and we do not want one. Examples:
13497                     #    print $x++    # do not break after $x
13498                     #    print HTML"HELLO"   # break ok after HTML
13499                     (
13500                            $next_type ne 'b'
13501                         && defined( $want_left_space{$next_type} )
13502                         && $want_left_space{$next_type} == WS_NO
13503                     )
13504
13505                     # or we might be followed by the start of a quote
13506                     || $next_nonblank_type =~ /^[\/\?]$/
13507                   )
13508                 {
13509                     $bond_str = NO_BREAK;
13510                 }
13511             }
13512
13513             # Do not break before a possible file handle
13514             if ( $next_nonblank_type eq 'Z' ) {
13515                 $bond_str = NO_BREAK;
13516             }
13517
13518             # As a defensive measure, do not break between a '(' and a
13519             # filehandle.  In some cases, this can cause an error.  For
13520             # example, the following program works:
13521             #    my $msg="hi!\n";
13522             #    print
13523             #    ( STDOUT
13524             #    $msg
13525             #    );
13526             #
13527             # But this program fails:
13528             #    my $msg="hi!\n";
13529             #    print
13530             #    (
13531             #    STDOUT
13532             #    $msg
13533             #    );
13534             #
13535             # This is normally only a problem with the 'extrude' option
13536             if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
13537                 $bond_str = NO_BREAK;
13538             }
13539
13540             # Breaking before a ++ can cause perl to guess wrong. For
13541             # example the following line will cause a syntax error
13542             # with -extrude if we break between '$i' and '++' [fixstyle2]
13543             #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
13544             elsif ( $next_nonblank_type eq '++' ) {
13545                 $bond_str = NO_BREAK;
13546             }
13547
13548             # Breaking before a ? before a quote can cause trouble if
13549             # they are not separated by a blank.
13550             # Example: a syntax error occurs if you break before the ? here
13551             #  my$logic=join$all?' && ':' || ',@regexps;
13552             # From: Professional_Perl_Programming_Code/multifind.pl
13553             elsif ( $next_nonblank_type eq '?' ) {
13554                 $bond_str = NO_BREAK
13555                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
13556             }
13557
13558             # Breaking before a . followed by a number
13559             # can cause trouble if there is no intervening space
13560             # Example: a syntax error occurs if you break before the .2 here
13561             #  $str .= pack($endian.2, ensurrogate($ord));
13562             # From: perl58/Unicode.pm
13563             elsif ( $next_nonblank_type eq '.' ) {
13564                 $bond_str = NO_BREAK
13565                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
13566             }
13567
13568             # patch to put cuddled elses back together when on multiple
13569             # lines, as in: } \n else \n { \n
13570             if ($rOpts_cuddled_else) {
13571
13572                 if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
13573                     || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
13574                 {
13575                     $bond_str = NO_BREAK;
13576                 }
13577             }
13578
13579             # keep '}' together with ';'
13580             if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
13581                 $bond_str = NO_BREAK;
13582             }
13583
13584             # never break between sub name and opening paren
13585             if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
13586                 $bond_str = NO_BREAK;
13587             }
13588
13589             #---------------------------------------------------------------
13590             # section 3:
13591             # now take nesting depth into account
13592             #---------------------------------------------------------------
13593             # final strength incorporates the bond strength and nesting depth
13594             my $strength;
13595
13596             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
13597                 if ( $total_nesting_depth > 0 ) {
13598                     $strength = $bond_str + $total_nesting_depth;
13599                 }
13600                 else {
13601                     $strength = $bond_str;
13602                 }
13603             }
13604             else {
13605                 $strength = NO_BREAK;
13606             }
13607
13608             # always break after side comment
13609             if ( $type eq '#' ) { $strength = 0 }
13610
13611             $bond_strength_to_go[$i] = $strength;
13612
13613             FORMATTER_DEBUG_FLAG_BOND && do {
13614                 my $str = substr( $token, 0, 15 );
13615                 $str .= ' ' x ( 16 - length($str) );
13616                 print
13617 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
13618             };
13619         }
13620     }
13621
13622 }
13623
13624 sub pad_array_to_go {
13625
13626     # to simplify coding in scan_list and set_bond_strengths, it helps
13627     # to create some extra blank tokens at the end of the arrays
13628     $tokens_to_go[ $max_index_to_go + 1 ] = '';
13629     $tokens_to_go[ $max_index_to_go + 2 ] = '';
13630     $types_to_go[ $max_index_to_go + 1 ]  = 'b';
13631     $types_to_go[ $max_index_to_go + 2 ]  = 'b';
13632     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
13633       $nesting_depth_to_go[$max_index_to_go];
13634
13635     #    /^[R\}\)\]]$/
13636     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
13637         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
13638
13639             # shouldn't happen:
13640             unless ( get_saw_brace_error() ) {
13641                 warning(
13642 "Program bug in scan_list: hit nesting error which should have been caught\n"
13643                 );
13644                 report_definite_bug();
13645             }
13646         }
13647         else {
13648             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
13649         }
13650     }
13651
13652     #       /^[L\{\(\[]$/
13653     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
13654         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
13655     }
13656 }
13657
13658 {    # begin scan_list
13659
13660     my (
13661         $block_type,                $current_depth,
13662         $depth,                     $i,
13663         $i_last_nonblank_token,     $last_colon_sequence_number,
13664         $last_nonblank_token,       $last_nonblank_type,
13665         $last_old_breakpoint_count, $minimum_depth,
13666         $next_nonblank_block_type,  $next_nonblank_token,
13667         $next_nonblank_type,        $old_breakpoint_count,
13668         $starting_breakpoint_count, $starting_depth,
13669         $token,                     $type,
13670         $type_sequence,
13671     );
13672
13673     my (
13674         @breakpoint_stack,              @breakpoint_undo_stack,
13675         @comma_index,                   @container_type,
13676         @identifier_count_stack,        @index_before_arrow,
13677         @interrupted_list,              @item_count_stack,
13678         @last_comma_index,              @last_dot_index,
13679         @last_nonblank_type,            @old_breakpoint_count_stack,
13680         @opening_structure_index_stack, @rfor_semicolon_list,
13681         @has_old_logical_breakpoints,   @rand_or_list,
13682         @i_equals,
13683     );
13684
13685     # routine to define essential variables when we go 'up' to
13686     # a new depth
13687     sub check_for_new_minimum_depth {
13688         my $depth = shift;
13689         if ( $depth < $minimum_depth ) {
13690
13691             $minimum_depth = $depth;
13692
13693             # these arrays need not retain values between calls
13694             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
13695             $container_type[$depth]                = "";
13696             $identifier_count_stack[$depth]        = 0;
13697             $index_before_arrow[$depth]            = -1;
13698             $interrupted_list[$depth]              = 1;
13699             $item_count_stack[$depth]              = 0;
13700             $last_nonblank_type[$depth]            = "";
13701             $opening_structure_index_stack[$depth] = -1;
13702
13703             $breakpoint_undo_stack[$depth]       = undef;
13704             $comma_index[$depth]                 = undef;
13705             $last_comma_index[$depth]            = undef;
13706             $last_dot_index[$depth]              = undef;
13707             $old_breakpoint_count_stack[$depth]  = undef;
13708             $has_old_logical_breakpoints[$depth] = 0;
13709             $rand_or_list[$depth]                = [];
13710             $rfor_semicolon_list[$depth]         = [];
13711             $i_equals[$depth]                    = -1;
13712
13713             # these arrays must retain values between calls
13714             if ( !defined( $has_broken_sublist[$depth] ) ) {
13715                 $dont_align[$depth]         = 0;
13716                 $has_broken_sublist[$depth] = 0;
13717                 $want_comma_break[$depth]   = 0;
13718             }
13719         }
13720     }
13721
13722     # routine to decide which commas to break at within a container;
13723     # returns:
13724     #   $bp_count = number of comma breakpoints set
13725     #   $do_not_break_apart = a flag indicating if container need not
13726     #     be broken open
13727     sub set_comma_breakpoints {
13728
13729         my $dd                 = shift;
13730         my $bp_count           = 0;
13731         my $do_not_break_apart = 0;
13732
13733         # anything to do?
13734         if ( $item_count_stack[$dd] ) {
13735
13736             # handle commas not in containers...
13737             if ( $dont_align[$dd] ) {
13738                 do_uncontained_comma_breaks($dd);
13739             }
13740
13741             # handle commas within containers...
13742             else {
13743                 my $fbc = $forced_breakpoint_count;
13744
13745                 # always open comma lists not preceded by keywords,
13746                 # barewords, identifiers (that is, anything that doesn't
13747                 # look like a function call)
13748                 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
13749
13750                 set_comma_breakpoints_do(
13751                     $dd,
13752                     $opening_structure_index_stack[$dd],
13753                     $i,
13754                     $item_count_stack[$dd],
13755                     $identifier_count_stack[$dd],
13756                     $comma_index[$dd],
13757                     $next_nonblank_type,
13758                     $container_type[$dd],
13759                     $interrupted_list[$dd],
13760                     \$do_not_break_apart,
13761                     $must_break_open,
13762                 );
13763                 $bp_count = $forced_breakpoint_count - $fbc;
13764                 $do_not_break_apart = 0 if $must_break_open;
13765             }
13766         }
13767         return ( $bp_count, $do_not_break_apart );
13768     }
13769
13770     sub do_uncontained_comma_breaks {
13771
13772         # Handle commas not in containers...
13773         # This is a catch-all routine for commas that we
13774         # don't know what to do with because the don't fall
13775         # within containers.  We will bias the bond strength
13776         # to break at commas which ended lines in the input
13777         # file.  This usually works better than just trying
13778         # to put as many items on a line as possible.  A
13779         # downside is that if the input file is garbage it
13780         # won't work very well. However, the user can always
13781         # prevent following the old breakpoints with the
13782         # -iob flag.
13783         my $dd   = shift;
13784         my $bias = -.01;
13785         foreach my $ii ( @{ $comma_index[$dd] } ) {
13786             if ( $old_breakpoint_to_go[$ii] ) {
13787                 $bond_strength_to_go[$ii] = $bias;
13788
13789                 # reduce bias magnitude to force breaks in order
13790                 $bias *= 0.99;
13791             }
13792         }
13793
13794         # Also put a break before the first comma if
13795         # (1) there was a break there in the input, and
13796         # (2) that was exactly one previous break in the input
13797         #
13798         # For example, we will follow the user and break after
13799         # 'print' in this snippet:
13800         #    print
13801         #      "conformability (Not the same dimension)\n",
13802         #      "\t", $have, " is ", text_unit($hu), "\n",
13803         #      "\t", $want, " is ", text_unit($wu), "\n",
13804         #      ;
13805         my $i_first_comma = $comma_index[$dd]->[0];
13806         if ( $old_breakpoint_to_go[$i_first_comma] ) {
13807             my $level_comma = $levels_to_go[$i_first_comma];
13808             my $ibreak      = -1;
13809             my $obp_count   = 0;
13810             for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
13811                 if ( $old_breakpoint_to_go[$ii] ) {
13812                     $obp_count++;
13813                     last if ( $obp_count > 1 );
13814                     $ibreak = $ii
13815                       if ( $levels_to_go[$ii] == $level_comma );
13816                 }
13817             }
13818             if ( $ibreak >= 0 && $obp_count == 1 ) {
13819                 set_forced_breakpoint($ibreak);
13820             }
13821         }
13822     }
13823
13824     my %is_logical_container;
13825
13826     BEGIN {
13827         @_ = qw# if elsif unless while and or err not && | || ? : ! #;
13828         @is_logical_container{@_} = (1) x scalar(@_);
13829     }
13830
13831     sub set_for_semicolon_breakpoints {
13832         my $dd = shift;
13833         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
13834             set_forced_breakpoint($_);
13835         }
13836     }
13837
13838     sub set_logical_breakpoints {
13839         my $dd = shift;
13840         if (
13841                $item_count_stack[$dd] == 0
13842             && $is_logical_container{ $container_type[$dd] }
13843
13844             # TESTING:
13845             || $has_old_logical_breakpoints[$dd]
13846           )
13847         {
13848
13849             # Look for breaks in this order:
13850             # 0   1    2   3
13851             # or  and  ||  &&
13852             foreach my $i ( 0 .. 3 ) {
13853                 if ( $rand_or_list[$dd][$i] ) {
13854                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
13855                         set_forced_breakpoint($_);
13856                     }
13857
13858                     # break at any 'if' and 'unless' too
13859                     foreach ( @{ $rand_or_list[$dd][4] } ) {
13860                         set_forced_breakpoint($_);
13861                     }
13862                     $rand_or_list[$dd] = [];
13863                     last;
13864                 }
13865             }
13866         }
13867     }
13868
13869     sub is_unbreakable_container {
13870
13871         # never break a container of one of these types
13872         # because bad things can happen (map1.t)
13873         my $dd = shift;
13874         $is_sort_map_grep{ $container_type[$dd] };
13875     }
13876
13877     sub scan_list {
13878
13879         # This routine is responsible for setting line breaks for all lists,
13880         # so that hierarchical structure can be displayed and so that list
13881         # items can be vertically aligned.  The output of this routine is
13882         # stored in the array @forced_breakpoint_to_go, which is used to set
13883         # final breakpoints.
13884
13885         $starting_depth = $nesting_depth_to_go[0];
13886
13887         $block_type                 = ' ';
13888         $current_depth              = $starting_depth;
13889         $i                          = -1;
13890         $last_colon_sequence_number = -1;
13891         $last_nonblank_token        = ';';
13892         $last_nonblank_type         = ';';
13893         $last_nonblank_block_type   = ' ';
13894         $last_old_breakpoint_count  = 0;
13895         $minimum_depth = $current_depth + 1;    # forces update in check below
13896         $old_breakpoint_count      = 0;
13897         $starting_breakpoint_count = $forced_breakpoint_count;
13898         $token                     = ';';
13899         $type                      = ';';
13900         $type_sequence             = '';
13901
13902         check_for_new_minimum_depth($current_depth);
13903
13904         my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13905         my $want_previous_breakpoint = -1;
13906
13907         my $saw_good_breakpoint;
13908         my $i_line_end   = -1;
13909         my $i_line_start = -1;
13910
13911         # loop over all tokens in this batch
13912         while ( ++$i <= $max_index_to_go ) {
13913             if ( $type ne 'b' ) {
13914                 $i_last_nonblank_token    = $i - 1;
13915                 $last_nonblank_type       = $type;
13916                 $last_nonblank_token      = $token;
13917                 $last_nonblank_block_type = $block_type;
13918             }
13919             $type          = $types_to_go[$i];
13920             $block_type    = $block_type_to_go[$i];
13921             $token         = $tokens_to_go[$i];
13922             $type_sequence = $type_sequence_to_go[$i];
13923             my $next_type       = $types_to_go[ $i + 1 ];
13924             my $next_token      = $tokens_to_go[ $i + 1 ];
13925             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13926             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
13927             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
13928             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13929
13930             # set break if flag was set
13931             if ( $want_previous_breakpoint >= 0 ) {
13932                 set_forced_breakpoint($want_previous_breakpoint);
13933                 $want_previous_breakpoint = -1;
13934             }
13935
13936             $last_old_breakpoint_count = $old_breakpoint_count;
13937             if ( $old_breakpoint_to_go[$i] ) {
13938                 $i_line_end   = $i;
13939                 $i_line_start = $i_next_nonblank;
13940
13941                 $old_breakpoint_count++;
13942
13943                 # Break before certain keywords if user broke there and
13944                 # this is a 'safe' break point. The idea is to retain
13945                 # any preferred breaks for sequential list operations,
13946                 # like a schwartzian transform.
13947                 if ($rOpts_break_at_old_keyword_breakpoints) {
13948                     if (
13949                            $next_nonblank_type eq 'k'
13950                         && $is_keyword_returning_list{$next_nonblank_token}
13951                         && (   $type =~ /^[=\)\]\}Riw]$/
13952                             || $type eq 'k'
13953                             && $is_keyword_returning_list{$token} )
13954                       )
13955                     {
13956
13957                         # we actually have to set this break next time through
13958                         # the loop because if we are at a closing token (such
13959                         # as '}') which forms a one-line block, this break might
13960                         # get undone.
13961                         $want_previous_breakpoint = $i;
13962                     }
13963                 }
13964             }
13965             next if ( $type eq 'b' );
13966             $depth = $nesting_depth_to_go[ $i + 1 ];
13967
13968             # safety check - be sure we always break after a comment
13969             # Shouldn't happen .. an error here probably means that the
13970             # nobreak flag did not get turned off correctly during
13971             # formatting.
13972             if ( $type eq '#' ) {
13973                 if ( $i != $max_index_to_go ) {
13974                     warning(
13975 "Non-fatal program bug: backup logic needed to break after a comment\n"
13976                     );
13977                     report_definite_bug();
13978                     $nobreak_to_go[$i] = 0;
13979                     set_forced_breakpoint($i);
13980                 }
13981             }
13982
13983             # Force breakpoints at certain tokens in long lines.
13984             # Note that such breakpoints will be undone later if these tokens
13985             # are fully contained within parens on a line.
13986             if (
13987
13988                 # break before a keyword within a line
13989                 $type eq 'k'
13990                 && $i > 0
13991
13992                 # if one of these keywords:
13993                 && $token =~ /^(if|unless|while|until|for)$/
13994
13995                 # but do not break at something like '1 while'
13996                 && ( $last_nonblank_type ne 'n' || $i > 2 )
13997
13998                 # and let keywords follow a closing 'do' brace
13999                 && $last_nonblank_block_type ne 'do'
14000
14001                 && (
14002                     $is_long_line
14003
14004                     # or container is broken (by side-comment, etc)
14005                     || (   $next_nonblank_token eq '('
14006                         && $mate_index_to_go[$i_next_nonblank] < $i )
14007                 )
14008               )
14009             {
14010                 set_forced_breakpoint( $i - 1 );
14011             }
14012
14013             # remember locations of '||'  and '&&' for possible breaks if we
14014             # decide this is a long logical expression.
14015             if ( $type eq '||' ) {
14016                 push @{ $rand_or_list[$depth][2] }, $i;
14017                 ++$has_old_logical_breakpoints[$depth]
14018                   if ( ( $i == $i_line_start || $i == $i_line_end )
14019                     && $rOpts_break_at_old_logical_breakpoints );
14020             }
14021             elsif ( $type eq '&&' ) {
14022                 push @{ $rand_or_list[$depth][3] }, $i;
14023                 ++$has_old_logical_breakpoints[$depth]
14024                   if ( ( $i == $i_line_start || $i == $i_line_end )
14025                     && $rOpts_break_at_old_logical_breakpoints );
14026             }
14027             elsif ( $type eq 'f' ) {
14028                 push @{ $rfor_semicolon_list[$depth] }, $i;
14029             }
14030             elsif ( $type eq 'k' ) {
14031                 if ( $token eq 'and' ) {
14032                     push @{ $rand_or_list[$depth][1] }, $i;
14033                     ++$has_old_logical_breakpoints[$depth]
14034                       if ( ( $i == $i_line_start || $i == $i_line_end )
14035                         && $rOpts_break_at_old_logical_breakpoints );
14036                 }
14037
14038                 # break immediately at 'or's which are probably not in a logical
14039                 # block -- but we will break in logical breaks below so that
14040                 # they do not add to the forced_breakpoint_count
14041                 elsif ( $token eq 'or' ) {
14042                     push @{ $rand_or_list[$depth][0] }, $i;
14043                     ++$has_old_logical_breakpoints[$depth]
14044                       if ( ( $i == $i_line_start || $i == $i_line_end )
14045                         && $rOpts_break_at_old_logical_breakpoints );
14046                     if ( $is_logical_container{ $container_type[$depth] } ) {
14047                     }
14048                     else {
14049                         if ($is_long_line) { set_forced_breakpoint($i) }
14050                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
14051                             && $rOpts_break_at_old_logical_breakpoints )
14052                         {
14053                             $saw_good_breakpoint = 1;
14054                         }
14055                     }
14056                 }
14057                 elsif ( $token eq 'if' || $token eq 'unless' ) {
14058                     push @{ $rand_or_list[$depth][4] }, $i;
14059                     if ( ( $i == $i_line_start || $i == $i_line_end )
14060                         && $rOpts_break_at_old_logical_breakpoints )
14061                     {
14062                         set_forced_breakpoint($i);
14063                     }
14064                 }
14065             }
14066             elsif ( $is_assignment{$type} ) {
14067                 $i_equals[$depth] = $i;
14068             }
14069
14070             if ($type_sequence) {
14071
14072                 # handle any postponed closing breakpoints
14073                 if ( $token =~ /^[\)\]\}\:]$/ ) {
14074                     if ( $type eq ':' ) {
14075                         $last_colon_sequence_number = $type_sequence;
14076
14077                         # TESTING: retain break at a ':' line break
14078                         if ( ( $i == $i_line_start || $i == $i_line_end )
14079                             && $rOpts_break_at_old_ternary_breakpoints )
14080                         {
14081
14082                             # TESTING:
14083                             set_forced_breakpoint($i);
14084
14085                             # break at previous '='
14086                             if ( $i_equals[$depth] > 0 ) {
14087                                 set_forced_breakpoint( $i_equals[$depth] );
14088                                 $i_equals[$depth] = -1;
14089                             }
14090                         }
14091                     }
14092                     if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
14093                         my $inc = ( $type eq ':' ) ? 0 : 1;
14094                         set_forced_breakpoint( $i - $inc );
14095                         delete $postponed_breakpoint{$type_sequence};
14096                     }
14097                 }
14098
14099                 # set breaks at ?/: if they will get separated (and are
14100                 # not a ?/: chain), or if the '?' is at the end of the
14101                 # line
14102                 elsif ( $token eq '?' ) {
14103                     my $i_colon = $mate_index_to_go[$i];
14104                     if (
14105                         $i_colon <= 0  # the ':' is not in this batch
14106                         || $i == 0     # this '?' is the first token of the line
14107                         || $i ==
14108                         $max_index_to_go    # or this '?' is the last token
14109                       )
14110                     {
14111
14112                         # don't break at a '?' if preceded by ':' on
14113                         # this line of previous ?/: pair on this line.
14114                         # This is an attempt to preserve a chain of ?/:
14115                         # expressions (elsif2.t).  And don't break if
14116                         # this has a side comment.
14117                         set_forced_breakpoint($i)
14118                           unless (
14119                             $type_sequence == (
14120                                 $last_colon_sequence_number +
14121                                   TYPE_SEQUENCE_INCREMENT
14122                             )
14123                             || $tokens_to_go[$max_index_to_go] eq '#'
14124                           );
14125                         set_closing_breakpoint($i);
14126                     }
14127                 }
14128             }
14129
14130 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
14131
14132             #------------------------------------------------------------
14133             # Handle Increasing Depth..
14134             #
14135             # prepare for a new list when depth increases
14136             # token $i is a '(','{', or '['
14137             #------------------------------------------------------------
14138             if ( $depth > $current_depth ) {
14139
14140                 $breakpoint_stack[$depth]       = $forced_breakpoint_count;
14141                 $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
14142                 $has_broken_sublist[$depth]     = 0;
14143                 $identifier_count_stack[$depth] = 0;
14144                 $index_before_arrow[$depth]     = -1;
14145                 $interrupted_list[$depth]       = 0;
14146                 $item_count_stack[$depth]       = 0;
14147                 $last_comma_index[$depth]       = undef;
14148                 $last_dot_index[$depth]         = undef;
14149                 $last_nonblank_type[$depth]     = $last_nonblank_type;
14150                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
14151                 $opening_structure_index_stack[$depth] = $i;
14152                 $rand_or_list[$depth]                  = [];
14153                 $rfor_semicolon_list[$depth]           = [];
14154                 $i_equals[$depth]                      = -1;
14155                 $want_comma_break[$depth]              = 0;
14156                 $container_type[$depth] =
14157                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
14158                   ? $last_nonblank_token
14159                   : "";
14160                 $has_old_logical_breakpoints[$depth] = 0;
14161
14162                 # if line ends here then signal closing token to break
14163                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
14164                 {
14165                     set_closing_breakpoint($i);
14166                 }
14167
14168                 # Not all lists of values should be vertically aligned..
14169                 $dont_align[$depth] =
14170
14171                   # code BLOCKS are handled at a higher level
14172                   ( $block_type ne "" )
14173
14174                   # certain paren lists
14175                   || ( $type eq '(' ) && (
14176
14177                     # it does not usually look good to align a list of
14178                     # identifiers in a parameter list, as in:
14179                     #    my($var1, $var2, ...)
14180                     # (This test should probably be refined, for now I'm just
14181                     # testing for any keyword)
14182                     ( $last_nonblank_type eq 'k' )
14183
14184                     # a trailing '(' usually indicates a non-list
14185                     || ( $next_nonblank_type eq '(' )
14186                   );
14187
14188                 # patch to outdent opening brace of long if/for/..
14189                 # statements (like this one).  See similar coding in
14190                 # set_continuation breaks.  We have also catch it here for
14191                 # short line fragments which otherwise will not go through
14192                 # set_continuation_breaks.
14193                 if (
14194                     $block_type
14195
14196                     # if we have the ')' but not its '(' in this batch..
14197                     && ( $last_nonblank_token eq ')' )
14198                     && $mate_index_to_go[$i_last_nonblank_token] < 0
14199
14200                     # and user wants brace to left
14201                     && !$rOpts->{'opening-brace-always-on-right'}
14202
14203                     && ( $type  eq '{' )    # should be true
14204                     && ( $token eq '{' )    # should be true
14205                   )
14206                 {
14207                     set_forced_breakpoint( $i - 1 );
14208                 }
14209             }
14210
14211             #------------------------------------------------------------
14212             # Handle Decreasing Depth..
14213             #
14214             # finish off any old list when depth decreases
14215             # token $i is a ')','}', or ']'
14216             #------------------------------------------------------------
14217             elsif ( $depth < $current_depth ) {
14218
14219                 check_for_new_minimum_depth($depth);
14220
14221                 # force all outer logical containers to break after we see on
14222                 # old breakpoint
14223                 $has_old_logical_breakpoints[$depth] ||=
14224                   $has_old_logical_breakpoints[$current_depth];
14225
14226                 # Patch to break between ') {' if the paren list is broken.
14227                 # There is similar logic in set_continuation_breaks for
14228                 # non-broken lists.
14229                 if (   $token eq ')'
14230                     && $next_nonblank_block_type
14231                     && $interrupted_list[$current_depth]
14232                     && $next_nonblank_type eq '{'
14233                     && !$rOpts->{'opening-brace-always-on-right'} )
14234                 {
14235                     set_forced_breakpoint($i);
14236                 }
14237
14238 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
14239
14240                 # set breaks at commas if necessary
14241                 my ( $bp_count, $do_not_break_apart ) =
14242                   set_comma_breakpoints($current_depth);
14243
14244                 my $i_opening = $opening_structure_index_stack[$current_depth];
14245                 my $saw_opening_structure = ( $i_opening >= 0 );
14246
14247                 # this term is long if we had to break at interior commas..
14248                 my $is_long_term = $bp_count > 0;
14249
14250                 # ..or if the length between opening and closing parens exceeds
14251                 # allowed line length
14252                 if ( !$is_long_term && $saw_opening_structure ) {
14253                     my $i_opening_minus = find_token_starting_list($i_opening);
14254
14255                     # Note: we have to allow for one extra space after a
14256                     # closing token so that we do not strand a comma or
14257                     # semicolon, hence the '>=' here (oneline.t)
14258                     $is_long_term =
14259                       excess_line_length( $i_opening_minus, $i ) >= 0;
14260                 }
14261
14262                 # We've set breaks after all comma-arrows.  Now we have to
14263                 # undo them if this can be a one-line block
14264                 # (the only breakpoints set will be due to comma-arrows)
14265                 if (
14266
14267                     # user doesn't require breaking after all comma-arrows
14268                     ( $rOpts_comma_arrow_breakpoints != 0 )
14269
14270                     # and if the opening structure is in this batch
14271                     && $saw_opening_structure
14272
14273                     # and either on the same old line
14274                     && (
14275                         $old_breakpoint_count_stack[$current_depth] ==
14276                         $last_old_breakpoint_count
14277
14278                         # or user wants to form long blocks with arrows
14279                         || $rOpts_comma_arrow_breakpoints == 2
14280                     )
14281
14282                   # and we made some breakpoints between the opening and closing
14283                     && ( $breakpoint_undo_stack[$current_depth] <
14284                         $forced_breakpoint_undo_count )
14285
14286                     # and this block is short enough to fit on one line
14287                     # Note: use < because need 1 more space for possible comma
14288                     && !$is_long_term
14289
14290                   )
14291                 {
14292                     undo_forced_breakpoint_stack(
14293                         $breakpoint_undo_stack[$current_depth] );
14294                 }
14295
14296                 # now see if we have any comma breakpoints left
14297                 my $has_comma_breakpoints =
14298                   ( $breakpoint_stack[$current_depth] !=
14299                       $forced_breakpoint_count );
14300
14301                 # update broken-sublist flag of the outer container
14302                 $has_broken_sublist[$depth] =
14303                      $has_broken_sublist[$depth]
14304                   || $has_broken_sublist[$current_depth]
14305                   || $is_long_term
14306                   || $has_comma_breakpoints;
14307
14308 # Having come to the closing ')', '}', or ']', now we have to decide if we
14309 # should 'open up' the structure by placing breaks at the opening and
14310 # closing containers.  This is a tricky decision.  Here are some of the
14311 # basic considerations:
14312 #
14313 # -If this is a BLOCK container, then any breakpoints will have already
14314 # been set (and according to user preferences), so we need do nothing here.
14315 #
14316 # -If we have a comma-separated list for which we can align the list items,
14317 # then we need to do so because otherwise the vertical aligner cannot
14318 # currently do the alignment.
14319 #
14320 # -If this container does itself contain a container which has been broken
14321 # open, then it should be broken open to properly show the structure.
14322 #
14323 # -If there is nothing to align, and no other reason to break apart,
14324 # then do not do it.
14325 #
14326 # We will not break open the parens of a long but 'simple' logical expression.
14327 # For example:
14328 #
14329 # This is an example of a simple logical expression and its formatting:
14330 #
14331 #     if ( $bigwasteofspace1 && $bigwasteofspace2
14332 #         || $bigwasteofspace3 && $bigwasteofspace4 )
14333 #
14334 # Most people would prefer this than the 'spacey' version:
14335 #
14336 #     if (
14337 #         $bigwasteofspace1 && $bigwasteofspace2
14338 #         || $bigwasteofspace3 && $bigwasteofspace4
14339 #     )
14340 #
14341 # To illustrate the rules for breaking logical expressions, consider:
14342 #
14343 #             FULLY DENSE:
14344 #             if ( $opt_excl
14345 #                 and ( exists $ids_excl_uc{$id_uc}
14346 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
14347 #
14348 # This is on the verge of being difficult to read.  The current default is to
14349 # open it up like this:
14350 #
14351 #             DEFAULT:
14352 #             if (
14353 #                 $opt_excl
14354 #                 and ( exists $ids_excl_uc{$id_uc}
14355 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
14356 #               )
14357 #
14358 # This is a compromise which tries to avoid being too dense and to spacey.
14359 # A more spaced version would be:
14360 #
14361 #             SPACEY:
14362 #             if (
14363 #                 $opt_excl
14364 #                 and (
14365 #                     exists $ids_excl_uc{$id_uc}
14366 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
14367 #                 )
14368 #               )
14369 #
14370 # Some people might prefer the spacey version -- an option could be added.  The
14371 # innermost expression contains a long block '( exists $ids_...  ')'.
14372 #
14373 # Here is how the logic goes: We will force a break at the 'or' that the
14374 # innermost expression contains, but we will not break apart its opening and
14375 # closing containers because (1) it contains no multi-line sub-containers itself,
14376 # and (2) there is no alignment to be gained by breaking it open like this
14377 #
14378 #             and (
14379 #                 exists $ids_excl_uc{$id_uc}
14380 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
14381 #             )
14382 #
14383 # (although this looks perfectly ok and might be good for long expressions).  The
14384 # outer 'if' container, though, contains a broken sub-container, so it will be
14385 # broken open to avoid too much density.  Also, since it contains no 'or's, there
14386 # will be a forced break at its 'and'.
14387
14388                 # set some flags telling something about this container..
14389                 my $is_simple_logical_expression = 0;
14390                 if (   $item_count_stack[$current_depth] == 0
14391                     && $saw_opening_structure
14392                     && $tokens_to_go[$i_opening] eq '('
14393                     && $is_logical_container{ $container_type[$current_depth] }
14394                   )
14395                 {
14396
14397                     # This seems to be a simple logical expression with
14398                     # no existing breakpoints.  Set a flag to prevent
14399                     # opening it up.
14400                     if ( !$has_comma_breakpoints ) {
14401                         $is_simple_logical_expression = 1;
14402                     }
14403
14404                     # This seems to be a simple logical expression with
14405                     # breakpoints (broken sublists, for example).  Break
14406                     # at all 'or's and '||'s.
14407                     else {
14408                         set_logical_breakpoints($current_depth);
14409                     }
14410                 }
14411
14412                 if ( $is_long_term
14413                     && @{ $rfor_semicolon_list[$current_depth] } )
14414                 {
14415                     set_for_semicolon_breakpoints($current_depth);
14416
14417                     # open up a long 'for' or 'foreach' container to allow
14418                     # leading term alignment unless -lp is used.
14419                     $has_comma_breakpoints = 1
14420                       unless $rOpts_line_up_parentheses;
14421                 }
14422
14423                 if (
14424
14425                     # breaks for code BLOCKS are handled at a higher level
14426                     !$block_type
14427
14428                     # we do not need to break at the top level of an 'if'
14429                     # type expression
14430                     && !$is_simple_logical_expression
14431
14432                     ## modification to keep ': (' containers vertically tight;
14433                     ## but probably better to let user set -vt=1 to avoid
14434                     ## inconsistency with other paren types
14435                     ## && ($container_type[$current_depth] ne ':')
14436
14437                     # otherwise, we require one of these reasons for breaking:
14438                     && (
14439
14440                         # - this term has forced line breaks
14441                         $has_comma_breakpoints
14442
14443                        # - the opening container is separated from this batch
14444                        #   for some reason (comment, blank line, code block)
14445                        # - this is a non-paren container spanning multiple lines
14446                         || !$saw_opening_structure
14447
14448                         # - this is a long block contained in another breakable
14449                         #   container
14450                         || (   $is_long_term
14451                             && $container_environment_to_go[$i_opening] ne
14452                             'BLOCK' )
14453                     )
14454                   )
14455                 {
14456
14457                     # For -lp option, we must put a breakpoint before
14458                     # the token which has been identified as starting
14459                     # this indentation level.  This is necessary for
14460                     # proper alignment.
14461                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
14462                     {
14463                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
14464                         if (   $i_opening + 1 < $max_index_to_go
14465                             && $types_to_go[ $i_opening + 1 ] eq 'b' )
14466                         {
14467                             $item = $leading_spaces_to_go[ $i_opening + 2 ];
14468                         }
14469                         if ( defined($item) ) {
14470                             my $i_start_2 = $item->get_STARTING_INDEX();
14471                             if (
14472                                 defined($i_start_2)
14473
14474                                 # we are breaking after an opening brace, paren,
14475                                 # so don't break before it too
14476                                 && $i_start_2 ne $i_opening
14477                               )
14478                             {
14479
14480                                 # Only break for breakpoints at the same
14481                                 # indentation level as the opening paren
14482                                 my $test1 = $nesting_depth_to_go[$i_opening];
14483                                 my $test2 = $nesting_depth_to_go[$i_start_2];
14484                                 if ( $test2 == $test1 ) {
14485                                     set_forced_breakpoint( $i_start_2 - 1 );
14486                                 }
14487                             }
14488                         }
14489                     }
14490
14491                     # break after opening structure.
14492                     # note: break before closing structure will be automatic
14493                     if ( $minimum_depth <= $current_depth ) {
14494
14495                         set_forced_breakpoint($i_opening)
14496                           unless ( $do_not_break_apart
14497                             || is_unbreakable_container($current_depth) );
14498
14499                         # break at '.' of lower depth level before opening token
14500                         if ( $last_dot_index[$depth] ) {
14501                             set_forced_breakpoint( $last_dot_index[$depth] );
14502                         }
14503
14504                         # break before opening structure if preeced by another
14505                         # closing structure and a comma.  This is normally
14506                         # done by the previous closing brace, but not
14507                         # if it was a one-line block.
14508                         if ( $i_opening > 2 ) {
14509                             my $i_prev =
14510                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
14511                               ? $i_opening - 2
14512                               : $i_opening - 1;
14513
14514                             if (   $types_to_go[$i_prev] eq ','
14515                                 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
14516                             {
14517                                 set_forced_breakpoint($i_prev);
14518                             }
14519
14520                             # also break before something like ':('  or '?('
14521                             # if appropriate.
14522                             elsif (
14523                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
14524                             {
14525                                 my $token_prev = $tokens_to_go[$i_prev];
14526                                 if ( $want_break_before{$token_prev} ) {
14527                                     set_forced_breakpoint($i_prev);
14528                                 }
14529                             }
14530                         }
14531                     }
14532
14533                     # break after comma following closing structure
14534                     if ( $next_type eq ',' ) {
14535                         set_forced_breakpoint( $i + 1 );
14536                     }
14537
14538                     # break before an '=' following closing structure
14539                     if (
14540                         $is_assignment{$next_nonblank_type}
14541                         && ( $breakpoint_stack[$current_depth] !=
14542                             $forced_breakpoint_count )
14543                       )
14544                     {
14545                         set_forced_breakpoint($i);
14546                     }
14547
14548                     # break at any comma before the opening structure Added
14549                     # for -lp, but seems to be good in general.  It isn't
14550                     # obvious how far back to look; the '5' below seems to
14551                     # work well and will catch the comma in something like
14552                     #  push @list, myfunc( $param, $param, ..
14553
14554                     my $icomma = $last_comma_index[$depth];
14555                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
14556                         unless ( $forced_breakpoint_to_go[$icomma] ) {
14557                             set_forced_breakpoint($icomma);
14558                         }
14559                     }
14560                 }    # end logic to open up a container
14561
14562                 # Break open a logical container open if it was already open
14563                 elsif ($is_simple_logical_expression
14564                     && $has_old_logical_breakpoints[$current_depth] )
14565                 {
14566                     set_logical_breakpoints($current_depth);
14567                 }
14568
14569                 # Handle long container which does not get opened up
14570                 elsif ($is_long_term) {
14571
14572                     # must set fake breakpoint to alert outer containers that
14573                     # they are complex
14574                     set_fake_breakpoint();
14575                 }
14576             }
14577
14578             #------------------------------------------------------------
14579             # Handle this token
14580             #------------------------------------------------------------
14581
14582             $current_depth = $depth;
14583
14584             # handle comma-arrow
14585             if ( $type eq '=>' ) {
14586                 next if ( $last_nonblank_type eq '=>' );
14587                 next if $rOpts_break_at_old_comma_breakpoints;
14588                 next if $rOpts_comma_arrow_breakpoints == 3;
14589                 $want_comma_break[$depth]   = 1;
14590                 $index_before_arrow[$depth] = $i_last_nonblank_token;
14591                 next;
14592             }
14593
14594             elsif ( $type eq '.' ) {
14595                 $last_dot_index[$depth] = $i;
14596             }
14597
14598             # Turn off alignment if we are sure that this is not a list
14599             # environment.  To be safe, we will do this if we see certain
14600             # non-list tokens, such as ';', and also the environment is
14601             # not a list.  Note that '=' could be in any of the = operators
14602             # (lextest.t). We can't just use the reported environment
14603             # because it can be incorrect in some cases.
14604             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
14605                 && $container_environment_to_go[$i] ne 'LIST' )
14606             {
14607                 $dont_align[$depth]         = 1;
14608                 $want_comma_break[$depth]   = 0;
14609                 $index_before_arrow[$depth] = -1;
14610             }
14611
14612             # now just handle any commas
14613             next unless ( $type eq ',' );
14614
14615             $last_dot_index[$depth]   = undef;
14616             $last_comma_index[$depth] = $i;
14617
14618             # break here if this comma follows a '=>'
14619             # but not if there is a side comment after the comma
14620             if ( $want_comma_break[$depth] ) {
14621
14622                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
14623                     $want_comma_break[$depth]   = 0;
14624                     $index_before_arrow[$depth] = -1;
14625                     next;
14626                 }
14627
14628                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14629
14630                 # break before the previous token if it looks safe
14631                 # Example of something that we will not try to break before:
14632                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
14633                 # Also we don't want to break at a binary operator (like +):
14634                 # $c->createOval(
14635                 #    $x + $R, $y +
14636                 #    $R => $x - $R,
14637                 #    $y - $R, -fill   => 'black',
14638                 # );
14639                 my $ibreak = $index_before_arrow[$depth] - 1;
14640                 if (   $ibreak > 0
14641                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
14642                 {
14643                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
14644                     if ( $types_to_go[$ibreak]  eq 'b' ) { $ibreak-- }
14645                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
14646
14647                         # don't break pointer calls, such as the following:
14648                         #  File::Spec->curdir  => 1,
14649                         # (This is tokenized as adjacent 'w' tokens)
14650                         if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
14651                             set_forced_breakpoint($ibreak);
14652                         }
14653                     }
14654                 }
14655
14656                 $want_comma_break[$depth]   = 0;
14657                 $index_before_arrow[$depth] = -1;
14658
14659                 # handle list which mixes '=>'s and ','s:
14660                 # treat any list items so far as an interrupted list
14661                 $interrupted_list[$depth] = 1;
14662                 next;
14663             }
14664
14665             # break after all commas above starting depth
14666             if ( $depth < $starting_depth && !$dont_align[$depth] ) {
14667                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14668                 next;
14669             }
14670
14671             # add this comma to the list..
14672             my $item_count = $item_count_stack[$depth];
14673             if ( $item_count == 0 ) {
14674
14675                 # but do not form a list with no opening structure
14676                 # for example:
14677
14678                 #            open INFILE_COPY, ">$input_file_copy"
14679                 #              or die ("very long message");
14680
14681                 if ( ( $opening_structure_index_stack[$depth] < 0 )
14682                     && $container_environment_to_go[$i] eq 'BLOCK' )
14683                 {
14684                     $dont_align[$depth] = 1;
14685                 }
14686             }
14687
14688             $comma_index[$depth][$item_count] = $i;
14689             ++$item_count_stack[$depth];
14690             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
14691                 $identifier_count_stack[$depth]++;
14692             }
14693         }
14694
14695         #-------------------------------------------
14696         # end of loop over all tokens in this batch
14697         #-------------------------------------------
14698
14699         # set breaks for any unfinished lists ..
14700         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
14701
14702             $interrupted_list[$dd] = 1;
14703             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
14704             set_comma_breakpoints($dd);
14705             set_logical_breakpoints($dd)
14706               if ( $has_old_logical_breakpoints[$dd] );
14707             set_for_semicolon_breakpoints($dd);
14708
14709             # break open container...
14710             my $i_opening = $opening_structure_index_stack[$dd];
14711             set_forced_breakpoint($i_opening)
14712               unless (
14713                 is_unbreakable_container($dd)
14714
14715                 # Avoid a break which would place an isolated ' or "
14716                 # on a line
14717                 || (   $type eq 'Q'
14718                     && $i_opening >= $max_index_to_go - 2
14719                     && $token =~ /^['"]$/ )
14720               );
14721         }
14722
14723         # Return a flag indicating if the input file had some good breakpoints.
14724         # This flag will be used to force a break in a line shorter than the
14725         # allowed line length.
14726         if ( $has_old_logical_breakpoints[$current_depth] ) {
14727             $saw_good_breakpoint = 1;
14728         }
14729         return $saw_good_breakpoint;
14730     }
14731 }    # end scan_list
14732
14733 sub find_token_starting_list {
14734
14735     # When testing to see if a block will fit on one line, some
14736     # previous token(s) may also need to be on the line; particularly
14737     # if this is a sub call.  So we will look back at least one
14738     # token. NOTE: This isn't perfect, but not critical, because
14739     # if we mis-identify a block, it will be wrapped and therefore
14740     # fixed the next time it is formatted.
14741     my $i_opening_paren = shift;
14742     my $i_opening_minus = $i_opening_paren;
14743     my $im1             = $i_opening_paren - 1;
14744     my $im2             = $i_opening_paren - 2;
14745     my $im3             = $i_opening_paren - 3;
14746     my $typem1          = $types_to_go[$im1];
14747     my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
14748     if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
14749         $i_opening_minus = $i_opening_paren;
14750     }
14751     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
14752         $i_opening_minus = $im1 if $im1 >= 0;
14753
14754         # walk back to improve length estimate
14755         for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
14756             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
14757             $i_opening_minus = $j;
14758         }
14759         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
14760     }
14761     elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
14762     elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
14763         $i_opening_minus = $im2;
14764     }
14765     return $i_opening_minus;
14766 }
14767
14768 {    # begin set_comma_breakpoints_do
14769
14770     my %is_keyword_with_special_leading_term;
14771
14772     BEGIN {
14773
14774         # These keywords have prototypes which allow a special leading item
14775         # followed by a list
14776         @_ =
14777           qw(formline grep kill map printf sprintf push chmod join pack unshift);
14778         @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
14779     }
14780
14781     sub set_comma_breakpoints_do {
14782
14783         # Given a list with some commas, set breakpoints at some of the
14784         # commas, if necessary, to make it easy to read.  This list is
14785         # an example:
14786         my (
14787             $depth,               $i_opening_paren,  $i_closing_paren,
14788             $item_count,          $identifier_count, $rcomma_index,
14789             $next_nonblank_type,  $list_type,        $interrupted,
14790             $rdo_not_break_apart, $must_break_open,
14791         ) = @_;
14792
14793         # nothing to do if no commas seen
14794         return if ( $item_count < 1 );
14795         my $i_first_comma     = $$rcomma_index[0];
14796         my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
14797         my $i_last_comma      = $i_true_last_comma;
14798         if ( $i_last_comma >= $max_index_to_go ) {
14799             $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
14800             return if ( $item_count < 1 );
14801         }
14802
14803         #---------------------------------------------------------------
14804         # find lengths of all items in the list to calculate page layout
14805         #---------------------------------------------------------------
14806         my $comma_count = $item_count;
14807         my @item_lengths;
14808         my @i_term_begin;
14809         my @i_term_end;
14810         my @i_term_comma;
14811         my $i_prev_plus;
14812         my @max_length = ( 0, 0 );
14813         my $first_term_length;
14814         my $i      = $i_opening_paren;
14815         my $is_odd = 1;
14816
14817         for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
14818             $is_odd      = 1 - $is_odd;
14819             $i_prev_plus = $i + 1;
14820             $i           = $$rcomma_index[$j];
14821
14822             my $i_term_end =
14823               ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
14824             my $i_term_begin =
14825               ( $types_to_go[$i_prev_plus] eq 'b' )
14826               ? $i_prev_plus + 1
14827               : $i_prev_plus;
14828             push @i_term_begin, $i_term_begin;
14829             push @i_term_end,   $i_term_end;
14830             push @i_term_comma, $i;
14831
14832             # note: currently adding 2 to all lengths (for comma and space)
14833             my $length =
14834               2 + token_sequence_length( $i_term_begin, $i_term_end );
14835             push @item_lengths, $length;
14836
14837             if ( $j == 0 ) {
14838                 $first_term_length = $length;
14839             }
14840             else {
14841
14842                 if ( $length > $max_length[$is_odd] ) {
14843                     $max_length[$is_odd] = $length;
14844                 }
14845             }
14846         }
14847
14848         # now we have to make a distinction between the comma count and item
14849         # count, because the item count will be one greater than the comma
14850         # count if the last item is not terminated with a comma
14851         my $i_b =
14852           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
14853           ? $i_last_comma + 1
14854           : $i_last_comma;
14855         my $i_e =
14856           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
14857           ? $i_closing_paren - 2
14858           : $i_closing_paren - 1;
14859         my $i_effective_last_comma = $i_last_comma;
14860
14861         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
14862
14863         if ( $last_item_length > 0 ) {
14864
14865             # add 2 to length because other lengths include a comma and a blank
14866             $last_item_length += 2;
14867             push @item_lengths, $last_item_length;
14868             push @i_term_begin, $i_b + 1;
14869             push @i_term_end,   $i_e;
14870             push @i_term_comma, undef;
14871
14872             my $i_odd = $item_count % 2;
14873
14874             if ( $last_item_length > $max_length[$i_odd] ) {
14875                 $max_length[$i_odd] = $last_item_length;
14876             }
14877
14878             $item_count++;
14879             $i_effective_last_comma = $i_e + 1;
14880
14881             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
14882                 $identifier_count++;
14883             }
14884         }
14885
14886         #---------------------------------------------------------------
14887         # End of length calculations
14888         #---------------------------------------------------------------
14889
14890         #---------------------------------------------------------------
14891         # Compound List Rule 1:
14892         # Break at (almost) every comma for a list containing a broken
14893         # sublist.  This has higher priority than the Interrupted List
14894         # Rule.
14895         #---------------------------------------------------------------
14896         if ( $has_broken_sublist[$depth] ) {
14897
14898             # Break at every comma except for a comma between two
14899             # simple, small terms.  This prevents long vertical
14900             # columns of, say, just 0's.
14901             my $small_length = 10;    # 2 + actual maximum length wanted
14902
14903             # We'll insert a break in long runs of small terms to
14904             # allow alignment in uniform tables.
14905             my $skipped_count = 0;
14906             my $columns       = table_columns_available($i_first_comma);
14907             my $fields        = int( $columns / $small_length );
14908             if (   $rOpts_maximum_fields_per_table
14909                 && $fields > $rOpts_maximum_fields_per_table )
14910             {
14911                 $fields = $rOpts_maximum_fields_per_table;
14912             }
14913             my $max_skipped_count = $fields - 1;
14914
14915             my $is_simple_last_term = 0;
14916             my $is_simple_next_term = 0;
14917             foreach my $j ( 0 .. $item_count ) {
14918                 $is_simple_last_term = $is_simple_next_term;
14919                 $is_simple_next_term = 0;
14920                 if (   $j < $item_count
14921                     && $i_term_end[$j] == $i_term_begin[$j]
14922                     && $item_lengths[$j] <= $small_length )
14923                 {
14924                     $is_simple_next_term = 1;
14925                 }
14926                 next if $j == 0;
14927                 if (   $is_simple_last_term
14928                     && $is_simple_next_term
14929                     && $skipped_count < $max_skipped_count )
14930                 {
14931                     $skipped_count++;
14932                 }
14933                 else {
14934                     $skipped_count = 0;
14935                     my $i = $i_term_comma[ $j - 1 ];
14936                     last unless defined $i;
14937                     set_forced_breakpoint($i);
14938                 }
14939             }
14940
14941             # always break at the last comma if this list is
14942             # interrupted; we wouldn't want to leave a terminal '{', for
14943             # example.
14944             if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14945             return;
14946         }
14947
14948 #my ( $a, $b, $c ) = caller();
14949 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
14950 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
14951 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14952
14953         #---------------------------------------------------------------
14954         # Interrupted List Rule:
14955         # A list is is forced to use old breakpoints if it was interrupted
14956         # by side comments or blank lines, or requested by user.
14957         #---------------------------------------------------------------
14958         if (   $rOpts_break_at_old_comma_breakpoints
14959             || $interrupted
14960             || $i_opening_paren < 0 )
14961         {
14962             copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14963             return;
14964         }
14965
14966         #---------------------------------------------------------------
14967         # Looks like a list of items.  We have to look at it and size it up.
14968         #---------------------------------------------------------------
14969
14970         my $opening_token = $tokens_to_go[$i_opening_paren];
14971         my $opening_environment =
14972           $container_environment_to_go[$i_opening_paren];
14973
14974         #-------------------------------------------------------------------
14975         # Return if this will fit on one line
14976         #-------------------------------------------------------------------
14977
14978         my $i_opening_minus = find_token_starting_list($i_opening_paren);
14979         return
14980           unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14981
14982         #-------------------------------------------------------------------
14983         # Now we know that this block spans multiple lines; we have to set
14984         # at least one breakpoint -- real or fake -- as a signal to break
14985         # open any outer containers.
14986         #-------------------------------------------------------------------
14987         set_fake_breakpoint();
14988
14989         # be sure we do not extend beyond the current list length
14990         if ( $i_effective_last_comma >= $max_index_to_go ) {
14991             $i_effective_last_comma = $max_index_to_go - 1;
14992         }
14993
14994         # Set a flag indicating if we need to break open to keep -lp
14995         # items aligned.  This is necessary if any of the list terms
14996         # exceeds the available space after the '('.
14997         my $need_lp_break_open = $must_break_open;
14998         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14999             my $columns_if_unbroken = $rOpts_maximum_line_length -
15000               total_line_length( $i_opening_minus, $i_opening_paren );
15001             $need_lp_break_open =
15002                  ( $max_length[0] > $columns_if_unbroken )
15003               || ( $max_length[1] > $columns_if_unbroken )
15004               || ( $first_term_length > $columns_if_unbroken );
15005         }
15006
15007         # Specify if the list must have an even number of fields or not.
15008         # It is generally safest to assume an even number, because the
15009         # list items might be a hash list.  But if we can be sure that
15010         # it is not a hash, then we can allow an odd number for more
15011         # flexibility.
15012         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
15013
15014         if (   $identifier_count >= $item_count - 1
15015             || $is_assignment{$next_nonblank_type}
15016             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
15017           )
15018         {
15019             $odd_or_even = 1;
15020         }
15021
15022         # do we have a long first term which should be
15023         # left on a line by itself?
15024         my $use_separate_first_term = (
15025             $odd_or_even == 1       # only if we can use 1 field/line
15026               && $item_count > 3    # need several items
15027               && $first_term_length >
15028               2 * $max_length[0] - 2    # need long first term
15029               && $first_term_length >
15030               2 * $max_length[1] - 2    # need long first term
15031         );
15032
15033         # or do we know from the type of list that the first term should
15034         # be placed alone?
15035         if ( !$use_separate_first_term ) {
15036             if ( $is_keyword_with_special_leading_term{$list_type} ) {
15037                 $use_separate_first_term = 1;
15038
15039                 # should the container be broken open?
15040                 if ( $item_count < 3 ) {
15041                     if ( $i_first_comma - $i_opening_paren < 4 ) {
15042                         $$rdo_not_break_apart = 1;
15043                     }
15044                 }
15045                 elsif ($first_term_length < 20
15046                     && $i_first_comma - $i_opening_paren < 4 )
15047                 {
15048                     my $columns = table_columns_available($i_first_comma);
15049                     if ( $first_term_length < $columns ) {
15050                         $$rdo_not_break_apart = 1;
15051                     }
15052                 }
15053             }
15054         }
15055
15056         # if so,
15057         if ($use_separate_first_term) {
15058
15059             # ..set a break and update starting values
15060             $use_separate_first_term = 1;
15061             set_forced_breakpoint($i_first_comma);
15062             $i_opening_paren = $i_first_comma;
15063             $i_first_comma   = $$rcomma_index[1];
15064             $item_count--;
15065             return if $comma_count == 1;
15066             shift @item_lengths;
15067             shift @i_term_begin;
15068             shift @i_term_end;
15069             shift @i_term_comma;
15070         }
15071
15072         # if not, update the metrics to include the first term
15073         else {
15074             if ( $first_term_length > $max_length[0] ) {
15075                 $max_length[0] = $first_term_length;
15076             }
15077         }
15078
15079         # Field width parameters
15080         my $pair_width = ( $max_length[0] + $max_length[1] );
15081         my $max_width =
15082           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
15083
15084         # Number of free columns across the page width for laying out tables
15085         my $columns = table_columns_available($i_first_comma);
15086
15087         # Estimated maximum number of fields which fit this space
15088         # This will be our first guess
15089         my $number_of_fields_max =
15090           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
15091             $pair_width );
15092         my $number_of_fields = $number_of_fields_max;
15093
15094         # Find the best-looking number of fields
15095         # and make this our second guess if possible
15096         my ( $number_of_fields_best, $ri_ragged_break_list,
15097             $new_identifier_count )
15098           = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
15099             $max_width );
15100
15101         if (   $number_of_fields_best != 0
15102             && $number_of_fields_best < $number_of_fields_max )
15103         {
15104             $number_of_fields = $number_of_fields_best;
15105         }
15106
15107         # ----------------------------------------------------------------------
15108         # If we are crowded and the -lp option is being used, try to
15109         # undo some indentation
15110         # ----------------------------------------------------------------------
15111         if (
15112             $rOpts_line_up_parentheses
15113             && (
15114                 $number_of_fields == 0
15115                 || (   $number_of_fields == 1
15116                     && $number_of_fields != $number_of_fields_best )
15117             )
15118           )
15119         {
15120             my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
15121             if ( $available_spaces > 0 ) {
15122
15123                 my $spaces_wanted = $max_width - $columns;    # for 1 field
15124
15125                 if ( $number_of_fields_best == 0 ) {
15126                     $number_of_fields_best =
15127                       get_maximum_fields_wanted( \@item_lengths );
15128                 }
15129
15130                 if ( $number_of_fields_best != 1 ) {
15131                     my $spaces_wanted_2 =
15132                       1 + $pair_width - $columns;             # for 2 fields
15133                     if ( $available_spaces > $spaces_wanted_2 ) {
15134                         $spaces_wanted = $spaces_wanted_2;
15135                     }
15136                 }
15137
15138                 if ( $spaces_wanted > 0 ) {
15139                     my $deleted_spaces =
15140                       reduce_lp_indentation( $i_first_comma, $spaces_wanted );
15141
15142                     # redo the math
15143                     if ( $deleted_spaces > 0 ) {
15144                         $columns = table_columns_available($i_first_comma);
15145                         $number_of_fields_max =
15146                           maximum_number_of_fields( $columns, $odd_or_even,
15147                             $max_width, $pair_width );
15148                         $number_of_fields = $number_of_fields_max;
15149
15150                         if (   $number_of_fields_best == 1
15151                             && $number_of_fields >= 1 )
15152                         {
15153                             $number_of_fields = $number_of_fields_best;
15154                         }
15155                     }
15156                 }
15157             }
15158         }
15159
15160         # try for one column if two won't work
15161         if ( $number_of_fields <= 0 ) {
15162             $number_of_fields = int( $columns / $max_width );
15163         }
15164
15165         # The user can place an upper bound on the number of fields,
15166         # which can be useful for doing maintenance on tables
15167         if (   $rOpts_maximum_fields_per_table
15168             && $number_of_fields > $rOpts_maximum_fields_per_table )
15169         {
15170             $number_of_fields = $rOpts_maximum_fields_per_table;
15171         }
15172
15173         # How many columns (characters) and lines would this container take
15174         # if no additional whitespace were added?
15175         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
15176             $i_effective_last_comma + 1 );
15177         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
15178         my $packed_lines = 1 + int( $packed_columns / $columns );
15179
15180         # are we an item contained in an outer list?
15181         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
15182
15183         if ( $number_of_fields <= 0 ) {
15184
15185 #         #---------------------------------------------------------------
15186 #         # We're in trouble.  We can't find a single field width that works.
15187 #         # There is no simple answer here; we may have a single long list
15188 #         # item, or many.
15189 #         #---------------------------------------------------------------
15190 #
15191 #         In many cases, it may be best to not force a break if there is just one
15192 #         comma, because the standard continuation break logic will do a better
15193 #         job without it.
15194 #
15195 #         In the common case that all but one of the terms can fit
15196 #         on a single line, it may look better not to break open the
15197 #         containing parens.  Consider, for example
15198 #
15199 #             $color =
15200 #               join ( '/',
15201 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
15202 #                 keys %colors );
15203 #
15204 #         which will look like this with the container broken:
15205 #
15206 #             $color = join (
15207 #                 '/',
15208 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
15209 #             );
15210 #
15211 #         Here is an example of this rule for a long last term:
15212 #
15213 #             log_message( 0, 256, 128,
15214 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
15215 #
15216 #         And here is an example with a long first term:
15217 #
15218 #         $s = sprintf(
15219 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
15220 #             $r, $pu, $ps, $cu, $cs, $tt
15221 #           )
15222 #           if $style eq 'all';
15223
15224             my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
15225             my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
15226             my $long_first_term =
15227               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
15228
15229             # break at every comma ...
15230             if (
15231
15232                 # if requested by user or is best looking
15233                 $number_of_fields_best == 1
15234
15235                 # or if this is a sublist of a larger list
15236                 || $in_hierarchical_list
15237
15238                 # or if multiple commas and we dont have a long first or last
15239                 # term
15240                 || ( $comma_count > 1
15241                     && !( $long_last_term || $long_first_term ) )
15242               )
15243             {
15244                 foreach ( 0 .. $comma_count - 1 ) {
15245                     set_forced_breakpoint( $$rcomma_index[$_] );
15246                 }
15247             }
15248             elsif ($long_last_term) {
15249
15250                 set_forced_breakpoint($i_last_comma);
15251                 $$rdo_not_break_apart = 1 unless $must_break_open;
15252             }
15253             elsif ($long_first_term) {
15254
15255                 set_forced_breakpoint($i_first_comma);
15256             }
15257             else {
15258
15259                 # let breaks be defined by default bond strength logic
15260             }
15261             return;
15262         }
15263
15264         # --------------------------------------------------------
15265         # We have a tentative field count that seems to work.
15266         # How many lines will this require?
15267         # --------------------------------------------------------
15268         my $formatted_lines = $item_count / ($number_of_fields);
15269         if ( $formatted_lines != int $formatted_lines ) {
15270             $formatted_lines = 1 + int $formatted_lines;
15271         }
15272
15273         # So far we've been trying to fill out to the right margin.  But
15274         # compact tables are easier to read, so let's see if we can use fewer
15275         # fields without increasing the number of lines.
15276         $number_of_fields =
15277           compactify_table( $item_count, $number_of_fields, $formatted_lines,
15278             $odd_or_even );
15279
15280         # How many spaces across the page will we fill?
15281         my $columns_per_line =
15282           ( int $number_of_fields / 2 ) * $pair_width +
15283           ( $number_of_fields % 2 ) * $max_width;
15284
15285         my $formatted_columns;
15286
15287         if ( $number_of_fields > 1 ) {
15288             $formatted_columns =
15289               ( $pair_width * ( int( $item_count / 2 ) ) +
15290                   ( $item_count % 2 ) * $max_width );
15291         }
15292         else {
15293             $formatted_columns = $max_width * $item_count;
15294         }
15295         if ( $formatted_columns < $packed_columns ) {
15296             $formatted_columns = $packed_columns;
15297         }
15298
15299         my $unused_columns = $formatted_columns - $packed_columns;
15300
15301         # set some empirical parameters to help decide if we should try to
15302         # align; high sparsity does not look good, especially with few lines
15303         my $sparsity = ($unused_columns) / ($formatted_columns);
15304         my $max_allowed_sparsity =
15305             ( $item_count < 3 )    ? 0.1
15306           : ( $packed_lines == 1 ) ? 0.15
15307           : ( $packed_lines == 2 ) ? 0.4
15308           :                          0.7;
15309
15310         # Begin check for shortcut methods, which avoid treating a list
15311         # as a table for relatively small parenthesized lists.  These
15312         # are usually easier to read if not formatted as tables.
15313         if (
15314             $packed_lines <= 2    # probably can fit in 2 lines
15315             && $item_count < 9    # doesn't have too many items
15316             && $opening_environment eq 'BLOCK'    # not a sub-container
15317             && $opening_token       eq '('        # is paren list
15318           )
15319         {
15320
15321             # Shortcut method 1: for -lp and just one comma:
15322             # This is a no-brainer, just break at the comma.
15323             if (
15324                 $rOpts_line_up_parentheses        # -lp
15325                 && $item_count == 2               # two items, one comma
15326                 && !$must_break_open
15327               )
15328             {
15329                 my $i_break = $$rcomma_index[0];
15330                 set_forced_breakpoint($i_break);
15331                 $$rdo_not_break_apart = 1;
15332                 set_non_alignment_flags( $comma_count, $rcomma_index );
15333                 return;
15334
15335             }
15336
15337             # method 2 is for most small ragged lists which might look
15338             # best if not displayed as a table.
15339             if (
15340                 ( $number_of_fields == 2 && $item_count == 3 )
15341                 || (
15342                     $new_identifier_count > 0    # isn't all quotes
15343                     && $sparsity > 0.15
15344                 )    # would be fairly spaced gaps if aligned
15345               )
15346             {
15347
15348                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15349                     $ri_ragged_break_list );
15350                 ++$break_count if ($use_separate_first_term);
15351
15352                 # NOTE: we should really use the true break count here,
15353                 # which can be greater if there are large terms and
15354                 # little space, but usually this will work well enough.
15355                 unless ($must_break_open) {
15356
15357                     if ( $break_count <= 1 ) {
15358                         $$rdo_not_break_apart = 1;
15359                     }
15360                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15361                     {
15362                         $$rdo_not_break_apart = 1;
15363                     }
15364                 }
15365                 set_non_alignment_flags( $comma_count, $rcomma_index );
15366                 return;
15367             }
15368
15369         }    # end shortcut methods
15370
15371         # debug stuff
15372
15373         FORMATTER_DEBUG_FLAG_SPARSE && do {
15374             print
15375 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
15376
15377         };
15378
15379         #---------------------------------------------------------------
15380         # Compound List Rule 2:
15381         # If this list is too long for one line, and it is an item of a
15382         # larger list, then we must format it, regardless of sparsity
15383         # (ian.t).  One reason that we have to do this is to trigger
15384         # Compound List Rule 1, above, which causes breaks at all commas of
15385         # all outer lists.  In this way, the structure will be properly
15386         # displayed.
15387         #---------------------------------------------------------------
15388
15389         # Decide if this list is too long for one line unless broken
15390         my $total_columns = table_columns_available($i_opening_paren);
15391         my $too_long      = $packed_columns > $total_columns;
15392
15393         # For a paren list, include the length of the token just before the
15394         # '(' because this is likely a sub call, and we would have to
15395         # include the sub name on the same line as the list.  This is still
15396         # imprecise, but not too bad.  (steve.t)
15397         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
15398
15399             $too_long = excess_line_length( $i_opening_minus,
15400                 $i_effective_last_comma + 1 ) > 0;
15401         }
15402
15403         # FIXME: For an item after a '=>', try to include the length of the
15404         # thing before the '=>'.  This is crude and should be improved by
15405         # actually looking back token by token.
15406         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
15407             my $i_opening_minus = $i_opening_paren - 4;
15408             if ( $i_opening_minus >= 0 ) {
15409                 $too_long = excess_line_length( $i_opening_minus,
15410                     $i_effective_last_comma + 1 ) > 0;
15411             }
15412         }
15413
15414         # Always break lists contained in '[' and '{' if too long for 1 line,
15415         # and always break lists which are too long and part of a more complex
15416         # structure.
15417         my $must_break_open_container = $must_break_open
15418           || ( $too_long
15419             && ( $in_hierarchical_list || $opening_token ne '(' ) );
15420
15421 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
15422
15423         #---------------------------------------------------------------
15424         # The main decision:
15425         # Now decide if we will align the data into aligned columns.  Do not
15426         # attempt to align columns if this is a tiny table or it would be
15427         # too spaced.  It seems that the more packed lines we have, the
15428         # sparser the list that can be allowed and still look ok.
15429         #---------------------------------------------------------------
15430
15431         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
15432             || ( $formatted_lines < 2 )
15433             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
15434           )
15435         {
15436
15437             #---------------------------------------------------------------
15438             # too sparse: would look ugly if aligned in a table;
15439             #---------------------------------------------------------------
15440
15441             # use old breakpoints if this is a 'big' list
15442             # FIXME: goal is to improve set_ragged_breakpoints so that
15443             # this is not necessary.
15444             if ( $packed_lines > 2 && $item_count > 10 ) {
15445                 write_logfile_entry("List sparse: using old breakpoints\n");
15446                 copy_old_breakpoints( $i_first_comma, $i_last_comma );
15447             }
15448
15449             # let the continuation logic handle it if 2 lines
15450             else {
15451
15452                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15453                     $ri_ragged_break_list );
15454                 ++$break_count if ($use_separate_first_term);
15455
15456                 unless ($must_break_open_container) {
15457                     if ( $break_count <= 1 ) {
15458                         $$rdo_not_break_apart = 1;
15459                     }
15460                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15461                     {
15462                         $$rdo_not_break_apart = 1;
15463                     }
15464                 }
15465                 set_non_alignment_flags( $comma_count, $rcomma_index );
15466             }
15467             return;
15468         }
15469
15470         #---------------------------------------------------------------
15471         # go ahead and format as a table
15472         #---------------------------------------------------------------
15473         write_logfile_entry(
15474             "List: auto formatting with $number_of_fields fields/row\n");
15475
15476         my $j_first_break =
15477           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
15478
15479         for (
15480             my $j = $j_first_break ;
15481             $j < $comma_count ;
15482             $j += $number_of_fields
15483           )
15484         {
15485             my $i = $$rcomma_index[$j];
15486             set_forced_breakpoint($i);
15487         }
15488         return;
15489     }
15490 }
15491
15492 sub set_non_alignment_flags {
15493
15494     # set flag which indicates that these commas should not be
15495     # aligned
15496     my ( $comma_count, $rcomma_index ) = @_;
15497     foreach ( 0 .. $comma_count - 1 ) {
15498         $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
15499     }
15500 }
15501
15502 sub study_list_complexity {
15503
15504     # Look for complex tables which should be formatted with one term per line.
15505     # Returns the following:
15506     #
15507     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
15508     #    which are hard to read
15509     #  $number_of_fields_best = suggested number of fields based on
15510     #    complexity; = 0 if any number may be used.
15511     #
15512     my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
15513     my $item_count            = @{$ri_term_begin};
15514     my $complex_item_count    = 0;
15515     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
15516     my $i_max                 = @{$ritem_lengths} - 1;
15517     ##my @item_complexity;
15518
15519     my $i_last_last_break = -3;
15520     my $i_last_break      = -2;
15521     my @i_ragged_break_list;
15522
15523     my $definitely_complex = 30;
15524     my $definitely_simple  = 12;
15525     my $quote_count        = 0;
15526
15527     for my $i ( 0 .. $i_max ) {
15528         my $ib = $ri_term_begin->[$i];
15529         my $ie = $ri_term_end->[$i];
15530
15531         # define complexity: start with the actual term length
15532         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
15533
15534         ##TBD: join types here and check for variations
15535         ##my $str=join "", @tokens_to_go[$ib..$ie];
15536
15537         my $is_quote = 0;
15538         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
15539             $is_quote = 1;
15540             $quote_count++;
15541         }
15542         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
15543             $quote_count++;
15544         }
15545
15546         if ( $ib eq $ie ) {
15547             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
15548                 $complex_item_count++;
15549                 $weighted_length *= 2;
15550             }
15551             else {
15552             }
15553         }
15554         else {
15555             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
15556                 $complex_item_count++;
15557                 $weighted_length *= 2;
15558             }
15559             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
15560                 $weighted_length += 4;
15561             }
15562         }
15563
15564         # add weight for extra tokens.
15565         $weighted_length += 2 * ( $ie - $ib );
15566
15567 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
15568 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
15569
15570 ##push @item_complexity, $weighted_length;
15571
15572         # now mark a ragged break after this item it if it is 'long and
15573         # complex':
15574         if ( $weighted_length >= $definitely_complex ) {
15575
15576             # if we broke after the previous term
15577             # then break before it too
15578             if (   $i_last_break == $i - 1
15579                 && $i > 1
15580                 && $i_last_last_break != $i - 2 )
15581             {
15582
15583                 ## FIXME: don't strand a small term
15584                 pop @i_ragged_break_list;
15585                 push @i_ragged_break_list, $i - 2;
15586                 push @i_ragged_break_list, $i - 1;
15587             }
15588
15589             push @i_ragged_break_list, $i;
15590             $i_last_last_break = $i_last_break;
15591             $i_last_break      = $i;
15592         }
15593
15594         # don't break before a small last term -- it will
15595         # not look good on a line by itself.
15596         elsif ($i == $i_max
15597             && $i_last_break == $i - 1
15598             && $weighted_length <= $definitely_simple )
15599         {
15600             pop @i_ragged_break_list;
15601         }
15602     }
15603
15604     my $identifier_count = $i_max + 1 - $quote_count;
15605
15606     # Need more tuning here..
15607     if (   $max_width > 12
15608         && $complex_item_count > $item_count / 2
15609         && $number_of_fields_best != 2 )
15610     {
15611         $number_of_fields_best = 1;
15612     }
15613
15614     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
15615 }
15616
15617 sub get_maximum_fields_wanted {
15618
15619     # Not all tables look good with more than one field of items.
15620     # This routine looks at a table and decides if it should be
15621     # formatted with just one field or not.
15622     # This coding is still under development.
15623     my ($ritem_lengths) = @_;
15624
15625     my $number_of_fields_best = 0;
15626
15627     # For just a few items, we tentatively assume just 1 field.
15628     my $item_count = @{$ritem_lengths};
15629     if ( $item_count <= 5 ) {
15630         $number_of_fields_best = 1;
15631     }
15632
15633     # For larger tables, look at it both ways and see what looks best
15634     else {
15635
15636         my $is_odd            = 1;
15637         my @max_length        = ( 0, 0 );
15638         my @last_length_2     = ( undef, undef );
15639         my @first_length_2    = ( undef, undef );
15640         my $last_length       = undef;
15641         my $total_variation_1 = 0;
15642         my $total_variation_2 = 0;
15643         my @total_variation_2 = ( 0, 0 );
15644         for ( my $j = 0 ; $j < $item_count ; $j++ ) {
15645
15646             $is_odd = 1 - $is_odd;
15647             my $length = $ritem_lengths->[$j];
15648             if ( $length > $max_length[$is_odd] ) {
15649                 $max_length[$is_odd] = $length;
15650             }
15651
15652             if ( defined($last_length) ) {
15653                 my $dl = abs( $length - $last_length );
15654                 $total_variation_1 += $dl;
15655             }
15656             $last_length = $length;
15657
15658             my $ll = $last_length_2[$is_odd];
15659             if ( defined($ll) ) {
15660                 my $dl = abs( $length - $ll );
15661                 $total_variation_2[$is_odd] += $dl;
15662             }
15663             else {
15664                 $first_length_2[$is_odd] = $length;
15665             }
15666             $last_length_2[$is_odd] = $length;
15667         }
15668         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
15669
15670         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
15671         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
15672             $number_of_fields_best = 1;
15673         }
15674     }
15675     return ($number_of_fields_best);
15676 }
15677
15678 sub table_columns_available {
15679     my $i_first_comma = shift;
15680     my $columns =
15681       $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
15682
15683     # Patch: the vertical formatter does not line up lines whose lengths
15684     # exactly equal the available line length because of allowances
15685     # that must be made for side comments.  Therefore, the number of
15686     # available columns is reduced by 1 character.
15687     $columns -= 1;
15688     return $columns;
15689 }
15690
15691 sub maximum_number_of_fields {
15692
15693     # how many fields will fit in the available space?
15694     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
15695     my $max_pairs        = int( $columns / $pair_width );
15696     my $number_of_fields = $max_pairs * 2;
15697     if (   $odd_or_even == 1
15698         && $max_pairs * $pair_width + $max_width <= $columns )
15699     {
15700         $number_of_fields++;
15701     }
15702     return $number_of_fields;
15703 }
15704
15705 sub compactify_table {
15706
15707     # given a table with a certain number of fields and a certain number
15708     # of lines, see if reducing the number of fields will make it look
15709     # better.
15710     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
15711     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
15712         my $min_fields;
15713
15714         for (
15715             $min_fields = $number_of_fields ;
15716             $min_fields >= $odd_or_even
15717             && $min_fields * $formatted_lines >= $item_count ;
15718             $min_fields -= $odd_or_even
15719           )
15720         {
15721             $number_of_fields = $min_fields;
15722         }
15723     }
15724     return $number_of_fields;
15725 }
15726
15727 sub set_ragged_breakpoints {
15728
15729     # Set breakpoints in a list that cannot be formatted nicely as a
15730     # table.
15731     my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
15732
15733     my $break_count = 0;
15734     foreach (@$ri_ragged_break_list) {
15735         my $j = $ri_term_comma->[$_];
15736         if ($j) {
15737             set_forced_breakpoint($j);
15738             $break_count++;
15739         }
15740     }
15741     return $break_count;
15742 }
15743
15744 sub copy_old_breakpoints {
15745     my ( $i_first_comma, $i_last_comma ) = @_;
15746     for my $i ( $i_first_comma .. $i_last_comma ) {
15747         if ( $old_breakpoint_to_go[$i] ) {
15748             set_forced_breakpoint($i);
15749         }
15750     }
15751 }
15752
15753 sub set_nobreaks {
15754     my ( $i, $j ) = @_;
15755     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
15756
15757         FORMATTER_DEBUG_FLAG_NOBREAK && do {
15758             my ( $a, $b, $c ) = caller();
15759             print(
15760 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
15761             );
15762         };
15763
15764         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
15765     }
15766
15767     # shouldn't happen; non-critical error
15768     else {
15769         FORMATTER_DEBUG_FLAG_NOBREAK && do {
15770             my ( $a, $b, $c ) = caller();
15771             print(
15772 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
15773             );
15774         };
15775     }
15776 }
15777
15778 sub set_fake_breakpoint {
15779
15780     # Just bump up the breakpoint count as a signal that there are breaks.
15781     # This is useful if we have breaks but may want to postpone deciding where
15782     # to make them.
15783     $forced_breakpoint_count++;
15784 }
15785
15786 sub set_forced_breakpoint {
15787     my $i = shift;
15788
15789     return unless defined $i && $i >= 0;
15790
15791     # when called with certain tokens, use bond strengths to decide
15792     # if we break before or after it
15793     my $token = $tokens_to_go[$i];
15794
15795     if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
15796         if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
15797     }
15798
15799     # breaks are forced before 'if' and 'unless'
15800     elsif ( $is_if_unless{$token} ) { $i-- }
15801
15802     if ( $i >= 0 && $i <= $max_index_to_go ) {
15803         my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
15804
15805         FORMATTER_DEBUG_FLAG_FORCE && do {
15806             my ( $a, $b, $c ) = caller();
15807             print
15808 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
15809         };
15810
15811         if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
15812             $forced_breakpoint_to_go[$i_nonblank] = 1;
15813
15814             if ( $i_nonblank > $index_max_forced_break ) {
15815                 $index_max_forced_break = $i_nonblank;
15816             }
15817             $forced_breakpoint_count++;
15818             $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
15819               $i_nonblank;
15820
15821             # if we break at an opening container..break at the closing
15822             if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
15823                 set_closing_breakpoint($i_nonblank);
15824             }
15825         }
15826     }
15827 }
15828
15829 sub clear_breakpoint_undo_stack {
15830     $forced_breakpoint_undo_count = 0;
15831 }
15832
15833 sub undo_forced_breakpoint_stack {
15834
15835     my $i_start = shift;
15836     if ( $i_start < 0 ) {
15837         $i_start = 0;
15838         my ( $a, $b, $c ) = caller();
15839         warning(
15840 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
15841         );
15842     }
15843
15844     while ( $forced_breakpoint_undo_count > $i_start ) {
15845         my $i =
15846           $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
15847         if ( $i >= 0 && $i <= $max_index_to_go ) {
15848             $forced_breakpoint_to_go[$i] = 0;
15849             $forced_breakpoint_count--;
15850
15851             FORMATTER_DEBUG_FLAG_UNDOBP && do {
15852                 my ( $a, $b, $c ) = caller();
15853                 print(
15854 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
15855                 );
15856             };
15857         }
15858
15859         # shouldn't happen, but not a critical error
15860         else {
15861             FORMATTER_DEBUG_FLAG_UNDOBP && do {
15862                 my ( $a, $b, $c ) = caller();
15863                 print(
15864 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
15865                 );
15866             };
15867         }
15868     }
15869 }
15870
15871 {    # begin recombine_breakpoints
15872
15873     my %is_amp_amp;
15874     my %is_ternary;
15875     my %is_math_op;
15876
15877     BEGIN {
15878
15879         @_ = qw( && || );
15880         @is_amp_amp{@_} = (1) x scalar(@_);
15881
15882         @_ = qw( ? : );
15883         @is_ternary{@_} = (1) x scalar(@_);
15884
15885         @_ = qw( + - * / );
15886         @is_math_op{@_} = (1) x scalar(@_);
15887     }
15888
15889     sub recombine_breakpoints {
15890
15891         # sub set_continuation_breaks is very liberal in setting line breaks
15892         # for long lines, always setting breaks at good breakpoints, even
15893         # when that creates small lines.  Occasionally small line fragments
15894         # are produced which would look better if they were combined.
15895         # That's the task of this routine, recombine_breakpoints.
15896         #
15897         # $ri_beg = ref to array of BEGinning indexes of each line
15898         # $ri_end = ref to array of ENDing indexes of each line
15899         my ( $ri_beg, $ri_end ) = @_;
15900
15901         my $more_to_do = 1;
15902
15903         # We keep looping over all of the lines of this batch
15904         # until there are no more possible recombinations
15905         my $nmax_last = @$ri_end;
15906         while ($more_to_do) {
15907             my $n_best = 0;
15908             my $bs_best;
15909             my $n;
15910             my $nmax = @$ri_end - 1;
15911
15912             # safety check for infinite loop
15913             unless ( $nmax < $nmax_last ) {
15914
15915             # shouldn't happen because splice below decreases nmax on each pass:
15916             # but i get paranoid sometimes
15917                 die "Program bug-infinite loop in recombine breakpoints\n";
15918             }
15919             $nmax_last  = $nmax;
15920             $more_to_do = 0;
15921             my $previous_outdentable_closing_paren;
15922             my $leading_amp_count = 0;
15923             my $this_line_is_semicolon_terminated;
15924
15925             # loop over all remaining lines in this batch
15926             for $n ( 1 .. $nmax ) {
15927
15928                 #----------------------------------------------------------
15929                 # If we join the current pair of lines,
15930                 # line $n-1 will become the left part of the joined line
15931                 # line $n will become the right part of the joined line
15932                 #
15933                 # Here are Indexes of the endpoint tokens of the two lines:
15934                 #
15935                 #  -----line $n-1--- | -----line $n-----
15936                 #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
15937                 #                    ^
15938                 #                    |
15939                 # We want to decide if we should remove the line break
15940                 # betwen the tokens at $iend_1 and $ibeg_2
15941                 #
15942                 # We will apply a number of ad-hoc tests to see if joining
15943                 # here will look ok.  The code will just issue a 'next'
15944                 # command if the join doesn't look good.  If we get through
15945                 # the gauntlet of tests, the lines will be recombined.
15946                 #----------------------------------------------------------
15947                 #
15948                 # beginning and ending tokens of the lines we are working on
15949                 my $ibeg_1 = $$ri_beg[ $n - 1 ];
15950                 my $iend_1 = $$ri_end[ $n - 1 ];
15951                 my $iend_2 = $$ri_end[$n];
15952                 my $ibeg_2 = $$ri_beg[$n];
15953
15954                 my $ibeg_nmax = $$ri_beg[$nmax];
15955
15956                 # some beginning indexes of other lines, which may not exist
15957                 my $ibeg_0 = $n > 1          ? $$ri_beg[ $n - 2 ] : -1;
15958                 my $ibeg_3 = $n < $nmax      ? $$ri_beg[ $n + 1 ] : -1;
15959                 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
15960
15961                 my $bs_tweak = 0;
15962
15963                 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15964                 #        $nesting_depth_to_go[$ibeg_1] );
15965
15966 ##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
15967
15968                 # If line $n is the last line, we set some flags and
15969                 # do any special checks for it
15970                 if ( $n == $nmax ) {
15971
15972                     # a terminal '{' should stay where it is
15973                     next if $types_to_go[$ibeg_2] eq '{';
15974
15975                     # set flag if statement $n ends in ';'
15976                     $this_line_is_semicolon_terminated =
15977                       $types_to_go[$iend_2] eq ';'
15978
15979                       # with possible side comment
15980                       || ( $types_to_go[$iend_2] eq '#'
15981                         && $iend_2 - $ibeg_2 >= 2
15982                         && $types_to_go[ $iend_2 - 2 ] eq ';'
15983                         && $types_to_go[ $iend_2 - 1 ] eq 'b' );
15984                 }
15985
15986                 #----------------------------------------------------------
15987                 # Section 1: examine token at $iend_1 (right end of first line
15988                 # of pair)
15989                 #----------------------------------------------------------
15990
15991                 # an isolated '}' may join with a ';' terminated segment
15992                 if ( $types_to_go[$iend_1] eq '}' ) {
15993
15994                     # Check for cases where combining a semicolon terminated
15995                     # statement with a previous isolated closing paren will
15996                     # allow the combined line to be outdented.  This is
15997                     # generally a good move.  For example, we can join up
15998                     # the last two lines here:
15999                     #  (
16000                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16001                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16002                     #    )
16003                     #    = stat($file);
16004                     #
16005                     # to get:
16006                     #  (
16007                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16008                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16009                     #  ) = stat($file);
16010                     #
16011                     # which makes the parens line up.
16012                     #
16013                     # Another example, from Joe Matarazzo, probably looks best
16014                     # with the 'or' clause appended to the trailing paren:
16015                     #  $self->some_method(
16016                     #      PARAM1 => 'foo',
16017                     #      PARAM2 => 'bar'
16018                     #  ) or die "Some_method didn't work";
16019                     #
16020                     $previous_outdentable_closing_paren =
16021                       $this_line_is_semicolon_terminated    # ends in ';'
16022                       && $ibeg_1 == $iend_1    # only one token on last line
16023                       && $tokens_to_go[$iend_1] eq
16024                       ')'                      # must be structural paren
16025
16026                       # only &&, ||, and : if no others seen
16027                       # (but note: our count made below could be wrong
16028                       # due to intervening comments)
16029                       && ( $leading_amp_count == 0
16030                         || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
16031
16032                       # but leading colons probably line up with with a
16033                       # previous colon or question (count could be wrong).
16034                       && $types_to_go[$ibeg_2] ne ':'
16035
16036                       # only one step in depth allowed.  this line must not
16037                       # begin with a ')' itself.
16038                       && ( $nesting_depth_to_go[$iend_1] ==
16039                         $nesting_depth_to_go[$iend_2] + 1 );
16040
16041                     # YVES patch 2 of 2:
16042                     # Allow cuddled eval chains, like this:
16043                     #   eval {
16044                     #       #STUFF;
16045                     #       1; # return true
16046                     #   } or do {
16047                     #       #handle error
16048                     #   };
16049                     # This patch works together with a patch in
16050                     # setting adjusted indentation (where the closing eval
16051                     # brace is outdented if possible).
16052                     # The problem is that an 'eval' block has continuation
16053                     # indentation and it looks better to undo it in some
16054                     # cases.  If we do not use this patch we would get:
16055                     #   eval {
16056                     #       #STUFF;
16057                     #       1; # return true
16058                     #       }
16059                     #       or do {
16060                     #       #handle error
16061                     #     };
16062                     # The alternative, for uncuddled style, is to create
16063                     # a patch in set_adjusted_indentation which undoes
16064                     # the indentation of a leading line like 'or do {'.
16065                     # This doesn't work well with -icb through
16066                     if (
16067                            $block_type_to_go[$iend_1] eq 'eval'
16068                         && !$rOpts->{'line-up-parentheses'}
16069                         && !$rOpts->{'indent-closing-brace'}
16070                         && $tokens_to_go[$iend_2] eq '{'
16071                         && (
16072                             ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ )
16073                             || (   $types_to_go[$ibeg_2] eq 'k'
16074                                 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16075                             || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16076                         )
16077                       )
16078                     {
16079                         $previous_outdentable_closing_paren ||= 1;
16080                     }
16081
16082                     next
16083                       unless (
16084                         $previous_outdentable_closing_paren
16085
16086                         # handle '.' and '?' specially below
16087                         || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
16088                       );
16089                 }
16090
16091                 # YVES
16092                 # honor breaks at opening brace
16093                 # Added to prevent recombining something like this:
16094                 #  } || eval { package main;
16095                 elsif ( $types_to_go[$iend_1] eq '{' ) {
16096                     next if $forced_breakpoint_to_go[$iend_1];
16097                 }
16098
16099                 # do not recombine lines with ending &&, ||,
16100                 elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
16101                     next unless $want_break_before{ $types_to_go[$iend_1] };
16102                 }
16103
16104                 # keep a terminal colon
16105                 elsif ( $types_to_go[$iend_1] eq ':' ) {
16106                     next unless $want_break_before{ $types_to_go[$iend_1] };
16107                 }
16108
16109                 # Identify and recombine a broken ?/: chain
16110                 elsif ( $types_to_go[$iend_1] eq '?' ) {
16111
16112                     # Do not recombine different levels
16113                     next
16114                       if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16115
16116                     # do not recombine unless next line ends in :
16117                     next unless $types_to_go[$iend_2] eq ':';
16118                 }
16119
16120                 # for lines ending in a comma...
16121                 elsif ( $types_to_go[$iend_1] eq ',' ) {
16122
16123                     # Do not recombine at comma which is following the
16124                     # input bias.
16125                     # TODO: might be best to make a special flag
16126                     next if ( $old_breakpoint_to_go[$iend_1] );
16127
16128                  # an isolated '},' may join with an identifier + ';'
16129                  # this is useful for the class of a 'bless' statement (bless.t)
16130                     if (   $types_to_go[$ibeg_1] eq '}'
16131                         && $types_to_go[$ibeg_2] eq 'i' )
16132                     {
16133                         next
16134                           unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16135                             && ( $iend_2 == ( $ibeg_2 + 1 ) )
16136                             && $this_line_is_semicolon_terminated );
16137
16138                         # override breakpoint
16139                         $forced_breakpoint_to_go[$iend_1] = 0;
16140                     }
16141
16142                     # but otherwise ..
16143                     else {
16144
16145                         # do not recombine after a comma unless this will leave
16146                         # just 1 more line
16147                         next unless ( $n + 1 >= $nmax );
16148
16149                     # do not recombine if there is a change in indentation depth
16150                         next
16151                           if (
16152                             $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
16153
16154                         # do not recombine a "complex expression" after a
16155                         # comma.  "complex" means no parens.
16156                         my $saw_paren;
16157                         foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16158                             if ( $tokens_to_go[$ii] eq '(' ) {
16159                                 $saw_paren = 1;
16160                                 last;
16161                             }
16162                         }
16163                         next if $saw_paren;
16164                     }
16165                 }
16166
16167                 # opening paren..
16168                 elsif ( $types_to_go[$iend_1] eq '(' ) {
16169
16170                     # No longer doing this
16171                 }
16172
16173                 elsif ( $types_to_go[$iend_1] eq ')' ) {
16174
16175                     # No longer doing this
16176                 }
16177
16178                 # keep a terminal for-semicolon
16179                 elsif ( $types_to_go[$iend_1] eq 'f' ) {
16180                     next;
16181                 }
16182
16183                 # if '=' at end of line ...
16184                 elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
16185
16186                     my $is_short_quote =
16187                       (      $types_to_go[$ibeg_2] eq 'Q'
16188                           && $ibeg_2 == $iend_2
16189                           && length( $tokens_to_go[$ibeg_2] ) <
16190                           $rOpts_short_concatenation_item_length );
16191                     my $is_ternary =
16192                       ( $types_to_go[$ibeg_1] eq '?'
16193                           && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
16194
16195                     # always join an isolated '=', a short quote, or if this
16196                     # will put ?/: at start of adjacent lines
16197                     if (   $ibeg_1 != $iend_1
16198                         && !$is_short_quote
16199                         && !$is_ternary )
16200                     {
16201                         next
16202                           unless (
16203                             (
16204
16205                                 # unless we can reduce this to two lines
16206                                 $nmax < $n + 2
16207
16208                              # or three lines, the last with a leading semicolon
16209                                 || (   $nmax == $n + 2
16210                                     && $types_to_go[$ibeg_nmax] eq ';' )
16211
16212                                 # or the next line ends with a here doc
16213                                 || $types_to_go[$iend_2] eq 'h'
16214
16215                                # or the next line ends in an open paren or brace
16216                                # and the break hasn't been forced [dima.t]
16217                                 || (  !$forced_breakpoint_to_go[$iend_1]
16218                                     && $types_to_go[$iend_2] eq '{' )
16219                             )
16220
16221                             # do not recombine if the two lines might align well
16222                             # this is a very approximate test for this
16223                             && (   $ibeg_3 >= 0
16224                                 && $types_to_go[$ibeg_2] ne
16225                                 $types_to_go[$ibeg_3] )
16226                           );
16227
16228                         # -lp users often prefer this:
16229                         #  my $title = function($env, $env, $sysarea,
16230                         #                       "bubba Borrower Entry");
16231                         #  so we will recombine if -lp is used we have ending
16232                         #  comma
16233                         if (  !$rOpts_line_up_parentheses
16234                             || $types_to_go[$iend_2] ne ',' )
16235                         {
16236
16237                            # otherwise, scan the rhs line up to last token for
16238                            # complexity.  Note that we are not counting the last
16239                            # token in case it is an opening paren.
16240                             my $tv    = 0;
16241                             my $depth = $nesting_depth_to_go[$ibeg_2];
16242                             for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
16243                                 if ( $nesting_depth_to_go[$i] != $depth ) {
16244                                     $tv++;
16245                                     last if ( $tv > 1 );
16246                                 }
16247                                 $depth = $nesting_depth_to_go[$i];
16248                             }
16249
16250                          # ok to recombine if no level changes before last token
16251                             if ( $tv > 0 ) {
16252
16253                                 # otherwise, do not recombine if more than two
16254                                 # level changes.
16255                                 next if ( $tv > 1 );
16256
16257                               # check total complexity of the two adjacent lines
16258                               # that will occur if we do this join
16259                                 my $istop =
16260                                   ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
16261                                 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
16262                                     if ( $nesting_depth_to_go[$i] != $depth ) {
16263                                         $tv++;
16264                                         last if ( $tv > 2 );
16265                                     }
16266                                     $depth = $nesting_depth_to_go[$i];
16267                                 }
16268
16269                         # do not recombine if total is more than 2 level changes
16270                                 next if ( $tv > 2 );
16271                             }
16272                         }
16273                     }
16274
16275                     unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16276                         $forced_breakpoint_to_go[$iend_1] = 0;
16277                     }
16278                 }
16279
16280                 # for keywords..
16281                 elsif ( $types_to_go[$iend_1] eq 'k' ) {
16282
16283                     # make major control keywords stand out
16284                     # (recombine.t)
16285                     next
16286                       if (
16287
16288                         #/^(last|next|redo|return)$/
16289                         $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16290
16291                         # but only if followed by multiple lines
16292                         && $n < $nmax
16293                       );
16294
16295                     if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16296                         next
16297                           unless $want_break_before{ $tokens_to_go[$iend_1] };
16298                     }
16299                 }
16300
16301                 # handle trailing + - * /
16302                 elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
16303
16304                     # combine lines if next line has single number
16305                     # or a short term followed by same operator
16306                     my $i_next_nonblank = $ibeg_2;
16307                     my $i_next_next     = $i_next_nonblank + 1;
16308                     $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16309                     my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
16310                       && (
16311                         $i_next_nonblank == $iend_2
16312                         || (   $i_next_next == $iend_2
16313                             && $is_math_op{ $types_to_go[$i_next_next] } )
16314                         || $types_to_go[$i_next_next] eq ';'
16315                       );
16316
16317                     # find token before last operator of previous line
16318                     my $iend_1_minus = $iend_1;
16319                     $iend_1_minus--
16320                       if ( $iend_1_minus > $ibeg_1 );
16321                     $iend_1_minus--
16322                       if ( $types_to_go[$iend_1_minus] eq 'b'
16323                         && $iend_1_minus > $ibeg_1 );
16324
16325                     my $short_term_follows =
16326                       (      $types_to_go[$iend_2] eq $types_to_go[$iend_1]
16327                           && $types_to_go[$iend_1_minus] =~ /^[in]$/
16328                           && $iend_2 <= $ibeg_2 + 2
16329                           && length( $tokens_to_go[$ibeg_2] ) <
16330                           $rOpts_short_concatenation_item_length );
16331
16332                     next
16333                       unless ( $number_follows || $short_term_follows );
16334                 }
16335
16336                 #----------------------------------------------------------
16337                 # Section 2: Now examine token at $ibeg_2 (left end of second
16338                 # line of pair)
16339                 #----------------------------------------------------------
16340
16341                 # join lines identified above as capable of
16342                 # causing an outdented line with leading closing paren
16343                 if ($previous_outdentable_closing_paren) {
16344                     $forced_breakpoint_to_go[$iend_1] = 0;
16345                 }
16346
16347                 # do not recombine lines with leading :
16348                 elsif ( $types_to_go[$ibeg_2] eq ':' ) {
16349                     $leading_amp_count++;
16350                     next if $want_break_before{ $types_to_go[$ibeg_2] };
16351                 }
16352
16353                 # handle lines with leading &&, ||
16354                 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16355
16356                     $leading_amp_count++;
16357
16358                     # ok to recombine if it follows a ? or :
16359                     # and is followed by an open paren..
16360                     my $ok =
16361                       (      $is_ternary{ $types_to_go[$ibeg_1] }
16362                           && $tokens_to_go[$iend_2] eq '(' )
16363
16364                     # or is followed by a ? or : at same depth
16365                     #
16366                     # We are looking for something like this. We can
16367                     # recombine the && line with the line above to make the
16368                     # structure more clear:
16369                     #  return
16370                     #    exists $G->{Attr}->{V}
16371                     #    && exists $G->{Attr}->{V}->{$u}
16372                     #    ? %{ $G->{Attr}->{V}->{$u} }
16373                     #    : ();
16374                     #
16375                     # We should probably leave something like this alone:
16376                     #  return
16377                     #       exists $G->{Attr}->{E}
16378                     #    && exists $G->{Attr}->{E}->{$u}
16379                     #    && exists $G->{Attr}->{E}->{$u}->{$v}
16380                     #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16381                     #    : ();
16382                     # so that we either have all of the &&'s (or ||'s)
16383                     # on one line, as in the first example, or break at
16384                     # each one as in the second example.  However, it
16385                     # sometimes makes things worse to check for this because
16386                     # it prevents multiple recombinations.  So this is not done.
16387                       || ( $ibeg_3 >= 0
16388                         && $is_ternary{ $types_to_go[$ibeg_3] }
16389                         && $nesting_depth_to_go[$ibeg_3] ==
16390                         $nesting_depth_to_go[$ibeg_2] );
16391
16392                     next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
16393                     $forced_breakpoint_to_go[$iend_1] = 0;
16394
16395                     # tweak the bond strength to give this joint priority
16396                     # over ? and :
16397                     $bs_tweak = 0.25;
16398                 }
16399
16400                 # Identify and recombine a broken ?/: chain
16401                 elsif ( $types_to_go[$ibeg_2] eq '?' ) {
16402
16403                     # Do not recombine different levels
16404                     my $lev = $levels_to_go[$ibeg_2];
16405                     next if ( $lev ne $levels_to_go[$ibeg_1] );
16406
16407                     # Do not recombine a '?' if either next line or
16408                     # previous line does not start with a ':'.  The reasons
16409                     # are that (1) no alignment of the ? will be possible
16410                     # and (2) the expression is somewhat complex, so the
16411                     # '?' is harder to see in the interior of the line.
16412                     my $follows_colon =
16413                       $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
16414                     my $precedes_colon =
16415                       $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16416                     next unless ( $follows_colon || $precedes_colon );
16417
16418                     # we will always combining a ? line following a : line
16419                     if ( !$follows_colon ) {
16420
16421                         # ...otherwise recombine only if it looks like a chain.
16422                         # we will just look at a few nearby lines to see if
16423                         # this looks like a chain.
16424                         my $local_count = 0;
16425                         foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
16426                             $local_count++
16427                               if $ii >= 0
16428                                   && $types_to_go[$ii] eq ':'
16429                                   && $levels_to_go[$ii] == $lev;
16430                         }
16431                         next unless ( $local_count > 1 );
16432                     }
16433                     $forced_breakpoint_to_go[$iend_1] = 0;
16434                 }
16435
16436                 # do not recombine lines with leading '.'
16437                 elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
16438                     my $i_next_nonblank = $ibeg_2 + 1;
16439                     if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16440                         $i_next_nonblank++;
16441                     }
16442
16443                     next
16444                       unless (
16445
16446                    # ... unless there is just one and we can reduce
16447                    # this to two lines if we do.  For example, this
16448                    #
16449                    #
16450                    #  $bodyA .=
16451                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16452                    #
16453                    #  looks better than this:
16454                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16455                    #    . '$args .= $pat;'
16456
16457                         (
16458                                $n == 2
16459                             && $n == $nmax
16460                             && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
16461                         )
16462
16463                         #  ... or this would strand a short quote , like this
16464                         #                . "some long qoute"
16465                         #                . "\n";
16466                         || (   $types_to_go[$i_next_nonblank] eq 'Q'
16467                             && $i_next_nonblank >= $iend_2 - 1
16468                             && length( $tokens_to_go[$i_next_nonblank] ) <
16469                             $rOpts_short_concatenation_item_length )
16470                       );
16471                 }
16472
16473                 # handle leading keyword..
16474                 elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
16475
16476                     # handle leading "or"
16477                     if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16478                         next
16479                           unless (
16480                             $this_line_is_semicolon_terminated
16481                             && (
16482
16483                                 # following 'if' or 'unless' or 'or'
16484                                 $types_to_go[$ibeg_1] eq 'k'
16485                                 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16486
16487                                 # important: only combine a very simple or
16488                                 # statement because the step below may have
16489                                 # combined a trailing 'and' with this or,
16490                                 # and we do not want to then combine
16491                                 # everything together
16492                                 && ( $iend_2 - $ibeg_2 <= 7 )
16493                             )
16494                           );
16495                     }
16496
16497                     # handle leading 'and'
16498                     elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
16499
16500                         # Decide if we will combine a single terminal 'and'
16501                         # after an 'if' or 'unless'.
16502
16503                         #     This looks best with the 'and' on the same
16504                         #     line as the 'if':
16505                         #
16506                         #         $a = 1
16507                         #           if $seconds and $nu < 2;
16508                         #
16509                         #     But this looks better as shown:
16510                         #
16511                         #         $a = 1
16512                         #           if !$this->{Parents}{$_}
16513                         #           or $this->{Parents}{$_} eq $_;
16514                         #
16515                         next
16516                           unless (
16517                             $this_line_is_semicolon_terminated
16518                             && (
16519
16520                                 # following 'if' or 'unless' or 'or'
16521                                 $types_to_go[$ibeg_1] eq 'k'
16522                                 && (   $is_if_unless{ $tokens_to_go[$ibeg_1] }
16523                                     || $tokens_to_go[$ibeg_1] eq 'or' )
16524                             )
16525                           );
16526                     }
16527
16528                     # handle leading "if" and "unless"
16529                     elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16530
16531                       # FIXME: This is still experimental..may not be too useful
16532                         next
16533                           unless (
16534                             $this_line_is_semicolon_terminated
16535
16536                             #  previous line begins with 'and' or 'or'
16537                             && $types_to_go[$ibeg_1] eq 'k'
16538                             && $is_and_or{ $tokens_to_go[$ibeg_1] }
16539
16540                           );
16541                     }
16542
16543                     # handle all other leading keywords
16544                     else {
16545
16546                         # keywords look best at start of lines,
16547                         # but combine things like "1 while"
16548                         unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
16549                             next
16550                               if ( ( $types_to_go[$iend_1] ne 'k' )
16551                                 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16552                         }
16553                     }
16554                 }
16555
16556                 # similar treatment of && and || as above for 'and' and 'or':
16557                 # NOTE: This block of code is currently bypassed because
16558                 # of a previous block but is retained for possible future use.
16559                 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16560
16561                     # maybe looking at something like:
16562                     # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16563
16564                     next
16565                       unless (
16566                         $this_line_is_semicolon_terminated
16567
16568                         # previous line begins with an 'if' or 'unless' keyword
16569                         && $types_to_go[$ibeg_1] eq 'k'
16570                         && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16571
16572                       );
16573                 }
16574
16575                 # handle leading + - * /
16576                 elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
16577                     my $i_next_nonblank = $ibeg_2 + 1;
16578                     if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16579                         $i_next_nonblank++;
16580                     }
16581
16582                     my $i_next_next = $i_next_nonblank + 1;
16583                     $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16584
16585                     my $is_number = (
16586                         $types_to_go[$i_next_nonblank] eq 'n'
16587                           && ( $i_next_nonblank >= $iend_2 - 1
16588                             || $types_to_go[$i_next_next] eq ';' )
16589                     );
16590
16591                     my $iend_1_nonblank =
16592                       $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
16593                     my $iend_2_nonblank =
16594                       $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
16595
16596                     my $is_short_term =
16597                       (      $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
16598                           && $types_to_go[$iend_2_nonblank] =~ /^[in]$/
16599                           && $types_to_go[$iend_1_nonblank] =~ /^[in]$/
16600                           && $iend_2_nonblank <= $ibeg_2 + 2
16601                           && length( $tokens_to_go[$iend_2_nonblank] ) <
16602                           $rOpts_short_concatenation_item_length );
16603
16604                     # Combine these lines if this line is a single
16605                     # number, or if it is a short term with same
16606                     # operator as the previous line.  For example, in
16607                     # the following code we will combine all of the
16608                     # short terms $A, $B, $C, $D, $E, $F, together
16609                     # instead of leaving them one per line:
16610                     #  my $time =
16611                     #    $A * $B * $C * $D * $E * $F *
16612                     #    ( 2. * $eps * $sigma * $area ) *
16613                     #    ( 1. / $tcold**3 - 1. / $thot**3 );
16614                     # This can be important in math-intensive code.
16615                     next
16616                       unless (
16617                            $is_number
16618                         || $is_short_term
16619
16620                         # or if we can reduce this to two lines if we do.
16621                         || (   $n == 2
16622                             && $n == $nmax
16623                             && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
16624                       );
16625                 }
16626
16627                 # handle line with leading = or similar
16628                 elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
16629                     next unless $n == 1;
16630                     next
16631                       unless (
16632
16633                         # unless we can reduce this to two lines
16634                         $nmax == 2
16635
16636                         # or three lines, the last with a leading semicolon
16637                         || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16638
16639                         # or the next line ends with a here doc
16640                         || $types_to_go[$iend_2] eq 'h'
16641                       );
16642                 }
16643
16644                 #----------------------------------------------------------
16645                 # Section 3:
16646                 # Combine the lines if we arrive here and it is possible
16647                 #----------------------------------------------------------
16648
16649                 # honor hard breakpoints
16650                 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16651
16652                 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
16653
16654                 # combined line cannot be too long
16655                 next
16656                   if excess_line_length( $ibeg_1, $iend_2 ) > 0;
16657
16658                 # do not recombine if we would skip in indentation levels
16659                 if ( $n < $nmax ) {
16660                     my $if_next = $$ri_beg[ $n + 1 ];
16661                     next
16662                       if (
16663                            $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16664                         && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16665
16666                         # but an isolated 'if (' is undesirable
16667                         && !(
16668                                $n == 1
16669                             && $iend_1 - $ibeg_1 <= 2
16670                             && $types_to_go[$ibeg_1]  eq 'k'
16671                             && $tokens_to_go[$ibeg_1] eq 'if'
16672                             && $tokens_to_go[$iend_1] ne '('
16673                         )
16674                       );
16675                 }
16676
16677                 # honor no-break's
16678                 next if ( $bs == NO_BREAK );
16679
16680                 # remember the pair with the greatest bond strength
16681                 if ( !$n_best ) {
16682                     $n_best  = $n;
16683                     $bs_best = $bs;
16684                 }
16685                 else {
16686
16687                     if ( $bs > $bs_best ) {
16688                         $n_best  = $n;
16689                         $bs_best = $bs;
16690                     }
16691                 }
16692             }
16693
16694             # recombine the pair with the greatest bond strength
16695             if ($n_best) {
16696                 splice @$ri_beg, $n_best, 1;
16697                 splice @$ri_end, $n_best - 1, 1;
16698
16699                 # keep going if we are still making progress
16700                 $more_to_do++;
16701             }
16702         }
16703         return ( $ri_beg, $ri_end );
16704     }
16705 }    # end recombine_breakpoints
16706
16707 sub break_all_chain_tokens {
16708
16709     # scan the current breakpoints looking for breaks at certain "chain
16710     # operators" (. : && || + etc) which often occur repeatedly in a long
16711     # statement.  If we see a break at any one, break at all similar tokens
16712     # within the same container.
16713     #
16714     my ( $ri_left, $ri_right ) = @_;
16715
16716     my %saw_chain_type;
16717     my %left_chain_type;
16718     my %right_chain_type;
16719     my %interior_chain_type;
16720     my $nmax = @$ri_right - 1;
16721
16722     # scan the left and right end tokens of all lines
16723     my $count = 0;
16724     for my $n ( 0 .. $nmax ) {
16725         my $il    = $$ri_left[$n];
16726         my $ir    = $$ri_right[$n];
16727         my $typel = $types_to_go[$il];
16728         my $typer = $types_to_go[$ir];
16729         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
16730         $typer = '+' if ( $typer eq '-' );
16731         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
16732         $typer = '*' if ( $typer eq '/' );
16733         my $tokenl = $tokens_to_go[$il];
16734         my $tokenr = $tokens_to_go[$ir];
16735
16736         if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
16737             next if ( $typel eq '?' );
16738             push @{ $left_chain_type{$typel} }, $il;
16739             $saw_chain_type{$typel} = 1;
16740             $count++;
16741         }
16742         if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
16743             next if ( $typer eq '?' );
16744             push @{ $right_chain_type{$typer} }, $ir;
16745             $saw_chain_type{$typer} = 1;
16746             $count++;
16747         }
16748     }
16749     return unless $count;
16750
16751     # now look for any interior tokens of the same types
16752     $count = 0;
16753     for my $n ( 0 .. $nmax ) {
16754         my $il = $$ri_left[$n];
16755         my $ir = $$ri_right[$n];
16756         for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
16757             my $type = $types_to_go[$i];
16758             $type = '+' if ( $type eq '-' );
16759             $type = '*' if ( $type eq '/' );
16760             if ( $saw_chain_type{$type} ) {
16761                 push @{ $interior_chain_type{$type} }, $i;
16762                 $count++;
16763             }
16764         }
16765     }
16766     return unless $count;
16767
16768     # now make a list of all new break points
16769     my @insert_list;
16770
16771     # loop over all chain types
16772     foreach my $type ( keys %saw_chain_type ) {
16773
16774         # quit if just ONE continuation line with leading .  For example--
16775         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
16776         #  . $contents;
16777         last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
16778
16779         # loop over all interior chain tokens
16780         foreach my $itest ( @{ $interior_chain_type{$type} } ) {
16781
16782             # loop over all left end tokens of same type
16783             if ( $left_chain_type{$type} ) {
16784                 next if $nobreak_to_go[ $itest - 1 ];
16785                 foreach my $i ( @{ $left_chain_type{$type} } ) {
16786                     next unless in_same_container( $i, $itest );
16787                     push @insert_list, $itest - 1;
16788
16789                     # Break at matching ? if this : is at a different level.
16790                     # For example, the ? before $THRf_DEAD in the following
16791                     # should get a break if its : gets a break.
16792                     #
16793                     # my $flags =
16794                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
16795                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
16796                     #   :              $THRf_R_JOINABLE;
16797                     if (   $type eq ':'
16798                         && $levels_to_go[$i] != $levels_to_go[$itest] )
16799                     {
16800                         my $i_question = $mate_index_to_go[$itest];
16801                         if ( $i_question > 0 ) {
16802                             push @insert_list, $i_question - 1;
16803                         }
16804                     }
16805                     last;
16806                 }
16807             }
16808
16809             # loop over all right end tokens of same type
16810             if ( $right_chain_type{$type} ) {
16811                 next if $nobreak_to_go[$itest];
16812                 foreach my $i ( @{ $right_chain_type{$type} } ) {
16813                     next unless in_same_container( $i, $itest );
16814                     push @insert_list, $itest;
16815
16816                     # break at matching ? if this : is at a different level
16817                     if (   $type eq ':'
16818                         && $levels_to_go[$i] != $levels_to_go[$itest] )
16819                     {
16820                         my $i_question = $mate_index_to_go[$itest];
16821                         if ( $i_question >= 0 ) {
16822                             push @insert_list, $i_question;
16823                         }
16824                     }
16825                     last;
16826                 }
16827             }
16828         }
16829     }
16830
16831     # insert any new break points
16832     if (@insert_list) {
16833         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16834     }
16835 }
16836
16837 sub break_equals {
16838
16839     # Look for assignment operators that could use a breakpoint.
16840     # For example, in the following snippet
16841     #
16842     #    $HOME = $ENV{HOME}
16843     #      || $ENV{LOGDIR}
16844     #      || $pw[7]
16845     #      || die "no home directory for user $<";
16846     #
16847     # we could break at the = to get this, which is a little nicer:
16848     #    $HOME =
16849     #         $ENV{HOME}
16850     #      || $ENV{LOGDIR}
16851     #      || $pw[7]
16852     #      || die "no home directory for user $<";
16853     #
16854     # The logic here follows the logic in set_logical_padding, which
16855     # will add the padding in the second line to improve alignment.
16856     #
16857     my ( $ri_left, $ri_right ) = @_;
16858     my $nmax = @$ri_right - 1;
16859     return unless ( $nmax >= 2 );
16860
16861     # scan the left ends of first two lines
16862     my $tokbeg = "";
16863     my $depth_beg;
16864     for my $n ( 1 .. 2 ) {
16865         my $il     = $$ri_left[$n];
16866         my $typel  = $types_to_go[$il];
16867         my $tokenl = $tokens_to_go[$il];
16868
16869         my $has_leading_op = ( $tokenl =~ /^\w/ )
16870           ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
16871           : $is_chain_operator{$typel};    # and, or
16872         return unless ($has_leading_op);
16873         if ( $n > 1 ) {
16874             return
16875               unless ( $tokenl eq $tokbeg
16876                 && $nesting_depth_to_go[$il] eq $depth_beg );
16877         }
16878         $tokbeg    = $tokenl;
16879         $depth_beg = $nesting_depth_to_go[$il];
16880     }
16881
16882     # now look for any interior tokens of the same types
16883     my $il = $$ri_left[0];
16884     my $ir = $$ri_right[0];
16885
16886     # now make a list of all new break points
16887     my @insert_list;
16888     for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
16889         my $type = $types_to_go[$i];
16890         if (   $is_assignment{$type}
16891             && $nesting_depth_to_go[$i] eq $depth_beg )
16892         {
16893             if ( $want_break_before{$type} ) {
16894                 push @insert_list, $i - 1;
16895             }
16896             else {
16897                 push @insert_list, $i;
16898             }
16899         }
16900     }
16901
16902     # Break after a 'return' followed by a chain of operators
16903     #  return ( $^O !~ /win32|dos/i )
16904     #    && ( $^O ne 'VMS' )
16905     #    && ( $^O ne 'OS2' )
16906     #    && ( $^O ne 'MacOS' );
16907     # To give:
16908     #  return
16909     #       ( $^O !~ /win32|dos/i )
16910     #    && ( $^O ne 'VMS' )
16911     #    && ( $^O ne 'OS2' )
16912     #    && ( $^O ne 'MacOS' );
16913     my $i = 0;
16914     if (   $types_to_go[$i] eq 'k'
16915         && $tokens_to_go[$i] eq 'return'
16916         && $ir > $il
16917         && $nesting_depth_to_go[$i] eq $depth_beg )
16918     {
16919         push @insert_list, $i;
16920     }
16921
16922     return unless (@insert_list);
16923
16924     # One final check...
16925     # scan second and thrid lines and be sure there are no assignments
16926     # we want to avoid breaking at an = to make something like this:
16927     #    unless ( $icon =
16928     #           $html_icons{"$type-$state"}
16929     #        or $icon = $html_icons{$type}
16930     #        or $icon = $html_icons{$state} )
16931     for my $n ( 1 .. 2 ) {
16932         my $il = $$ri_left[$n];
16933         my $ir = $$ri_right[$n];
16934         for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
16935             my $type = $types_to_go[$i];
16936             return
16937               if ( $is_assignment{$type}
16938                 && $nesting_depth_to_go[$i] eq $depth_beg );
16939         }
16940     }
16941
16942     # ok, insert any new break point
16943     if (@insert_list) {
16944         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16945     }
16946 }
16947
16948 sub insert_final_breaks {
16949
16950     my ( $ri_left, $ri_right ) = @_;
16951
16952     my $nmax = @$ri_right - 1;
16953
16954     # scan the left and right end tokens of all lines
16955     my $count         = 0;
16956     my $i_first_colon = -1;
16957     for my $n ( 0 .. $nmax ) {
16958         my $il    = $$ri_left[$n];
16959         my $ir    = $$ri_right[$n];
16960         my $typel = $types_to_go[$il];
16961         my $typer = $types_to_go[$ir];
16962         return if ( $typel eq '?' );
16963         return if ( $typer eq '?' );
16964         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
16965         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16966     }
16967
16968     # For long ternary chains,
16969     # if the first : we see has its # ? is in the interior
16970     # of a preceding line, then see if there are any good
16971     # breakpoints before the ?.
16972     if ( $i_first_colon > 0 ) {
16973         my $i_question = $mate_index_to_go[$i_first_colon];
16974         if ( $i_question > 0 ) {
16975             my @insert_list;
16976             for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16977                 my $token = $tokens_to_go[$ii];
16978                 my $type  = $types_to_go[$ii];
16979
16980                 # For now, a good break is either a comma or a 'return'.
16981                 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
16982                     && in_same_container( $ii, $i_question ) )
16983                 {
16984                     push @insert_list, $ii;
16985                     last;
16986                 }
16987             }
16988
16989             # insert any new break points
16990             if (@insert_list) {
16991                 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16992             }
16993         }
16994     }
16995 }
16996
16997 sub in_same_container {
16998
16999     # check to see if tokens at i1 and i2 are in the
17000     # same container, and not separated by a comma, ? or :
17001     my ( $i1, $i2 ) = @_;
17002     my $type  = $types_to_go[$i1];
17003     my $depth = $nesting_depth_to_go[$i1];
17004     return unless ( $nesting_depth_to_go[$i2] == $depth );
17005     if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
17006
17007     ###########################################################
17008     # This is potentially a very slow routine and not critical.
17009     # For safety just give up for large differences.
17010     # See test file 'infinite_loop.txt'
17011     # TODO: replace this loop with a data structure
17012     ###########################################################
17013     return if ( $i2 - $i1 > 200 );
17014
17015     for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
17016         next   if ( $nesting_depth_to_go[$i] > $depth );
17017         return if ( $nesting_depth_to_go[$i] < $depth );
17018
17019         my $tok = $tokens_to_go[$i];
17020         $tok = ',' if $tok eq '=>';    # treat => same as ,
17021
17022         # Example: we would not want to break at any of these .'s
17023         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
17024         if ( $type ne ':' ) {
17025             return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
17026         }
17027         else {
17028             return if ( $tok =~ /^[\,]$/ );
17029         }
17030     }
17031     return 1;
17032 }
17033
17034 sub set_continuation_breaks {
17035
17036     # Define an array of indexes for inserting newline characters to
17037     # keep the line lengths below the maximum desired length.  There is
17038     # an implied break after the last token, so it need not be included.
17039
17040     # Method:
17041     # This routine is part of series of routines which adjust line
17042     # lengths.  It is only called if a statement is longer than the
17043     # maximum line length, or if a preliminary scanning located
17044     # desirable break points.   Sub scan_list has already looked at
17045     # these tokens and set breakpoints (in array
17046     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17047     # after commas, after opening parens, and before closing parens).
17048     # This routine will honor these breakpoints and also add additional
17049     # breakpoints as necessary to keep the line length below the maximum
17050     # requested.  It bases its decision on where the 'bond strength' is
17051     # lowest.
17052
17053     # Output: returns references to the arrays:
17054     #  @i_first
17055     #  @i_last
17056     # which contain the indexes $i of the first and last tokens on each
17057     # line.
17058
17059     # In addition, the array:
17060     #   $forced_breakpoint_to_go[$i]
17061     # may be updated to be =1 for any index $i after which there must be
17062     # a break.  This signals later routines not to undo the breakpoint.
17063
17064     my $saw_good_break = shift;
17065     my @i_first        = ();      # the first index to output
17066     my @i_last         = ();      # the last index to output
17067     my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
17068     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17069
17070     set_bond_strengths();
17071
17072     my $imin = 0;
17073     my $imax = $max_index_to_go;
17074     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17075     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17076     my $i_begin = $imin;          # index for starting next iteration
17077
17078     my $leading_spaces          = leading_spaces_to_go($imin);
17079     my $line_count              = 0;
17080     my $last_break_strength     = NO_BREAK;
17081     my $i_last_break            = -1;
17082     my $max_bias                = 0.001;
17083     my $tiny_bias               = 0.0001;
17084     my $leading_alignment_token = "";
17085     my $leading_alignment_type  = "";
17086
17087     # see if any ?/:'s are in order
17088     my $colons_in_order = 1;
17089     my $last_tok        = "";
17090     my @colon_list  = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
17091     my $colon_count = @colon_list;
17092     foreach (@colon_list) {
17093         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17094         $last_tok = $_;
17095     }
17096
17097     # This is a sufficient but not necessary condition for colon chain
17098     my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
17099
17100     #-------------------------------------------------------
17101     # BEGINNING of main loop to set continuation breakpoints
17102     # Keep iterating until we reach the end
17103     #-------------------------------------------------------
17104     while ( $i_begin <= $imax ) {
17105         my $lowest_strength        = NO_BREAK;
17106         my $starting_sum           = $lengths_to_go[$i_begin];
17107         my $i_lowest               = -1;
17108         my $i_test                 = -1;
17109         my $lowest_next_token      = '';
17110         my $lowest_next_type       = 'b';
17111         my $i_lowest_next_nonblank = -1;
17112
17113         #-------------------------------------------------------
17114         # BEGINNING of inner loop to find the best next breakpoint
17115         #-------------------------------------------------------
17116         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
17117             my $type       = $types_to_go[$i_test];
17118             my $token      = $tokens_to_go[$i_test];
17119             my $next_type  = $types_to_go[ $i_test + 1 ];
17120             my $next_token = $tokens_to_go[ $i_test + 1 ];
17121             my $i_next_nonblank =
17122               ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
17123             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
17124             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
17125             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17126             my $strength                 = $bond_strength_to_go[$i_test];
17127             my $must_break               = 0;
17128
17129             # FIXME: TESTING: Might want to be able to break after these
17130             # force an immediate break at certain operators
17131             # with lower level than the start of the line
17132             if (
17133                 (
17134                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17135                     || (   $next_nonblank_type eq 'k'
17136                         && $next_nonblank_token =~ /^(and|or)$/ )
17137                 )
17138                 && ( $nesting_depth_to_go[$i_begin] >
17139                     $nesting_depth_to_go[$i_next_nonblank] )
17140               )
17141             {
17142                 set_forced_breakpoint($i_next_nonblank);
17143             }
17144
17145             if (
17146
17147                 # Try to put a break where requested by scan_list
17148                 $forced_breakpoint_to_go[$i_test]
17149
17150                 # break between ) { in a continued line so that the '{' can
17151                 # be outdented
17152                 # See similar logic in scan_list which catches instances
17153                 # where a line is just something like ') {'
17154                 || (   $line_count
17155                     && ( $token              eq ')' )
17156                     && ( $next_nonblank_type eq '{' )
17157                     && ($next_nonblank_block_type)
17158                     && !$rOpts->{'opening-brace-always-on-right'} )
17159
17160                 # There is an implied forced break at a terminal opening brace
17161                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17162               )
17163             {
17164
17165                 # Forced breakpoints must sometimes be overridden, for example
17166                 # because of a side comment causing a NO_BREAK.  It is easier
17167                 # to catch this here than when they are set.
17168                 if ( $strength < NO_BREAK ) {
17169                     $strength   = $lowest_strength - $tiny_bias;
17170                     $must_break = 1;
17171                 }
17172             }
17173
17174             # quit if a break here would put a good terminal token on
17175             # the next line and we already have a possible break
17176             if (
17177                    !$must_break
17178                 && ( $next_nonblank_type =~ /^[\;\,]$/ )
17179                 && (
17180                     (
17181                         $leading_spaces +
17182                         $lengths_to_go[ $i_next_nonblank + 1 ] -
17183                         $starting_sum
17184                     ) > $rOpts_maximum_line_length
17185                 )
17186               )
17187             {
17188                 last if ( $i_lowest >= 0 );
17189             }
17190
17191             # Avoid a break which would strand a single punctuation
17192             # token.  For example, we do not want to strand a leading
17193             # '.' which is followed by a long quoted string.
17194             if (
17195                    !$must_break
17196                 && ( $i_test == $i_begin )
17197                 && ( $i_test < $imax )
17198                 && ( $token eq $type )
17199                 && (
17200                     (
17201                         $leading_spaces +
17202                         $lengths_to_go[ $i_test + 1 ] -
17203                         $starting_sum
17204                     ) <= $rOpts_maximum_line_length
17205                 )
17206               )
17207             {
17208                 $i_test++;
17209
17210                 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
17211                     $i_test++;
17212                 }
17213                 redo;
17214             }
17215
17216             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17217             {
17218
17219                 # break at previous best break if it would have produced
17220                 # a leading alignment of certain common tokens, and it
17221                 # is different from the latest candidate break
17222                 last
17223                   if ($leading_alignment_type);
17224
17225                 # Force at least one breakpoint if old code had good
17226                 # break It is only called if a breakpoint is required or
17227                 # desired.  This will probably need some adjustments
17228                 # over time.  A goal is to try to be sure that, if a new
17229                 # side comment is introduced into formated text, then
17230                 # the same breakpoints will occur.  scbreak.t
17231                 last
17232                   if (
17233                     $i_test == $imax                # we are at the end
17234                     && !$forced_breakpoint_count    #
17235                     && $saw_good_break              # old line had good break
17236                     && $type =~ /^[#;\{]$/          # and this line ends in
17237                                                     # ';' or side comment
17238                     && $i_last_break < 0        # and we haven't made a break
17239                     && $i_lowest > 0            # and we saw a possible break
17240                     && $i_lowest < $imax - 1    # (but not just before this ;)
17241                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17242                   );
17243
17244                 $lowest_strength        = $strength;
17245                 $i_lowest               = $i_test;
17246                 $lowest_next_token      = $next_nonblank_token;
17247                 $lowest_next_type       = $next_nonblank_type;
17248                 $i_lowest_next_nonblank = $i_next_nonblank;
17249                 last if $must_break;
17250
17251                 # set flags to remember if a break here will produce a
17252                 # leading alignment of certain common tokens
17253                 if (   $line_count > 0
17254                     && $i_test < $imax
17255                     && ( $lowest_strength - $last_break_strength <= $max_bias )
17256                   )
17257                 {
17258                     my $i_last_end = $i_begin - 1;
17259                     if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
17260                     my $tok_beg  = $tokens_to_go[$i_begin];
17261                     my $type_beg = $types_to_go[$i_begin];
17262                     if (
17263
17264                         # check for leading alignment of certain tokens
17265                         (
17266                                $tok_beg eq $next_nonblank_token
17267                             && $is_chain_operator{$tok_beg}
17268                             && (   $type_beg eq 'k'
17269                                 || $type_beg eq $tok_beg )
17270                             && $nesting_depth_to_go[$i_begin] >=
17271                             $nesting_depth_to_go[$i_next_nonblank]
17272                         )
17273
17274                         || (   $tokens_to_go[$i_last_end] eq $token
17275                             && $is_chain_operator{$token}
17276                             && ( $type eq 'k' || $type eq $token )
17277                             && $nesting_depth_to_go[$i_last_end] >=
17278                             $nesting_depth_to_go[$i_test] )
17279                       )
17280                     {
17281                         $leading_alignment_token = $next_nonblank_token;
17282                         $leading_alignment_type  = $next_nonblank_type;
17283                     }
17284                 }
17285             }
17286
17287             my $too_long =
17288               ( $i_test >= $imax )
17289               ? 1
17290               : (
17291                 (
17292                     $leading_spaces +
17293                       $lengths_to_go[ $i_test + 2 ] -
17294                       $starting_sum
17295                 ) > $rOpts_maximum_line_length
17296               );
17297
17298             FORMATTER_DEBUG_FLAG_BREAK
17299               && print
17300 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
17301
17302             # allow one extra terminal token after exceeding line length
17303             # if it would strand this token.
17304             if (   $rOpts_fuzzy_line_length
17305                 && $too_long
17306                 && ( $i_lowest == $i_test )
17307                 && ( length($token) > 1 )
17308                 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
17309             {
17310                 $too_long = 0;
17311             }
17312
17313             last
17314               if (
17315                 ( $i_test == $imax )    # we're done if no more tokens,
17316                 || (
17317                     ( $i_lowest >= 0 )    # or no more space and we have a break
17318                     && $too_long
17319                 )
17320               );
17321         }
17322
17323         #-------------------------------------------------------
17324         # END of inner loop to find the best next breakpoint
17325         # Now decide exactly where to put the breakpoint
17326         #-------------------------------------------------------
17327
17328         # it's always ok to break at imax if no other break was found
17329         if ( $i_lowest < 0 ) { $i_lowest = $imax }
17330
17331         # semi-final index calculation
17332         my $i_next_nonblank = (
17333             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17334             ? $i_lowest + 2
17335             : $i_lowest + 1
17336         );
17337         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17338         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17339
17340         #-------------------------------------------------------
17341         # ?/: rule 1 : if a break here will separate a '?' on this
17342         # line from its closing ':', then break at the '?' instead.
17343         #-------------------------------------------------------
17344         my $i;
17345         foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17346             next unless ( $tokens_to_go[$i] eq '?' );
17347
17348             # do not break if probable sequence of ?/: statements
17349             next if ($is_colon_chain);
17350
17351             # do not break if statement is broken by side comment
17352             next
17353               if (
17354                 $tokens_to_go[$max_index_to_go] eq '#'
17355                 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
17356                     $max_index_to_go ) !~ /^[\;\}]$/
17357               );
17358
17359             # no break needed if matching : is also on the line
17360             next
17361               if ( $mate_index_to_go[$i] >= 0
17362                 && $mate_index_to_go[$i] <= $i_next_nonblank );
17363
17364             $i_lowest = $i;
17365             if ( $want_break_before{'?'} ) { $i_lowest-- }
17366             last;
17367         }
17368
17369         #-------------------------------------------------------
17370         # END of inner loop to find the best next breakpoint:
17371         # Break the line after the token with index i=$i_lowest
17372         #-------------------------------------------------------
17373
17374         # final index calculation
17375         $i_next_nonblank = (
17376             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17377             ? $i_lowest + 2
17378             : $i_lowest + 1
17379         );
17380         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17381         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17382
17383         FORMATTER_DEBUG_FLAG_BREAK
17384           && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
17385
17386         #-------------------------------------------------------
17387         # ?/: rule 2 : if we break at a '?', then break at its ':'
17388         #
17389         # Note: this rule is also in sub scan_list to handle a break
17390         # at the start and end of a line (in case breaks are dictated
17391         # by side comments).
17392         #-------------------------------------------------------
17393         if ( $next_nonblank_type eq '?' ) {
17394             set_closing_breakpoint($i_next_nonblank);
17395         }
17396         elsif ( $types_to_go[$i_lowest] eq '?' ) {
17397             set_closing_breakpoint($i_lowest);
17398         }
17399
17400         #-------------------------------------------------------
17401         # ?/: rule 3 : if we break at a ':' then we save
17402         # its location for further work below.  We may need to go
17403         # back and break at its '?'.
17404         #-------------------------------------------------------
17405         if ( $next_nonblank_type eq ':' ) {
17406             push @i_colon_breaks, $i_next_nonblank;
17407         }
17408         elsif ( $types_to_go[$i_lowest] eq ':' ) {
17409             push @i_colon_breaks, $i_lowest;
17410         }
17411
17412         # here we should set breaks for all '?'/':' pairs which are
17413         # separated by this line
17414
17415         $line_count++;
17416
17417         # save this line segment, after trimming blanks at the ends
17418         push( @i_first,
17419             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17420         push( @i_last,
17421             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17422
17423         # set a forced breakpoint at a container opening, if necessary, to
17424         # signal a break at a closing container.  Excepting '(' for now.
17425         if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
17426             && !$forced_breakpoint_to_go[$i_lowest] )
17427         {
17428             set_closing_breakpoint($i_lowest);
17429         }
17430
17431         # get ready to go again
17432         $i_begin                 = $i_lowest + 1;
17433         $last_break_strength     = $lowest_strength;
17434         $i_last_break            = $i_lowest;
17435         $leading_alignment_token = "";
17436         $leading_alignment_type  = "";
17437         $lowest_next_token       = '';
17438         $lowest_next_type        = 'b';
17439
17440         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17441             $i_begin++;
17442         }
17443
17444         # update indentation size
17445         if ( $i_begin <= $imax ) {
17446             $leading_spaces = leading_spaces_to_go($i_begin);
17447         }
17448     }
17449
17450     #-------------------------------------------------------
17451     # END of main loop to set continuation breakpoints
17452     # Now go back and make any necessary corrections
17453     #-------------------------------------------------------
17454
17455     #-------------------------------------------------------
17456     # ?/: rule 4 -- if we broke at a ':', then break at
17457     # corresponding '?' unless this is a chain of ?: expressions
17458     #-------------------------------------------------------
17459     if (@i_colon_breaks) {
17460
17461         # using a simple method for deciding if we are in a ?/: chain --
17462         # this is a chain if it has multiple ?/: pairs all in order;
17463         # otherwise not.
17464         # Note that if line starts in a ':' we count that above as a break
17465         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17466
17467         unless ($is_chain) {
17468             my @insert_list = ();
17469             foreach (@i_colon_breaks) {
17470                 my $i_question = $mate_index_to_go[$_];
17471                 if ( $i_question >= 0 ) {
17472                     if ( $want_break_before{'?'} ) {
17473                         $i_question--;
17474                         if (   $i_question > 0
17475                             && $types_to_go[$i_question] eq 'b' )
17476                         {
17477                             $i_question--;
17478                         }
17479                     }
17480
17481                     if ( $i_question >= 0 ) {
17482                         push @insert_list, $i_question;
17483                     }
17484                 }
17485                 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
17486             }
17487         }
17488     }
17489     return ( \@i_first, \@i_last, $colon_count );
17490 }
17491
17492 sub insert_additional_breaks {
17493
17494     # this routine will add line breaks at requested locations after
17495     # sub set_continuation_breaks has made preliminary breaks.
17496
17497     my ( $ri_break_list, $ri_first, $ri_last ) = @_;
17498     my $i_f;
17499     my $i_l;
17500     my $line_number = 0;
17501     my $i_break_left;
17502     foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
17503
17504         $i_f = $$ri_first[$line_number];
17505         $i_l = $$ri_last[$line_number];
17506         while ( $i_break_left >= $i_l ) {
17507             $line_number++;
17508
17509             # shouldn't happen unless caller passes bad indexes
17510             if ( $line_number >= @$ri_last ) {
17511                 warning(
17512 "Non-fatal program bug: couldn't set break at $i_break_left\n"
17513                 );
17514                 report_definite_bug();
17515                 return;
17516             }
17517             $i_f = $$ri_first[$line_number];
17518             $i_l = $$ri_last[$line_number];
17519         }
17520
17521         my $i_break_right = $i_break_left + 1;
17522         if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
17523
17524         if (   $i_break_left >= $i_f
17525             && $i_break_left < $i_l
17526             && $i_break_right > $i_f
17527             && $i_break_right <= $i_l )
17528         {
17529             splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
17530             splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
17531         }
17532     }
17533 }
17534
17535 sub set_closing_breakpoint {
17536
17537     # set a breakpoint at a matching closing token
17538     # at present, this is only used to break at a ':' which matches a '?'
17539     my $i_break = shift;
17540
17541     if ( $mate_index_to_go[$i_break] >= 0 ) {
17542
17543         # CAUTION: infinite recursion possible here:
17544         #   set_closing_breakpoint calls set_forced_breakpoint, and
17545         #   set_forced_breakpoint call set_closing_breakpoint
17546         #   ( test files attrib.t, BasicLyx.pm.html).
17547         # Don't reduce the '2' in the statement below
17548         if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
17549
17550             # break before } ] and ), but sub set_forced_breakpoint will decide
17551             # to break before or after a ? and :
17552             my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
17553             set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
17554         }
17555     }
17556     else {
17557         my $type_sequence = $type_sequence_to_go[$i_break];
17558         if ($type_sequence) {
17559             my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
17560             $postponed_breakpoint{$type_sequence} = 1;
17561         }
17562     }
17563 }
17564
17565 # check to see if output line tabbing agrees with input line
17566 # this can be very useful for debugging a script which has an extra
17567 # or missing brace
17568 sub compare_indentation_levels {
17569
17570     my ( $python_indentation_level, $structural_indentation_level ) = @_;
17571     if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
17572         $last_tabbing_disagreement = $input_line_number;
17573
17574         if ($in_tabbing_disagreement) {
17575         }
17576         else {
17577             $tabbing_disagreement_count++;
17578
17579             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17580                 write_logfile_entry(
17581 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
17582                 );
17583             }
17584             $in_tabbing_disagreement    = $input_line_number;
17585             $first_tabbing_disagreement = $in_tabbing_disagreement
17586               unless ($first_tabbing_disagreement);
17587         }
17588     }
17589     else {
17590
17591         if ($in_tabbing_disagreement) {
17592
17593             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17594                 write_logfile_entry(
17595 "End indentation disagreement from input line $in_tabbing_disagreement\n"
17596                 );
17597
17598                 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
17599                     write_logfile_entry(
17600                         "No further tabbing disagreements will be noted\n");
17601                 }
17602             }
17603             $in_tabbing_disagreement = 0;
17604         }
17605     }
17606 }
17607
17608 #####################################################################
17609 #
17610 # the Perl::Tidy::IndentationItem class supplies items which contain
17611 # how much whitespace should be used at the start of a line
17612 #
17613 #####################################################################
17614
17615 package Perl::Tidy::IndentationItem;
17616
17617 # Indexes for indentation items
17618 use constant SPACES             => 0;     # total leading white spaces
17619 use constant LEVEL              => 1;     # the indentation 'level'
17620 use constant CI_LEVEL           => 2;     # the 'continuation level'
17621 use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
17622                                           # for this level
17623 use constant CLOSED             => 4;     # index where we saw closing '}'
17624 use constant COMMA_COUNT        => 5;     # how many commas at this level?
17625 use constant SEQUENCE_NUMBER    => 6;     # output batch number
17626 use constant INDEX              => 7;     # index in output batch list
17627 use constant HAVE_CHILD         => 8;     # any dependents?
17628 use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
17629                                           # we would like to move to get
17630                                           # alignment (negative if left)
17631 use constant ALIGN_PAREN        => 10;    # do we want to try to align
17632                                           # with an opening structure?
17633 use constant MARKED             => 11;    # if visited by corrector logic
17634 use constant STACK_DEPTH        => 12;    # indentation nesting depth
17635 use constant STARTING_INDEX     => 13;    # first token index of this level
17636 use constant ARROW_COUNT        => 14;    # how many =>'s
17637
17638 sub new {
17639
17640     # Create an 'indentation_item' which describes one level of leading
17641     # whitespace when the '-lp' indentation is used.  We return
17642     # a reference to an anonymous array of associated variables.
17643     # See above constants for storage scheme.
17644     my (
17645         $class,               $spaces,           $level,
17646         $ci_level,            $available_spaces, $index,
17647         $gnu_sequence_number, $align_paren,      $stack_depth,
17648         $starting_index,
17649     ) = @_;
17650     my $closed            = -1;
17651     my $arrow_count       = 0;
17652     my $comma_count       = 0;
17653     my $have_child        = 0;
17654     my $want_right_spaces = 0;
17655     my $marked            = 0;
17656     bless [
17657         $spaces,              $level,          $ci_level,
17658         $available_spaces,    $closed,         $comma_count,
17659         $gnu_sequence_number, $index,          $have_child,
17660         $want_right_spaces,   $align_paren,    $marked,
17661         $stack_depth,         $starting_index, $arrow_count,
17662     ], $class;
17663 }
17664
17665 sub permanently_decrease_AVAILABLE_SPACES {
17666
17667     # make a permanent reduction in the available indentation spaces
17668     # at one indentation item.  NOTE: if there are child nodes, their
17669     # total SPACES must be reduced by the caller.
17670
17671     my ( $item, $spaces_needed ) = @_;
17672     my $available_spaces = $item->get_AVAILABLE_SPACES();
17673     my $deleted_spaces =
17674       ( $available_spaces > $spaces_needed )
17675       ? $spaces_needed
17676       : $available_spaces;
17677     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
17678     $item->decrease_SPACES($deleted_spaces);
17679     $item->set_RECOVERABLE_SPACES(0);
17680
17681     return $deleted_spaces;
17682 }
17683
17684 sub tentatively_decrease_AVAILABLE_SPACES {
17685
17686     # We are asked to tentatively delete $spaces_needed of indentation
17687     # for a indentation item.  We may want to undo this later.  NOTE: if
17688     # there are child nodes, their total SPACES must be reduced by the
17689     # caller.
17690     my ( $item, $spaces_needed ) = @_;
17691     my $available_spaces = $item->get_AVAILABLE_SPACES();
17692     my $deleted_spaces =
17693       ( $available_spaces > $spaces_needed )
17694       ? $spaces_needed
17695       : $available_spaces;
17696     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
17697     $item->decrease_SPACES($deleted_spaces);
17698     $item->increase_RECOVERABLE_SPACES($deleted_spaces);
17699     return $deleted_spaces;
17700 }
17701
17702 sub get_STACK_DEPTH {
17703     my $self = shift;
17704     return $self->[STACK_DEPTH];
17705 }
17706
17707 sub get_SPACES {
17708     my $self = shift;
17709     return $self->[SPACES];
17710 }
17711
17712 sub get_MARKED {
17713     my $self = shift;
17714     return $self->[MARKED];
17715 }
17716
17717 sub set_MARKED {
17718     my ( $self, $value ) = @_;
17719     if ( defined($value) ) {
17720         $self->[MARKED] = $value;
17721     }
17722     return $self->[MARKED];
17723 }
17724
17725 sub get_AVAILABLE_SPACES {
17726     my $self = shift;
17727     return $self->[AVAILABLE_SPACES];
17728 }
17729
17730 sub decrease_SPACES {
17731     my ( $self, $value ) = @_;
17732     if ( defined($value) ) {
17733         $self->[SPACES] -= $value;
17734     }
17735     return $self->[SPACES];
17736 }
17737
17738 sub decrease_AVAILABLE_SPACES {
17739     my ( $self, $value ) = @_;
17740     if ( defined($value) ) {
17741         $self->[AVAILABLE_SPACES] -= $value;
17742     }
17743     return $self->[AVAILABLE_SPACES];
17744 }
17745
17746 sub get_ALIGN_PAREN {
17747     my $self = shift;
17748     return $self->[ALIGN_PAREN];
17749 }
17750
17751 sub get_RECOVERABLE_SPACES {
17752     my $self = shift;
17753     return $self->[RECOVERABLE_SPACES];
17754 }
17755
17756 sub set_RECOVERABLE_SPACES {
17757     my ( $self, $value ) = @_;
17758     if ( defined($value) ) {
17759         $self->[RECOVERABLE_SPACES] = $value;
17760     }
17761     return $self->[RECOVERABLE_SPACES];
17762 }
17763
17764 sub increase_RECOVERABLE_SPACES {
17765     my ( $self, $value ) = @_;
17766     if ( defined($value) ) {
17767         $self->[RECOVERABLE_SPACES] += $value;
17768     }
17769     return $self->[RECOVERABLE_SPACES];
17770 }
17771
17772 sub get_CI_LEVEL {
17773     my $self = shift;
17774     return $self->[CI_LEVEL];
17775 }
17776
17777 sub get_LEVEL {
17778     my $self = shift;
17779     return $self->[LEVEL];
17780 }
17781
17782 sub get_SEQUENCE_NUMBER {
17783     my $self = shift;
17784     return $self->[SEQUENCE_NUMBER];
17785 }
17786
17787 sub get_INDEX {
17788     my $self = shift;
17789     return $self->[INDEX];
17790 }
17791
17792 sub get_STARTING_INDEX {
17793     my $self = shift;
17794     return $self->[STARTING_INDEX];
17795 }
17796
17797 sub set_HAVE_CHILD {
17798     my ( $self, $value ) = @_;
17799     if ( defined($value) ) {
17800         $self->[HAVE_CHILD] = $value;
17801     }
17802     return $self->[HAVE_CHILD];
17803 }
17804
17805 sub get_HAVE_CHILD {
17806     my $self = shift;
17807     return $self->[HAVE_CHILD];
17808 }
17809
17810 sub set_ARROW_COUNT {
17811     my ( $self, $value ) = @_;
17812     if ( defined($value) ) {
17813         $self->[ARROW_COUNT] = $value;
17814     }
17815     return $self->[ARROW_COUNT];
17816 }
17817
17818 sub get_ARROW_COUNT {
17819     my $self = shift;
17820     return $self->[ARROW_COUNT];
17821 }
17822
17823 sub set_COMMA_COUNT {
17824     my ( $self, $value ) = @_;
17825     if ( defined($value) ) {
17826         $self->[COMMA_COUNT] = $value;
17827     }
17828     return $self->[COMMA_COUNT];
17829 }
17830
17831 sub get_COMMA_COUNT {
17832     my $self = shift;
17833     return $self->[COMMA_COUNT];
17834 }
17835
17836 sub set_CLOSED {
17837     my ( $self, $value ) = @_;
17838     if ( defined($value) ) {
17839         $self->[CLOSED] = $value;
17840     }
17841     return $self->[CLOSED];
17842 }
17843
17844 sub get_CLOSED {
17845     my $self = shift;
17846     return $self->[CLOSED];
17847 }
17848
17849 #####################################################################
17850 #
17851 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
17852 # contain a single output line
17853 #
17854 #####################################################################
17855
17856 package Perl::Tidy::VerticalAligner::Line;
17857
17858 {
17859
17860     use strict;
17861     use Carp;
17862
17863     use constant JMAX                      => 0;
17864     use constant JMAX_ORIGINAL_LINE        => 1;
17865     use constant RTOKENS                   => 2;
17866     use constant RFIELDS                   => 3;
17867     use constant RPATTERNS                 => 4;
17868     use constant INDENTATION               => 5;
17869     use constant LEADING_SPACE_COUNT       => 6;
17870     use constant OUTDENT_LONG_LINES        => 7;
17871     use constant LIST_TYPE                 => 8;
17872     use constant IS_HANGING_SIDE_COMMENT   => 9;
17873     use constant RALIGNMENTS               => 10;
17874     use constant MAXIMUM_LINE_LENGTH       => 11;
17875     use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
17876
17877     my %_index_map;
17878     $_index_map{jmax}                      = JMAX;
17879     $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
17880     $_index_map{rtokens}                   = RTOKENS;
17881     $_index_map{rfields}                   = RFIELDS;
17882     $_index_map{rpatterns}                 = RPATTERNS;
17883     $_index_map{indentation}               = INDENTATION;
17884     $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
17885     $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
17886     $_index_map{list_type}                 = LIST_TYPE;
17887     $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
17888     $_index_map{ralignments}               = RALIGNMENTS;
17889     $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
17890     $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
17891
17892     my @_default_data = ();
17893     $_default_data[JMAX]                      = undef;
17894     $_default_data[JMAX_ORIGINAL_LINE]        = undef;
17895     $_default_data[RTOKENS]                   = undef;
17896     $_default_data[RFIELDS]                   = undef;
17897     $_default_data[RPATTERNS]                 = undef;
17898     $_default_data[INDENTATION]               = undef;
17899     $_default_data[LEADING_SPACE_COUNT]       = undef;
17900     $_default_data[OUTDENT_LONG_LINES]        = undef;
17901     $_default_data[LIST_TYPE]                 = undef;
17902     $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
17903     $_default_data[RALIGNMENTS]               = [];
17904     $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
17905     $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
17906
17907     {
17908
17909         # methods to count object population
17910         my $_count = 0;
17911         sub get_count        { $_count; }
17912         sub _increment_count { ++$_count }
17913         sub _decrement_count { --$_count }
17914     }
17915
17916     # Constructor may be called as a class method
17917     sub new {
17918         my ( $caller, %arg ) = @_;
17919         my $caller_is_obj = ref($caller);
17920         my $class = $caller_is_obj || $caller;
17921         no strict "refs";
17922         my $self = bless [], $class;
17923
17924         $self->[RALIGNMENTS] = [];
17925
17926         my $index;
17927         foreach ( keys %_index_map ) {
17928             $index = $_index_map{$_};
17929             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
17930             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
17931             else { $self->[$index] = $_default_data[$index] }
17932         }
17933
17934         $self->_increment_count();
17935         return $self;
17936     }
17937
17938     sub DESTROY {
17939         $_[0]->_decrement_count();
17940     }
17941
17942     sub get_jmax                      { $_[0]->[JMAX] }
17943     sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
17944     sub get_rtokens                   { $_[0]->[RTOKENS] }
17945     sub get_rfields                   { $_[0]->[RFIELDS] }
17946     sub get_rpatterns                 { $_[0]->[RPATTERNS] }
17947     sub get_indentation               { $_[0]->[INDENTATION] }
17948     sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
17949     sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
17950     sub get_list_type                 { $_[0]->[LIST_TYPE] }
17951     sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
17952     sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
17953
17954     sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
17955     sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
17956     sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
17957     sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
17958
17959     sub get_starting_column {
17960         $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
17961     }
17962
17963     sub increment_column {
17964         $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
17965     }
17966     sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
17967
17968     sub current_field_width {
17969         my $self = shift;
17970         my ($j) = @_;
17971         if ( $j == 0 ) {
17972             return $self->get_column($j);
17973         }
17974         else {
17975             return $self->get_column($j) - $self->get_column( $j - 1 );
17976         }
17977     }
17978
17979     sub field_width_growth {
17980         my $self = shift;
17981         my $j    = shift;
17982         return $self->get_column($j) - $self->get_starting_column($j);
17983     }
17984
17985     sub starting_field_width {
17986         my $self = shift;
17987         my $j    = shift;
17988         if ( $j == 0 ) {
17989             return $self->get_starting_column($j);
17990         }
17991         else {
17992             return $self->get_starting_column($j) -
17993               $self->get_starting_column( $j - 1 );
17994         }
17995     }
17996
17997     sub increase_field_width {
17998
17999         my $self = shift;
18000         my ( $j, $pad ) = @_;
18001         my $jmax = $self->get_jmax();
18002         for my $k ( $j .. $jmax ) {
18003             $self->increment_column( $k, $pad );
18004         }
18005     }
18006
18007     sub get_available_space_on_right {
18008         my $self = shift;
18009         my $jmax = $self->get_jmax();
18010         return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
18011     }
18012
18013     sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
18014     sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
18015     sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
18016     sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
18017     sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
18018     sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
18019     sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
18020     sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
18021     sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
18022     sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
18023     sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
18024
18025 }
18026
18027 #####################################################################
18028 #
18029 # the Perl::Tidy::VerticalAligner::Alignment class holds information
18030 # on a single column being aligned
18031 #
18032 #####################################################################
18033 package Perl::Tidy::VerticalAligner::Alignment;
18034
18035 {
18036
18037     use strict;
18038
18039     #use Carp;
18040
18041     # Symbolic array indexes
18042     use constant COLUMN          => 0;    # the current column number
18043     use constant STARTING_COLUMN => 1;    # column number when created
18044     use constant MATCHING_TOKEN  => 2;    # what token we are matching
18045     use constant STARTING_LINE   => 3;    # the line index of creation
18046     use constant ENDING_LINE     => 4;    # the most recent line to use it
18047     use constant SAVED_COLUMN    => 5;    # the most recent line to use it
18048     use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
18049                                           # (just its index in an array)
18050
18051     # Correspondence between variables and array indexes
18052     my %_index_map;
18053     $_index_map{column}          = COLUMN;
18054     $_index_map{starting_column} = STARTING_COLUMN;
18055     $_index_map{matching_token}  = MATCHING_TOKEN;
18056     $_index_map{starting_line}   = STARTING_LINE;
18057     $_index_map{ending_line}     = ENDING_LINE;
18058     $_index_map{saved_column}    = SAVED_COLUMN;
18059     $_index_map{serial_number}   = SERIAL_NUMBER;
18060
18061     my @_default_data = ();
18062     $_default_data[COLUMN]          = undef;
18063     $_default_data[STARTING_COLUMN] = undef;
18064     $_default_data[MATCHING_TOKEN]  = undef;
18065     $_default_data[STARTING_LINE]   = undef;
18066     $_default_data[ENDING_LINE]     = undef;
18067     $_default_data[SAVED_COLUMN]    = undef;
18068     $_default_data[SERIAL_NUMBER]   = undef;
18069
18070     # class population count
18071     {
18072         my $_count = 0;
18073         sub get_count        { $_count; }
18074         sub _increment_count { ++$_count }
18075         sub _decrement_count { --$_count }
18076     }
18077
18078     # constructor
18079     sub new {
18080         my ( $caller, %arg ) = @_;
18081         my $caller_is_obj = ref($caller);
18082         my $class = $caller_is_obj || $caller;
18083         no strict "refs";
18084         my $self = bless [], $class;
18085
18086         foreach ( keys %_index_map ) {
18087             my $index = $_index_map{$_};
18088             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
18089             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
18090             else { $self->[$index] = $_default_data[$index] }
18091         }
18092         $self->_increment_count();
18093         return $self;
18094     }
18095
18096     sub DESTROY {
18097         $_[0]->_decrement_count();
18098     }
18099
18100     sub get_column          { return $_[0]->[COLUMN] }
18101     sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
18102     sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
18103     sub get_starting_line   { return $_[0]->[STARTING_LINE] }
18104     sub get_ending_line     { return $_[0]->[ENDING_LINE] }
18105     sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
18106
18107     sub set_column          { $_[0]->[COLUMN]          = $_[1] }
18108     sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
18109     sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
18110     sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
18111     sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
18112     sub increment_column { $_[0]->[COLUMN] += $_[1] }
18113
18114     sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
18115     sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
18116
18117 }
18118
18119 package Perl::Tidy::VerticalAligner;
18120
18121 # The Perl::Tidy::VerticalAligner package collects output lines and
18122 # attempts to line up certain common tokens, such as => and #, which are
18123 # identified by the calling routine.
18124 #
18125 # There are two main routines: append_line and flush.  Append acts as a
18126 # storage buffer, collecting lines into a group which can be vertically
18127 # aligned.  When alignment is no longer possible or desirable, it dumps
18128 # the group to flush.
18129 #
18130 #     append_line -----> flush
18131 #
18132 #     collects          writes
18133 #     vertical          one
18134 #     groups            group
18135
18136 BEGIN {
18137
18138     # Caution: these debug flags produce a lot of output
18139     # They should all be 0 except when debugging small scripts
18140
18141     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
18142     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
18143     use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
18144
18145     my $debug_warning = sub {
18146         print "VALIGN_DEBUGGING with key $_[0]\n";
18147     };
18148
18149     VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
18150     VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
18151
18152 }
18153
18154 use vars qw(
18155   $vertical_aligner_self
18156   $current_line
18157   $maximum_alignment_index
18158   $ralignment_list
18159   $maximum_jmax_seen
18160   $minimum_jmax_seen
18161   $previous_minimum_jmax_seen
18162   $previous_maximum_jmax_seen
18163   $maximum_line_index
18164   $group_level
18165   $group_type
18166   $group_maximum_gap
18167   $marginal_match
18168   $last_group_level_written
18169   $last_leading_space_count
18170   $extra_indent_ok
18171   $zero_count
18172   @group_lines
18173   $last_comment_column
18174   $last_side_comment_line_number
18175   $last_side_comment_length
18176   $last_side_comment_level
18177   $outdented_line_count
18178   $first_outdented_line_at
18179   $last_outdented_line_at
18180   $diagnostics_object
18181   $logger_object
18182   $file_writer_object
18183   @side_comment_history
18184   $comment_leading_space_count
18185   $is_matching_terminal_line
18186
18187   $cached_line_text
18188   $cached_line_type
18189   $cached_line_flag
18190   $cached_seqno
18191   $cached_line_valid
18192   $cached_line_leading_space_count
18193   $cached_seqno_string
18194
18195   $seqno_string
18196   $last_nonblank_seqno_string
18197
18198   $rOpts
18199
18200   $rOpts_maximum_line_length
18201   $rOpts_continuation_indentation
18202   $rOpts_indent_columns
18203   $rOpts_tabs
18204   $rOpts_entab_leading_whitespace
18205   $rOpts_valign
18206
18207   $rOpts_fixed_position_side_comment
18208   $rOpts_minimum_space_to_comment
18209
18210 );
18211
18212 sub initialize {
18213
18214     my $class;
18215
18216     ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
18217       = @_;
18218
18219     # variables describing the entire space group:
18220     $ralignment_list            = [];
18221     $group_level                = 0;
18222     $last_group_level_written   = -1;
18223     $extra_indent_ok            = 0;    # can we move all lines to the right?
18224     $last_side_comment_length   = 0;
18225     $maximum_jmax_seen          = 0;
18226     $minimum_jmax_seen          = 0;
18227     $previous_minimum_jmax_seen = 0;
18228     $previous_maximum_jmax_seen = 0;
18229
18230     # variables describing each line of the group
18231     @group_lines = ();                  # list of all lines in group
18232
18233     $outdented_line_count          = 0;
18234     $first_outdented_line_at       = 0;
18235     $last_outdented_line_at        = 0;
18236     $last_side_comment_line_number = 0;
18237     $last_side_comment_level       = -1;
18238     $is_matching_terminal_line     = 0;
18239
18240     # most recent 3 side comments; [ line number, column ]
18241     $side_comment_history[0] = [ -300, 0 ];
18242     $side_comment_history[1] = [ -200, 0 ];
18243     $side_comment_history[2] = [ -100, 0 ];
18244
18245     # write_leader_and_string cache:
18246     $cached_line_text                = "";
18247     $cached_line_type                = 0;
18248     $cached_line_flag                = 0;
18249     $cached_seqno                    = 0;
18250     $cached_line_valid               = 0;
18251     $cached_line_leading_space_count = 0;
18252     $cached_seqno_string             = "";
18253
18254     # string of sequence numbers joined together
18255     $seqno_string               = "";
18256     $last_nonblank_seqno_string = "";
18257
18258     # frequently used parameters
18259     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
18260     $rOpts_tabs                     = $rOpts->{'tabs'};
18261     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
18262     $rOpts_fixed_position_side_comment =
18263       $rOpts->{'fixed-position-side-comment'};
18264     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
18265     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
18266     $rOpts_valign                   = $rOpts->{'valign'};
18267
18268     forget_side_comment();
18269
18270     initialize_for_new_group();
18271
18272     $vertical_aligner_self = {};
18273     bless $vertical_aligner_self, $class;
18274     return $vertical_aligner_self;
18275 }
18276
18277 sub initialize_for_new_group {
18278     $maximum_line_index      = -1;      # lines in the current group
18279     $maximum_alignment_index = -1;      # alignments in current group
18280     $zero_count              = 0;       # count consecutive lines without tokens
18281     $current_line            = undef;   # line being matched for alignment
18282     $group_maximum_gap       = 0;       # largest gap introduced
18283     $group_type              = "";
18284     $marginal_match          = 0;
18285     $comment_leading_space_count = 0;
18286     $last_leading_space_count    = 0;
18287 }
18288
18289 # interface to Perl::Tidy::Diagnostics routines
18290 sub write_diagnostics {
18291     if ($diagnostics_object) {
18292         $diagnostics_object->write_diagnostics(@_);
18293     }
18294 }
18295
18296 # interface to Perl::Tidy::Logger routines
18297 sub warning {
18298     if ($logger_object) {
18299         $logger_object->warning(@_);
18300     }
18301 }
18302
18303 sub write_logfile_entry {
18304     if ($logger_object) {
18305         $logger_object->write_logfile_entry(@_);
18306     }
18307 }
18308
18309 sub report_definite_bug {
18310     if ($logger_object) {
18311         $logger_object->report_definite_bug();
18312     }
18313 }
18314
18315 sub get_SPACES {
18316
18317     # return the number of leading spaces associated with an indentation
18318     # variable $indentation is either a constant number of spaces or an
18319     # object with a get_SPACES method.
18320     my $indentation = shift;
18321     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
18322 }
18323
18324 sub get_RECOVERABLE_SPACES {
18325
18326     # return the number of spaces (+ means shift right, - means shift left)
18327     # that we would like to shift a group of lines with the same indentation
18328     # to get them to line up with their opening parens
18329     my $indentation = shift;
18330     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
18331 }
18332
18333 sub get_STACK_DEPTH {
18334
18335     my $indentation = shift;
18336     return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
18337 }
18338
18339 sub make_alignment {
18340     my ( $col, $token ) = @_;
18341
18342     # make one new alignment at column $col which aligns token $token
18343     ++$maximum_alignment_index;
18344     my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
18345         column          => $col,
18346         starting_column => $col,
18347         matching_token  => $token,
18348         starting_line   => $maximum_line_index,
18349         ending_line     => $maximum_line_index,
18350         serial_number   => $maximum_alignment_index,
18351     );
18352     $ralignment_list->[$maximum_alignment_index] = $alignment;
18353     return $alignment;
18354 }
18355
18356 sub dump_alignments {
18357     print
18358 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
18359     for my $i ( 0 .. $maximum_alignment_index ) {
18360         my $column          = $ralignment_list->[$i]->get_column();
18361         my $starting_column = $ralignment_list->[$i]->get_starting_column();
18362         my $matching_token  = $ralignment_list->[$i]->get_matching_token();
18363         my $starting_line   = $ralignment_list->[$i]->get_starting_line();
18364         my $ending_line     = $ralignment_list->[$i]->get_ending_line();
18365         print
18366 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
18367     }
18368 }
18369
18370 sub save_alignment_columns {
18371     for my $i ( 0 .. $maximum_alignment_index ) {
18372         $ralignment_list->[$i]->save_column();
18373     }
18374 }
18375
18376 sub restore_alignment_columns {
18377     for my $i ( 0 .. $maximum_alignment_index ) {
18378         $ralignment_list->[$i]->restore_column();
18379     }
18380 }
18381
18382 sub forget_side_comment {
18383     $last_comment_column = 0;
18384 }
18385
18386 sub append_line {
18387
18388     # sub append is called to place one line in the current vertical group.
18389     #
18390     # The input parameters are:
18391     #     $level = indentation level of this line
18392     #     $rfields = reference to array of fields
18393     #     $rpatterns = reference to array of patterns, one per field
18394     #     $rtokens   = reference to array of tokens starting fields 1,2,..
18395     #
18396     # Here is an example of what this package does.  In this example,
18397     # we are trying to line up both the '=>' and the '#'.
18398     #
18399     #         '18' => 'grave',    #   \`
18400     #         '19' => 'acute',    #   `'
18401     #         '20' => 'caron',    #   \v
18402     # <-tabs-><f1-><--field 2 ---><-f3->
18403     # |            |              |    |
18404     # |            |              |    |
18405     # col1        col2         col3 col4
18406     #
18407     # The calling routine has already broken the entire line into 3 fields as
18408     # indicated.  (So the work of identifying promising common tokens has
18409     # already been done).
18410     #
18411     # In this example, there will be 2 tokens being matched: '=>' and '#'.
18412     # They are the leading parts of fields 2 and 3, but we do need to know
18413     # what they are so that we can dump a group of lines when these tokens
18414     # change.
18415     #
18416     # The fields contain the actual characters of each field.  The patterns
18417     # are like the fields, but they contain mainly token types instead
18418     # of tokens, so they have fewer characters.  They are used to be
18419     # sure we are matching fields of similar type.
18420     #
18421     # In this example, there will be 4 column indexes being adjusted.  The
18422     # first one is always at zero.  The interior columns are at the start of
18423     # the matching tokens, and the last one tracks the maximum line length.
18424     #
18425     # Basically, each time a new line comes in, it joins the current vertical
18426     # group if possible.  Otherwise it causes the current group to be dumped
18427     # and a new group is started.
18428     #
18429     # For each new group member, the column locations are increased, as
18430     # necessary, to make room for the new fields.  When the group is finally
18431     # output, these column numbers are used to compute the amount of spaces of
18432     # padding needed for each field.
18433     #
18434     # Programming note: the fields are assumed not to have any tab characters.
18435     # Tabs have been previously removed except for tabs in quoted strings and
18436     # side comments.  Tabs in these fields can mess up the column counting.
18437     # The log file warns the user if there are any such tabs.
18438
18439     my (
18440         $level,               $level_end,
18441         $indentation,         $rfields,
18442         $rtokens,             $rpatterns,
18443         $is_forced_break,     $outdent_long_lines,
18444         $is_terminal_ternary, $is_terminal_statement,
18445         $do_not_pad,          $rvertical_tightness_flags,
18446         $level_jump,
18447     ) = @_;
18448
18449     # number of fields is $jmax
18450     # number of tokens between fields is $jmax-1
18451     my $jmax = $#{$rfields};
18452
18453     my $leading_space_count = get_SPACES($indentation);
18454
18455     # set outdented flag to be sure we either align within statements or
18456     # across statement boundaries, but not both.
18457     my $is_outdented = $last_leading_space_count > $leading_space_count;
18458     $last_leading_space_count = $leading_space_count;
18459
18460     # Patch: undo for hanging side comment
18461     my $is_hanging_side_comment =
18462       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
18463     $is_outdented = 0 if $is_hanging_side_comment;
18464
18465     VALIGN_DEBUG_FLAG_APPEND0 && do {
18466         print
18467 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
18468     };
18469
18470     # Validate cached line if necessary: If we can produce a container
18471     # with just 2 lines total by combining an existing cached opening
18472     # token with the closing token to follow, then we will mark both
18473     # cached flags as valid.
18474     if ($rvertical_tightness_flags) {
18475         if (   $maximum_line_index <= 0
18476             && $cached_line_type
18477             && $cached_seqno
18478             && $rvertical_tightness_flags->[2]
18479             && $rvertical_tightness_flags->[2] == $cached_seqno )
18480         {
18481             $rvertical_tightness_flags->[3] ||= 1;
18482             $cached_line_valid ||= 1;
18483         }
18484     }
18485
18486     # do not join an opening block brace with an unbalanced line
18487     # unless requested with a flag value of 2
18488     if (   $cached_line_type == 3
18489         && $maximum_line_index < 0
18490         && $cached_line_flag < 2
18491         && $level_jump != 0 )
18492     {
18493         $cached_line_valid = 0;
18494     }
18495
18496     # patch until new aligner is finished
18497     if ($do_not_pad) { my_flush() }
18498
18499     # shouldn't happen:
18500     if ( $level < 0 ) { $level = 0 }
18501
18502     # do not align code across indentation level changes
18503     # or if vertical alignment is turned off for debugging
18504     if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
18505
18506         # we are allowed to shift a group of lines to the right if its
18507         # level is greater than the previous and next group
18508         $extra_indent_ok =
18509           ( $level < $group_level && $last_group_level_written < $group_level );
18510
18511         my_flush();
18512
18513         # If we know that this line will get flushed out by itself because
18514         # of level changes, we can leave the extra_indent_ok flag set.
18515         # That way, if we get an external flush call, we will still be
18516         # able to do some -lp alignment if necessary.
18517         $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
18518
18519         $group_level = $level;
18520
18521         # wait until after the above flush to get the leading space
18522         # count because it may have been changed if the -icp flag is in
18523         # effect
18524         $leading_space_count = get_SPACES($indentation);
18525
18526     }
18527
18528     # --------------------------------------------------------------------
18529     # Patch to collect outdentable block COMMENTS
18530     # --------------------------------------------------------------------
18531     my $is_blank_line = "";
18532     my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
18533     if ( $group_type eq 'COMMENT' ) {
18534         if (
18535             (
18536                    $is_block_comment
18537                 && $outdent_long_lines
18538                 && $leading_space_count == $comment_leading_space_count
18539             )
18540             || $is_blank_line
18541           )
18542         {
18543             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
18544             return;
18545         }
18546         else {
18547             my_flush();
18548         }
18549     }
18550
18551     # --------------------------------------------------------------------
18552     # add dummy fields for terminal ternary
18553     # --------------------------------------------------------------------
18554     my $j_terminal_match;
18555     if ( $is_terminal_ternary && $current_line ) {
18556         $j_terminal_match =
18557           fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
18558         $jmax = @{$rfields} - 1;
18559     }
18560
18561     # --------------------------------------------------------------------
18562     # add dummy fields for else statement
18563     # --------------------------------------------------------------------
18564     if (   $rfields->[0] =~ /^else\s*$/
18565         && $current_line
18566         && $level_jump == 0 )
18567     {
18568         $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
18569         $jmax = @{$rfields} - 1;
18570     }
18571
18572     # --------------------------------------------------------------------
18573     # Step 1. Handle simple line of code with no fields to match.
18574     # --------------------------------------------------------------------
18575     if ( $jmax <= 0 ) {
18576         $zero_count++;
18577
18578         if ( $maximum_line_index >= 0
18579             && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
18580         {
18581
18582             # flush the current group if it has some aligned columns..
18583             if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
18584
18585             # flush current group if we are just collecting side comments..
18586             elsif (
18587
18588                 # ...and we haven't seen a comment lately
18589                 ( $zero_count > 3 )
18590
18591                 # ..or if this new line doesn't fit to the left of the comments
18592                 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
18593                     $group_lines[0]->get_column(0) )
18594               )
18595             {
18596                 my_flush();
18597             }
18598         }
18599
18600         # patch to start new COMMENT group if this comment may be outdented
18601         if (   $is_block_comment
18602             && $outdent_long_lines
18603             && $maximum_line_index < 0 )
18604         {
18605             $group_type                           = 'COMMENT';
18606             $comment_leading_space_count          = $leading_space_count;
18607             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
18608             return;
18609         }
18610
18611         # just write this line directly if no current group, no side comment,
18612         # and no space recovery is needed.
18613         if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
18614         {
18615             write_leader_and_string( $leading_space_count, $$rfields[0], 0,
18616                 $outdent_long_lines, $rvertical_tightness_flags );
18617             return;
18618         }
18619     }
18620     else {
18621         $zero_count = 0;
18622     }
18623
18624     # programming check: (shouldn't happen)
18625     # an error here implies an incorrect call was made
18626     if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
18627         warning(
18628 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
18629         );
18630         report_definite_bug();
18631     }
18632
18633     # --------------------------------------------------------------------
18634     # create an object to hold this line
18635     # --------------------------------------------------------------------
18636     my $new_line = new Perl::Tidy::VerticalAligner::Line(
18637         jmax                      => $jmax,
18638         jmax_original_line        => $jmax,
18639         rtokens                   => $rtokens,
18640         rfields                   => $rfields,
18641         rpatterns                 => $rpatterns,
18642         indentation               => $indentation,
18643         leading_space_count       => $leading_space_count,
18644         outdent_long_lines        => $outdent_long_lines,
18645         list_type                 => "",
18646         is_hanging_side_comment   => $is_hanging_side_comment,
18647         maximum_line_length       => $rOpts->{'maximum-line-length'},
18648         rvertical_tightness_flags => $rvertical_tightness_flags,
18649     );
18650
18651     # Initialize a global flag saying if the last line of the group should
18652     # match end of group and also terminate the group.  There should be no
18653     # returns between here and where the flag is handled at the bottom.
18654     my $col_matching_terminal = 0;
18655     if ( defined($j_terminal_match) ) {
18656
18657         # remember the column of the terminal ? or { to match with
18658         $col_matching_terminal = $current_line->get_column($j_terminal_match);
18659
18660         # set global flag for sub decide_if_aligned
18661         $is_matching_terminal_line = 1;
18662     }
18663
18664     # --------------------------------------------------------------------
18665     # It simplifies things to create a zero length side comment
18666     # if none exists.
18667     # --------------------------------------------------------------------
18668     make_side_comment( $new_line, $level_end );
18669
18670     # --------------------------------------------------------------------
18671     # Decide if this is a simple list of items.
18672     # There are 3 list types: none, comma, comma-arrow.
18673     # We use this below to be less restrictive in deciding what to align.
18674     # --------------------------------------------------------------------
18675     if ($is_forced_break) {
18676         decide_if_list($new_line);
18677     }
18678
18679     if ($current_line) {
18680
18681         # --------------------------------------------------------------------
18682         # Allow hanging side comment to join current group, if any
18683         # This will help keep side comments aligned, because otherwise we
18684         # will have to start a new group, making alignment less likely.
18685         # --------------------------------------------------------------------
18686         join_hanging_comment( $new_line, $current_line )
18687           if $is_hanging_side_comment;
18688
18689         # --------------------------------------------------------------------
18690         # If there is just one previous line, and it has more fields
18691         # than the new line, try to join fields together to get a match with
18692         # the new line.  At the present time, only a single leading '=' is
18693         # allowed to be compressed out.  This is useful in rare cases where
18694         # a table is forced to use old breakpoints because of side comments,
18695         # and the table starts out something like this:
18696         #   my %MonthChars = ('0', 'Jan',   # side comment
18697         #                     '1', 'Feb',
18698         #                     '2', 'Mar',
18699         # Eliminating the '=' field will allow the remaining fields to line up.
18700         # This situation does not occur if there are no side comments
18701         # because scan_list would put a break after the opening '('.
18702         # --------------------------------------------------------------------
18703         eliminate_old_fields( $new_line, $current_line );
18704
18705         # --------------------------------------------------------------------
18706         # If the new line has more fields than the current group,
18707         # see if we can match the first fields and combine the remaining
18708         # fields of the new line.
18709         # --------------------------------------------------------------------
18710         eliminate_new_fields( $new_line, $current_line );
18711
18712         # --------------------------------------------------------------------
18713         # Flush previous group unless all common tokens and patterns match..
18714         # --------------------------------------------------------------------
18715         check_match( $new_line, $current_line );
18716
18717         # --------------------------------------------------------------------
18718         # See if there is space for this line in the current group (if any)
18719         # --------------------------------------------------------------------
18720         if ($current_line) {
18721             check_fit( $new_line, $current_line );
18722         }
18723     }
18724
18725     # --------------------------------------------------------------------
18726     # Append this line to the current group (or start new group)
18727     # --------------------------------------------------------------------
18728     accept_line($new_line);
18729
18730     # Future update to allow this to vary:
18731     $current_line = $new_line if ( $maximum_line_index == 0 );
18732
18733     # output this group if it ends in a terminal else or ternary line
18734     if ( defined($j_terminal_match) ) {
18735
18736         # if there is only one line in the group (maybe due to failure to match
18737         # perfectly with previous lines), then align the ? or { of this
18738         # terminal line with the previous one unless that would make the line
18739         # too long
18740         if ( $maximum_line_index == 0 ) {
18741             my $col_now = $current_line->get_column($j_terminal_match);
18742             my $pad     = $col_matching_terminal - $col_now;
18743             my $padding_available =
18744               $current_line->get_available_space_on_right();
18745             if ( $pad > 0 && $pad <= $padding_available ) {
18746                 $current_line->increase_field_width( $j_terminal_match, $pad );
18747             }
18748         }
18749         my_flush();
18750         $is_matching_terminal_line = 0;
18751     }
18752
18753     # --------------------------------------------------------------------
18754     # Step 8. Some old debugging stuff
18755     # --------------------------------------------------------------------
18756     VALIGN_DEBUG_FLAG_APPEND && do {
18757         print "APPEND fields:";
18758         dump_array(@$rfields);
18759         print "APPEND tokens:";
18760         dump_array(@$rtokens);
18761         print "APPEND patterns:";
18762         dump_array(@$rpatterns);
18763         dump_alignments();
18764     };
18765
18766     return;
18767 }
18768
18769 sub join_hanging_comment {
18770
18771     my $line = shift;
18772     my $jmax = $line->get_jmax();
18773     return 0 unless $jmax == 1;    # must be 2 fields
18774     my $rtokens = $line->get_rtokens();
18775     return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
18776     my $rfields = $line->get_rfields();
18777     return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
18778     my $old_line            = shift;
18779     my $maximum_field_index = $old_line->get_jmax();
18780     return 0
18781       unless $maximum_field_index > $jmax;    # the current line has more fields
18782     my $rpatterns = $line->get_rpatterns();
18783
18784     $line->set_is_hanging_side_comment(1);
18785     $jmax = $maximum_field_index;
18786     $line->set_jmax($jmax);
18787     $$rfields[$jmax]         = $$rfields[1];
18788     $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
18789     $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
18790     for ( my $j = 1 ; $j < $jmax ; $j++ ) {
18791         $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
18792         $$rtokens[ $j - 1 ]   = "";
18793         $$rpatterns[ $j - 1 ] = "";
18794     }
18795     return 1;
18796 }
18797
18798 sub eliminate_old_fields {
18799
18800     my $new_line = shift;
18801     my $jmax     = $new_line->get_jmax();
18802     if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
18803     if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
18804
18805     # there must be one previous line
18806     return unless ( $maximum_line_index == 0 );
18807
18808     my $old_line            = shift;
18809     my $maximum_field_index = $old_line->get_jmax();
18810
18811     ###############################################
18812     # this line must have fewer fields
18813     return unless $maximum_field_index > $jmax;
18814     ###############################################
18815
18816     # Identify specific cases where field elimination is allowed:
18817     # case=1: both lines have comma-separated lists, and the first
18818     #         line has an equals
18819     # case=2: both lines have leading equals
18820
18821     # case 1 is the default
18822     my $case = 1;
18823
18824     # See if case 2: both lines have leading '='
18825     # We'll require smiliar leading patterns in this case
18826     my $old_rtokens   = $old_line->get_rtokens();
18827     my $rtokens       = $new_line->get_rtokens();
18828     my $rpatterns     = $new_line->get_rpatterns();
18829     my $old_rpatterns = $old_line->get_rpatterns();
18830     if (   $rtokens->[0] =~ /^=\d*$/
18831         && $old_rtokens->[0]   eq $rtokens->[0]
18832         && $old_rpatterns->[0] eq $rpatterns->[0] )
18833     {
18834         $case = 2;
18835     }
18836
18837     # not too many fewer fields in new line for case 1
18838     return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
18839
18840     # case 1 must have side comment
18841     my $old_rfields = $old_line->get_rfields();
18842     return
18843       if ( $case == 1
18844         && length( $$old_rfields[$maximum_field_index] ) == 0 );
18845
18846     my $rfields = $new_line->get_rfields();
18847
18848     my $hid_equals = 0;
18849
18850     my @new_alignments        = ();
18851     my @new_fields            = ();
18852     my @new_matching_patterns = ();
18853     my @new_matching_tokens   = ();
18854
18855     my $j = 0;
18856     my $k;
18857     my $current_field   = '';
18858     my $current_pattern = '';
18859
18860     # loop over all old tokens
18861     my $in_match = 0;
18862     for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
18863         $current_field   .= $$old_rfields[$k];
18864         $current_pattern .= $$old_rpatterns[$k];
18865         last if ( $j > $jmax - 1 );
18866
18867         if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
18868             $in_match                  = 1;
18869             $new_fields[$j]            = $current_field;
18870             $new_matching_patterns[$j] = $current_pattern;
18871             $current_field             = '';
18872             $current_pattern           = '';
18873             $new_matching_tokens[$j]   = $$old_rtokens[$k];
18874             $new_alignments[$j]        = $old_line->get_alignment($k);
18875             $j++;
18876         }
18877         else {
18878
18879             if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
18880                 last if ( $case == 2 );    # avoid problems with stuff
18881                                            # like:   $a=$b=$c=$d;
18882                 $hid_equals = 1;
18883             }
18884             last
18885               if ( $in_match && $case == 1 )
18886               ;    # disallow gaps in matching field types in case 1
18887         }
18888     }
18889
18890     # Modify the current state if we are successful.
18891     # We must exactly reach the ends of both lists for success.
18892     if (   ( $j == $jmax )
18893         && ( $current_field eq '' )
18894         && ( $case != 1 || $hid_equals ) )
18895     {
18896         $k = $maximum_field_index;
18897         $current_field   .= $$old_rfields[$k];
18898         $current_pattern .= $$old_rpatterns[$k];
18899         $new_fields[$j]            = $current_field;
18900         $new_matching_patterns[$j] = $current_pattern;
18901
18902         $new_alignments[$j] = $old_line->get_alignment($k);
18903         $maximum_field_index = $j;
18904
18905         $old_line->set_alignments(@new_alignments);
18906         $old_line->set_jmax($jmax);
18907         $old_line->set_rtokens( \@new_matching_tokens );
18908         $old_line->set_rfields( \@new_fields );
18909         $old_line->set_rpatterns( \@$rpatterns );
18910     }
18911 }
18912
18913 # create an empty side comment if none exists
18914 sub make_side_comment {
18915     my $new_line  = shift;
18916     my $level_end = shift;
18917     my $jmax      = $new_line->get_jmax();
18918     my $rtokens   = $new_line->get_rtokens();
18919
18920     # if line does not have a side comment...
18921     if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
18922         my $rfields   = $new_line->get_rfields();
18923         my $rpatterns = $new_line->get_rpatterns();
18924         $$rtokens[$jmax]     = '#';
18925         $$rfields[ ++$jmax ] = '';
18926         $$rpatterns[$jmax]   = '#';
18927         $new_line->set_jmax($jmax);
18928         $new_line->set_jmax_original_line($jmax);
18929     }
18930
18931     # line has a side comment..
18932     else {
18933
18934         # don't remember old side comment location for very long
18935         my $line_number = $vertical_aligner_self->get_output_line_number();
18936         my $rfields     = $new_line->get_rfields();
18937         if (
18938             $line_number - $last_side_comment_line_number > 12
18939
18940             # and don't remember comment location across block level changes
18941             || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
18942           )
18943         {
18944             forget_side_comment();
18945         }
18946         $last_side_comment_line_number = $line_number;
18947         $last_side_comment_level       = $level_end;
18948     }
18949 }
18950
18951 sub decide_if_list {
18952
18953     my $line = shift;
18954
18955     # A list will be taken to be a line with a forced break in which all
18956     # of the field separators are commas or comma-arrows (except for the
18957     # trailing #)
18958
18959     # List separator tokens are things like ',3'   or '=>2',
18960     # where the trailing digit is the nesting depth.  Allow braces
18961     # to allow nested list items.
18962     my $rtokens    = $line->get_rtokens();
18963     my $test_token = $$rtokens[0];
18964     if ( $test_token =~ /^(\,|=>)/ ) {
18965         my $list_type = $test_token;
18966         my $jmax      = $line->get_jmax();
18967
18968         foreach ( 1 .. $jmax - 2 ) {
18969             if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
18970                 $list_type = "";
18971                 last;
18972             }
18973         }
18974         $line->set_list_type($list_type);
18975     }
18976 }
18977
18978 sub eliminate_new_fields {
18979
18980     return unless ( $maximum_line_index >= 0 );
18981     my ( $new_line, $old_line ) = @_;
18982     my $jmax = $new_line->get_jmax();
18983
18984     my $old_rtokens = $old_line->get_rtokens();
18985     my $rtokens     = $new_line->get_rtokens();
18986     my $is_assignment =
18987       ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
18988
18989     # must be monotonic variation
18990     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
18991
18992     # must be more fields in the new line
18993     my $maximum_field_index = $old_line->get_jmax();
18994     return unless ( $maximum_field_index < $jmax );
18995
18996     unless ($is_assignment) {
18997         return
18998           unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
18999           ;    # only if monotonic
19000
19001         # never combine fields of a comma list
19002         return
19003           unless ( $maximum_field_index > 1 )
19004           && ( $new_line->get_list_type() !~ /^,/ );
19005     }
19006
19007     my $rfields       = $new_line->get_rfields();
19008     my $rpatterns     = $new_line->get_rpatterns();
19009     my $old_rpatterns = $old_line->get_rpatterns();
19010
19011     # loop over all OLD tokens except comment and check match
19012     my $match = 1;
19013     my $k;
19014     for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
19015         if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
19016             || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
19017         {
19018             $match = 0;
19019             last;
19020         }
19021     }
19022
19023     # first tokens agree, so combine extra new tokens
19024     if ($match) {
19025         for $k ( $maximum_field_index .. $jmax - 1 ) {
19026
19027             $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
19028             $$rfields[$k] = "";
19029             $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
19030             $$rpatterns[$k] = "";
19031         }
19032
19033         $$rtokens[ $maximum_field_index - 1 ] = '#';
19034         $$rfields[$maximum_field_index]       = $$rfields[$jmax];
19035         $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
19036         $jmax                                 = $maximum_field_index;
19037     }
19038     $new_line->set_jmax($jmax);
19039 }
19040
19041 sub fix_terminal_ternary {
19042
19043     # Add empty fields as necessary to align a ternary term
19044     # like this:
19045     #
19046     #  my $leapyear =
19047     #      $year % 4   ? 0
19048     #    : $year % 100 ? 1
19049     #    : $year % 400 ? 0
19050     #    :               1;
19051     #
19052     # returns 1 if the terminal item should be indented
19053
19054     my ( $rfields, $rtokens, $rpatterns ) = @_;
19055
19056     my $jmax        = @{$rfields} - 1;
19057     my $old_line    = $group_lines[$maximum_line_index];
19058     my $rfields_old = $old_line->get_rfields();
19059
19060     my $rpatterns_old       = $old_line->get_rpatterns();
19061     my $rtokens_old         = $old_line->get_rtokens();
19062     my $maximum_field_index = $old_line->get_jmax();
19063
19064     # look for the question mark after the :
19065     my ($jquestion);
19066     my $depth_question;
19067     my $pad = "";
19068     for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
19069         my $tok = $rtokens_old->[$j];
19070         if ( $tok =~ /^\?(\d+)$/ ) {
19071             $depth_question = $1;
19072
19073             # depth must be correct
19074             next unless ( $depth_question eq $group_level );
19075
19076             $jquestion = $j;
19077             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
19078                 $pad = " " x length($1);
19079             }
19080             else {
19081                 return;    # shouldn't happen
19082             }
19083             last;
19084         }
19085     }
19086     return unless ( defined($jquestion) );    # shouldn't happen
19087
19088     # Now splice the tokens and patterns of the previous line
19089     # into the else line to insure a match.  Add empty fields
19090     # as necessary.
19091     my $jadd = $jquestion;
19092
19093     # Work on copies of the actual arrays in case we have
19094     # to return due to an error
19095     my @fields   = @{$rfields};
19096     my @patterns = @{$rpatterns};
19097     my @tokens   = @{$rtokens};
19098
19099     VALIGN_DEBUG_FLAG_TERNARY && do {
19100         local $" = '><';
19101         print "CURRENT FIELDS=<@{$rfields_old}>\n";
19102         print "CURRENT TOKENS=<@{$rtokens_old}>\n";
19103         print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
19104         print "UNMODIFIED FIELDS=<@{$rfields}>\n";
19105         print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
19106         print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
19107     };
19108
19109     # handle cases of leading colon on this line
19110     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
19111
19112         my ( $colon, $therest ) = ( $1, $2 );
19113
19114         # Handle sub-case of first field with leading colon plus additional code
19115         # This is the usual situation as at the '1' below:
19116         #  ...
19117         #  : $year % 400 ? 0
19118         #  :               1;
19119         if ($therest) {
19120
19121             # Split the first field after the leading colon and insert padding.
19122             # Note that this padding will remain even if the terminal value goes
19123             # out on a separate line.  This does not seem to look to bad, so no
19124             # mechanism has been included to undo it.
19125             my $field1 = shift @fields;
19126             unshift @fields, ( $colon, $pad . $therest );
19127
19128             # change the leading pattern from : to ?
19129             return unless ( $patterns[0] =~ s/^\:/?/ );
19130
19131             # install leading tokens and patterns of existing line
19132             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19133             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19134
19135             # insert appropriate number of empty fields
19136             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19137         }
19138
19139         # handle sub-case of first field just equal to leading colon.
19140         # This can happen for example in the example below where
19141         # the leading '(' would create a new alignment token
19142         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
19143         # :                        ( $mname = $name . '->' );
19144         else {
19145
19146             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
19147
19148             # prepend a leading ? onto the second pattern
19149             $patterns[1] = "?b" . $patterns[1];
19150
19151             # pad the second field
19152             $fields[1] = $pad . $fields[1];
19153
19154             # install leading tokens and patterns of existing line, replacing
19155             # leading token and inserting appropriate number of empty fields
19156             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
19157             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
19158             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19159         }
19160     }
19161
19162     # Handle case of no leading colon on this line.  This will
19163     # be the case when -wba=':' is used.  For example,
19164     #  $year % 400 ? 0 :
19165     #                1;
19166     else {
19167
19168         # install leading tokens and patterns of existing line
19169         $patterns[0] = '?' . 'b' . $patterns[0];
19170         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19171         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19172
19173         # insert appropriate number of empty fields
19174         $jadd = $jquestion + 1;
19175         $fields[0] = $pad . $fields[0];
19176         splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
19177     }
19178
19179     VALIGN_DEBUG_FLAG_TERNARY && do {
19180         local $" = '><';
19181         print "MODIFIED TOKENS=<@tokens>\n";
19182         print "MODIFIED PATTERNS=<@patterns>\n";
19183         print "MODIFIED FIELDS=<@fields>\n";
19184     };
19185
19186     # all ok .. update the arrays
19187     @{$rfields}   = @fields;
19188     @{$rtokens}   = @tokens;
19189     @{$rpatterns} = @patterns;
19190
19191     # force a flush after this line
19192     return $jquestion;
19193 }
19194
19195 sub fix_terminal_else {
19196
19197     # Add empty fields as necessary to align a balanced terminal
19198     # else block to a previous if/elsif/unless block,
19199     # like this:
19200     #
19201     #  if   ( 1 || $x ) { print "ok 13\n"; }
19202     #  else             { print "not ok 13\n"; }
19203     #
19204     # returns 1 if the else block should be indented
19205     #
19206     my ( $rfields, $rtokens, $rpatterns ) = @_;
19207     my $jmax = @{$rfields} - 1;
19208     return unless ( $jmax > 0 );
19209
19210     # check for balanced else block following if/elsif/unless
19211     my $rfields_old = $current_line->get_rfields();
19212
19213     # TBD: add handling for 'case'
19214     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
19215
19216     # look for the opening brace after the else, and extrace the depth
19217     my $tok_brace = $rtokens->[0];
19218     my $depth_brace;
19219     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
19220
19221     # probably:  "else # side_comment"
19222     else { return }
19223
19224     my $rpatterns_old       = $current_line->get_rpatterns();
19225     my $rtokens_old         = $current_line->get_rtokens();
19226     my $maximum_field_index = $current_line->get_jmax();
19227
19228     # be sure the previous if/elsif is followed by an opening paren
19229     my $jparen    = 0;
19230     my $tok_paren = '(' . $depth_brace;
19231     my $tok_test  = $rtokens_old->[$jparen];
19232     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
19233
19234     # Now find the opening block brace
19235     my ($jbrace);
19236     for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
19237         my $tok = $rtokens_old->[$j];
19238         if ( $tok eq $tok_brace ) {
19239             $jbrace = $j;
19240             last;
19241         }
19242     }
19243     return unless ( defined($jbrace) );           # shouldn't happen
19244
19245     # Now splice the tokens and patterns of the previous line
19246     # into the else line to insure a match.  Add empty fields
19247     # as necessary.
19248     my $jadd = $jbrace - $jparen;
19249     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
19250     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
19251     splice( @{$rfields}, 1, 0, ('') x $jadd );
19252
19253     # force a flush after this line if it does not follow a case
19254     return $jbrace
19255       unless ( $rfields_old->[0] =~ /^case\s*$/ );
19256 }
19257
19258 {    # sub check_match
19259     my %is_good_alignment;
19260
19261     BEGIN {
19262
19263         # Vertically aligning on certain "good" tokens is usually okay
19264         # so we can be less restrictive in marginal cases.
19265         @_ = qw( { ? => = );
19266         push @_, (',');
19267         @is_good_alignment{@_} = (1) x scalar(@_);
19268     }
19269
19270     sub check_match {
19271
19272         # See if the current line matches the current vertical alignment group.
19273         # If not, flush the current group.
19274         my $new_line = shift;
19275         my $old_line = shift;
19276
19277         # uses global variables:
19278         #  $previous_minimum_jmax_seen
19279         #  $maximum_jmax_seen
19280         #  $maximum_line_index
19281         #  $marginal_match
19282         my $jmax                = $new_line->get_jmax();
19283         my $maximum_field_index = $old_line->get_jmax();
19284
19285         # flush if this line has too many fields
19286         if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
19287
19288         # flush if adding this line would make a non-monotonic field count
19289         if (
19290             ( $maximum_field_index > $jmax )    # this has too few fields
19291             && (
19292                 ( $previous_minimum_jmax_seen <
19293                     $jmax )                     # and wouldn't be monotonic
19294                 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
19295             )
19296           )
19297         {
19298             goto NO_MATCH;
19299         }
19300
19301         # otherwise see if this line matches the current group
19302         my $jmax_original_line      = $new_line->get_jmax_original_line();
19303         my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19304         my $rtokens                 = $new_line->get_rtokens();
19305         my $rfields                 = $new_line->get_rfields();
19306         my $rpatterns               = $new_line->get_rpatterns();
19307         my $list_type               = $new_line->get_list_type();
19308
19309         my $group_list_type = $old_line->get_list_type();
19310         my $old_rpatterns   = $old_line->get_rpatterns();
19311         my $old_rtokens     = $old_line->get_rtokens();
19312
19313         my $jlimit = $jmax - 1;
19314         if ( $maximum_field_index > $jmax ) {
19315             $jlimit = $jmax_original_line;
19316             --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
19317         }
19318
19319         # handle comma-separated lists ..
19320         if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
19321             for my $j ( 0 .. $jlimit ) {
19322                 my $old_tok = $$old_rtokens[$j];
19323                 next unless $old_tok;
19324                 my $new_tok = $$rtokens[$j];
19325                 next unless $new_tok;
19326
19327                 # lists always match ...
19328                 # unless they would align any '=>'s with ','s
19329                 goto NO_MATCH
19330                   if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
19331                     || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
19332             }
19333         }
19334
19335         # do detailed check for everything else except hanging side comments
19336         elsif ( !$is_hanging_side_comment ) {
19337
19338             my $leading_space_count = $new_line->get_leading_space_count();
19339
19340             my $max_pad = 0;
19341             my $min_pad = 0;
19342             my $saw_good_alignment;
19343
19344             for my $j ( 0 .. $jlimit ) {
19345
19346                 my $old_tok = $$old_rtokens[$j];
19347                 my $new_tok = $$rtokens[$j];
19348
19349                 # Note on encoding used for alignment tokens:
19350                 # -------------------------------------------
19351                 # Tokens are "decorated" with information which can help
19352                 # prevent unwanted alignments.  Consider for example the
19353                 # following two lines:
19354                 #   local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
19355                 #   local ( $i, $f ) = &'bdiv( $xn, $xd );
19356                 # There are three alignment tokens in each line, a comma,
19357                 # an =, and a comma.  In the first line these three tokens
19358                 # are encoded as:
19359                 #    ,4+local-18     =3      ,4+split-7
19360                 # and in the second line they are encoded as
19361                 #    ,4+local-18     =3      ,4+&'bdiv-8
19362                 # Tokens always at least have token name and nesting
19363                 # depth.  So in this example the ='s are at depth 3 and
19364                 # the ,'s are at depth 4.  This prevents aligning tokens
19365                 # of different depths.  Commas contain additional
19366                 # information, as follows:
19367                 # ,  {depth} + {container name} - {spaces to opening paren}
19368                 # This allows us to reject matching the rightmost commas
19369                 # in the above two lines, since they are for different
19370                 # function calls.  This encoding is done in
19371                 # 'sub send_lines_to_vertical_aligner'.
19372
19373                 # Pick off actual token.
19374                 # Everything up to the first digit is the actual token.
19375                 my $alignment_token = $new_tok;
19376                 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
19377
19378                 # see if the decorated tokens match
19379                 my $tokens_match = $new_tok eq $old_tok
19380
19381                   # Exception for matching terminal : of ternary statement..
19382                   # consider containers prefixed by ? and : a match
19383                   || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
19384
19385                 # No match if the alignment tokens differ...
19386                 if ( !$tokens_match ) {
19387
19388                     # ...Unless this is a side comment
19389                     if (
19390                         $j == $jlimit
19391
19392                         # and there is either at least one alignment token
19393                         # or this is a single item following a list.  This
19394                         # latter rule is required for 'December' to join
19395                         # the following list:
19396                         # my (@months) = (
19397                         #     '',       'January',   'February', 'March',
19398                         #     'April',  'May',       'June',     'July',
19399                         #     'August', 'September', 'October',  'November',
19400                         #     'December'
19401                         # );
19402                         # If it doesn't then the -lp formatting will fail.
19403                         && ( $j > 0 || $old_tok =~ /^,/ )
19404                       )
19405                     {
19406                         $marginal_match = 1
19407                           if ( $marginal_match == 0
19408                             && $maximum_line_index == 0 );
19409                         last;
19410                     }
19411
19412                     goto NO_MATCH;
19413                 }
19414
19415                 # Calculate amount of padding required to fit this in.
19416                 # $pad is the number of spaces by which we must increase
19417                 # the current field to squeeze in this field.
19418                 my $pad =
19419                   length( $$rfields[$j] ) - $old_line->current_field_width($j);
19420                 if ( $j == 0 ) { $pad += $leading_space_count; }
19421
19422                 # remember max pads to limit marginal cases
19423                 if ( $alignment_token ne '#' ) {
19424                     if ( $pad > $max_pad ) { $max_pad = $pad }
19425                     if ( $pad < $min_pad ) { $min_pad = $pad }
19426                 }
19427                 if ( $is_good_alignment{$alignment_token} ) {
19428                     $saw_good_alignment = 1;
19429                 }
19430
19431                 # If patterns don't match, we have to be careful...
19432                 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
19433
19434                     # flag this as a marginal match since patterns differ
19435                     $marginal_match = 1
19436                       if ( $marginal_match == 0 && $maximum_line_index == 0 );
19437
19438                     # We have to be very careful about aligning commas
19439                     # when the pattern's don't match, because it can be
19440                     # worse to create an alignment where none is needed
19441                     # than to omit one.  Here's an example where the ','s
19442                     # are not in named continers.  The first line below
19443                     # should not match the next two:
19444                     #   ( $a, $b ) = ( $b, $r );
19445                     #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
19446                     #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
19447                     if ( $alignment_token eq ',' ) {
19448
19449                        # do not align commas unless they are in named containers
19450                         goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
19451                     }
19452
19453                     # do not align parens unless patterns match;
19454                     # large ugly spaces can occur in math expressions.
19455                     elsif ( $alignment_token eq '(' ) {
19456
19457                         # But we can allow a match if the parens don't
19458                         # require any padding.
19459                         if ( $pad != 0 ) { goto NO_MATCH }
19460                     }
19461
19462                     # Handle an '=' alignment with different patterns to
19463                     # the left.
19464                     elsif ( $alignment_token eq '=' ) {
19465
19466                         # It is best to be a little restrictive when
19467                         # aligning '=' tokens.  Here is an example of
19468                         # two lines that we will not align:
19469                         #       my $variable=6;
19470                         #       $bb=4;
19471                         # The problem is that one is a 'my' declaration,
19472                         # and the other isn't, so they're not very similar.
19473                         # We will filter these out by comparing the first
19474                         # letter of the pattern.  This is crude, but works
19475                         # well enough.
19476                         if (
19477                             substr( $$old_rpatterns[$j], 0, 1 ) ne
19478                             substr( $$rpatterns[$j], 0, 1 ) )
19479                         {
19480                             goto NO_MATCH;
19481                         }
19482
19483                         # If we pass that test, we'll call it a marginal match.
19484                         # Here is an example of a marginal match:
19485                         #       $done{$$op} = 1;
19486                         #       $op         = compile_bblock($op);
19487                         # The left tokens are both identifiers, but
19488                         # one accesses a hash and the other doesn't.
19489                         # We'll let this be a tentative match and undo
19490                         # it later if we don't find more than 2 lines
19491                         # in the group.
19492                         elsif ( $maximum_line_index == 0 ) {
19493                             $marginal_match =
19494                               2;    # =2 prevents being undone below
19495                         }
19496                     }
19497                 }
19498
19499                 # Don't let line with fewer fields increase column widths
19500                 # ( align3.t )
19501                 if ( $maximum_field_index > $jmax ) {
19502
19503                     # Exception: suspend this rule to allow last lines to join
19504                     if ( $pad > 0 ) { goto NO_MATCH; }
19505                 }
19506             } ## end for my $j ( 0 .. $jlimit)
19507
19508             # Turn off the "marginal match" flag in some cases...
19509             # A "marginal match" occurs when the alignment tokens agree
19510             # but there are differences in the other tokens (patterns).
19511             # If we leave the marginal match flag set, then the rule is that we
19512             # will align only if there are more than two lines in the group.
19513             # We will turn of the flag if we almost have a match
19514             # and either we have seen a good alignment token or we
19515             # just need a small pad (2 spaces) to fit.  These rules are
19516             # the result of experimentation.  Tokens which misaligned by just
19517             # one or two characters are annoying.  On the other hand,
19518             # large gaps to less important alignment tokens are also annoying.
19519             if (   $marginal_match == 1
19520                 && $jmax == $maximum_field_index
19521                 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
19522               )
19523             {
19524                 $marginal_match = 0;
19525             }
19526             ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
19527         }
19528
19529         # We have a match (even if marginal).
19530         # If the current line has fewer fields than the current group
19531         # but otherwise matches, copy the remaining group fields to
19532         # make it a perfect match.
19533         if ( $maximum_field_index > $jmax ) {
19534             my $comment = $$rfields[$jmax];
19535             for $jmax ( $jlimit .. $maximum_field_index ) {
19536                 $$rtokens[$jmax]     = $$old_rtokens[$jmax];
19537                 $$rfields[ ++$jmax ] = '';
19538                 $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
19539             }
19540             $$rfields[$jmax] = $comment;
19541             $new_line->set_jmax($jmax);
19542         }
19543         return;
19544
19545       NO_MATCH:
19546         ##print "BUBBA: no match jmax=$jmax  max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
19547         my_flush();
19548         return;
19549     }
19550 }
19551
19552 sub check_fit {
19553
19554     return unless ( $maximum_line_index >= 0 );
19555     my $new_line = shift;
19556     my $old_line = shift;
19557
19558     my $jmax                    = $new_line->get_jmax();
19559     my $leading_space_count     = $new_line->get_leading_space_count();
19560     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19561     my $rtokens                 = $new_line->get_rtokens();
19562     my $rfields                 = $new_line->get_rfields();
19563     my $rpatterns               = $new_line->get_rpatterns();
19564
19565     my $group_list_type = $group_lines[0]->get_list_type();
19566
19567     my $padding_so_far    = 0;
19568     my $padding_available = $old_line->get_available_space_on_right();
19569
19570     # save current columns in case this doesn't work
19571     save_alignment_columns();
19572
19573     my ( $j, $pad, $eight );
19574     my $maximum_field_index = $old_line->get_jmax();
19575     for $j ( 0 .. $jmax ) {
19576
19577         $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
19578
19579         if ( $j == 0 ) {
19580             $pad += $leading_space_count;
19581         }
19582
19583         # remember largest gap of the group, excluding gap to side comment
19584         if (   $pad < 0
19585             && $group_maximum_gap < -$pad
19586             && $j > 0
19587             && $j < $jmax - 1 )
19588         {
19589             $group_maximum_gap = -$pad;
19590         }
19591
19592         next if $pad < 0;
19593
19594         ## This patch helps sometimes, but it doesn't check to see if
19595         ## the line is too long even without the side comment.  It needs
19596         ## to be reworked.
19597         ##don't let a long token with no trailing side comment push
19598         ##side comments out, or end a group.  (sidecmt1.t)
19599         ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
19600
19601         # This line will need space; lets see if we want to accept it..
19602         if (
19603
19604             # not if this won't fit
19605             ( $pad > $padding_available )
19606
19607             # previously, there were upper bounds placed on padding here
19608             # (maximum_whitespace_columns), but they were not really helpful
19609
19610           )
19611         {
19612
19613             # revert to starting state then flush; things didn't work out
19614             restore_alignment_columns();
19615             my_flush();
19616             last;
19617         }
19618
19619         # patch to avoid excessive gaps in previous lines,
19620         # due to a line of fewer fields.
19621         #   return join( ".",
19622         #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
19623         #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
19624         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
19625
19626         # looks ok, squeeze this field in
19627         $old_line->increase_field_width( $j, $pad );
19628         $padding_available -= $pad;
19629
19630         # remember largest gap of the group, excluding gap to side comment
19631         if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
19632             $group_maximum_gap = $pad;
19633         }
19634     }
19635 }
19636
19637 sub accept_line {
19638
19639     # The current line either starts a new alignment group or is
19640     # accepted into the current alignment group.
19641     my $new_line = shift;
19642     $group_lines[ ++$maximum_line_index ] = $new_line;
19643
19644     # initialize field lengths if starting new group
19645     if ( $maximum_line_index == 0 ) {
19646
19647         my $jmax    = $new_line->get_jmax();
19648         my $rfields = $new_line->get_rfields();
19649         my $rtokens = $new_line->get_rtokens();
19650         my $j;
19651         my $col = $new_line->get_leading_space_count();
19652
19653         for $j ( 0 .. $jmax ) {
19654             $col += length( $$rfields[$j] );
19655
19656             # create initial alignments for the new group
19657             my $token = "";
19658             if ( $j < $jmax ) { $token = $$rtokens[$j] }
19659             my $alignment = make_alignment( $col, $token );
19660             $new_line->set_alignment( $j, $alignment );
19661         }
19662
19663         $maximum_jmax_seen = $jmax;
19664         $minimum_jmax_seen = $jmax;
19665     }
19666
19667     # use previous alignments otherwise
19668     else {
19669         my @new_alignments =
19670           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
19671         $new_line->set_alignments(@new_alignments);
19672     }
19673
19674     # remember group jmax extremes for next call to append_line
19675     $previous_minimum_jmax_seen = $minimum_jmax_seen;
19676     $previous_maximum_jmax_seen = $maximum_jmax_seen;
19677 }
19678
19679 sub dump_array {
19680
19681     # debug routine to dump array contents
19682     local $" = ')(';
19683     print "(@_)\n";
19684 }
19685
19686 # flush() sends the current Perl::Tidy::VerticalAligner group down the
19687 # pipeline to Perl::Tidy::FileWriter.
19688
19689 # This is the external flush, which also empties the cache
19690 sub flush {
19691
19692     if ( $maximum_line_index < 0 ) {
19693         if ($cached_line_type) {
19694             $seqno_string = $cached_seqno_string;
19695             entab_and_output( $cached_line_text,
19696                 $cached_line_leading_space_count,
19697                 $last_group_level_written );
19698             $cached_line_type    = 0;
19699             $cached_line_text    = "";
19700             $cached_seqno_string = "";
19701         }
19702     }
19703     else {
19704         my_flush();
19705     }
19706 }
19707
19708 # This is the internal flush, which leaves the cache intact
19709 sub my_flush {
19710
19711     return if ( $maximum_line_index < 0 );
19712
19713     # handle a group of comment lines
19714     if ( $group_type eq 'COMMENT' ) {
19715
19716         VALIGN_DEBUG_FLAG_APPEND0 && do {
19717             my ( $a, $b, $c ) = caller();
19718             print
19719 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
19720
19721         };
19722         my $leading_space_count = $comment_leading_space_count;
19723         my $leading_string      = get_leading_string($leading_space_count);
19724
19725         # zero leading space count if any lines are too long
19726         my $max_excess = 0;
19727         for my $i ( 0 .. $maximum_line_index ) {
19728             my $str = $group_lines[$i];
19729             my $excess =
19730               length($str) + $leading_space_count - $rOpts_maximum_line_length;
19731             if ( $excess > $max_excess ) {
19732                 $max_excess = $excess;
19733             }
19734         }
19735
19736         if ( $max_excess > 0 ) {
19737             $leading_space_count -= $max_excess;
19738             if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
19739             $last_outdented_line_at =
19740               $file_writer_object->get_output_line_number();
19741             unless ($outdented_line_count) {
19742                 $first_outdented_line_at = $last_outdented_line_at;
19743             }
19744             $outdented_line_count += ( $maximum_line_index + 1 );
19745         }
19746
19747         # write the group of lines
19748         my $outdent_long_lines = 0;
19749         for my $i ( 0 .. $maximum_line_index ) {
19750             write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
19751                 $outdent_long_lines, "" );
19752         }
19753     }
19754
19755     # handle a group of code lines
19756     else {
19757
19758         VALIGN_DEBUG_FLAG_APPEND0 && do {
19759             my $group_list_type = $group_lines[0]->get_list_type();
19760             my ( $a, $b, $c ) = caller();
19761             my $maximum_field_index = $group_lines[0]->get_jmax();
19762             print
19763 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
19764
19765         };
19766
19767         # some small groups are best left unaligned
19768         my $do_not_align = decide_if_aligned();
19769
19770         # optimize side comment location
19771         $do_not_align = adjust_side_comment($do_not_align);
19772
19773         # recover spaces for -lp option if possible
19774         my $extra_leading_spaces = get_extra_leading_spaces();
19775
19776         # all lines of this group have the same basic leading spacing
19777         my $group_leader_length = $group_lines[0]->get_leading_space_count();
19778
19779         # add extra leading spaces if helpful
19780         my $min_ci_gap = improve_continuation_indentation( $do_not_align,
19781             $group_leader_length );
19782
19783         # loop to output all lines
19784         for my $i ( 0 .. $maximum_line_index ) {
19785             my $line = $group_lines[$i];
19786             write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
19787                 $group_leader_length, $extra_leading_spaces );
19788         }
19789     }
19790     initialize_for_new_group();
19791 }
19792
19793 sub decide_if_aligned {
19794
19795     # Do not try to align two lines which are not really similar
19796     return unless $maximum_line_index == 1;
19797     return if ($is_matching_terminal_line);
19798
19799     my $group_list_type = $group_lines[0]->get_list_type();
19800
19801     my $do_not_align = (
19802
19803         # always align lists
19804         !$group_list_type
19805
19806           && (
19807
19808             # don't align if it was just a marginal match
19809             $marginal_match
19810
19811             # don't align two lines with big gap
19812             || $group_maximum_gap > 12
19813
19814             # or lines with differing number of alignment tokens
19815             # TODO: this could be improved.  It occasionally rejects
19816             # good matches.
19817             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
19818           )
19819     );
19820
19821     # But try to convert them into a simple comment group if the first line
19822     # a has side comment
19823     my $rfields             = $group_lines[0]->get_rfields();
19824     my $maximum_field_index = $group_lines[0]->get_jmax();
19825     if (   $do_not_align
19826         && ( $maximum_line_index > 0 )
19827         && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
19828     {
19829         combine_fields();
19830         $do_not_align = 0;
19831     }
19832     return $do_not_align;
19833 }
19834
19835 sub adjust_side_comment {
19836
19837     my $do_not_align = shift;
19838
19839     # let's see if we can move the side comment field out a little
19840     # to improve readability (the last field is always a side comment field)
19841     my $have_side_comment       = 0;
19842     my $first_side_comment_line = -1;
19843     my $maximum_field_index     = $group_lines[0]->get_jmax();
19844     for my $i ( 0 .. $maximum_line_index ) {
19845         my $line = $group_lines[$i];
19846
19847         if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
19848             $have_side_comment       = 1;
19849             $first_side_comment_line = $i;
19850             last;
19851         }
19852     }
19853
19854     my $kmax = $maximum_field_index + 1;
19855
19856     if ($have_side_comment) {
19857
19858         my $line = $group_lines[0];
19859
19860         # the maximum space without exceeding the line length:
19861         my $avail = $line->get_available_space_on_right();
19862
19863         # try to use the previous comment column
19864         my $side_comment_column = $line->get_column( $kmax - 2 );
19865         my $move                = $last_comment_column - $side_comment_column;
19866
19867 ##        my $sc_line0 = $side_comment_history[0]->[0];
19868 ##        my $sc_col0  = $side_comment_history[0]->[1];
19869 ##        my $sc_line1 = $side_comment_history[1]->[0];
19870 ##        my $sc_col1  = $side_comment_history[1]->[1];
19871 ##        my $sc_line2 = $side_comment_history[2]->[0];
19872 ##        my $sc_col2  = $side_comment_history[2]->[1];
19873 ##
19874 ##        # FUTURE UPDATES:
19875 ##        # Be sure to ignore 'do not align' and  '} # end comments'
19876 ##        # Find first $move > 0 and $move <= $avail as follows:
19877 ##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
19878 ##        # 2. try sc_col2 if (line-sc_line2) < 12
19879 ##        # 3. try min possible space, plus up to 8,
19880 ##        # 4. try min possible space
19881
19882         if ( $kmax > 0 && !$do_not_align ) {
19883
19884             # but if this doesn't work, give up and use the minimum space
19885             if ( $move > $avail ) {
19886                 $move = $rOpts_minimum_space_to_comment - 1;
19887             }
19888
19889             # but we want some minimum space to the comment
19890             my $min_move = $rOpts_minimum_space_to_comment - 1;
19891             if (   $move >= 0
19892                 && $last_side_comment_length > 0
19893                 && ( $first_side_comment_line == 0 )
19894                 && $group_level == $last_group_level_written )
19895             {
19896                 $min_move = 0;
19897             }
19898
19899             if ( $move < $min_move ) {
19900                 $move = $min_move;
19901             }
19902
19903             # prevously, an upper bound was placed on $move here,
19904             # (maximum_space_to_comment), but it was not helpful
19905
19906             # don't exceed the available space
19907             if ( $move > $avail ) { $move = $avail }
19908
19909             # we can only increase space, never decrease
19910             if ( $move > 0 ) {
19911                 $line->increase_field_width( $maximum_field_index - 1, $move );
19912             }
19913
19914             # remember this column for the next group
19915             $last_comment_column = $line->get_column( $kmax - 2 );
19916         }
19917         else {
19918
19919             # try to at least line up the existing side comment location
19920             if ( $kmax > 0 && $move > 0 && $move < $avail ) {
19921                 $line->increase_field_width( $maximum_field_index - 1, $move );
19922                 $do_not_align = 0;
19923             }
19924
19925             # reset side comment column if we can't align
19926             else {
19927                 forget_side_comment();
19928             }
19929         }
19930     }
19931     return $do_not_align;
19932 }
19933
19934 sub improve_continuation_indentation {
19935     my ( $do_not_align, $group_leader_length ) = @_;
19936
19937     # See if we can increase the continuation indentation
19938     # to move all continuation lines closer to the next field
19939     # (unless it is a comment).
19940     #
19941     # '$min_ci_gap'is the extra indentation that we may need to introduce.
19942     # We will only introduce this to fields which already have some ci.
19943     # Without this variable, we would occasionally get something like this
19944     # (Complex.pm):
19945     #
19946     # use overload '+' => \&plus,
19947     #   '-'            => \&minus,
19948     #   '*'            => \&multiply,
19949     #   ...
19950     #   'tan'          => \&tan,
19951     #   'atan2'        => \&atan2,
19952     #
19953     # Whereas with this variable, we can shift variables over to get this:
19954     #
19955     # use overload '+' => \&plus,
19956     #          '-'     => \&minus,
19957     #          '*'     => \&multiply,
19958     #          ...
19959     #          'tan'   => \&tan,
19960     #          'atan2' => \&atan2,
19961
19962     ## BUB: Deactivated####################
19963     # The trouble with this patch is that it may, for example,
19964     # move in some 'or's  or ':'s, and leave some out, so that the
19965     # left edge alignment suffers.
19966     return 0;
19967     ###########################################
19968
19969     my $maximum_field_index = $group_lines[0]->get_jmax();
19970
19971     my $min_ci_gap = $rOpts_maximum_line_length;
19972     if ( $maximum_field_index > 1 && !$do_not_align ) {
19973
19974         for my $i ( 0 .. $maximum_line_index ) {
19975             my $line                = $group_lines[$i];
19976             my $leading_space_count = $line->get_leading_space_count();
19977             my $rfields             = $line->get_rfields();
19978
19979             my $gap =
19980               $line->get_column(0) -
19981               $leading_space_count -
19982               length( $$rfields[0] );
19983
19984             if ( $leading_space_count > $group_leader_length ) {
19985                 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
19986             }
19987         }
19988
19989         if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
19990             $min_ci_gap = 0;
19991         }
19992     }
19993     else {
19994         $min_ci_gap = 0;
19995     }
19996     return $min_ci_gap;
19997 }
19998
19999 sub write_vertically_aligned_line {
20000
20001     my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
20002         $extra_leading_spaces )
20003       = @_;
20004     my $rfields                   = $line->get_rfields();
20005     my $leading_space_count       = $line->get_leading_space_count();
20006     my $outdent_long_lines        = $line->get_outdent_long_lines();
20007     my $maximum_field_index       = $line->get_jmax();
20008     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
20009
20010     # add any extra spaces
20011     if ( $leading_space_count > $group_leader_length ) {
20012         $leading_space_count += $min_ci_gap;
20013     }
20014
20015     my $str = $$rfields[0];
20016
20017     # loop to concatenate all fields of this line and needed padding
20018     my $total_pad_count = 0;
20019     my ( $j, $pad );
20020     for $j ( 1 .. $maximum_field_index ) {
20021
20022         # skip zero-length side comments
20023         last
20024           if ( ( $j == $maximum_field_index )
20025             && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
20026           );
20027
20028         # compute spaces of padding before this field
20029         my $col = $line->get_column( $j - 1 );
20030         $pad = $col - ( length($str) + $leading_space_count );
20031
20032         if ($do_not_align) {
20033             $pad =
20034               ( $j < $maximum_field_index )
20035               ? 0
20036               : $rOpts_minimum_space_to_comment - 1;
20037         }
20038
20039         # if the -fpsc flag is set, move the side comment to the selected
20040         # column if and only if it is possible, ignoring constraints on
20041         # line length and minimum space to comment
20042         if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
20043         {
20044             my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
20045             if ( $newpad >= 0 ) { $pad = $newpad; }
20046         }
20047
20048         # accumulate the padding
20049         if ( $pad > 0 ) { $total_pad_count += $pad; }
20050
20051         # add this field
20052         if ( !defined $$rfields[$j] ) {
20053             write_diagnostics("UNDEFined field at j=$j\n");
20054         }
20055
20056         # only add padding when we have a finite field;
20057         # this avoids extra terminal spaces if we have empty fields
20058         if ( length( $$rfields[$j] ) > 0 ) {
20059             $str .= ' ' x $total_pad_count;
20060             $total_pad_count = 0;
20061             $str .= $$rfields[$j];
20062         }
20063         else {
20064             $total_pad_count = 0;
20065         }
20066
20067         # update side comment history buffer
20068         if ( $j == $maximum_field_index ) {
20069             my $lineno = $file_writer_object->get_output_line_number();
20070             shift @side_comment_history;
20071             push @side_comment_history, [ $lineno, $col ];
20072         }
20073     }
20074
20075     my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
20076
20077     # ship this line off
20078     write_leader_and_string( $leading_space_count + $extra_leading_spaces,
20079         $str, $side_comment_length, $outdent_long_lines,
20080         $rvertical_tightness_flags );
20081 }
20082
20083 sub get_extra_leading_spaces {
20084
20085     #----------------------------------------------------------
20086     # Define any extra indentation space (for the -lp option).
20087     # Here is why:
20088     # If a list has side comments, sub scan_list must dump the
20089     # list before it sees everything.  When this happens, it sets
20090     # the indentation to the standard scheme, but notes how
20091     # many spaces it would have liked to use.  We may be able
20092     # to recover that space here in the event that that all of the
20093     # lines of a list are back together again.
20094     #----------------------------------------------------------
20095
20096     my $extra_leading_spaces = 0;
20097     if ($extra_indent_ok) {
20098         my $object = $group_lines[0]->get_indentation();
20099         if ( ref($object) ) {
20100             my $extra_indentation_spaces_wanted =
20101               get_RECOVERABLE_SPACES($object);
20102
20103             # all indentation objects must be the same
20104             my $i;
20105             for $i ( 1 .. $maximum_line_index ) {
20106                 if ( $object != $group_lines[$i]->get_indentation() ) {
20107                     $extra_indentation_spaces_wanted = 0;
20108                     last;
20109                 }
20110             }
20111
20112             if ($extra_indentation_spaces_wanted) {
20113
20114                 # the maximum space without exceeding the line length:
20115                 my $avail = $group_lines[0]->get_available_space_on_right();
20116                 $extra_leading_spaces =
20117                   ( $avail > $extra_indentation_spaces_wanted )
20118                   ? $extra_indentation_spaces_wanted
20119                   : $avail;
20120
20121                 # update the indentation object because with -icp the terminal
20122                 # ');' will use the same adjustment.
20123                 $object->permanently_decrease_AVAILABLE_SPACES(
20124                     -$extra_leading_spaces );
20125             }
20126         }
20127     }
20128     return $extra_leading_spaces;
20129 }
20130
20131 sub combine_fields {
20132
20133     # combine all fields except for the comment field  ( sidecmt.t )
20134     # Uses global variables:
20135     #  @group_lines
20136     #  $maximum_line_index
20137     my ( $j, $k );
20138     my $maximum_field_index = $group_lines[0]->get_jmax();
20139     for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
20140         my $line    = $group_lines[$j];
20141         my $rfields = $line->get_rfields();
20142         foreach ( 1 .. $maximum_field_index - 1 ) {
20143             $$rfields[0] .= $$rfields[$_];
20144         }
20145         $$rfields[1] = $$rfields[$maximum_field_index];
20146
20147         $line->set_jmax(1);
20148         $line->set_column( 0, 0 );
20149         $line->set_column( 1, 0 );
20150
20151     }
20152     $maximum_field_index = 1;
20153
20154     for $j ( 0 .. $maximum_line_index ) {
20155         my $line    = $group_lines[$j];
20156         my $rfields = $line->get_rfields();
20157         for $k ( 0 .. $maximum_field_index ) {
20158             my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
20159             if ( $k == 0 ) {
20160                 $pad += $group_lines[$j]->get_leading_space_count();
20161             }
20162
20163             if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
20164
20165         }
20166     }
20167 }
20168
20169 sub get_output_line_number {
20170
20171     # the output line number reported to a caller is the number of items
20172     # written plus the number of items in the buffer
20173     my $self = shift;
20174     1 + $maximum_line_index + $file_writer_object->get_output_line_number();
20175 }
20176
20177 sub write_leader_and_string {
20178
20179     my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
20180         $rvertical_tightness_flags )
20181       = @_;
20182
20183     # handle outdenting of long lines:
20184     if ($outdent_long_lines) {
20185         my $excess =
20186           length($str) -
20187           $side_comment_length +
20188           $leading_space_count -
20189           $rOpts_maximum_line_length;
20190         if ( $excess > 0 ) {
20191             $leading_space_count = 0;
20192             $last_outdented_line_at =
20193               $file_writer_object->get_output_line_number();
20194
20195             unless ($outdented_line_count) {
20196                 $first_outdented_line_at = $last_outdented_line_at;
20197             }
20198             $outdented_line_count++;
20199         }
20200     }
20201
20202     # Make preliminary leading whitespace.  It could get changed
20203     # later by entabbing, so we have to keep track of any changes
20204     # to the leading_space_count from here on.
20205     my $leading_string =
20206       $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
20207
20208     # Unpack any recombination data; it was packed by
20209     # sub send_lines_to_vertical_aligner. Contents:
20210     #
20211     #   [0] type: 1=opening  2=closing  3=opening block brace
20212     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
20213     #             if closing: spaces of padding to use
20214     #   [2] sequence number of container
20215     #   [3] valid flag: do not append if this flag is false
20216     #
20217     my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20218         $seqno_end );
20219     if ($rvertical_tightness_flags) {
20220         (
20221             $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20222             $seqno_end
20223         ) = @{$rvertical_tightness_flags};
20224     }
20225
20226     $seqno_string = $seqno_end;
20227
20228     # handle any cached line ..
20229     # either append this line to it or write it out
20230     if ( length($cached_line_text) ) {
20231
20232         if ( !$cached_line_valid ) {
20233             entab_and_output( $cached_line_text,
20234                 $cached_line_leading_space_count,
20235                 $last_group_level_written );
20236         }
20237
20238         # handle cached line with opening container token
20239         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
20240
20241             my $gap = $leading_space_count - length($cached_line_text);
20242
20243             # handle option of just one tight opening per line:
20244             if ( $cached_line_flag == 1 ) {
20245                 if ( defined($open_or_close) && $open_or_close == 1 ) {
20246                     $gap = -1;
20247                 }
20248             }
20249
20250             if ( $gap >= 0 ) {
20251                 $leading_string      = $cached_line_text . ' ' x $gap;
20252                 $leading_space_count = $cached_line_leading_space_count;
20253                 $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
20254             }
20255             else {
20256                 entab_and_output( $cached_line_text,
20257                     $cached_line_leading_space_count,
20258                     $last_group_level_written );
20259             }
20260         }
20261
20262         # handle cached line to place before this closing container token
20263         else {
20264             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
20265
20266             if ( length($test_line) <= $rOpts_maximum_line_length ) {
20267
20268                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
20269
20270                 # Patch to outdent closing tokens ending # in ');'
20271                 # If we are joining a line like ');' to a previous stacked
20272                 # set of closing tokens, then decide if we may outdent the
20273                 # combined stack to the indentation of the ');'.  Since we
20274                 # should not normally outdent any of the other tokens more than
20275                 # the indentation of the lines that contained them, we will
20276                 # only do this if all of the corresponding opening
20277                 # tokens were on the same line.  This can happen with
20278                 # -sot and -sct.  For example, it is ok here:
20279                 #   __PACKAGE__->load_components( qw(
20280                 #         PK::Auto
20281                 #         Core
20282                 #   ));
20283                 #
20284                 #   But, for example, we do not outdent in this example because
20285                 #   that would put the closing sub brace out farther than the
20286                 #   opening sub brace:
20287                 #
20288                 #   perltidy -sot -sct
20289                 #   $c->Tk::bind(
20290                 #       '<Control-f>' => sub {
20291                 #           my ($c) = @_;
20292                 #           my $e = $c->XEvent;
20293                 #           itemsUnderArea $c;
20294                 #       } );
20295                 #
20296                 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
20297
20298                     # The way to tell this is if the stacked sequence numbers
20299                     # of this output line are the reverse of the stacked
20300                     # sequence numbers of the previous non-blank line of
20301                     # sequence numbers.  So we can join if the previous
20302                     # nonblank string of tokens is the mirror image.  For
20303                     # example if stack )}] is 13:8:6 then we are looking for a
20304                     # leading stack like [{( which is 6:8:13 We only need to
20305                     # check the two ends, because the intermediate tokens must
20306                     # fall in order.  Note on speed: having to split on colons
20307                     # and eliminate multiple colons might appear to be slow,
20308                     # but it's not an issue because we almost never come
20309                     # through here.  In a typical file we don't.
20310                     $seqno_string               =~ s/^:+//;
20311                     $last_nonblank_seqno_string =~ s/^:+//;
20312                     $seqno_string               =~ s/:+/:/g;
20313                     $last_nonblank_seqno_string =~ s/:+/:/g;
20314
20315                     # how many spaces can we outdent?
20316                     my $diff =
20317                       $cached_line_leading_space_count - $leading_space_count;
20318                     if (   $diff > 0
20319                         && length($seqno_string)
20320                         && length($last_nonblank_seqno_string) ==
20321                         length($seqno_string) )
20322                     {
20323                         my @seqno_last =
20324                           ( split ':', $last_nonblank_seqno_string );
20325                         my @seqno_now = ( split ':', $seqno_string );
20326                         if (   $seqno_now[-1] == $seqno_last[0]
20327                             && $seqno_now[0] == $seqno_last[-1] )
20328                         {
20329
20330                             # OK to outdent ..
20331                             # for absolute safety, be sure we only remove
20332                             # whitespace
20333                             my $ws = substr( $test_line, 0, $diff );
20334                             if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
20335
20336                                 $test_line = substr( $test_line, $diff );
20337                                 $cached_line_leading_space_count -= $diff;
20338                             }
20339
20340                             # shouldn't happen, but not critical:
20341                             ##else {
20342                             ## ERROR transferring indentation here
20343                             ##}
20344                         }
20345                     }
20346                 }
20347
20348                 $str                 = $test_line;
20349                 $leading_string      = "";
20350                 $leading_space_count = $cached_line_leading_space_count;
20351             }
20352             else {
20353                 entab_and_output( $cached_line_text,
20354                     $cached_line_leading_space_count,
20355                     $last_group_level_written );
20356             }
20357         }
20358     }
20359     $cached_line_type = 0;
20360     $cached_line_text = "";
20361
20362     # make the line to be written
20363     my $line = $leading_string . $str;
20364
20365     # write or cache this line
20366     if ( !$open_or_close || $side_comment_length > 0 ) {
20367         entab_and_output( $line, $leading_space_count, $group_level );
20368     }
20369     else {
20370         $cached_line_text                = $line;
20371         $cached_line_type                = $open_or_close;
20372         $cached_line_flag                = $tightness_flag;
20373         $cached_seqno                    = $seqno;
20374         $cached_line_valid               = $valid;
20375         $cached_line_leading_space_count = $leading_space_count;
20376         $cached_seqno_string             = $seqno_string;
20377     }
20378
20379     $last_group_level_written = $group_level;
20380     $last_side_comment_length = $side_comment_length;
20381     $extra_indent_ok          = 0;
20382 }
20383
20384 sub entab_and_output {
20385     my ( $line, $leading_space_count, $level ) = @_;
20386
20387     # The line is currently correct if there is no tabbing (recommended!)
20388     # We may have to lop off some leading spaces and replace with tabs.
20389     if ( $leading_space_count > 0 ) {
20390
20391         # Nothing to do if no tabs
20392         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20393             || $rOpts_indent_columns <= 0 )
20394         {
20395
20396             # nothing to do
20397         }
20398
20399         # Handle entab option
20400         elsif ($rOpts_entab_leading_whitespace) {
20401             my $space_count =
20402               $leading_space_count % $rOpts_entab_leading_whitespace;
20403             my $tab_count =
20404               int( $leading_space_count / $rOpts_entab_leading_whitespace );
20405             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
20406             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20407                 substr( $line, 0, $leading_space_count ) = $leading_string;
20408             }
20409             else {
20410
20411                 # REMOVE AFTER TESTING
20412                 # shouldn't happen - program error counting whitespace
20413                 # we'll skip entabbing
20414                 warning(
20415 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20416                 );
20417             }
20418         }
20419
20420         # Handle option of one tab per level
20421         else {
20422             my $leading_string = ( "\t" x $level );
20423             my $space_count =
20424               $leading_space_count - $level * $rOpts_indent_columns;
20425
20426             # shouldn't happen:
20427             if ( $space_count < 0 ) {
20428                 warning(
20429 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
20430                 );
20431                 $leading_string = ( ' ' x $leading_space_count );
20432             }
20433             else {
20434                 $leading_string .= ( ' ' x $space_count );
20435             }
20436             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20437                 substr( $line, 0, $leading_space_count ) = $leading_string;
20438             }
20439             else {
20440
20441                 # REMOVE AFTER TESTING
20442                 # shouldn't happen - program error counting whitespace
20443                 # we'll skip entabbing
20444                 warning(
20445 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20446                 );
20447             }
20448         }
20449     }
20450     $file_writer_object->write_code_line( $line . "\n" );
20451     if ($seqno_string) {
20452         $last_nonblank_seqno_string = $seqno_string;
20453     }
20454 }
20455
20456 {    # begin get_leading_string
20457
20458     my @leading_string_cache;
20459
20460     sub get_leading_string {
20461
20462         # define the leading whitespace string for this line..
20463         my $leading_whitespace_count = shift;
20464
20465         # Handle case of zero whitespace, which includes multi-line quotes
20466         # (which may have a finite level; this prevents tab problems)
20467         if ( $leading_whitespace_count <= 0 ) {
20468             return "";
20469         }
20470
20471         # look for previous result
20472         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
20473             return $leading_string_cache[$leading_whitespace_count];
20474         }
20475
20476         # must compute a string for this number of spaces
20477         my $leading_string;
20478
20479         # Handle simple case of no tabs
20480         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20481             || $rOpts_indent_columns <= 0 )
20482         {
20483             $leading_string = ( ' ' x $leading_whitespace_count );
20484         }
20485
20486         # Handle entab option
20487         elsif ($rOpts_entab_leading_whitespace) {
20488             my $space_count =
20489               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
20490             my $tab_count = int(
20491                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
20492             $leading_string = "\t" x $tab_count . ' ' x $space_count;
20493         }
20494
20495         # Handle option of one tab per level
20496         else {
20497             $leading_string = ( "\t" x $group_level );
20498             my $space_count =
20499               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
20500
20501             # shouldn't happen:
20502             if ( $space_count < 0 ) {
20503                 warning(
20504 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
20505                 );
20506                 $leading_string = ( ' ' x $leading_whitespace_count );
20507             }
20508             else {
20509                 $leading_string .= ( ' ' x $space_count );
20510             }
20511         }
20512         $leading_string_cache[$leading_whitespace_count] = $leading_string;
20513         return $leading_string;
20514     }
20515 }    # end get_leading_string
20516
20517 sub report_anything_unusual {
20518     my $self = shift;
20519     if ( $outdented_line_count > 0 ) {
20520         write_logfile_entry(
20521             "$outdented_line_count long lines were outdented:\n");
20522         write_logfile_entry(
20523             "  First at output line $first_outdented_line_at\n");
20524
20525         if ( $outdented_line_count > 1 ) {
20526             write_logfile_entry(
20527                 "   Last at output line $last_outdented_line_at\n");
20528         }
20529         write_logfile_entry(
20530             "  use -noll to prevent outdenting, -l=n to increase line length\n"
20531         );
20532         write_logfile_entry("\n");
20533     }
20534 }
20535
20536 #####################################################################
20537 #
20538 # the Perl::Tidy::FileWriter class writes the output file
20539 #
20540 #####################################################################
20541
20542 package Perl::Tidy::FileWriter;
20543
20544 # Maximum number of little messages; probably need not be changed.
20545 use constant MAX_NAG_MESSAGES => 6;
20546
20547 sub write_logfile_entry {
20548     my $self          = shift;
20549     my $logger_object = $self->{_logger_object};
20550     if ($logger_object) {
20551         $logger_object->write_logfile_entry(@_);
20552     }
20553 }
20554
20555 sub new {
20556     my $class = shift;
20557     my ( $line_sink_object, $rOpts, $logger_object ) = @_;
20558
20559     bless {
20560         _line_sink_object           => $line_sink_object,
20561         _logger_object              => $logger_object,
20562         _rOpts                      => $rOpts,
20563         _output_line_number         => 1,
20564         _consecutive_blank_lines    => 0,
20565         _consecutive_nonblank_lines => 0,
20566         _first_line_length_error    => 0,
20567         _max_line_length_error      => 0,
20568         _last_line_length_error     => 0,
20569         _first_line_length_error_at => 0,
20570         _max_line_length_error_at   => 0,
20571         _last_line_length_error_at  => 0,
20572         _line_length_error_count    => 0,
20573         _max_output_line_length     => 0,
20574         _max_output_line_length_at  => 0,
20575     }, $class;
20576 }
20577
20578 sub tee_on {
20579     my $self = shift;
20580     $self->{_line_sink_object}->tee_on();
20581 }
20582
20583 sub tee_off {
20584     my $self = shift;
20585     $self->{_line_sink_object}->tee_off();
20586 }
20587
20588 sub get_output_line_number {
20589     my $self = shift;
20590     return $self->{_output_line_number};
20591 }
20592
20593 sub decrement_output_line_number {
20594     my $self = shift;
20595     $self->{_output_line_number}--;
20596 }
20597
20598 sub get_consecutive_nonblank_lines {
20599     my $self = shift;
20600     return $self->{_consecutive_nonblank_lines};
20601 }
20602
20603 sub reset_consecutive_blank_lines {
20604     my $self = shift;
20605     $self->{_consecutive_blank_lines} = 0;
20606 }
20607
20608 sub want_blank_line {
20609     my $self = shift;
20610     unless ( $self->{_consecutive_blank_lines} ) {
20611         $self->write_blank_code_line();
20612     }
20613 }
20614
20615 sub write_blank_code_line {
20616     my $self   = shift;
20617     my $forced = shift;
20618     my $rOpts  = $self->{_rOpts};
20619     return
20620       if (!$forced
20621         && $self->{_consecutive_blank_lines} >=
20622         $rOpts->{'maximum-consecutive-blank-lines'} );
20623     $self->{_consecutive_blank_lines}++;
20624     $self->{_consecutive_nonblank_lines} = 0;
20625     $self->write_line("\n");
20626 }
20627
20628 sub write_code_line {
20629     my $self = shift;
20630     my $a    = shift;
20631
20632     if ( $a =~ /^\s*$/ ) {
20633         my $rOpts = $self->{_rOpts};
20634         return
20635           if ( $self->{_consecutive_blank_lines} >=
20636             $rOpts->{'maximum-consecutive-blank-lines'} );
20637         $self->{_consecutive_blank_lines}++;
20638         $self->{_consecutive_nonblank_lines} = 0;
20639     }
20640     else {
20641         $self->{_consecutive_blank_lines} = 0;
20642         $self->{_consecutive_nonblank_lines}++;
20643     }
20644     $self->write_line($a);
20645 }
20646
20647 sub write_line {
20648     my $self = shift;
20649     my $a    = shift;
20650
20651     # TODO: go through and see if the test is necessary here
20652     if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
20653
20654     $self->{_line_sink_object}->write_line($a);
20655
20656     # This calculation of excess line length ignores any internal tabs
20657     my $rOpts  = $self->{_rOpts};
20658     my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
20659     if ( $a =~ /^\t+/g ) {
20660         $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
20661     }
20662
20663     # Note that we just incremented output line number to future value
20664     # so we must subtract 1 for current line number
20665     if ( length($a) > 1 + $self->{_max_output_line_length} ) {
20666         $self->{_max_output_line_length}    = length($a) - 1;
20667         $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
20668     }
20669
20670     if ( $exceed > 0 ) {
20671         my $output_line_number = $self->{_output_line_number};
20672         $self->{_last_line_length_error}    = $exceed;
20673         $self->{_last_line_length_error_at} = $output_line_number - 1;
20674         if ( $self->{_line_length_error_count} == 0 ) {
20675             $self->{_first_line_length_error}    = $exceed;
20676             $self->{_first_line_length_error_at} = $output_line_number - 1;
20677         }
20678
20679         if (
20680             $self->{_last_line_length_error} > $self->{_max_line_length_error} )
20681         {
20682             $self->{_max_line_length_error}    = $exceed;
20683             $self->{_max_line_length_error_at} = $output_line_number - 1;
20684         }
20685
20686         if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
20687             $self->write_logfile_entry(
20688                 "Line length exceeded by $exceed characters\n");
20689         }
20690         $self->{_line_length_error_count}++;
20691     }
20692
20693 }
20694
20695 sub report_line_length_errors {
20696     my $self                    = shift;
20697     my $rOpts                   = $self->{_rOpts};
20698     my $line_length_error_count = $self->{_line_length_error_count};
20699     if ( $line_length_error_count == 0 ) {
20700         $self->write_logfile_entry(
20701             "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
20702         my $max_output_line_length    = $self->{_max_output_line_length};
20703         my $max_output_line_length_at = $self->{_max_output_line_length_at};
20704         $self->write_logfile_entry(
20705 "  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
20706         );
20707
20708     }
20709     else {
20710
20711         my $word = ( $line_length_error_count > 1 ) ? "s" : "";
20712         $self->write_logfile_entry(
20713 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
20714         );
20715
20716         $word = ( $line_length_error_count > 1 ) ? "First" : "";
20717         my $first_line_length_error    = $self->{_first_line_length_error};
20718         my $first_line_length_error_at = $self->{_first_line_length_error_at};
20719         $self->write_logfile_entry(
20720 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
20721         );
20722
20723         if ( $line_length_error_count > 1 ) {
20724             my $max_line_length_error     = $self->{_max_line_length_error};
20725             my $max_line_length_error_at  = $self->{_max_line_length_error_at};
20726             my $last_line_length_error    = $self->{_last_line_length_error};
20727             my $last_line_length_error_at = $self->{_last_line_length_error_at};
20728             $self->write_logfile_entry(
20729 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
20730             );
20731             $self->write_logfile_entry(
20732 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
20733             );
20734         }
20735     }
20736 }
20737
20738 #####################################################################
20739 #
20740 # The Perl::Tidy::Debugger class shows line tokenization
20741 #
20742 #####################################################################
20743
20744 package Perl::Tidy::Debugger;
20745
20746 sub new {
20747
20748     my ( $class, $filename ) = @_;
20749
20750     bless {
20751         _debug_file        => $filename,
20752         _debug_file_opened => 0,
20753         _fh                => undef,
20754     }, $class;
20755 }
20756
20757 sub really_open_debug_file {
20758
20759     my $self       = shift;
20760     my $debug_file = $self->{_debug_file};
20761     my $fh;
20762     unless ( $fh = IO::File->new("> $debug_file") ) {
20763         warn("can't open $debug_file: $!\n");
20764     }
20765     $self->{_debug_file_opened} = 1;
20766     $self->{_fh}                = $fh;
20767     print $fh
20768       "Use -dump-token-types (-dtt) to get a list of token type codes\n";
20769 }
20770
20771 sub close_debug_file {
20772
20773     my $self = shift;
20774     my $fh   = $self->{_fh};
20775     if ( $self->{_debug_file_opened} ) {
20776
20777         eval { $self->{_fh}->close() };
20778     }
20779 }
20780
20781 sub write_debug_entry {
20782
20783     # This is a debug dump routine which may be modified as necessary
20784     # to dump tokens on a line-by-line basis.  The output will be written
20785     # to the .DEBUG file when the -D flag is entered.
20786     my $self           = shift;
20787     my $line_of_tokens = shift;
20788
20789     my $input_line        = $line_of_tokens->{_line_text};
20790     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
20791     my $rtokens           = $line_of_tokens->{_rtokens};
20792     my $rlevels           = $line_of_tokens->{_rlevels};
20793     my $rslevels          = $line_of_tokens->{_rslevels};
20794     my $rblock_type       = $line_of_tokens->{_rblock_type};
20795     my $input_line_number = $line_of_tokens->{_line_number};
20796     my $line_type         = $line_of_tokens->{_line_type};
20797
20798     my ( $j, $num );
20799
20800     my $token_str              = "$input_line_number: ";
20801     my $reconstructed_original = "$input_line_number: ";
20802     my $block_str              = "$input_line_number: ";
20803
20804     #$token_str .= "$line_type: ";
20805     #$reconstructed_original .= "$line_type: ";
20806
20807     my $pattern   = "";
20808     my @next_char = ( '"', '"' );
20809     my $i_next    = 0;
20810     unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
20811     my $fh = $self->{_fh};
20812
20813     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
20814
20815         # testing patterns
20816         if ( $$rtoken_type[$j] eq 'k' ) {
20817             $pattern .= $$rtokens[$j];
20818         }
20819         else {
20820             $pattern .= $$rtoken_type[$j];
20821         }
20822         $reconstructed_original .= $$rtokens[$j];
20823         $block_str .= "($$rblock_type[$j])";
20824         $num = length( $$rtokens[$j] );
20825         my $type_str = $$rtoken_type[$j];
20826
20827         # be sure there are no blank tokens (shouldn't happen)
20828         # This can only happen if a programming error has been made
20829         # because all valid tokens are non-blank
20830         if ( $type_str eq ' ' ) {
20831             print $fh "BLANK TOKEN on the next line\n";
20832             $type_str = $next_char[$i_next];
20833             $i_next   = 1 - $i_next;
20834         }
20835
20836         if ( length($type_str) == 1 ) {
20837             $type_str = $type_str x $num;
20838         }
20839         $token_str .= $type_str;
20840     }
20841
20842     # Write what you want here ...
20843     # print $fh "$input_line\n";
20844     # print $fh "$pattern\n";
20845     print $fh "$reconstructed_original\n";
20846     print $fh "$token_str\n";
20847
20848     #print $fh "$block_str\n";
20849 }
20850
20851 #####################################################################
20852 #
20853 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
20854 # method for returning the next line to be parsed, as well as a
20855 # 'peek_ahead()' method
20856 #
20857 # The input parameter is an object with a 'get_line()' method
20858 # which returns the next line to be parsed
20859 #
20860 #####################################################################
20861
20862 package Perl::Tidy::LineBuffer;
20863
20864 sub new {
20865
20866     my $class              = shift;
20867     my $line_source_object = shift;
20868
20869     return bless {
20870         _line_source_object => $line_source_object,
20871         _rlookahead_buffer  => [],
20872     }, $class;
20873 }
20874
20875 sub peek_ahead {
20876     my $self               = shift;
20877     my $buffer_index       = shift;
20878     my $line               = undef;
20879     my $line_source_object = $self->{_line_source_object};
20880     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
20881     if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
20882         $line = $$rlookahead_buffer[$buffer_index];
20883     }
20884     else {
20885         $line = $line_source_object->get_line();
20886         push( @$rlookahead_buffer, $line );
20887     }
20888     return $line;
20889 }
20890
20891 sub get_line {
20892     my $self               = shift;
20893     my $line               = undef;
20894     my $line_source_object = $self->{_line_source_object};
20895     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
20896
20897     if ( scalar(@$rlookahead_buffer) ) {
20898         $line = shift @$rlookahead_buffer;
20899     }
20900     else {
20901         $line = $line_source_object->get_line();
20902     }
20903     return $line;
20904 }
20905
20906 ########################################################################
20907 #
20908 # the Perl::Tidy::Tokenizer package is essentially a filter which
20909 # reads lines of perl source code from a source object and provides
20910 # corresponding tokenized lines through its get_line() method.  Lines
20911 # flow from the source_object to the caller like this:
20912 #
20913 # source_object --> LineBuffer_object --> Tokenizer -->  calling routine
20914 #   get_line()         get_line()           get_line()     line_of_tokens
20915 #
20916 # The source object can be any object with a get_line() method which
20917 # supplies one line (a character string) perl call.
20918 # The LineBuffer object is created by the Tokenizer.
20919 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
20920 # containing one tokenized line for each call to its get_line() method.
20921 #
20922 # WARNING: This is not a real class yet.  Only one tokenizer my be used.
20923 #
20924 ########################################################################
20925
20926 package Perl::Tidy::Tokenizer;
20927
20928 BEGIN {
20929
20930     # Caution: these debug flags produce a lot of output
20931     # They should all be 0 except when debugging small scripts
20932
20933     use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
20934     use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
20935     use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
20936     use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
20937     use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
20938
20939     my $debug_warning = sub {
20940         print "TOKENIZER_DEBUGGING with key $_[0]\n";
20941     };
20942
20943     TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
20944     TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
20945     TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
20946     TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
20947     TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
20948
20949 }
20950
20951 use Carp;
20952
20953 # PACKAGE VARIABLES for for processing an entire FILE.
20954 use vars qw{
20955   $tokenizer_self
20956
20957   $last_nonblank_token
20958   $last_nonblank_type
20959   $last_nonblank_block_type
20960   $statement_type
20961   $in_attribute_list
20962   $current_package
20963   $context
20964
20965   %is_constant
20966   %is_user_function
20967   %user_function_prototype
20968   %is_block_function
20969   %is_block_list_function
20970   %saw_function_definition
20971
20972   $brace_depth
20973   $paren_depth
20974   $square_bracket_depth
20975
20976   @current_depth
20977   @total_depth
20978   $total_depth
20979   @nesting_sequence_number
20980   @current_sequence_number
20981   @paren_type
20982   @paren_semicolon_count
20983   @paren_structural_type
20984   @brace_type
20985   @brace_structural_type
20986   @brace_statement_type
20987   @brace_context
20988   @brace_package
20989   @square_bracket_type
20990   @square_bracket_structural_type
20991   @depth_array
20992   @nested_ternary_flag
20993   @starting_line_of_current_depth
20994 };
20995
20996 # GLOBAL CONSTANTS for routines in this package
20997 use vars qw{
20998   %is_indirect_object_taker
20999   %is_block_operator
21000   %expecting_operator_token
21001   %expecting_operator_types
21002   %expecting_term_types
21003   %expecting_term_token
21004   %is_digraph
21005   %is_file_test_operator
21006   %is_trigraph
21007   %is_valid_token_type
21008   %is_keyword
21009   %is_code_block_token
21010   %really_want_term
21011   @opening_brace_names
21012   @closing_brace_names
21013   %is_keyword_taking_list
21014   %is_q_qq_qw_qx_qr_s_y_tr_m
21015 };
21016
21017 # possible values of operator_expected()
21018 use constant TERM     => -1;
21019 use constant UNKNOWN  => 0;
21020 use constant OPERATOR => 1;
21021
21022 # possible values of context
21023 use constant SCALAR_CONTEXT  => -1;
21024 use constant UNKNOWN_CONTEXT => 0;
21025 use constant LIST_CONTEXT    => 1;
21026
21027 # Maximum number of little messages; probably need not be changed.
21028 use constant MAX_NAG_MESSAGES => 6;
21029
21030 {
21031
21032     # methods to count instances
21033     my $_count = 0;
21034     sub get_count        { $_count; }
21035     sub _increment_count { ++$_count }
21036     sub _decrement_count { --$_count }
21037 }
21038
21039 sub DESTROY {
21040     $_[0]->_decrement_count();
21041 }
21042
21043 sub new {
21044
21045     my $class = shift;
21046
21047     # Note: 'tabs' and 'indent_columns' are temporary and should be
21048     # removed asap
21049     my %defaults = (
21050         source_object        => undef,
21051         debugger_object      => undef,
21052         diagnostics_object   => undef,
21053         logger_object        => undef,
21054         starting_level       => undef,
21055         indent_columns       => 4,
21056         tabs                 => 0,
21057         entab_leading_space  => undef,
21058         look_for_hash_bang   => 0,
21059         trim_qw              => 1,
21060         look_for_autoloader  => 1,
21061         look_for_selfloader  => 1,
21062         starting_line_number => 1,
21063     );
21064     my %args = ( %defaults, @_ );
21065
21066     # we are given an object with a get_line() method to supply source lines
21067     my $source_object = $args{source_object};
21068
21069     # we create another object with a get_line() and peek_ahead() method
21070     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
21071
21072     # Tokenizer state data is as follows:
21073     # _rhere_target_list    reference to list of here-doc targets
21074     # _here_doc_target      the target string for a here document
21075     # _here_quote_character the type of here-doc quoting (" ' ` or none)
21076     #                       to determine if interpolation is done
21077     # _quote_target         character we seek if chasing a quote
21078     # _line_start_quote     line where we started looking for a long quote
21079     # _in_here_doc          flag indicating if we are in a here-doc
21080     # _in_pod               flag set if we are in pod documentation
21081     # _in_error             flag set if we saw severe error (binary in script)
21082     # _in_data              flag set if we are in __DATA__ section
21083     # _in_end               flag set if we are in __END__ section
21084     # _in_format            flag set if we are in a format description
21085     # _in_attribute_list    flag telling if we are looking for attributes
21086     # _in_quote             flag telling if we are chasing a quote
21087     # _starting_level       indentation level of first line
21088     # _input_tabstr         string denoting one indentation level of input file
21089     # _know_input_tabstr    flag indicating if we know _input_tabstr
21090     # _line_buffer_object   object with get_line() method to supply source code
21091     # _diagnostics_object   place to write debugging information
21092     # _unexpected_error_count  error count used to limit output
21093     # _lower_case_labels_at  line numbers where lower case labels seen
21094     $tokenizer_self = {
21095         _rhere_target_list                  => [],
21096         _in_here_doc                        => 0,
21097         _here_doc_target                    => "",
21098         _here_quote_character               => "",
21099         _in_data                            => 0,
21100         _in_end                             => 0,
21101         _in_format                          => 0,
21102         _in_error                           => 0,
21103         _in_pod                             => 0,
21104         _in_attribute_list                  => 0,
21105         _in_quote                           => 0,
21106         _quote_target                       => "",
21107         _line_start_quote                   => -1,
21108         _starting_level                     => $args{starting_level},
21109         _know_starting_level                => defined( $args{starting_level} ),
21110         _tabs                               => $args{tabs},
21111         _entab_leading_space                => $args{entab_leading_space},
21112         _indent_columns                     => $args{indent_columns},
21113         _look_for_hash_bang                 => $args{look_for_hash_bang},
21114         _trim_qw                            => $args{trim_qw},
21115         _input_tabstr                       => "",
21116         _know_input_tabstr                  => -1,
21117         _last_line_number                   => $args{starting_line_number} - 1,
21118         _saw_perl_dash_P                    => 0,
21119         _saw_perl_dash_w                    => 0,
21120         _saw_use_strict                     => 0,
21121         _saw_v_string                       => 0,
21122         _look_for_autoloader                => $args{look_for_autoloader},
21123         _look_for_selfloader                => $args{look_for_selfloader},
21124         _saw_autoloader                     => 0,
21125         _saw_selfloader                     => 0,
21126         _saw_hash_bang                      => 0,
21127         _saw_end                            => 0,
21128         _saw_data                           => 0,
21129         _saw_negative_indentation           => 0,
21130         _started_tokenizing                 => 0,
21131         _line_buffer_object                 => $line_buffer_object,
21132         _debugger_object                    => $args{debugger_object},
21133         _diagnostics_object                 => $args{diagnostics_object},
21134         _logger_object                      => $args{logger_object},
21135         _unexpected_error_count             => 0,
21136         _started_looking_for_here_target_at => 0,
21137         _nearly_matched_here_target_at      => undef,
21138         _line_text                          => "",
21139         _rlower_case_labels_at              => undef,
21140     };
21141
21142     prepare_for_a_new_file();
21143     find_starting_indentation_level();
21144
21145     bless $tokenizer_self, $class;
21146
21147     # This is not a full class yet, so die if an attempt is made to
21148     # create more than one object.
21149
21150     if ( _increment_count() > 1 ) {
21151         confess
21152 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
21153     }
21154
21155     return $tokenizer_self;
21156
21157 }
21158
21159 # interface to Perl::Tidy::Logger routines
21160 sub warning {
21161     my $logger_object = $tokenizer_self->{_logger_object};
21162     if ($logger_object) {
21163         $logger_object->warning(@_);
21164     }
21165 }
21166
21167 sub complain {
21168     my $logger_object = $tokenizer_self->{_logger_object};
21169     if ($logger_object) {
21170         $logger_object->complain(@_);
21171     }
21172 }
21173
21174 sub write_logfile_entry {
21175     my $logger_object = $tokenizer_self->{_logger_object};
21176     if ($logger_object) {
21177         $logger_object->write_logfile_entry(@_);
21178     }
21179 }
21180
21181 sub interrupt_logfile {
21182     my $logger_object = $tokenizer_self->{_logger_object};
21183     if ($logger_object) {
21184         $logger_object->interrupt_logfile();
21185     }
21186 }
21187
21188 sub resume_logfile {
21189     my $logger_object = $tokenizer_self->{_logger_object};
21190     if ($logger_object) {
21191         $logger_object->resume_logfile();
21192     }
21193 }
21194
21195 sub increment_brace_error {
21196     my $logger_object = $tokenizer_self->{_logger_object};
21197     if ($logger_object) {
21198         $logger_object->increment_brace_error();
21199     }
21200 }
21201
21202 sub report_definite_bug {
21203     my $logger_object = $tokenizer_self->{_logger_object};
21204     if ($logger_object) {
21205         $logger_object->report_definite_bug();
21206     }
21207 }
21208
21209 sub brace_warning {
21210     my $logger_object = $tokenizer_self->{_logger_object};
21211     if ($logger_object) {
21212         $logger_object->brace_warning(@_);
21213     }
21214 }
21215
21216 sub get_saw_brace_error {
21217     my $logger_object = $tokenizer_self->{_logger_object};
21218     if ($logger_object) {
21219         $logger_object->get_saw_brace_error();
21220     }
21221     else {
21222         0;
21223     }
21224 }
21225
21226 # interface to Perl::Tidy::Diagnostics routines
21227 sub write_diagnostics {
21228     if ( $tokenizer_self->{_diagnostics_object} ) {
21229         $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
21230     }
21231 }
21232
21233 sub report_tokenization_errors {
21234
21235     my $self = shift;
21236
21237     my $level = get_indentation_level();
21238     if ( $level != $tokenizer_self->{_starting_level} ) {
21239         warning("final indentation level: $level\n");
21240     }
21241
21242     check_final_nesting_depths();
21243
21244     if ( $tokenizer_self->{_look_for_hash_bang}
21245         && !$tokenizer_self->{_saw_hash_bang} )
21246     {
21247         warning(
21248             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
21249     }
21250
21251     if ( $tokenizer_self->{_in_format} ) {
21252         warning("hit EOF while in format description\n");
21253     }
21254
21255     if ( $tokenizer_self->{_in_pod} ) {
21256
21257         # Just write log entry if this is after __END__ or __DATA__
21258         # because this happens to often, and it is not likely to be
21259         # a parsing error.
21260         if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21261             write_logfile_entry(
21262 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21263             );
21264         }
21265
21266         else {
21267             complain(
21268 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21269             );
21270         }
21271
21272     }
21273
21274     if ( $tokenizer_self->{_in_here_doc} ) {
21275         my $here_doc_target = $tokenizer_self->{_here_doc_target};
21276         my $started_looking_for_here_target_at =
21277           $tokenizer_self->{_started_looking_for_here_target_at};
21278         if ($here_doc_target) {
21279             warning(
21280 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
21281             );
21282         }
21283         else {
21284             warning(
21285 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
21286             );
21287         }
21288         my $nearly_matched_here_target_at =
21289           $tokenizer_self->{_nearly_matched_here_target_at};
21290         if ($nearly_matched_here_target_at) {
21291             warning(
21292 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
21293             );
21294         }
21295     }
21296
21297     if ( $tokenizer_self->{_in_quote} ) {
21298         my $line_start_quote = $tokenizer_self->{_line_start_quote};
21299         my $quote_target     = $tokenizer_self->{_quote_target};
21300         my $what =
21301           ( $tokenizer_self->{_in_attribute_list} )
21302           ? "attribute list"
21303           : "quote/pattern";
21304         warning(
21305 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
21306         );
21307     }
21308
21309     unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
21310         if ( $] < 5.006 ) {
21311             write_logfile_entry("Suggest including '-w parameter'\n");
21312         }
21313         else {
21314             write_logfile_entry("Suggest including 'use warnings;'\n");
21315         }
21316     }
21317
21318     if ( $tokenizer_self->{_saw_perl_dash_P} ) {
21319         write_logfile_entry("Use of -P parameter for defines is discouraged\n");
21320     }
21321
21322     unless ( $tokenizer_self->{_saw_use_strict} ) {
21323         write_logfile_entry("Suggest including 'use strict;'\n");
21324     }
21325
21326     # it is suggested that lables have at least one upper case character
21327     # for legibility and to avoid code breakage as new keywords are introduced
21328     if ( $tokenizer_self->{_rlower_case_labels_at} ) {
21329         my @lower_case_labels_at =
21330           @{ $tokenizer_self->{_rlower_case_labels_at} };
21331         write_logfile_entry(
21332             "Suggest using upper case characters in label(s)\n");
21333         local $" = ')(';
21334         write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
21335     }
21336 }
21337
21338 sub report_v_string {
21339
21340     # warn if this version can't handle v-strings
21341     my $tok = shift;
21342     unless ( $tokenizer_self->{_saw_v_string} ) {
21343         $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
21344     }
21345     if ( $] < 5.006 ) {
21346         warning(
21347 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
21348         );
21349     }
21350 }
21351
21352 sub get_input_line_number {
21353     return $tokenizer_self->{_last_line_number};
21354 }
21355
21356 # returns the next tokenized line
21357 sub get_line {
21358
21359     my $self = shift;
21360
21361     # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
21362     # $square_bracket_depth, $paren_depth
21363
21364     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
21365     $tokenizer_self->{_line_text} = $input_line;
21366
21367     return undef unless ($input_line);
21368
21369     my $input_line_number = ++$tokenizer_self->{_last_line_number};
21370
21371     # Find and remove what characters terminate this line, including any
21372     # control r
21373     my $input_line_separator = "";
21374     if ( chomp($input_line) ) { $input_line_separator = $/ }
21375
21376     # TODO: what other characters should be included here?
21377     if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
21378         $input_line_separator = $2 . $input_line_separator;
21379     }
21380
21381     # for backwards compatability we keep the line text terminated with
21382     # a newline character
21383     $input_line .= "\n";
21384     $tokenizer_self->{_line_text} = $input_line;    # update
21385
21386     # create a data structure describing this line which will be
21387     # returned to the caller.
21388
21389     # _line_type codes are:
21390     #   SYSTEM         - system-specific code before hash-bang line
21391     #   CODE           - line of perl code (including comments)
21392     #   POD_START      - line starting pod, such as '=head'
21393     #   POD            - pod documentation text
21394     #   POD_END        - last line of pod section, '=cut'
21395     #   HERE           - text of here-document
21396     #   HERE_END       - last line of here-doc (target word)
21397     #   FORMAT         - format section
21398     #   FORMAT_END     - last line of format section, '.'
21399     #   DATA_START     - __DATA__ line
21400     #   DATA           - unidentified text following __DATA__
21401     #   END_START      - __END__ line
21402     #   END            - unidentified text following __END__
21403     #   ERROR          - we are in big trouble, probably not a perl script
21404
21405     # Other variables:
21406     #   _curly_brace_depth     - depth of curly braces at start of line
21407     #   _square_bracket_depth  - depth of square brackets at start of line
21408     #   _paren_depth           - depth of parens at start of line
21409     #   _starting_in_quote     - this line continues a multi-line quote
21410     #                            (so don't trim leading blanks!)
21411     #   _ending_in_quote       - this line ends in a multi-line quote
21412     #                            (so don't trim trailing blanks!)
21413     my $line_of_tokens = {
21414         _line_type                => 'EOF',
21415         _line_text                => $input_line,
21416         _line_number              => $input_line_number,
21417         _rtoken_type              => undef,
21418         _rtokens                  => undef,
21419         _rlevels                  => undef,
21420         _rslevels                 => undef,
21421         _rblock_type              => undef,
21422         _rcontainer_type          => undef,
21423         _rcontainer_environment   => undef,
21424         _rtype_sequence           => undef,
21425         _rnesting_tokens          => undef,
21426         _rci_levels               => undef,
21427         _rnesting_blocks          => undef,
21428         _python_indentation_level => -1,                   ## 0,
21429         _starting_in_quote    => 0,                    # to be set by subroutine
21430         _ending_in_quote      => 0,
21431         _curly_brace_depth    => $brace_depth,
21432         _square_bracket_depth => $square_bracket_depth,
21433         _paren_depth          => $paren_depth,
21434         _quote_character      => '',
21435     };
21436
21437     # must print line unchanged if we are in a here document
21438     if ( $tokenizer_self->{_in_here_doc} ) {
21439
21440         $line_of_tokens->{_line_type} = 'HERE';
21441         my $here_doc_target      = $tokenizer_self->{_here_doc_target};
21442         my $here_quote_character = $tokenizer_self->{_here_quote_character};
21443         my $candidate_target     = $input_line;
21444         chomp $candidate_target;
21445         if ( $candidate_target eq $here_doc_target ) {
21446             $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21447             $line_of_tokens->{_line_type}                     = 'HERE_END';
21448             write_logfile_entry("Exiting HERE document $here_doc_target\n");
21449
21450             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21451             if (@$rhere_target_list) {    # there can be multiple here targets
21452                 ( $here_doc_target, $here_quote_character ) =
21453                   @{ shift @$rhere_target_list };
21454                 $tokenizer_self->{_here_doc_target} = $here_doc_target;
21455                 $tokenizer_self->{_here_quote_character} =
21456                   $here_quote_character;
21457                 write_logfile_entry(
21458                     "Entering HERE document $here_doc_target\n");
21459                 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21460                 $tokenizer_self->{_started_looking_for_here_target_at} =
21461                   $input_line_number;
21462             }
21463             else {
21464                 $tokenizer_self->{_in_here_doc}          = 0;
21465                 $tokenizer_self->{_here_doc_target}      = "";
21466                 $tokenizer_self->{_here_quote_character} = "";
21467             }
21468         }
21469
21470         # check for error of extra whitespace
21471         # note for PERL6: leading whitespace is allowed
21472         else {
21473             $candidate_target =~ s/\s*$//;
21474             $candidate_target =~ s/^\s*//;
21475             if ( $candidate_target eq $here_doc_target ) {
21476                 $tokenizer_self->{_nearly_matched_here_target_at} =
21477                   $input_line_number;
21478             }
21479         }
21480         return $line_of_tokens;
21481     }
21482
21483     # must print line unchanged if we are in a format section
21484     elsif ( $tokenizer_self->{_in_format} ) {
21485
21486         if ( $input_line =~ /^\.[\s#]*$/ ) {
21487             write_logfile_entry("Exiting format section\n");
21488             $tokenizer_self->{_in_format} = 0;
21489             $line_of_tokens->{_line_type} = 'FORMAT_END';
21490         }
21491         else {
21492             $line_of_tokens->{_line_type} = 'FORMAT';
21493         }
21494         return $line_of_tokens;
21495     }
21496
21497     # must print line unchanged if we are in pod documentation
21498     elsif ( $tokenizer_self->{_in_pod} ) {
21499
21500         $line_of_tokens->{_line_type} = 'POD';
21501         if ( $input_line =~ /^=cut/ ) {
21502             $line_of_tokens->{_line_type} = 'POD_END';
21503             write_logfile_entry("Exiting POD section\n");
21504             $tokenizer_self->{_in_pod} = 0;
21505         }
21506         if ( $input_line =~ /^\#\!.*perl\b/ ) {
21507             warning(
21508                 "Hash-bang in pod can cause older versions of perl to fail! \n"
21509             );
21510         }
21511
21512         return $line_of_tokens;
21513     }
21514
21515     # must print line unchanged if we have seen a severe error (i.e., we
21516     # are seeing illegal tokens and connot continue.  Syntax errors do
21517     # not pass this route).  Calling routine can decide what to do, but
21518     # the default can be to just pass all lines as if they were after __END__
21519     elsif ( $tokenizer_self->{_in_error} ) {
21520         $line_of_tokens->{_line_type} = 'ERROR';
21521         return $line_of_tokens;
21522     }
21523
21524     # print line unchanged if we are __DATA__ section
21525     elsif ( $tokenizer_self->{_in_data} ) {
21526
21527         # ...but look for POD
21528         # Note that the _in_data and _in_end flags remain set
21529         # so that we return to that state after seeing the
21530         # end of a pod section
21531         if ( $input_line =~ /^=(?!cut)/ ) {
21532             $line_of_tokens->{_line_type} = 'POD_START';
21533             write_logfile_entry("Entering POD section\n");
21534             $tokenizer_self->{_in_pod} = 1;
21535             return $line_of_tokens;
21536         }
21537         else {
21538             $line_of_tokens->{_line_type} = 'DATA';
21539             return $line_of_tokens;
21540         }
21541     }
21542
21543     # print line unchanged if we are in __END__ section
21544     elsif ( $tokenizer_self->{_in_end} ) {
21545
21546         # ...but look for POD
21547         # Note that the _in_data and _in_end flags remain set
21548         # so that we return to that state after seeing the
21549         # end of a pod section
21550         if ( $input_line =~ /^=(?!cut)/ ) {
21551             $line_of_tokens->{_line_type} = 'POD_START';
21552             write_logfile_entry("Entering POD section\n");
21553             $tokenizer_self->{_in_pod} = 1;
21554             return $line_of_tokens;
21555         }
21556         else {
21557             $line_of_tokens->{_line_type} = 'END';
21558             return $line_of_tokens;
21559         }
21560     }
21561
21562     # check for a hash-bang line if we haven't seen one
21563     if ( !$tokenizer_self->{_saw_hash_bang} ) {
21564         if ( $input_line =~ /^\#\!.*perl\b/ ) {
21565             $tokenizer_self->{_saw_hash_bang} = $input_line_number;
21566
21567             # check for -w and -P flags
21568             if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
21569                 $tokenizer_self->{_saw_perl_dash_P} = 1;
21570             }
21571
21572             if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
21573                 $tokenizer_self->{_saw_perl_dash_w} = 1;
21574             }
21575
21576             if (   ( $input_line_number > 1 )
21577                 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
21578             {
21579
21580                 # this is helpful for VMS systems; we may have accidentally
21581                 # tokenized some DCL commands
21582                 if ( $tokenizer_self->{_started_tokenizing} ) {
21583                     warning(
21584 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
21585                     );
21586                 }
21587                 else {
21588                     complain("Useless hash-bang after line 1\n");
21589                 }
21590             }
21591
21592             # Report the leading hash-bang as a system line
21593             # This will prevent -dac from deleting it
21594             else {
21595                 $line_of_tokens->{_line_type} = 'SYSTEM';
21596                 return $line_of_tokens;
21597             }
21598         }
21599     }
21600
21601     # wait for a hash-bang before parsing if the user invoked us with -x
21602     if ( $tokenizer_self->{_look_for_hash_bang}
21603         && !$tokenizer_self->{_saw_hash_bang} )
21604     {
21605         $line_of_tokens->{_line_type} = 'SYSTEM';
21606         return $line_of_tokens;
21607     }
21608
21609     # a first line of the form ': #' will be marked as SYSTEM
21610     # since lines of this form may be used by tcsh
21611     if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
21612         $line_of_tokens->{_line_type} = 'SYSTEM';
21613         return $line_of_tokens;
21614     }
21615
21616     # now we know that it is ok to tokenize the line...
21617     # the line tokenizer will modify any of these private variables:
21618     #        _rhere_target_list
21619     #        _in_data
21620     #        _in_end
21621     #        _in_format
21622     #        _in_error
21623     #        _in_pod
21624     #        _in_quote
21625     my $ending_in_quote_last = $tokenizer_self->{_in_quote};
21626     tokenize_this_line($line_of_tokens);
21627
21628     # Now finish defining the return structure and return it
21629     $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
21630
21631     # handle severe error (binary data in script)
21632     if ( $tokenizer_self->{_in_error} ) {
21633         $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
21634         warning("Giving up after error\n");
21635         $line_of_tokens->{_line_type} = 'ERROR';
21636         reset_indentation_level(0);          # avoid error messages
21637         return $line_of_tokens;
21638     }
21639
21640     # handle start of pod documentation
21641     if ( $tokenizer_self->{_in_pod} ) {
21642
21643         # This gets tricky..above a __DATA__ or __END__ section, perl
21644         # accepts '=cut' as the start of pod section. But afterwards,
21645         # only pod utilities see it and they may ignore an =cut without
21646         # leading =head.  In any case, this isn't good.
21647         if ( $input_line =~ /^=cut\b/ ) {
21648             if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21649                 complain("=cut while not in pod ignored\n");
21650                 $tokenizer_self->{_in_pod}    = 0;
21651                 $line_of_tokens->{_line_type} = 'POD_END';
21652             }
21653             else {
21654                 $line_of_tokens->{_line_type} = 'POD_START';
21655                 complain(
21656 "=cut starts a pod section .. this can fool pod utilities.\n"
21657                 );
21658                 write_logfile_entry("Entering POD section\n");
21659             }
21660         }
21661
21662         else {
21663             $line_of_tokens->{_line_type} = 'POD_START';
21664             write_logfile_entry("Entering POD section\n");
21665         }
21666
21667         return $line_of_tokens;
21668     }
21669
21670     # update indentation levels for log messages
21671     if ( $input_line !~ /^\s*$/ ) {
21672         my $rlevels                      = $line_of_tokens->{_rlevels};
21673         my $structural_indentation_level = $$rlevels[0];
21674         my ( $python_indentation_level, $msg ) =
21675           find_indentation_level( $input_line, $structural_indentation_level );
21676         if ($msg) { write_logfile_entry("$msg") }
21677         if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
21678             $line_of_tokens->{_python_indentation_level} =
21679               $python_indentation_level;
21680         }
21681     }
21682
21683     # see if this line contains here doc targets
21684     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21685     if (@$rhere_target_list) {
21686
21687         my ( $here_doc_target, $here_quote_character ) =
21688           @{ shift @$rhere_target_list };
21689         $tokenizer_self->{_in_here_doc}          = 1;
21690         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
21691         $tokenizer_self->{_here_quote_character} = $here_quote_character;
21692         write_logfile_entry("Entering HERE document $here_doc_target\n");
21693         $tokenizer_self->{_started_looking_for_here_target_at} =
21694           $input_line_number;
21695     }
21696
21697     # NOTE: __END__ and __DATA__ statements are written unformatted
21698     # because they can theoretically contain additional characters
21699     # which are not tokenized (and cannot be read with <DATA> either!).
21700     if ( $tokenizer_self->{_in_data} ) {
21701         $line_of_tokens->{_line_type} = 'DATA_START';
21702         write_logfile_entry("Starting __DATA__ section\n");
21703         $tokenizer_self->{_saw_data} = 1;
21704
21705         # keep parsing after __DATA__ if use SelfLoader was seen
21706         if ( $tokenizer_self->{_saw_selfloader} ) {
21707             $tokenizer_self->{_in_data} = 0;
21708             write_logfile_entry(
21709                 "SelfLoader seen, continuing; -nlsl deactivates\n");
21710         }
21711
21712         return $line_of_tokens;
21713     }
21714
21715     elsif ( $tokenizer_self->{_in_end} ) {
21716         $line_of_tokens->{_line_type} = 'END_START';
21717         write_logfile_entry("Starting __END__ section\n");
21718         $tokenizer_self->{_saw_end} = 1;
21719
21720         # keep parsing after __END__ if use AutoLoader was seen
21721         if ( $tokenizer_self->{_saw_autoloader} ) {
21722             $tokenizer_self->{_in_end} = 0;
21723             write_logfile_entry(
21724                 "AutoLoader seen, continuing; -nlal deactivates\n");
21725         }
21726         return $line_of_tokens;
21727     }
21728
21729     # now, finally, we know that this line is type 'CODE'
21730     $line_of_tokens->{_line_type} = 'CODE';
21731
21732     # remember if we have seen any real code
21733     if (  !$tokenizer_self->{_started_tokenizing}
21734         && $input_line !~ /^\s*$/
21735         && $input_line !~ /^\s*#/ )
21736     {
21737         $tokenizer_self->{_started_tokenizing} = 1;
21738     }
21739
21740     if ( $tokenizer_self->{_debugger_object} ) {
21741         $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
21742     }
21743
21744     # Note: if keyword 'format' occurs in this line code, it is still CODE
21745     # (keyword 'format' need not start a line)
21746     if ( $tokenizer_self->{_in_format} ) {
21747         write_logfile_entry("Entering format section\n");
21748     }
21749
21750     if ( $tokenizer_self->{_in_quote}
21751         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
21752     {
21753
21754         #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
21755         if (
21756             ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
21757         {
21758             $tokenizer_self->{_line_start_quote} = $input_line_number;
21759             write_logfile_entry(
21760                 "Start multi-line quote or pattern ending in $quote_target\n");
21761         }
21762     }
21763     elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
21764         and !$tokenizer_self->{_in_quote} )
21765     {
21766         $tokenizer_self->{_line_start_quote} = -1;
21767         write_logfile_entry("End of multi-line quote or pattern\n");
21768     }
21769
21770     # we are returning a line of CODE
21771     return $line_of_tokens;
21772 }
21773
21774 sub find_starting_indentation_level {
21775
21776     # USES GLOBAL VARIABLES: $tokenizer_self
21777     my $starting_level    = 0;
21778     my $know_input_tabstr = -1;    # flag for find_indentation_level
21779
21780     # use value if given as parameter
21781     if ( $tokenizer_self->{_know_starting_level} ) {
21782         $starting_level = $tokenizer_self->{_starting_level};
21783     }
21784
21785     # if we know there is a hash_bang line, the level must be zero
21786     elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
21787         $tokenizer_self->{_know_starting_level} = 1;
21788     }
21789
21790     # otherwise figure it out from the input file
21791     else {
21792         my $line;
21793         my $i                            = 0;
21794         my $structural_indentation_level = -1; # flag for find_indentation_level
21795
21796         # keep looking at lines until we find a hash bang or piece of code
21797         my $msg = "";
21798         while ( $line =
21799             $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
21800         {
21801
21802             # if first line is #! then assume starting level is zero
21803             if ( $i == 1 && $line =~ /^\#\!/ ) {
21804                 $starting_level = 0;
21805                 last;
21806             }
21807             next if ( $line =~ /^\s*#/ );    # skip past comments
21808             next if ( $line =~ /^\s*$/ );    # skip past blank lines
21809             ( $starting_level, $msg ) =
21810               find_indentation_level( $line, $structural_indentation_level );
21811             if ($msg) { write_logfile_entry("$msg") }
21812             last;
21813         }
21814         $msg = "Line $i implies starting-indentation-level = $starting_level\n";
21815
21816         if ( $starting_level > 0 ) {
21817
21818             my $input_tabstr = $tokenizer_self->{_input_tabstr};
21819             if ( $input_tabstr eq "\t" ) {
21820                 $msg .= "by guessing input tabbing uses 1 tab per level\n";
21821             }
21822             else {
21823                 my $cols = length($input_tabstr);
21824                 $msg .=
21825                   "by guessing input tabbing uses $cols blanks per level\n";
21826             }
21827         }
21828         write_logfile_entry("$msg");
21829     }
21830     $tokenizer_self->{_starting_level} = $starting_level;
21831     reset_indentation_level($starting_level);
21832 }
21833
21834 # Find indentation level given a input line.  At the same time, try to
21835 # figure out the input tabbing scheme.
21836 #
21837 # There are two types of calls:
21838 #
21839 # Type 1: $structural_indentation_level < 0
21840 #  In this case we have to guess $input_tabstr to figure out the level.
21841 #
21842 # Type 2: $structural_indentation_level >= 0
21843 #  In this case the level of this line is known, and this routine can
21844 #  update the tabbing string, if still unknown, to make the level correct.
21845
21846 sub find_indentation_level {
21847     my ( $line, $structural_indentation_level ) = @_;
21848
21849     # USES GLOBAL VARIABLES: $tokenizer_self
21850     my $level = 0;
21851     my $msg   = "";
21852
21853     my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
21854     my $input_tabstr      = $tokenizer_self->{_input_tabstr};
21855
21856     # find leading whitespace
21857     my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
21858
21859     # make first guess at input tabbing scheme if necessary
21860     if ( $know_input_tabstr < 0 ) {
21861
21862         $know_input_tabstr = 0;
21863
21864         # When -et=n is used for the output formatting, we will assume that
21865         # tabs in the input formatting were also produced with -et=n.  This may
21866         # not be true, but it is the best guess because it will keep leading
21867         # whitespace unchanged on repeated formatting on small pieces of code
21868         # when -et=n is used.  Thanks to Sam Kington for this patch.
21869         if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) {
21870             $leading_whitespace =~ s{^ (\t*) }
21871            { " " x (length($1) * $tabsize) }xe;
21872             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
21873         }
21874         elsif ( $tokenizer_self->{_tabs} ) {
21875             $input_tabstr = "\t";
21876             if ( length($leading_whitespace) > 0 ) {
21877                 if ( $leading_whitespace !~ /\t/ ) {
21878
21879                     my $cols = $tokenizer_self->{_indent_columns};
21880
21881                     if ( length($leading_whitespace) < $cols ) {
21882                         $cols = length($leading_whitespace);
21883                     }
21884                     $input_tabstr = " " x $cols;
21885                 }
21886             }
21887         }
21888         else {
21889             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
21890
21891             if ( length($leading_whitespace) > 0 ) {
21892                 if ( $leading_whitespace =~ /^\t/ ) {
21893                     $input_tabstr = "\t";
21894                 }
21895             }
21896         }
21897         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21898         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
21899     }
21900
21901     # determine the input tabbing scheme if possible
21902     if (   ( $know_input_tabstr == 0 )
21903         && ( length($leading_whitespace) > 0 )
21904         && ( $structural_indentation_level > 0 ) )
21905     {
21906         my $saved_input_tabstr = $input_tabstr;
21907
21908         # check for common case of one tab per indentation level
21909         if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21910             if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21911                 $input_tabstr = "\t";
21912                 $msg          = "Guessing old indentation was tab character\n";
21913             }
21914         }
21915
21916         else {
21917
21918             # detab any tabs based on 8 blanks per tab
21919             my $entabbed = "";
21920             if ( $leading_whitespace =~ s/^\t+/        /g ) {
21921                 $entabbed = "entabbed";
21922             }
21923
21924             # now compute tabbing from number of spaces
21925             my $columns =
21926               length($leading_whitespace) / $structural_indentation_level;
21927             if ( $columns == int $columns ) {
21928                 $msg =
21929                   "Guessing old indentation was $columns $entabbed spaces\n";
21930             }
21931             else {
21932                 $columns = int $columns;
21933                 $msg =
21934 "old indentation is unclear, using $columns $entabbed spaces\n";
21935             }
21936             $input_tabstr = " " x $columns;
21937         }
21938         $know_input_tabstr                    = 1;
21939         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21940         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
21941
21942         # see if mistakes were made
21943         if ( ( $tokenizer_self->{_starting_level} > 0 )
21944             && !$tokenizer_self->{_know_starting_level} )
21945         {
21946
21947             if ( $input_tabstr ne $saved_input_tabstr ) {
21948                 complain(
21949 "I made a bad starting level guess; rerun with a value for -sil \n"
21950                 );
21951             }
21952         }
21953     }
21954
21955     # use current guess at input tabbing to get input indentation level
21956     #
21957     # Patch to handle a common case of entabbed leading whitespace
21958     # If the leading whitespace equals 4 spaces and we also have
21959     # tabs, detab the input whitespace assuming 8 spaces per tab.
21960     if ( length($input_tabstr) == 4 ) {
21961         $leading_whitespace =~ s/^\t+/        /g;
21962     }
21963
21964     if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
21965         my $pos = 0;
21966
21967         while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
21968         {
21969             $pos += $len_tab;
21970             $level++;
21971         }
21972     }
21973     return ( $level, $msg );
21974 }
21975
21976 # This is a currently unused debug routine
21977 sub dump_functions {
21978
21979     my $fh = *STDOUT;
21980     my ( $pkg, $sub );
21981     foreach $pkg ( keys %is_user_function ) {
21982         print $fh "\nnon-constant subs in package $pkg\n";
21983
21984         foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
21985             my $msg = "";
21986             if ( $is_block_list_function{$pkg}{$sub} ) {
21987                 $msg = 'block_list';
21988             }
21989
21990             if ( $is_block_function{$pkg}{$sub} ) {
21991                 $msg = 'block';
21992             }
21993             print $fh "$sub $msg\n";
21994         }
21995     }
21996
21997     foreach $pkg ( keys %is_constant ) {
21998         print $fh "\nconstants and constant subs in package $pkg\n";
21999
22000         foreach $sub ( keys %{ $is_constant{$pkg} } ) {
22001             print $fh "$sub\n";
22002         }
22003     }
22004 }
22005
22006 sub ones_count {
22007
22008     # count number of 1's in a string of 1's and 0's
22009     # example: ones_count("010101010101") gives 6
22010     return ( my $cis = $_[0] ) =~ tr/1/0/;
22011 }
22012
22013 sub prepare_for_a_new_file {
22014
22015     # previous tokens needed to determine what to expect next
22016     $last_nonblank_token      = ';';    # the only possible starting state which
22017     $last_nonblank_type       = ';';    # will make a leading brace a code block
22018     $last_nonblank_block_type = '';
22019
22020     # scalars for remembering statement types across multiple lines
22021     $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
22022     $in_attribute_list = 0;
22023
22024     # scalars for remembering where we are in the file
22025     $current_package = "main";
22026     $context         = UNKNOWN_CONTEXT;
22027
22028     # hashes used to remember function information
22029     %is_constant             = ();      # user-defined constants
22030     %is_user_function        = ();      # user-defined functions
22031     %user_function_prototype = ();      # their prototypes
22032     %is_block_function       = ();
22033     %is_block_list_function  = ();
22034     %saw_function_definition = ();
22035
22036     # variables used to track depths of various containers
22037     # and report nesting errors
22038     $paren_depth          = 0;
22039     $brace_depth          = 0;
22040     $square_bracket_depth = 0;
22041     @current_depth[ 0 .. $#closing_brace_names ] =
22042       (0) x scalar @closing_brace_names;
22043     $total_depth = 0;
22044     @total_depth = ();
22045     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
22046       ( 0 .. $#closing_brace_names );
22047     @current_sequence_number             = ();
22048     $paren_type[$paren_depth]            = '';
22049     $paren_semicolon_count[$paren_depth] = 0;
22050     $paren_structural_type[$brace_depth] = '';
22051     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
22052     $brace_structural_type[$brace_depth]                   = '';
22053     $brace_statement_type[$brace_depth]                    = "";
22054     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
22055     $brace_package[$paren_depth]                           = $current_package;
22056     $square_bracket_type[$square_bracket_depth]            = '';
22057     $square_bracket_structural_type[$square_bracket_depth] = '';
22058
22059     initialize_tokenizer_state();
22060 }
22061
22062 {                                       # begin tokenize_this_line
22063
22064     use constant BRACE          => 0;
22065     use constant SQUARE_BRACKET => 1;
22066     use constant PAREN          => 2;
22067     use constant QUESTION_COLON => 3;
22068
22069     # TV1: scalars for processing one LINE.
22070     # Re-initialized on each entry to sub tokenize_this_line.
22071     my (
22072         $block_type,        $container_type,    $expecting,
22073         $i,                 $i_tok,             $input_line,
22074         $input_line_number, $last_nonblank_i,   $max_token_index,
22075         $next_tok,          $next_type,         $peeked_ahead,
22076         $prototype,         $rhere_target_list, $rtoken_map,
22077         $rtoken_type,       $rtokens,           $tok,
22078         $type,              $type_sequence,     $indent_flag,
22079     );
22080
22081     # TV2: refs to ARRAYS for processing one LINE
22082     # Re-initialized on each call.
22083     my $routput_token_list     = [];    # stack of output token indexes
22084     my $routput_token_type     = [];    # token types
22085     my $routput_block_type     = [];    # types of code block
22086     my $routput_container_type = [];    # paren types, such as if, elsif, ..
22087     my $routput_type_sequence  = [];    # nesting sequential number
22088     my $routput_indent_flag    = [];    #
22089
22090     # TV3: SCALARS for quote variables.  These are initialized with a
22091     # subroutine call and continually updated as lines are processed.
22092     my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22093         $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
22094
22095     # TV4: SCALARS for multi-line identifiers and
22096     # statements. These are initialized with a subroutine call
22097     # and continually updated as lines are processed.
22098     my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
22099
22100     # TV5: SCALARS for tracking indentation level.
22101     # Initialized once and continually updated as lines are
22102     # processed.
22103     my (
22104         $nesting_token_string,      $nesting_type_string,
22105         $nesting_block_string,      $nesting_block_flag,
22106         $nesting_list_string,       $nesting_list_flag,
22107         $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22108         $in_statement_continuation, $level_in_tokenizer,
22109         $slevel_in_tokenizer,       $rslevel_stack,
22110     );
22111
22112     # TV6: SCALARS for remembering several previous
22113     # tokens. Initialized once and continually updated as
22114     # lines are processed.
22115     my (
22116         $last_nonblank_container_type,     $last_nonblank_type_sequence,
22117         $last_last_nonblank_token,         $last_last_nonblank_type,
22118         $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
22119         $last_last_nonblank_type_sequence, $last_nonblank_prototype,
22120     );
22121
22122     # ----------------------------------------------------------------
22123     # beginning of tokenizer variable access and manipulation routines
22124     # ----------------------------------------------------------------
22125
22126     sub initialize_tokenizer_state {
22127
22128         # TV1: initialized on each call
22129         # TV2: initialized on each call
22130         # TV3:
22131         $in_quote                = 0;
22132         $quote_type              = 'Q';
22133         $quote_character         = "";
22134         $quote_pos               = 0;
22135         $quote_depth             = 0;
22136         $quoted_string_1         = "";
22137         $quoted_string_2         = "";
22138         $allowed_quote_modifiers = "";
22139
22140         # TV4:
22141         $id_scan_state     = '';
22142         $identifier        = '';
22143         $want_paren        = "";
22144         $indented_if_level = 0;
22145
22146         # TV5:
22147         $nesting_token_string             = "";
22148         $nesting_type_string              = "";
22149         $nesting_block_string             = '1';    # initially in a block
22150         $nesting_block_flag               = 1;
22151         $nesting_list_string              = '0';    # initially not in a list
22152         $nesting_list_flag                = 0;      # initially not in a list
22153         $ci_string_in_tokenizer           = "";
22154         $continuation_string_in_tokenizer = "0";
22155         $in_statement_continuation        = 0;
22156         $level_in_tokenizer               = 0;
22157         $slevel_in_tokenizer              = 0;
22158         $rslevel_stack                    = [];
22159
22160         # TV6:
22161         $last_nonblank_container_type      = '';
22162         $last_nonblank_type_sequence       = '';
22163         $last_last_nonblank_token          = ';';
22164         $last_last_nonblank_type           = ';';
22165         $last_last_nonblank_block_type     = '';
22166         $last_last_nonblank_container_type = '';
22167         $last_last_nonblank_type_sequence  = '';
22168         $last_nonblank_prototype           = "";
22169     }
22170
22171     sub save_tokenizer_state {
22172
22173         my $rTV1 = [
22174             $block_type,        $container_type,    $expecting,
22175             $i,                 $i_tok,             $input_line,
22176             $input_line_number, $last_nonblank_i,   $max_token_index,
22177             $next_tok,          $next_type,         $peeked_ahead,
22178             $prototype,         $rhere_target_list, $rtoken_map,
22179             $rtoken_type,       $rtokens,           $tok,
22180             $type,              $type_sequence,     $indent_flag,
22181         ];
22182
22183         my $rTV2 = [
22184             $routput_token_list,    $routput_token_type,
22185             $routput_block_type,    $routput_container_type,
22186             $routput_type_sequence, $routput_indent_flag,
22187         ];
22188
22189         my $rTV3 = [
22190             $in_quote,        $quote_type,
22191             $quote_character, $quote_pos,
22192             $quote_depth,     $quoted_string_1,
22193             $quoted_string_2, $allowed_quote_modifiers,
22194         ];
22195
22196         my $rTV4 =
22197           [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
22198
22199         my $rTV5 = [
22200             $nesting_token_string,      $nesting_type_string,
22201             $nesting_block_string,      $nesting_block_flag,
22202             $nesting_list_string,       $nesting_list_flag,
22203             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22204             $in_statement_continuation, $level_in_tokenizer,
22205             $slevel_in_tokenizer,       $rslevel_stack,
22206         ];
22207
22208         my $rTV6 = [
22209             $last_nonblank_container_type,
22210             $last_nonblank_type_sequence,
22211             $last_last_nonblank_token,
22212             $last_last_nonblank_type,
22213             $last_last_nonblank_block_type,
22214             $last_last_nonblank_container_type,
22215             $last_last_nonblank_type_sequence,
22216             $last_nonblank_prototype,
22217         ];
22218         return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
22219     }
22220
22221     sub restore_tokenizer_state {
22222         my ($rstate) = @_;
22223         my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
22224         (
22225             $block_type,        $container_type,    $expecting,
22226             $i,                 $i_tok,             $input_line,
22227             $input_line_number, $last_nonblank_i,   $max_token_index,
22228             $next_tok,          $next_type,         $peeked_ahead,
22229             $prototype,         $rhere_target_list, $rtoken_map,
22230             $rtoken_type,       $rtokens,           $tok,
22231             $type,              $type_sequence,     $indent_flag,
22232         ) = @{$rTV1};
22233
22234         (
22235             $routput_token_list,    $routput_token_type,
22236             $routput_block_type,    $routput_container_type,
22237             $routput_type_sequence, $routput_type_sequence,
22238         ) = @{$rTV2};
22239
22240         (
22241             $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22242             $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
22243         ) = @{$rTV3};
22244
22245         ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
22246           @{$rTV4};
22247
22248         (
22249             $nesting_token_string,      $nesting_type_string,
22250             $nesting_block_string,      $nesting_block_flag,
22251             $nesting_list_string,       $nesting_list_flag,
22252             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22253             $in_statement_continuation, $level_in_tokenizer,
22254             $slevel_in_tokenizer,       $rslevel_stack,
22255         ) = @{$rTV5};
22256
22257         (
22258             $last_nonblank_container_type,
22259             $last_nonblank_type_sequence,
22260             $last_last_nonblank_token,
22261             $last_last_nonblank_type,
22262             $last_last_nonblank_block_type,
22263             $last_last_nonblank_container_type,
22264             $last_last_nonblank_type_sequence,
22265             $last_nonblank_prototype,
22266         ) = @{$rTV6};
22267     }
22268
22269     sub get_indentation_level {
22270
22271         # patch to avoid reporting error if indented if is not terminated
22272         if ($indented_if_level) { return $level_in_tokenizer - 1 }
22273         return $level_in_tokenizer;
22274     }
22275
22276     sub reset_indentation_level {
22277         $level_in_tokenizer  = $_[0];
22278         $slevel_in_tokenizer = $_[0];
22279         push @{$rslevel_stack}, $slevel_in_tokenizer;
22280     }
22281
22282     sub peeked_ahead {
22283         $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
22284     }
22285
22286     # ------------------------------------------------------------
22287     # end of tokenizer variable access and manipulation routines
22288     # ------------------------------------------------------------
22289
22290     # ------------------------------------------------------------
22291     # beginning of various scanner interface routines
22292     # ------------------------------------------------------------
22293     sub scan_replacement_text {
22294
22295         # check for here-docs in replacement text invoked by
22296         # a substitution operator with executable modifier 'e'.
22297         #
22298         # given:
22299         #  $replacement_text
22300         # return:
22301         #  $rht = reference to any here-doc targets
22302         my ($replacement_text) = @_;
22303
22304         # quick check
22305         return undef unless ( $replacement_text =~ /<</ );
22306
22307         write_logfile_entry("scanning replacement text for here-doc targets\n");
22308
22309         # save the logger object for error messages
22310         my $logger_object = $tokenizer_self->{_logger_object};
22311
22312         # localize all package variables
22313         local (
22314             $tokenizer_self,          $last_nonblank_token,
22315             $last_nonblank_type,      $last_nonblank_block_type,
22316             $statement_type,          $in_attribute_list,
22317             $current_package,         $context,
22318             %is_constant,             %is_user_function,
22319             %user_function_prototype, %is_block_function,
22320             %is_block_list_function,  %saw_function_definition,
22321             $brace_depth,             $paren_depth,
22322             $square_bracket_depth,    @current_depth,
22323             @total_depth,             $total_depth,
22324             @nesting_sequence_number, @current_sequence_number,
22325             @paren_type,              @paren_semicolon_count,
22326             @paren_structural_type,   @brace_type,
22327             @brace_structural_type,   @brace_statement_type,
22328             @brace_context,           @brace_package,
22329             @square_bracket_type,     @square_bracket_structural_type,
22330             @depth_array,             @starting_line_of_current_depth,
22331             @nested_ternary_flag,
22332         );
22333
22334         # save all lexical variables
22335         my $rstate = save_tokenizer_state();
22336         _decrement_count();    # avoid error check for multiple tokenizers
22337
22338         # make a new tokenizer
22339         my $rOpts = {};
22340         my $rpending_logfile_message;
22341         my $source_object =
22342           Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
22343             $rpending_logfile_message );
22344         my $tokenizer = Perl::Tidy::Tokenizer->new(
22345             source_object        => $source_object,
22346             logger_object        => $logger_object,
22347             starting_line_number => $input_line_number,
22348         );
22349
22350         # scan the replacement text
22351         1 while ( $tokenizer->get_line() );
22352
22353         # remove any here doc targets
22354         my $rht = undef;
22355         if ( $tokenizer_self->{_in_here_doc} ) {
22356             $rht = [];
22357             push @{$rht},
22358               [
22359                 $tokenizer_self->{_here_doc_target},
22360                 $tokenizer_self->{_here_quote_character}
22361               ];
22362             if ( $tokenizer_self->{_rhere_target_list} ) {
22363                 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
22364                 $tokenizer_self->{_rhere_target_list} = undef;
22365             }
22366             $tokenizer_self->{_in_here_doc} = undef;
22367         }
22368
22369         # now its safe to report errors
22370         $tokenizer->report_tokenization_errors();
22371
22372         # restore all tokenizer lexical variables
22373         restore_tokenizer_state($rstate);
22374
22375         # return the here doc targets
22376         return $rht;
22377     }
22378
22379     sub scan_bare_identifier {
22380         ( $i, $tok, $type, $prototype ) =
22381           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
22382             $rtoken_map, $max_token_index );
22383     }
22384
22385     sub scan_identifier {
22386         ( $i, $tok, $type, $id_scan_state, $identifier ) =
22387           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
22388             $max_token_index, $expecting );
22389     }
22390
22391     sub scan_id {
22392         ( $i, $tok, $type, $id_scan_state ) =
22393           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
22394             $id_scan_state, $max_token_index );
22395     }
22396
22397     sub scan_number {
22398         my $number;
22399         ( $i, $type, $number ) =
22400           scan_number_do( $input_line, $i, $rtoken_map, $type,
22401             $max_token_index );
22402         return $number;
22403     }
22404
22405     # a sub to warn if token found where term expected
22406     sub error_if_expecting_TERM {
22407         if ( $expecting == TERM ) {
22408             if ( $really_want_term{$last_nonblank_type} ) {
22409                 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
22410                     $rtoken_type, $input_line );
22411                 1;
22412             }
22413         }
22414     }
22415
22416     # a sub to warn if token found where operator expected
22417     sub error_if_expecting_OPERATOR {
22418         if ( $expecting == OPERATOR ) {
22419             my $thing = defined $_[0] ? $_[0] : $tok;
22420             unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
22421                 $rtoken_map, $rtoken_type, $input_line );
22422             if ( $i_tok == 0 ) {
22423                 interrupt_logfile();
22424                 warning("Missing ';' above?\n");
22425                 resume_logfile();
22426             }
22427             1;
22428         }
22429     }
22430
22431     # ------------------------------------------------------------
22432     # end scanner interfaces
22433     # ------------------------------------------------------------
22434
22435     my %is_for_foreach;
22436     @_ = qw(for foreach);
22437     @is_for_foreach{@_} = (1) x scalar(@_);
22438
22439     my %is_my_our;
22440     @_ = qw(my our);
22441     @is_my_our{@_} = (1) x scalar(@_);
22442
22443     # These keywords may introduce blocks after parenthesized expressions,
22444     # in the form:
22445     # keyword ( .... ) { BLOCK }
22446     # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
22447     my %is_blocktype_with_paren;
22448     @_ = qw(if elsif unless while until for foreach switch case given when);
22449     @is_blocktype_with_paren{@_} = (1) x scalar(@_);
22450
22451     # ------------------------------------------------------------
22452     # begin hash of code for handling most token types
22453     # ------------------------------------------------------------
22454     my $tokenization_code = {
22455
22456         # no special code for these types yet, but syntax checks
22457         # could be added
22458
22459 ##      '!'   => undef,
22460 ##      '!='  => undef,
22461 ##      '!~'  => undef,
22462 ##      '%='  => undef,
22463 ##      '&&=' => undef,
22464 ##      '&='  => undef,
22465 ##      '+='  => undef,
22466 ##      '-='  => undef,
22467 ##      '..'  => undef,
22468 ##      '..'  => undef,
22469 ##      '...' => undef,
22470 ##      '.='  => undef,
22471 ##      '<<=' => undef,
22472 ##      '<='  => undef,
22473 ##      '<=>' => undef,
22474 ##      '<>'  => undef,
22475 ##      '='   => undef,
22476 ##      '=='  => undef,
22477 ##      '=~'  => undef,
22478 ##      '>='  => undef,
22479 ##      '>>'  => undef,
22480 ##      '>>=' => undef,
22481 ##      '\\'  => undef,
22482 ##      '^='  => undef,
22483 ##      '|='  => undef,
22484 ##      '||=' => undef,
22485 ##      '//=' => undef,
22486 ##      '~'   => undef,
22487 ##      '~~'  => undef,
22488 ##      '!~~'  => undef,
22489
22490         '>' => sub {
22491             error_if_expecting_TERM()
22492               if ( $expecting == TERM );
22493         },
22494         '|' => sub {
22495             error_if_expecting_TERM()
22496               if ( $expecting == TERM );
22497         },
22498         '$' => sub {
22499
22500             # start looking for a scalar
22501             error_if_expecting_OPERATOR("Scalar")
22502               if ( $expecting == OPERATOR );
22503             scan_identifier();
22504
22505             if ( $identifier eq '$^W' ) {
22506                 $tokenizer_self->{_saw_perl_dash_w} = 1;
22507             }
22508
22509             # Check for indentifier in indirect object slot
22510             # (vorboard.pl, sort.t).  Something like:
22511             #   /^(print|printf|sort|exec|system)$/
22512             if (
22513                 $is_indirect_object_taker{$last_nonblank_token}
22514
22515                 || ( ( $last_nonblank_token eq '(' )
22516                     && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
22517                 || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
22518               )
22519             {
22520                 $type = 'Z';
22521             }
22522         },
22523         '(' => sub {
22524
22525             ++$paren_depth;
22526             $paren_semicolon_count[$paren_depth] = 0;
22527             if ($want_paren) {
22528                 $container_type = $want_paren;
22529                 $want_paren     = "";
22530             }
22531             else {
22532                 $container_type = $last_nonblank_token;
22533
22534                 # We can check for a syntax error here of unexpected '(',
22535                 # but this is going to get messy...
22536                 if (
22537                     $expecting == OPERATOR
22538
22539                     # be sure this is not a method call of the form
22540                     # &method(...), $method->(..), &{method}(...),
22541                     # $ref[2](list) is ok & short for $ref[2]->(list)
22542                     # NOTE: at present, braces in something like &{ xxx }
22543                     # are not marked as a block, we might have a method call
22544                     && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
22545
22546                   )
22547                 {
22548
22549                     # ref: camel 3 p 703.
22550                     if ( $last_last_nonblank_token eq 'do' ) {
22551                         complain(
22552 "do SUBROUTINE is deprecated; consider & or -> notation\n"
22553                         );
22554                     }
22555                     else {
22556
22557                         # if this is an empty list, (), then it is not an
22558                         # error; for example, we might have a constant pi and
22559                         # invoke it with pi() or just pi;
22560                         my ( $next_nonblank_token, $i_next ) =
22561                           find_next_nonblank_token( $i, $rtokens,
22562                             $max_token_index );
22563                         if ( $next_nonblank_token ne ')' ) {
22564                             my $hint;
22565                             error_if_expecting_OPERATOR('(');
22566
22567                             if ( $last_nonblank_type eq 'C' ) {
22568                                 $hint =
22569                                   "$last_nonblank_token has a void prototype\n";
22570                             }
22571                             elsif ( $last_nonblank_type eq 'i' ) {
22572                                 if (   $i_tok > 0
22573                                     && $last_nonblank_token =~ /^\$/ )
22574                                 {
22575                                     $hint =
22576 "Do you mean '$last_nonblank_token->(' ?\n";
22577                                 }
22578                             }
22579                             if ($hint) {
22580                                 interrupt_logfile();
22581                                 warning($hint);
22582                                 resume_logfile();
22583                             }
22584                         } ## end if ( $next_nonblank_token...
22585                     } ## end else [ if ( $last_last_nonblank_token...
22586                 } ## end if ( $expecting == OPERATOR...
22587             }
22588             $paren_type[$paren_depth] = $container_type;
22589             ( $type_sequence, $indent_flag ) =
22590               increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22591
22592             # propagate types down through nested parens
22593             # for example: the second paren in 'if ((' would be structural
22594             # since the first is.
22595
22596             if ( $last_nonblank_token eq '(' ) {
22597                 $type = $last_nonblank_type;
22598             }
22599
22600             #     We exclude parens as structural after a ',' because it
22601             #     causes subtle problems with continuation indentation for
22602             #     something like this, where the first 'or' will not get
22603             #     indented.
22604             #
22605             #         assert(
22606             #             __LINE__,
22607             #             ( not defined $check )
22608             #               or ref $check
22609             #               or $check eq "new"
22610             #               or $check eq "old",
22611             #         );
22612             #
22613             #     Likewise, we exclude parens where a statement can start
22614             #     because of problems with continuation indentation, like
22615             #     these:
22616             #
22617             #         ($firstline =~ /^#\!.*perl/)
22618             #         and (print $File::Find::name, "\n")
22619             #           and (return 1);
22620             #
22621             #         (ref($usage_fref) =~ /CODE/)
22622             #         ? &$usage_fref
22623             #           : (&blast_usage, &blast_params, &blast_general_params);
22624
22625             else {
22626                 $type = '{';
22627             }
22628
22629             if ( $last_nonblank_type eq ')' ) {
22630                 warning(
22631                     "Syntax error? found token '$last_nonblank_type' then '('\n"
22632                 );
22633             }
22634             $paren_structural_type[$paren_depth] = $type;
22635
22636         },
22637         ')' => sub {
22638             ( $type_sequence, $indent_flag ) =
22639               decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22640
22641             if ( $paren_structural_type[$paren_depth] eq '{' ) {
22642                 $type = '}';
22643             }
22644
22645             $container_type = $paren_type[$paren_depth];
22646
22647             #    /^(for|foreach)$/
22648             if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
22649                 my $num_sc = $paren_semicolon_count[$paren_depth];
22650                 if ( $num_sc > 0 && $num_sc != 2 ) {
22651                     warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
22652                 }
22653             }
22654
22655             if ( $paren_depth > 0 ) { $paren_depth-- }
22656         },
22657         ',' => sub {
22658             if ( $last_nonblank_type eq ',' ) {
22659                 complain("Repeated ','s \n");
22660             }
22661
22662             # patch for operator_expected: note if we are in the list (use.t)
22663             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
22664 ##                FIXME: need to move this elsewhere, perhaps check after a '('
22665 ##                elsif ($last_nonblank_token eq '(') {
22666 ##                    warning("Leading ','s illegal in some versions of perl\n");
22667 ##                }
22668         },
22669         ';' => sub {
22670             $context        = UNKNOWN_CONTEXT;
22671             $statement_type = '';
22672
22673             #    /^(for|foreach)$/
22674             if ( $is_for_foreach{ $paren_type[$paren_depth] } )
22675             {    # mark ; in for loop
22676
22677                 # Be careful: we do not want a semicolon such as the
22678                 # following to be included:
22679                 #
22680                 #    for (sort {strcoll($a,$b);} keys %investments) {
22681
22682                 if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
22683                     && $square_bracket_depth ==
22684                     $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
22685                 {
22686
22687                     $type = 'f';
22688                     $paren_semicolon_count[$paren_depth]++;
22689                 }
22690             }
22691
22692         },
22693         '"' => sub {
22694             error_if_expecting_OPERATOR("String")
22695               if ( $expecting == OPERATOR );
22696             $in_quote                = 1;
22697             $type                    = 'Q';
22698             $allowed_quote_modifiers = "";
22699         },
22700         "'" => sub {
22701             error_if_expecting_OPERATOR("String")
22702               if ( $expecting == OPERATOR );
22703             $in_quote                = 1;
22704             $type                    = 'Q';
22705             $allowed_quote_modifiers = "";
22706         },
22707         '`' => sub {
22708             error_if_expecting_OPERATOR("String")
22709               if ( $expecting == OPERATOR );
22710             $in_quote                = 1;
22711             $type                    = 'Q';
22712             $allowed_quote_modifiers = "";
22713         },
22714         '/' => sub {
22715             my $is_pattern;
22716
22717             if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
22718                 my $msg;
22719                 ( $is_pattern, $msg ) =
22720                   guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
22721                     $max_token_index );
22722
22723                 if ($msg) {
22724                     write_diagnostics("DIVIDE:$msg\n");
22725                     write_logfile_entry($msg);
22726                 }
22727             }
22728             else { $is_pattern = ( $expecting == TERM ) }
22729
22730             if ($is_pattern) {
22731                 $in_quote                = 1;
22732                 $type                    = 'Q';
22733                 $allowed_quote_modifiers = '[cgimosxp]';
22734             }
22735             else {    # not a pattern; check for a /= token
22736
22737                 if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
22738                     $i++;
22739                     $tok  = '/=';
22740                     $type = $tok;
22741                 }
22742
22743               #DEBUG - collecting info on what tokens follow a divide
22744               # for development of guessing algorithm
22745               #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
22746               #    #write_diagnostics( "DIVIDE? $input_line\n" );
22747               #}
22748             }
22749         },
22750         '{' => sub {
22751
22752             # if we just saw a ')', we will label this block with
22753             # its type.  We need to do this to allow sub
22754             # code_block_type to determine if this brace starts a
22755             # code block or anonymous hash.  (The type of a paren
22756             # pair is the preceding token, such as 'if', 'else',
22757             # etc).
22758             $container_type = "";
22759
22760             # ATTRS: for a '{' following an attribute list, reset
22761             # things to look like we just saw the sub name
22762             if ( $statement_type =~ /^sub/ ) {
22763                 $last_nonblank_token = $statement_type;
22764                 $last_nonblank_type  = 'i';
22765                 $statement_type      = "";
22766             }
22767
22768             # patch for SWITCH/CASE: hide these keywords from an immediately
22769             # following opening brace
22770             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
22771                 && $statement_type eq $last_nonblank_token )
22772             {
22773                 $last_nonblank_token = ";";
22774             }
22775
22776             elsif ( $last_nonblank_token eq ')' ) {
22777                 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
22778
22779                 # defensive move in case of a nesting error (pbug.t)
22780                 # in which this ')' had no previous '('
22781                 # this nesting error will have been caught
22782                 if ( !defined($last_nonblank_token) ) {
22783                     $last_nonblank_token = 'if';
22784                 }
22785
22786                 # check for syntax error here;
22787                 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
22788                     my $list = join( ' ', sort keys %is_blocktype_with_paren );
22789                     warning(
22790                         "syntax error at ') {', didn't see one of: $list\n");
22791                 }
22792             }
22793
22794             # patch for paren-less for/foreach glitch, part 2.
22795             # see note below under 'qw'
22796             elsif ($last_nonblank_token eq 'qw'
22797                 && $is_for_foreach{$want_paren} )
22798             {
22799                 $last_nonblank_token = $want_paren;
22800                 if ( $last_last_nonblank_token eq $want_paren ) {
22801                     warning(
22802 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
22803                     );
22804
22805                 }
22806                 $want_paren = "";
22807             }
22808
22809             # now identify which of the three possible types of
22810             # curly braces we have: hash index container, anonymous
22811             # hash reference, or code block.
22812
22813             # non-structural (hash index) curly brace pair
22814             # get marked 'L' and 'R'
22815             if ( is_non_structural_brace() ) {
22816                 $type = 'L';
22817
22818                 # patch for SWITCH/CASE:
22819                 # allow paren-less identifier after 'when'
22820                 # if the brace is preceded by a space
22821                 if (   $statement_type eq 'when'
22822                     && $last_nonblank_type      eq 'i'
22823                     && $last_last_nonblank_type eq 'k'
22824                     && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
22825                 {
22826                     $type       = '{';
22827                     $block_type = $statement_type;
22828                 }
22829             }
22830
22831             # code and anonymous hash have the same type, '{', but are
22832             # distinguished by 'block_type',
22833             # which will be blank for an anonymous hash
22834             else {
22835
22836                 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
22837                     $max_token_index );
22838
22839                 # patch to promote bareword type to function taking block
22840                 if (   $block_type
22841                     && $last_nonblank_type eq 'w'
22842                     && $last_nonblank_i >= 0 )
22843                 {
22844                     if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
22845                         $routput_token_type->[$last_nonblank_i] = 'G';
22846                     }
22847                 }
22848
22849                 # patch for SWITCH/CASE: if we find a stray opening block brace
22850                 # where we might accept a 'case' or 'when' block, then take it
22851                 if (   $statement_type eq 'case'
22852                     || $statement_type eq 'when' )
22853                 {
22854                     if ( !$block_type || $block_type eq '}' ) {
22855                         $block_type = $statement_type;
22856                     }
22857                 }
22858             }
22859             $brace_type[ ++$brace_depth ] = $block_type;
22860             $brace_package[$brace_depth] = $current_package;
22861             ( $type_sequence, $indent_flag ) =
22862               increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22863             $brace_structural_type[$brace_depth] = $type;
22864             $brace_context[$brace_depth]         = $context;
22865             $brace_statement_type[$brace_depth]  = $statement_type;
22866         },
22867         '}' => sub {
22868             $block_type = $brace_type[$brace_depth];
22869             if ($block_type) { $statement_type = '' }
22870             if ( defined( $brace_package[$brace_depth] ) ) {
22871                 $current_package = $brace_package[$brace_depth];
22872             }
22873
22874             # can happen on brace error (caught elsewhere)
22875             else {
22876             }
22877             ( $type_sequence, $indent_flag ) =
22878               decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22879
22880             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
22881                 $type = 'R';
22882             }
22883
22884             # propagate type information for 'do' and 'eval' blocks.
22885             # This is necessary to enable us to know if an operator
22886             # or term is expected next
22887             if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
22888                 $tok = $brace_type[$brace_depth];
22889             }
22890
22891             $context        = $brace_context[$brace_depth];
22892             $statement_type = $brace_statement_type[$brace_depth];
22893             if ( $brace_depth > 0 ) { $brace_depth--; }
22894         },
22895         '&' => sub {    # maybe sub call? start looking
22896
22897             # We have to check for sub call unless we are sure we
22898             # are expecting an operator.  This example from s2p
22899             # got mistaken as a q operator in an early version:
22900             #   print BODY &q(<<'EOT');
22901             if ( $expecting != OPERATOR ) {
22902                 scan_identifier();
22903             }
22904             else {
22905             }
22906         },
22907         '<' => sub {    # angle operator or less than?
22908
22909             if ( $expecting != OPERATOR ) {
22910                 ( $i, $type ) =
22911                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
22912                     $expecting, $max_token_index );
22913
22914                 if ( $type eq '<' && $expecting == TERM ) {
22915                     error_if_expecting_TERM();
22916                     interrupt_logfile();
22917                     warning("Unterminated <> operator?\n");
22918                     resume_logfile();
22919                 }
22920             }
22921             else {
22922             }
22923         },
22924         '?' => sub {    # ?: conditional or starting pattern?
22925
22926             my $is_pattern;
22927
22928             if ( $expecting == UNKNOWN ) {
22929
22930                 my $msg;
22931                 ( $is_pattern, $msg ) =
22932                   guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
22933                     $max_token_index );
22934
22935                 if ($msg) { write_logfile_entry($msg) }
22936             }
22937             else { $is_pattern = ( $expecting == TERM ) }
22938
22939             if ($is_pattern) {
22940                 $in_quote                = 1;
22941                 $type                    = 'Q';
22942                 $allowed_quote_modifiers = '[cgimosxp]';
22943             }
22944             else {
22945                 ( $type_sequence, $indent_flag ) =
22946                   increase_nesting_depth( QUESTION_COLON,
22947                     $$rtoken_map[$i_tok] );
22948             }
22949         },
22950         '*' => sub {    # typeglob, or multiply?
22951
22952             if ( $expecting == TERM ) {
22953                 scan_identifier();
22954             }
22955             else {
22956
22957                 if ( $$rtokens[ $i + 1 ] eq '=' ) {
22958                     $tok  = '*=';
22959                     $type = $tok;
22960                     $i++;
22961                 }
22962                 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
22963                     $tok  = '**';
22964                     $type = $tok;
22965                     $i++;
22966                     if ( $$rtokens[ $i + 1 ] eq '=' ) {
22967                         $tok  = '**=';
22968                         $type = $tok;
22969                         $i++;
22970                     }
22971                 }
22972             }
22973         },
22974         '.' => sub {    # what kind of . ?
22975
22976             if ( $expecting != OPERATOR ) {
22977                 scan_number();
22978                 if ( $type eq '.' ) {
22979                     error_if_expecting_TERM()
22980                       if ( $expecting == TERM );
22981                 }
22982             }
22983             else {
22984             }
22985         },
22986         ':' => sub {
22987
22988             # if this is the first nonblank character, call it a label
22989             # since perl seems to just swallow it
22990             if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
22991                 $type = 'J';
22992             }
22993
22994             # ATTRS: check for a ':' which introduces an attribute list
22995             # (this might eventually get its own token type)
22996             elsif ( $statement_type =~ /^sub/ ) {
22997                 $type              = 'A';
22998                 $in_attribute_list = 1;
22999             }
23000
23001             # check for scalar attribute, such as
23002             # my $foo : shared = 1;
23003             elsif ($is_my_our{$statement_type}
23004                 && $current_depth[QUESTION_COLON] == 0 )
23005             {
23006                 $type              = 'A';
23007                 $in_attribute_list = 1;
23008             }
23009
23010             # otherwise, it should be part of a ?/: operator
23011             else {
23012                 ( $type_sequence, $indent_flag ) =
23013                   decrease_nesting_depth( QUESTION_COLON,
23014                     $$rtoken_map[$i_tok] );
23015                 if ( $last_nonblank_token eq '?' ) {
23016                     warning("Syntax error near ? :\n");
23017                 }
23018             }
23019         },
23020         '+' => sub {    # what kind of plus?
23021
23022             if ( $expecting == TERM ) {
23023                 my $number = scan_number();
23024
23025                 # unary plus is safest assumption if not a number
23026                 if ( !defined($number) ) { $type = 'p'; }
23027             }
23028             elsif ( $expecting == OPERATOR ) {
23029             }
23030             else {
23031                 if ( $next_type eq 'w' ) { $type = 'p' }
23032             }
23033         },
23034         '@' => sub {
23035
23036             error_if_expecting_OPERATOR("Array")
23037               if ( $expecting == OPERATOR );
23038             scan_identifier();
23039         },
23040         '%' => sub {    # hash or modulo?
23041
23042             # first guess is hash if no following blank
23043             if ( $expecting == UNKNOWN ) {
23044                 if ( $next_type ne 'b' ) { $expecting = TERM }
23045             }
23046             if ( $expecting == TERM ) {
23047                 scan_identifier();
23048             }
23049         },
23050         '[' => sub {
23051             $square_bracket_type[ ++$square_bracket_depth ] =
23052               $last_nonblank_token;
23053             ( $type_sequence, $indent_flag ) =
23054               increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23055
23056             # It may seem odd, but structural square brackets have
23057             # type '{' and '}'.  This simplifies the indentation logic.
23058             if ( !is_non_structural_brace() ) {
23059                 $type = '{';
23060             }
23061             $square_bracket_structural_type[$square_bracket_depth] = $type;
23062         },
23063         ']' => sub {
23064             ( $type_sequence, $indent_flag ) =
23065               decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23066
23067             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
23068             {
23069                 $type = '}';
23070             }
23071             if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
23072         },
23073         '-' => sub {    # what kind of minus?
23074
23075             if ( ( $expecting != OPERATOR )
23076                 && $is_file_test_operator{$next_tok} )
23077             {
23078                 my ( $next_nonblank_token, $i_next ) =
23079                   find_next_nonblank_token( $i + 1, $rtokens,
23080                     $max_token_index );
23081
23082                 # check for a quoted word like "-w=>xx";
23083                 # it is sufficient to just check for a following '='
23084                 if ( $next_nonblank_token eq '=' ) {
23085                     $type = 'm';
23086                 }
23087                 else {
23088                     $i++;
23089                     $tok .= $next_tok;
23090                     $type = 'F';
23091                 }
23092             }
23093             elsif ( $expecting == TERM ) {
23094                 my $number = scan_number();
23095
23096                 # maybe part of bareword token? unary is safest
23097                 if ( !defined($number) ) { $type = 'm'; }
23098
23099             }
23100             elsif ( $expecting == OPERATOR ) {
23101             }
23102             else {
23103
23104                 if ( $next_type eq 'w' ) {
23105                     $type = 'm';
23106                 }
23107             }
23108         },
23109
23110         '^' => sub {
23111
23112             # check for special variables like ${^WARNING_BITS}
23113             if ( $expecting == TERM ) {
23114
23115                 # FIXME: this should work but will not catch errors
23116                 # because we also have to be sure that previous token is
23117                 # a type character ($,@,%).
23118                 if ( $last_nonblank_token eq '{'
23119                     && ( $next_tok =~ /^[A-Za-z_]/ ) )
23120                 {
23121
23122                     if ( $next_tok eq 'W' ) {
23123                         $tokenizer_self->{_saw_perl_dash_w} = 1;
23124                     }
23125                     $tok  = $tok . $next_tok;
23126                     $i    = $i + 1;
23127                     $type = 'w';
23128                 }
23129
23130                 else {
23131                     unless ( error_if_expecting_TERM() ) {
23132
23133                         # Something like this is valid but strange:
23134                         # undef ^I;
23135                         complain("The '^' seems unusual here\n");
23136                     }
23137                 }
23138             }
23139         },
23140
23141         '::' => sub {    # probably a sub call
23142             scan_bare_identifier();
23143         },
23144         '<<' => sub {    # maybe a here-doc?
23145             return
23146               unless ( $i < $max_token_index )
23147               ;          # here-doc not possible if end of line
23148
23149             if ( $expecting != OPERATOR ) {
23150                 my ( $found_target, $here_doc_target, $here_quote_character,
23151                     $saw_error );
23152                 (
23153                     $found_target, $here_doc_target, $here_quote_character, $i,
23154                     $saw_error
23155                   )
23156                   = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
23157                     $max_token_index );
23158
23159                 if ($found_target) {
23160                     push @{$rhere_target_list},
23161                       [ $here_doc_target, $here_quote_character ];
23162                     $type = 'h';
23163                     if ( length($here_doc_target) > 80 ) {
23164                         my $truncated = substr( $here_doc_target, 0, 80 );
23165                         complain("Long here-target: '$truncated' ...\n");
23166                     }
23167                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
23168                         complain(
23169                             "Unconventional here-target: '$here_doc_target'\n"
23170                         );
23171                     }
23172                 }
23173                 elsif ( $expecting == TERM ) {
23174                     unless ($saw_error) {
23175
23176                         # shouldn't happen..
23177                         warning("Program bug; didn't find here doc target\n");
23178                         report_definite_bug();
23179                     }
23180                 }
23181             }
23182             else {
23183             }
23184         },
23185         '->' => sub {
23186
23187             # if -> points to a bare word, we must scan for an identifier,
23188             # otherwise something like ->y would look like the y operator
23189             scan_identifier();
23190         },
23191
23192         # type = 'pp' for pre-increment, '++' for post-increment
23193         '++' => sub {
23194             if ( $expecting == TERM ) { $type = 'pp' }
23195             elsif ( $expecting == UNKNOWN ) {
23196                 my ( $next_nonblank_token, $i_next ) =
23197                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23198                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
23199             }
23200         },
23201
23202         '=>' => sub {
23203             if ( $last_nonblank_type eq $tok ) {
23204                 complain("Repeated '=>'s \n");
23205             }
23206
23207             # patch for operator_expected: note if we are in the list (use.t)
23208             # TODO: make version numbers a new token type
23209             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
23210         },
23211
23212         # type = 'mm' for pre-decrement, '--' for post-decrement
23213         '--' => sub {
23214
23215             if ( $expecting == TERM ) { $type = 'mm' }
23216             elsif ( $expecting == UNKNOWN ) {
23217                 my ( $next_nonblank_token, $i_next ) =
23218                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23219                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
23220             }
23221         },
23222
23223         '&&' => sub {
23224             error_if_expecting_TERM()
23225               if ( $expecting == TERM );
23226         },
23227
23228         '||' => sub {
23229             error_if_expecting_TERM()
23230               if ( $expecting == TERM );
23231         },
23232
23233         '//' => sub {
23234             error_if_expecting_TERM()
23235               if ( $expecting == TERM );
23236         },
23237     };
23238
23239     # ------------------------------------------------------------
23240     # end hash of code for handling individual token types
23241     # ------------------------------------------------------------
23242
23243     my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
23244
23245     # These block types terminate statements and do not need a trailing
23246     # semicolon
23247     # patched for SWITCH/CASE/
23248     my %is_zero_continuation_block_type;
23249     @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
23250       if elsif else unless while until for foreach switch case given when);
23251     @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
23252
23253     my %is_not_zero_continuation_block_type;
23254     @_ = qw(sort grep map do eval);
23255     @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
23256
23257     my %is_logical_container;
23258     @_ = qw(if elsif unless while and or err not && !  || for foreach);
23259     @is_logical_container{@_} = (1) x scalar(@_);
23260
23261     my %is_binary_type;
23262     @_ = qw(|| &&);
23263     @is_binary_type{@_} = (1) x scalar(@_);
23264
23265     my %is_binary_keyword;
23266     @_ = qw(and or err eq ne cmp);
23267     @is_binary_keyword{@_} = (1) x scalar(@_);
23268
23269     # 'L' is token for opening { at hash key
23270     my %is_opening_type;
23271     @_ = qw" L { ( [ ";
23272     @is_opening_type{@_} = (1) x scalar(@_);
23273
23274     # 'R' is token for closing } at hash key
23275     my %is_closing_type;
23276     @_ = qw" R } ) ] ";
23277     @is_closing_type{@_} = (1) x scalar(@_);
23278
23279     my %is_redo_last_next_goto;
23280     @_ = qw(redo last next goto);
23281     @is_redo_last_next_goto{@_} = (1) x scalar(@_);
23282
23283     my %is_use_require;
23284     @_ = qw(use require);
23285     @is_use_require{@_} = (1) x scalar(@_);
23286
23287     my %is_sub_package;
23288     @_ = qw(sub package);
23289     @is_sub_package{@_} = (1) x scalar(@_);
23290
23291     # This hash holds the hash key in $tokenizer_self for these keywords:
23292     my %is_format_END_DATA = (
23293         'format'   => '_in_format',
23294         '__END__'  => '_in_end',
23295         '__DATA__' => '_in_data',
23296     );
23297
23298     # ref: camel 3 p 147,
23299     # but perl may accept undocumented flags
23300     # perl 5.10 adds 'p' (preserve)
23301     my %quote_modifiers = (
23302         's'  => '[cegimosxp]',
23303         'y'  => '[cds]',
23304         'tr' => '[cds]',
23305         'm'  => '[cgimosxp]',
23306         'qr' => '[imosxp]',
23307         'q'  => "",
23308         'qq' => "",
23309         'qw' => "",
23310         'qx' => "",
23311     );
23312
23313     # table showing how many quoted things to look for after quote operator..
23314     # s, y, tr have 2 (pattern and replacement)
23315     # others have 1 (pattern only)
23316     my %quote_items = (
23317         's'  => 2,
23318         'y'  => 2,
23319         'tr' => 2,
23320         'm'  => 1,
23321         'qr' => 1,
23322         'q'  => 1,
23323         'qq' => 1,
23324         'qw' => 1,
23325         'qx' => 1,
23326     );
23327
23328     sub tokenize_this_line {
23329
23330   # This routine breaks a line of perl code into tokens which are of use in
23331   # indentation and reformatting.  One of my goals has been to define tokens
23332   # such that a newline may be inserted between any pair of tokens without
23333   # changing or invalidating the program. This version comes close to this,
23334   # although there are necessarily a few exceptions which must be caught by
23335   # the formatter.  Many of these involve the treatment of bare words.
23336   #
23337   # The tokens and their types are returned in arrays.  See previous
23338   # routine for their names.
23339   #
23340   # See also the array "valid_token_types" in the BEGIN section for an
23341   # up-to-date list.
23342   #
23343   # To simplify things, token types are either a single character, or they
23344   # are identical to the tokens themselves.
23345   #
23346   # As a debugging aid, the -D flag creates a file containing a side-by-side
23347   # comparison of the input string and its tokenization for each line of a file.
23348   # This is an invaluable debugging aid.
23349   #
23350   # In addition to tokens, and some associated quantities, the tokenizer
23351   # also returns flags indication any special line types.  These include
23352   # quotes, here_docs, formats.
23353   #
23354   # -----------------------------------------------------------------------
23355   #
23356   # How to add NEW_TOKENS:
23357   #
23358   # New token types will undoubtedly be needed in the future both to keep up
23359   # with changes in perl and to help adapt the tokenizer to other applications.
23360   #
23361   # Here are some notes on the minimal steps.  I wrote these notes while
23362   # adding the 'v' token type for v-strings, which are things like version
23363   # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
23364   # can use your editor to search for the string "NEW_TOKENS" to find the
23365   # appropriate sections to change):
23366   #
23367   # *. Try to talk somebody else into doing it!  If not, ..
23368   #
23369   # *. Make a backup of your current version in case things don't work out!
23370   #
23371   # *. Think of a new, unused character for the token type, and add to
23372   # the array @valid_token_types in the BEGIN section of this package.
23373   # For example, I used 'v' for v-strings.
23374   #
23375   # *. Implement coding to recognize the $type of the token in this routine.
23376   # This is the hardest part, and is best done by immitating or modifying
23377   # some of the existing coding.  For example, to recognize v-strings, I
23378   # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
23379   # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
23380   #
23381   # *. Update sub operator_expected.  This update is critically important but
23382   # the coding is trivial.  Look at the comments in that routine for help.
23383   # For v-strings, which should behave like numbers, I just added 'v' to the
23384   # regex used to handle numbers and strings (types 'n' and 'Q').
23385   #
23386   # *. Implement a 'bond strength' rule in sub set_bond_strengths in
23387   # Perl::Tidy::Formatter for breaking lines around this token type.  You can
23388   # skip this step and take the default at first, then adjust later to get
23389   # desired results.  For adding type 'v', I looked at sub bond_strength and
23390   # saw that number type 'n' was using default strengths, so I didn't do
23391   # anything.  I may tune it up someday if I don't like the way line
23392   # breaks with v-strings look.
23393   #
23394   # *. Implement a 'whitespace' rule in sub set_white_space_flag in
23395   # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
23396   # and saw that type 'n' used spaces on both sides, so I just added 'v'
23397   # to the array @spaces_both_sides.
23398   #
23399   # *. Update HtmlWriter package so that users can colorize the token as
23400   # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
23401   # that package.  For v-strings, I initially chose to use a default color
23402   # equal to the default for numbers, but it might be nice to change that
23403   # eventually.
23404   #
23405   # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
23406   #
23407   # *. Run lots and lots of debug tests.  Start with special files designed
23408   # to test the new token type.  Run with the -D flag to create a .DEBUG
23409   # file which shows the tokenization.  When these work ok, test as many old
23410   # scripts as possible.  Start with all of the '.t' files in the 'test'
23411   # directory of the distribution file.  Compare .tdy output with previous
23412   # version and updated version to see the differences.  Then include as
23413   # many more files as possible. My own technique has been to collect a huge
23414   # number of perl scripts (thousands!) into one directory and run perltidy
23415   # *, then run diff between the output of the previous version and the
23416   # current version.
23417   #
23418   # *. For another example, search for the smartmatch operator '~~'
23419   # with your editor to see where updates were made for it.
23420   #
23421   # -----------------------------------------------------------------------
23422
23423         my $line_of_tokens = shift;
23424         my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
23425
23426         # patch while coding change is underway
23427         # make callers private data to allow access
23428         # $tokenizer_self = $caller_tokenizer_self;
23429
23430         # extract line number for use in error messages
23431         $input_line_number = $line_of_tokens->{_line_number};
23432
23433         # reinitialize for multi-line quote
23434         $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
23435
23436         # check for pod documentation
23437         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
23438
23439             # must not be in multi-line quote
23440             # and must not be in an eqn
23441             if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
23442             {
23443                 $tokenizer_self->{_in_pod} = 1;
23444                 return;
23445             }
23446         }
23447
23448         $input_line = $untrimmed_input_line;
23449
23450         chomp $input_line;
23451
23452         # trim start of this line unless we are continuing a quoted line
23453         # do not trim end because we might end in a quote (test: deken4.pl)
23454         # Perl::Tidy::Formatter will delete needless trailing blanks
23455         unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
23456             $input_line =~ s/^\s*//;    # trim left end
23457         }
23458
23459         # update the copy of the line for use in error messages
23460         # This must be exactly what we give the pre_tokenizer
23461         $tokenizer_self->{_line_text} = $input_line;
23462
23463         # re-initialize for the main loop
23464         $routput_token_list     = [];    # stack of output token indexes
23465         $routput_token_type     = [];    # token types
23466         $routput_block_type     = [];    # types of code block
23467         $routput_container_type = [];    # paren types, such as if, elsif, ..
23468         $routput_type_sequence  = [];    # nesting sequential number
23469
23470         $rhere_target_list = [];
23471
23472         $tok             = $last_nonblank_token;
23473         $type            = $last_nonblank_type;
23474         $prototype       = $last_nonblank_prototype;
23475         $last_nonblank_i = -1;
23476         $block_type      = $last_nonblank_block_type;
23477         $container_type  = $last_nonblank_container_type;
23478         $type_sequence   = $last_nonblank_type_sequence;
23479         $indent_flag     = 0;
23480         $peeked_ahead    = 0;
23481
23482         # tokenization is done in two stages..
23483         # stage 1 is a very simple pre-tokenization
23484         my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
23485
23486         # a little optimization for a full-line comment
23487         if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
23488             $max_tokens_wanted = 1    # no use tokenizing a comment
23489         }
23490
23491         # start by breaking the line into pre-tokens
23492         ( $rtokens, $rtoken_map, $rtoken_type ) =
23493           pre_tokenize( $input_line, $max_tokens_wanted );
23494
23495         $max_token_index = scalar(@$rtokens) - 1;
23496         push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
23497         push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
23498         push( @$rtoken_type, 'b', 'b', 'b' );
23499
23500         # initialize for main loop
23501         for $i ( 0 .. $max_token_index + 3 ) {
23502             $routput_token_type->[$i]     = "";
23503             $routput_block_type->[$i]     = "";
23504             $routput_container_type->[$i] = "";
23505             $routput_type_sequence->[$i]  = "";
23506             $routput_indent_flag->[$i]    = 0;
23507         }
23508         $i     = -1;
23509         $i_tok = -1;
23510
23511         # ------------------------------------------------------------
23512         # begin main tokenization loop
23513         # ------------------------------------------------------------
23514
23515         # we are looking at each pre-token of one line and combining them
23516         # into tokens
23517         while ( ++$i <= $max_token_index ) {
23518
23519             if ($in_quote) {    # continue looking for end of a quote
23520                 $type = $quote_type;
23521
23522                 unless ( @{$routput_token_list} )
23523                 {               # initialize if continuation line
23524                     push( @{$routput_token_list}, $i );
23525                     $routput_token_type->[$i] = $type;
23526
23527                 }
23528                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
23529
23530                 # scan for the end of the quote or pattern
23531                 (
23532                     $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23533                     $quoted_string_1, $quoted_string_2
23534                   )
23535                   = do_quote(
23536                     $i,               $in_quote,    $quote_character,
23537                     $quote_pos,       $quote_depth, $quoted_string_1,
23538                     $quoted_string_2, $rtokens,     $rtoken_map,
23539                     $max_token_index
23540                   );
23541
23542                 # all done if we didn't find it
23543                 last if ($in_quote);
23544
23545                 # save pattern and replacement text for rescanning
23546                 my $qs1 = $quoted_string_1;
23547                 my $qs2 = $quoted_string_2;
23548
23549                 # re-initialize for next search
23550                 $quote_character = '';
23551                 $quote_pos       = 0;
23552                 $quote_type      = 'Q';
23553                 $quoted_string_1 = "";
23554                 $quoted_string_2 = "";
23555                 last if ( ++$i > $max_token_index );
23556
23557                 # look for any modifiers
23558                 if ($allowed_quote_modifiers) {
23559
23560                     # check for exact quote modifiers
23561                     if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
23562                         my $str = $$rtokens[$i];
23563                         my $saw_modifier_e;
23564                         while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
23565                             my $pos = pos($str);
23566                             my $char = substr( $str, $pos - 1, 1 );
23567                             $saw_modifier_e ||= ( $char eq 'e' );
23568                         }
23569
23570                         # For an 'e' quote modifier we must scan the replacement
23571                         # text for here-doc targets.
23572                         if ($saw_modifier_e) {
23573
23574                             my $rht = scan_replacement_text($qs1);
23575
23576                             # Change type from 'Q' to 'h' for quotes with
23577                             # here-doc targets so that the formatter (see sub
23578                             # print_line_of_tokens) will not make any line
23579                             # breaks after this point.
23580                             if ($rht) {
23581                                 push @{$rhere_target_list}, @{$rht};
23582                                 $type = 'h';
23583                                 if ( $i_tok < 0 ) {
23584                                     my $ilast = $routput_token_list->[-1];
23585                                     $routput_token_type->[$ilast] = $type;
23586                                 }
23587                             }
23588                         }
23589
23590                         if ( defined( pos($str) ) ) {
23591
23592                             # matched
23593                             if ( pos($str) == length($str) ) {
23594                                 last if ( ++$i > $max_token_index );
23595                             }
23596
23597                             # Looks like a joined quote modifier
23598                             # and keyword, maybe something like
23599                             # s/xxx/yyy/gefor @k=...
23600                             # Example is "galgen.pl".  Would have to split
23601                             # the word and insert a new token in the
23602                             # pre-token list.  This is so rare that I haven't
23603                             # done it.  Will just issue a warning citation.
23604
23605                             # This error might also be triggered if my quote
23606                             # modifier characters are incomplete
23607                             else {
23608                                 warning(<<EOM);
23609
23610 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
23611 Please put a space between quote modifiers and trailing keywords.
23612 EOM
23613
23614                            # print "token $$rtokens[$i]\n";
23615                            # my $num = length($str) - pos($str);
23616                            # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
23617                            # print "continuing with new token $$rtokens[$i]\n";
23618
23619                                 # skipping past this token does least damage
23620                                 last if ( ++$i > $max_token_index );
23621                             }
23622                         }
23623                         else {
23624
23625                             # example file: rokicki4.pl
23626                             # This error might also be triggered if my quote
23627                             # modifier characters are incomplete
23628                             write_logfile_entry(
23629 "Note: found word $str at quote modifier location\n"
23630                             );
23631                         }
23632                     }
23633
23634                     # re-initialize
23635                     $allowed_quote_modifiers = "";
23636                 }
23637             }
23638
23639             unless ( $tok =~ /^\s*$/ ) {
23640
23641                 # try to catch some common errors
23642                 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
23643
23644                     if ( $last_nonblank_token eq 'eq' ) {
23645                         complain("Should 'eq' be '==' here ?\n");
23646                     }
23647                     elsif ( $last_nonblank_token eq 'ne' ) {
23648                         complain("Should 'ne' be '!=' here ?\n");
23649                     }
23650                 }
23651
23652                 $last_last_nonblank_token      = $last_nonblank_token;
23653                 $last_last_nonblank_type       = $last_nonblank_type;
23654                 $last_last_nonblank_block_type = $last_nonblank_block_type;
23655                 $last_last_nonblank_container_type =
23656                   $last_nonblank_container_type;
23657                 $last_last_nonblank_type_sequence =
23658                   $last_nonblank_type_sequence;
23659                 $last_nonblank_token          = $tok;
23660                 $last_nonblank_type           = $type;
23661                 $last_nonblank_prototype      = $prototype;
23662                 $last_nonblank_block_type     = $block_type;
23663                 $last_nonblank_container_type = $container_type;
23664                 $last_nonblank_type_sequence  = $type_sequence;
23665                 $last_nonblank_i              = $i_tok;
23666             }
23667
23668             # store previous token type
23669             if ( $i_tok >= 0 ) {
23670                 $routput_token_type->[$i_tok]     = $type;
23671                 $routput_block_type->[$i_tok]     = $block_type;
23672                 $routput_container_type->[$i_tok] = $container_type;
23673                 $routput_type_sequence->[$i_tok]  = $type_sequence;
23674                 $routput_indent_flag->[$i_tok]    = $indent_flag;
23675             }
23676             my $pre_tok  = $$rtokens[$i];        # get the next pre-token
23677             my $pre_type = $$rtoken_type[$i];    # and type
23678             $tok  = $pre_tok;
23679             $type = $pre_type;                   # to be modified as necessary
23680             $block_type = "";    # blank for all tokens except code block braces
23681             $container_type = "";    # blank for all tokens except some parens
23682             $type_sequence  = "";    # blank for all tokens except ?/:
23683             $indent_flag    = 0;
23684             $prototype = "";    # blank for all tokens except user defined subs
23685             $i_tok     = $i;
23686
23687             # this pre-token will start an output token
23688             push( @{$routput_token_list}, $i_tok );
23689
23690             # continue gathering identifier if necessary
23691             # but do not start on blanks and comments
23692             if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
23693
23694                 if ( $id_scan_state =~ /^(sub|package)/ ) {
23695                     scan_id();
23696                 }
23697                 else {
23698                     scan_identifier();
23699                 }
23700
23701                 last if ($id_scan_state);
23702                 next if ( ( $i > 0 ) || $type );
23703
23704                 # didn't find any token; start over
23705                 $type = $pre_type;
23706                 $tok  = $pre_tok;
23707             }
23708
23709             # handle whitespace tokens..
23710             next if ( $type eq 'b' );
23711             my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
23712             my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
23713
23714             # Build larger tokens where possible, since we are not in a quote.
23715             #
23716             # First try to assemble digraphs.  The following tokens are
23717             # excluded and handled specially:
23718             # '/=' is excluded because the / might start a pattern.
23719             # 'x=' is excluded since it might be $x=, with $ on previous line
23720             # '**' and *= might be typeglobs of punctuation variables
23721             # I have allowed tokens starting with <, such as <=,
23722             # because I don't think these could be valid angle operators.
23723             # test file: storrs4.pl
23724             my $test_tok   = $tok . $$rtokens[ $i + 1 ];
23725             my $combine_ok = $is_digraph{$test_tok};
23726
23727             # check for special cases which cannot be combined
23728             if ($combine_ok) {
23729
23730                 # '//' must be defined_or operator if an operator is expected.
23731                 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
23732                 # could be migrated here for clarity
23733                 if ( $test_tok eq '//' ) {
23734                     my $next_type = $$rtokens[ $i + 1 ];
23735                     my $expecting =
23736                       operator_expected( $prev_type, $tok, $next_type );
23737                     $combine_ok = 0 unless ( $expecting == OPERATOR );
23738                 }
23739             }
23740
23741             if (
23742                 $combine_ok
23743                 && ( $test_tok ne '/=' )    # might be pattern
23744                 && ( $test_tok ne 'x=' )    # might be $x
23745                 && ( $test_tok ne '**' )    # typeglob?
23746                 && ( $test_tok ne '*=' )    # typeglob?
23747               )
23748             {
23749                 $tok = $test_tok;
23750                 $i++;
23751
23752                 # Now try to assemble trigraphs.  Note that all possible
23753                 # perl trigraphs can be constructed by appending a character
23754                 # to a digraph.
23755                 $test_tok = $tok . $$rtokens[ $i + 1 ];
23756
23757                 if ( $is_trigraph{$test_tok} ) {
23758                     $tok = $test_tok;
23759                     $i++;
23760                 }
23761             }
23762
23763             $type      = $tok;
23764             $next_tok  = $$rtokens[ $i + 1 ];
23765             $next_type = $$rtoken_type[ $i + 1 ];
23766
23767             TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
23768                 local $" = ')(';
23769                 my @debug_list = (
23770                     $last_nonblank_token,      $tok,
23771                     $next_tok,                 $brace_depth,
23772                     $brace_type[$brace_depth], $paren_depth,
23773                     $paren_type[$paren_depth]
23774                 );
23775                 print "TOKENIZE:(@debug_list)\n";
23776             };
23777
23778             # turn off attribute list on first non-blank, non-bareword
23779             if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
23780
23781             ###############################################################
23782             # We have the next token, $tok.
23783             # Now we have to examine this token and decide what it is
23784             # and define its $type
23785             #
23786             # section 1: bare words
23787             ###############################################################
23788
23789             if ( $pre_type eq 'w' ) {
23790                 $expecting = operator_expected( $prev_type, $tok, $next_type );
23791                 my ( $next_nonblank_token, $i_next ) =
23792                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23793
23794                 # ATTRS: handle sub and variable attributes
23795                 if ($in_attribute_list) {
23796
23797                     # treat bare word followed by open paren like qw(
23798                     if ( $next_nonblank_token eq '(' ) {
23799                         $in_quote                = $quote_items{'q'};
23800                         $allowed_quote_modifiers = $quote_modifiers{'q'};
23801                         $type                    = 'q';
23802                         $quote_type              = 'q';
23803                         next;
23804                     }
23805
23806                     # handle bareword not followed by open paren
23807                     else {
23808                         $type = 'w';
23809                         next;
23810                     }
23811                 }
23812
23813                 # quote a word followed by => operator
23814                 if ( $next_nonblank_token eq '=' ) {
23815
23816                     if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
23817                         if ( $is_constant{$current_package}{$tok} ) {
23818                             $type = 'C';
23819                         }
23820                         elsif ( $is_user_function{$current_package}{$tok} ) {
23821                             $type = 'U';
23822                             $prototype =
23823                               $user_function_prototype{$current_package}{$tok};
23824                         }
23825                         elsif ( $tok =~ /^v\d+$/ ) {
23826                             $type = 'v';
23827                             report_v_string($tok);
23828                         }
23829                         else { $type = 'w' }
23830
23831                         next;
23832                     }
23833                 }
23834
23835      # quote a bare word within braces..like xxx->{s}; note that we
23836      # must be sure this is not a structural brace, to avoid
23837      # mistaking {s} in the following for a quoted bare word:
23838      #     for(@[){s}bla}BLA}
23839      # Also treat q in something like var{-q} as a bare word, not qoute operator
23840                 ##if (   ( $last_nonblank_type eq 'L' )
23841                 ##    && ( $next_nonblank_token eq '}' ) )
23842                 if (
23843                     $next_nonblank_token eq '}'
23844                     && (
23845                         $last_nonblank_type eq 'L'
23846                         || (   $last_nonblank_type eq 'm'
23847                             && $last_last_nonblank_type eq 'L' )
23848                     )
23849                   )
23850                 {
23851                     $type = 'w';
23852                     next;
23853                 }
23854
23855                 # a bare word immediately followed by :: is not a keyword;
23856                 # use $tok_kw when testing for keywords to avoid a mistake
23857                 my $tok_kw = $tok;
23858                 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
23859                 {
23860                     $tok_kw .= '::';
23861                 }
23862
23863                 # handle operator x (now we know it isn't $x=)
23864                 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
23865                     if ( $tok eq 'x' ) {
23866
23867                         if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
23868                             $tok  = 'x=';
23869                             $type = $tok;
23870                             $i++;
23871                         }
23872                         else {
23873                             $type = 'x';
23874                         }
23875                     }
23876
23877                     # FIXME: Patch: mark something like x4 as an integer for now
23878                     # It gets fixed downstream.  This is easier than
23879                     # splitting the pretoken.
23880                     else {
23881                         $type = 'n';
23882                     }
23883                 }
23884
23885                 elsif ( ( $tok eq 'strict' )
23886                     and ( $last_nonblank_token eq 'use' ) )
23887                 {
23888                     $tokenizer_self->{_saw_use_strict} = 1;
23889                     scan_bare_identifier();
23890                 }
23891
23892                 elsif ( ( $tok eq 'warnings' )
23893                     and ( $last_nonblank_token eq 'use' ) )
23894                 {
23895                     $tokenizer_self->{_saw_perl_dash_w} = 1;
23896
23897                     # scan as identifier, so that we pick up something like:
23898                     # use warnings::register
23899                     scan_bare_identifier();
23900                 }
23901
23902                 elsif (
23903                        $tok eq 'AutoLoader'
23904                     && $tokenizer_self->{_look_for_autoloader}
23905                     && (
23906                         $last_nonblank_token eq 'use'
23907
23908                         # these regexes are from AutoSplit.pm, which we want
23909                         # to mimic
23910                         || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
23911                         || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
23912                     )
23913                   )
23914                 {
23915                     write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
23916                     $tokenizer_self->{_saw_autoloader}      = 1;
23917                     $tokenizer_self->{_look_for_autoloader} = 0;
23918                     scan_bare_identifier();
23919                 }
23920
23921                 elsif (
23922                        $tok eq 'SelfLoader'
23923                     && $tokenizer_self->{_look_for_selfloader}
23924                     && (   $last_nonblank_token eq 'use'
23925                         || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
23926                         || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
23927                   )
23928                 {
23929                     write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
23930                     $tokenizer_self->{_saw_selfloader}      = 1;
23931                     $tokenizer_self->{_look_for_selfloader} = 0;
23932                     scan_bare_identifier();
23933                 }
23934
23935                 elsif ( ( $tok eq 'constant' )
23936                     and ( $last_nonblank_token eq 'use' ) )
23937                 {
23938                     scan_bare_identifier();
23939                     my ( $next_nonblank_token, $i_next ) =
23940                       find_next_nonblank_token( $i, $rtokens,
23941                         $max_token_index );
23942
23943                     if ($next_nonblank_token) {
23944
23945                         if ( $is_keyword{$next_nonblank_token} ) {
23946                             warning(
23947 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
23948                             );
23949                         }
23950
23951                         # FIXME: could check for error in which next token is
23952                         # not a word (number, punctuation, ..)
23953                         else {
23954                             $is_constant{$current_package}
23955                               {$next_nonblank_token} = 1;
23956                         }
23957                     }
23958                 }
23959
23960                 # various quote operators
23961                 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
23962                     if ( $expecting == OPERATOR ) {
23963
23964                         # patch for paren-less for/foreach glitch, part 1
23965                         # perl will accept this construct as valid:
23966                         #
23967                         #    foreach my $key qw\Uno Due Tres Quadro\ {
23968                         #        print "Set $key\n";
23969                         #    }
23970                         unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
23971                         {
23972                             error_if_expecting_OPERATOR();
23973                         }
23974                     }
23975                     $in_quote                = $quote_items{$tok};
23976                     $allowed_quote_modifiers = $quote_modifiers{$tok};
23977
23978                    # All quote types are 'Q' except possibly qw quotes.
23979                    # qw quotes are special in that they may generally be trimmed
23980                    # of leading and trailing whitespace.  So they are given a
23981                    # separate type, 'q', unless requested otherwise.
23982                     $type =
23983                       ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
23984                       ? 'q'
23985                       : 'Q';
23986                     $quote_type = $type;
23987                 }
23988
23989                 # check for a statement label
23990                 elsif (
23991                        ( $next_nonblank_token eq ':' )
23992                     && ( $$rtokens[ $i_next + 1 ] ne ':' )
23993                     && ( $i_next <= $max_token_index )    # colon on same line
23994                     && label_ok()
23995                   )
23996                 {
23997                     if ( $tok !~ /[A-Z]/ ) {
23998                         push @{ $tokenizer_self->{_rlower_case_labels_at} },
23999                           $input_line_number;
24000                     }
24001                     $type = 'J';
24002                     $tok .= ':';
24003                     $i = $i_next;
24004                     next;
24005                 }
24006
24007                 #      'sub' || 'package'
24008                 elsif ( $is_sub_package{$tok_kw} ) {
24009                     error_if_expecting_OPERATOR()
24010                       if ( $expecting == OPERATOR );
24011                     scan_id();
24012                 }
24013
24014                 # Note on token types for format, __DATA__, __END__:
24015                 # It simplifies things to give these type ';', so that when we
24016                 # start rescanning we will be expecting a token of type TERM.
24017                 # We will switch to type 'k' before outputting the tokens.
24018                 elsif ( $is_format_END_DATA{$tok_kw} ) {
24019                     $type = ';';    # make tokenizer look for TERM next
24020                     $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
24021                     last;
24022                 }
24023
24024                 elsif ( $is_keyword{$tok_kw} ) {
24025                     $type = 'k';
24026
24027                     # Since for and foreach may not be followed immediately
24028                     # by an opening paren, we have to remember which keyword
24029                     # is associated with the next '('
24030                     if ( $is_for_foreach{$tok} ) {
24031                         if ( new_statement_ok() ) {
24032                             $want_paren = $tok;
24033                         }
24034                     }
24035
24036                     # recognize 'use' statements, which are special
24037                     elsif ( $is_use_require{$tok} ) {
24038                         $statement_type = $tok;
24039                         error_if_expecting_OPERATOR()
24040                           if ( $expecting == OPERATOR );
24041                     }
24042
24043                     # remember my and our to check for trailing ": shared"
24044                     elsif ( $is_my_our{$tok} ) {
24045                         $statement_type = $tok;
24046                     }
24047
24048                     # Check for misplaced 'elsif' and 'else', but allow isolated
24049                     # else or elsif blocks to be formatted.  This is indicated
24050                     # by a last noblank token of ';'
24051                     elsif ( $tok eq 'elsif' ) {
24052                         if (   $last_nonblank_token ne ';'
24053                             && $last_nonblank_block_type !~
24054                             /^(if|elsif|unless)$/ )
24055                         {
24056                             warning(
24057 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
24058                             );
24059                         }
24060                     }
24061                     elsif ( $tok eq 'else' ) {
24062
24063                         # patched for SWITCH/CASE
24064                         if (   $last_nonblank_token ne ';'
24065                             && $last_nonblank_block_type !~
24066                             /^(if|elsif|unless|case|when)$/ )
24067                         {
24068                             warning(
24069 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
24070                             );
24071                         }
24072                     }
24073                     elsif ( $tok eq 'continue' ) {
24074                         if (   $last_nonblank_token ne ';'
24075                             && $last_nonblank_block_type !~
24076                             /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
24077                         {
24078
24079                             # note: ';' '{' and '}' in list above
24080                             # because continues can follow bare blocks;
24081                             # ':' is labeled block
24082                             #
24083                             ############################################
24084                             # NOTE: This check has been deactivated because
24085                             # continue has an alternative usage for given/when
24086                             # blocks in perl 5.10
24087                             ## warning("'$tok' should follow a block\n");
24088                             ############################################
24089                         }
24090                     }
24091
24092                     # patch for SWITCH/CASE if 'case' and 'when are
24093                     # treated as keywords.
24094                     elsif ( $tok eq 'when' || $tok eq 'case' ) {
24095                         $statement_type = $tok;    # next '{' is block
24096                     }
24097
24098                     # indent trailing if/unless/while/until
24099                     # outdenting will be handled by later indentation loop
24100                     if (   $tok =~ /^(if|unless|while|until)$/
24101                         && $next_nonblank_token ne '(' )
24102                     {
24103                         $indent_flag = 1;
24104                     }
24105                 }
24106
24107                 # check for inline label following
24108                 #         /^(redo|last|next|goto)$/
24109                 elsif (( $last_nonblank_type eq 'k' )
24110                     && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
24111                 {
24112                     $type = 'j';
24113                     next;
24114                 }
24115
24116                 # something else --
24117                 else {
24118
24119                     scan_bare_identifier();
24120                     if ( $type eq 'w' ) {
24121
24122                         if ( $expecting == OPERATOR ) {
24123
24124                             # don't complain about possible indirect object
24125                             # notation.
24126                             # For example:
24127                             #   package main;
24128                             #   sub new($) { ... }
24129                             #   $b = new A::;  # calls A::new
24130                             #   $c = new A;    # same thing but suspicious
24131                             # This will call A::new but we have a 'new' in
24132                             # main:: which looks like a constant.
24133                             #
24134                             if ( $last_nonblank_type eq 'C' ) {
24135                                 if ( $tok !~ /::$/ ) {
24136                                     complain(<<EOM);
24137 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
24138        Maybe indirectet object notation?
24139 EOM
24140                                 }
24141                             }
24142                             else {
24143                                 error_if_expecting_OPERATOR("bareword");
24144                             }
24145                         }
24146
24147                         # mark bare words immediately followed by a paren as
24148                         # functions
24149                         $next_tok = $$rtokens[ $i + 1 ];
24150                         if ( $next_tok eq '(' ) {
24151                             $type = 'U';
24152                         }
24153
24154                         # underscore after file test operator is file handle
24155                         if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
24156                             $type = 'Z';
24157                         }
24158
24159                         # patch for SWITCH/CASE if 'case' and 'when are
24160                         # not treated as keywords:
24161                         if (
24162                             (
24163                                    $tok eq 'case'
24164                                 && $brace_type[$brace_depth] eq 'switch'
24165                             )
24166                             || (   $tok eq 'when'
24167                                 && $brace_type[$brace_depth] eq 'given' )
24168                           )
24169                         {
24170                             $statement_type = $tok;    # next '{' is block
24171                             $type = 'k';    # for keyword syntax coloring
24172                         }
24173
24174                         # patch for SWITCH/CASE if switch and given not keywords
24175                         # Switch is not a perl 5 keyword, but we will gamble
24176                         # and mark switch followed by paren as a keyword.  This
24177                         # is only necessary to get html syntax coloring nice,
24178                         # and does not commit this as being a switch/case.
24179                         if ( $next_nonblank_token eq '('
24180                             && ( $tok eq 'switch' || $tok eq 'given' ) )
24181                         {
24182                             $type = 'k';    # for keyword syntax coloring
24183                         }
24184                     }
24185                 }
24186             }
24187
24188             ###############################################################
24189             # section 2: strings of digits
24190             ###############################################################
24191             elsif ( $pre_type eq 'd' ) {
24192                 $expecting = operator_expected( $prev_type, $tok, $next_type );
24193                 error_if_expecting_OPERATOR("Number")
24194                   if ( $expecting == OPERATOR );
24195                 my $number = scan_number();
24196                 if ( !defined($number) ) {
24197
24198                     # shouldn't happen - we should always get a number
24199                     warning("non-number beginning with digit--program bug\n");
24200                     report_definite_bug();
24201                 }
24202             }
24203
24204             ###############################################################
24205             # section 3: all other tokens
24206             ###############################################################
24207
24208             else {
24209                 last if ( $tok eq '#' );
24210                 my $code = $tokenization_code->{$tok};
24211                 if ($code) {
24212                     $expecting =
24213                       operator_expected( $prev_type, $tok, $next_type );
24214                     $code->();
24215                     redo if $in_quote;
24216                 }
24217             }
24218         }
24219
24220         # -----------------------------
24221         # end of main tokenization loop
24222         # -----------------------------
24223
24224         if ( $i_tok >= 0 ) {
24225             $routput_token_type->[$i_tok]     = $type;
24226             $routput_block_type->[$i_tok]     = $block_type;
24227             $routput_container_type->[$i_tok] = $container_type;
24228             $routput_type_sequence->[$i_tok]  = $type_sequence;
24229             $routput_indent_flag->[$i_tok]    = $indent_flag;
24230         }
24231
24232         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
24233             $last_last_nonblank_token          = $last_nonblank_token;
24234             $last_last_nonblank_type           = $last_nonblank_type;
24235             $last_last_nonblank_block_type     = $last_nonblank_block_type;
24236             $last_last_nonblank_container_type = $last_nonblank_container_type;
24237             $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
24238             $last_nonblank_token               = $tok;
24239             $last_nonblank_type                = $type;
24240             $last_nonblank_block_type          = $block_type;
24241             $last_nonblank_container_type      = $container_type;
24242             $last_nonblank_type_sequence       = $type_sequence;
24243             $last_nonblank_prototype           = $prototype;
24244         }
24245
24246         # reset indentation level if necessary at a sub or package
24247         # in an attempt to recover from a nesting error
24248         if ( $level_in_tokenizer < 0 ) {
24249             if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
24250                 reset_indentation_level(0);
24251                 brace_warning("resetting level to 0 at $1 $2\n");
24252             }
24253         }
24254
24255         # all done tokenizing this line ...
24256         # now prepare the final list of tokens and types
24257
24258         my @token_type     = ();   # stack of output token types
24259         my @block_type     = ();   # stack of output code block types
24260         my @container_type = ();   # stack of output code container types
24261         my @type_sequence  = ();   # stack of output type sequence numbers
24262         my @tokens         = ();   # output tokens
24263         my @levels         = ();   # structural brace levels of output tokens
24264         my @slevels        = ();   # secondary nesting levels of output tokens
24265         my @nesting_tokens = ();   # string of tokens leading to this depth
24266         my @nesting_types  = ();   # string of token types leading to this depth
24267         my @nesting_blocks = ();   # string of block types leading to this depth
24268         my @nesting_lists  = ();   # string of list types leading to this depth
24269         my @ci_string = ();  # string needed to compute continuation indentation
24270         my @container_environment = ();    # BLOCK or LIST
24271         my $container_environment = '';
24272         my $im                    = -1;    # previous $i value
24273         my $num;
24274         my $ci_string_sum = ones_count($ci_string_in_tokenizer);
24275
24276 # Computing Token Indentation
24277 #
24278 #     The final section of the tokenizer forms tokens and also computes
24279 #     parameters needed to find indentation.  It is much easier to do it
24280 #     in the tokenizer than elsewhere.  Here is a brief description of how
24281 #     indentation is computed.  Perl::Tidy computes indentation as the sum
24282 #     of 2 terms:
24283 #
24284 #     (1) structural indentation, such as if/else/elsif blocks
24285 #     (2) continuation indentation, such as long parameter call lists.
24286 #
24287 #     These are occasionally called primary and secondary indentation.
24288 #
24289 #     Structural indentation is introduced by tokens of type '{', although
24290 #     the actual tokens might be '{', '(', or '['.  Structural indentation
24291 #     is of two types: BLOCK and non-BLOCK.  Default structural indentation
24292 #     is 4 characters if the standard indentation scheme is used.
24293 #
24294 #     Continuation indentation is introduced whenever a line at BLOCK level
24295 #     is broken before its termination.  Default continuation indentation
24296 #     is 2 characters in the standard indentation scheme.
24297 #
24298 #     Both types of indentation may be nested arbitrarily deep and
24299 #     interlaced.  The distinction between the two is somewhat arbitrary.
24300 #
24301 #     For each token, we will define two variables which would apply if
24302 #     the current statement were broken just before that token, so that
24303 #     that token started a new line:
24304 #
24305 #     $level = the structural indentation level,
24306 #     $ci_level = the continuation indentation level
24307 #
24308 #     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
24309 #     assuming defaults.  However, in some special cases it is customary
24310 #     to modify $ci_level from this strict value.
24311 #
24312 #     The total structural indentation is easy to compute by adding and
24313 #     subtracting 1 from a saved value as types '{' and '}' are seen.  The
24314 #     running value of this variable is $level_in_tokenizer.
24315 #
24316 #     The total continuation is much more difficult to compute, and requires
24317 #     several variables.  These veriables are:
24318 #
24319 #     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
24320 #       each indentation level, if there are intervening open secondary
24321 #       structures just prior to that level.
24322 #     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
24323 #       if the last token at that level is "continued", meaning that it
24324 #       is not the first token of an expression.
24325 #     $nesting_block_string = a string of 1's and 0's indicating, for each
24326 #       indentation level, if the level is of type BLOCK or not.
24327 #     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
24328 #     $nesting_list_string = a string of 1's and 0's indicating, for each
24329 #       indentation level, if it is is appropriate for list formatting.
24330 #       If so, continuation indentation is used to indent long list items.
24331 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
24332 #     @{$rslevel_stack} = a stack of total nesting depths at each
24333 #       structural indentation level, where "total nesting depth" means
24334 #       the nesting depth that would occur if every nesting token -- '{', '[',
24335 #       and '(' -- , regardless of context, is used to compute a nesting
24336 #       depth.
24337
24338         #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
24339         #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
24340
24341         my ( $ci_string_i, $level_i, $nesting_block_string_i,
24342             $nesting_list_string_i, $nesting_token_string_i,
24343             $nesting_type_string_i, );
24344
24345         foreach $i ( @{$routput_token_list} )
24346         {    # scan the list of pre-tokens indexes
24347
24348             # self-checking for valid token types
24349             my $type                    = $routput_token_type->[$i];
24350             my $forced_indentation_flag = $routput_indent_flag->[$i];
24351
24352             # See if we should undo the $forced_indentation_flag.
24353             # Forced indentation after 'if', 'unless', 'while' and 'until'
24354             # expressions without trailing parens is optional and doesn't
24355             # always look good.  It is usually okay for a trailing logical
24356             # expression, but if the expression is a function call, code block,
24357             # or some kind of list it puts in an unwanted extra indentation
24358             # level which is hard to remove.
24359             #
24360             # Example where extra indentation looks ok:
24361             # return 1
24362             #   if $det_a < 0 and $det_b > 0
24363             #       or $det_a > 0 and $det_b < 0;
24364             #
24365             # Example where extra indentation is not needed because
24366             # the eval brace also provides indentation:
24367             # print "not " if defined eval {
24368             #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
24369             # };
24370             #
24371             # The following rule works fairly well:
24372             #   Undo the flag if the end of this line, or start of the next
24373             #   line, is an opening container token or a comma.
24374             # This almost always works, but if not after another pass it will
24375             # be stable.
24376             if ( $forced_indentation_flag && $type eq 'k' ) {
24377                 my $ixlast  = -1;
24378                 my $ilast   = $routput_token_list->[$ixlast];
24379                 my $toklast = $routput_token_type->[$ilast];
24380                 if ( $toklast eq '#' ) {
24381                     $ixlast--;
24382                     $ilast   = $routput_token_list->[$ixlast];
24383                     $toklast = $routput_token_type->[$ilast];
24384                 }
24385                 if ( $toklast eq 'b' ) {
24386                     $ixlast--;
24387                     $ilast   = $routput_token_list->[$ixlast];
24388                     $toklast = $routput_token_type->[$ilast];
24389                 }
24390                 if ( $toklast =~ /^[\{,]$/ ) {
24391                     $forced_indentation_flag = 0;
24392                 }
24393                 else {
24394                     ( $toklast, my $i_next ) =
24395                       find_next_nonblank_token( $max_token_index, $rtokens,
24396                         $max_token_index );
24397                     if ( $toklast =~ /^[\{,]$/ ) {
24398                         $forced_indentation_flag = 0;
24399                     }
24400                 }
24401             }
24402
24403             # if we are already in an indented if, see if we should outdent
24404             if ($indented_if_level) {
24405
24406                 # don't try to nest trailing if's - shouldn't happen
24407                 if ( $type eq 'k' ) {
24408                     $forced_indentation_flag = 0;
24409                 }
24410
24411                 # check for the normal case - outdenting at next ';'
24412                 elsif ( $type eq ';' ) {
24413                     if ( $level_in_tokenizer == $indented_if_level ) {
24414                         $forced_indentation_flag = -1;
24415                         $indented_if_level       = 0;
24416                     }
24417                 }
24418
24419                 # handle case of missing semicolon
24420                 elsif ( $type eq '}' ) {
24421                     if ( $level_in_tokenizer == $indented_if_level ) {
24422                         $indented_if_level = 0;
24423
24424                         # TBD: This could be a subroutine call
24425                         $level_in_tokenizer--;
24426                         if ( @{$rslevel_stack} > 1 ) {
24427                             pop( @{$rslevel_stack} );
24428                         }
24429                         if ( length($nesting_block_string) > 1 )
24430                         {    # true for valid script
24431                             chop $nesting_block_string;
24432                             chop $nesting_list_string;
24433                         }
24434
24435                     }
24436                 }
24437             }
24438
24439             my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
24440             $level_i = $level_in_tokenizer;
24441
24442             # This can happen by running perltidy on non-scripts
24443             # although it could also be bug introduced by programming change.
24444             # Perl silently accepts a 032 (^Z) and takes it as the end
24445             if ( !$is_valid_token_type{$type} ) {
24446                 my $val = ord($type);
24447                 warning(
24448                     "unexpected character decimal $val ($type) in script\n");
24449                 $tokenizer_self->{_in_error} = 1;
24450             }
24451
24452             # ----------------------------------------------------------------
24453             # TOKEN TYPE PATCHES
24454             #  output __END__, __DATA__, and format as type 'k' instead of ';'
24455             # to make html colors correct, etc.
24456             my $fix_type = $type;
24457             if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
24458
24459             # output anonymous 'sub' as keyword
24460             if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
24461
24462             # -----------------------------------------------------------------
24463
24464             $nesting_token_string_i = $nesting_token_string;
24465             $nesting_type_string_i  = $nesting_type_string;
24466             $nesting_block_string_i = $nesting_block_string;
24467             $nesting_list_string_i  = $nesting_list_string;
24468
24469             # set primary indentation levels based on structural braces
24470             # Note: these are set so that the leading braces have a HIGHER
24471             # level than their CONTENTS, which is convenient for indentation
24472             # Also, define continuation indentation for each token.
24473             if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
24474             {
24475
24476                 # use environment before updating
24477                 $container_environment =
24478                     $nesting_block_flag ? 'BLOCK'
24479                   : $nesting_list_flag  ? 'LIST'
24480                   :                       "";
24481
24482                 # if the difference between total nesting levels is not 1,
24483                 # there are intervening non-structural nesting types between
24484                 # this '{' and the previous unclosed '{'
24485                 my $intervening_secondary_structure = 0;
24486                 if ( @{$rslevel_stack} ) {
24487                     $intervening_secondary_structure =
24488                       $slevel_in_tokenizer - $rslevel_stack->[-1];
24489                 }
24490
24491      # Continuation Indentation
24492      #
24493      # Having tried setting continuation indentation both in the formatter and
24494      # in the tokenizer, I can say that setting it in the tokenizer is much,
24495      # much easier.  The formatter already has too much to do, and can't
24496      # make decisions on line breaks without knowing what 'ci' will be at
24497      # arbitrary locations.
24498      #
24499      # But a problem with setting the continuation indentation (ci) here
24500      # in the tokenizer is that we do not know where line breaks will actually
24501      # be.  As a result, we don't know if we should propagate continuation
24502      # indentation to higher levels of structure.
24503      #
24504      # For nesting of only structural indentation, we never need to do this.
24505      # For example, in a long if statement, like this
24506      #
24507      #   if ( !$output_block_type[$i]
24508      #     && ($in_statement_continuation) )
24509      #   {           <--outdented
24510      #       do_something();
24511      #   }
24512      #
24513      # the second line has ci but we do normally give the lines within the BLOCK
24514      # any ci.  This would be true if we had blocks nested arbitrarily deeply.
24515      #
24516      # But consider something like this, where we have created a break after
24517      # an opening paren on line 1, and the paren is not (currently) a
24518      # structural indentation token:
24519      #
24520      # my $file = $menubar->Menubutton(
24521      #   qw/-text File -underline 0 -menuitems/ => [
24522      #       [
24523      #           Cascade    => '~View',
24524      #           -menuitems => [
24525      #           ...
24526      #
24527      # The second line has ci, so it would seem reasonable to propagate it
24528      # down, giving the third line 1 ci + 1 indentation.  This suggests the
24529      # following rule, which is currently used to propagating ci down: if there
24530      # are any non-structural opening parens (or brackets, or braces), before
24531      # an opening structural brace, then ci is propagated down, and otherwise
24532      # not.  The variable $intervening_secondary_structure contains this
24533      # information for the current token, and the string
24534      # "$ci_string_in_tokenizer" is a stack of previous values of this
24535      # variable.
24536
24537                 # save the current states
24538                 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
24539                 $level_in_tokenizer++;
24540
24541                 if ($forced_indentation_flag) {
24542
24543                     # break BEFORE '?' when there is forced indentation
24544                     if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
24545                     if ( $type eq 'k' ) {
24546                         $indented_if_level = $level_in_tokenizer;
24547                     }
24548                 }
24549
24550                 if ( $routput_block_type->[$i] ) {
24551                     $nesting_block_flag = 1;
24552                     $nesting_block_string .= '1';
24553                 }
24554                 else {
24555                     $nesting_block_flag = 0;
24556                     $nesting_block_string .= '0';
24557                 }
24558
24559                 # we will use continuation indentation within containers
24560                 # which are not blocks and not logical expressions
24561                 my $bit = 0;
24562                 if ( !$routput_block_type->[$i] ) {
24563
24564                     # propagate flag down at nested open parens
24565                     if ( $routput_container_type->[$i] eq '(' ) {
24566                         $bit = 1 if $nesting_list_flag;
24567                     }
24568
24569                   # use list continuation if not a logical grouping
24570                   # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
24571                     else {
24572                         $bit = 1
24573                           unless
24574                             $is_logical_container{ $routput_container_type->[$i]
24575                               };
24576                     }
24577                 }
24578                 $nesting_list_string .= $bit;
24579                 $nesting_list_flag = $bit;
24580
24581                 $ci_string_in_tokenizer .=
24582                   ( $intervening_secondary_structure != 0 ) ? '1' : '0';
24583                 $ci_string_sum = ones_count($ci_string_in_tokenizer);
24584                 $continuation_string_in_tokenizer .=
24585                   ( $in_statement_continuation > 0 ) ? '1' : '0';
24586
24587    #  Sometimes we want to give an opening brace continuation indentation,
24588    #  and sometimes not.  For code blocks, we don't do it, so that the leading
24589    #  '{' gets outdented, like this:
24590    #
24591    #   if ( !$output_block_type[$i]
24592    #     && ($in_statement_continuation) )
24593    #   {           <--outdented
24594    #
24595    #  For other types, we will give them continuation indentation.  For example,
24596    #  here is how a list looks with the opening paren indented:
24597    #
24598    #     @LoL =
24599    #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
24600    #         [ "homer", "marge", "bart" ], );
24601    #
24602    #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
24603
24604                 my $total_ci = $ci_string_sum;
24605                 if (
24606                     !$routput_block_type->[$i]    # patch: skip for BLOCK
24607                     && ($in_statement_continuation)
24608                     && !( $forced_indentation_flag && $type eq ':' )
24609                   )
24610                 {
24611                     $total_ci += $in_statement_continuation
24612                       unless ( $ci_string_in_tokenizer =~ /1$/ );
24613                 }
24614
24615                 $ci_string_i               = $total_ci;
24616                 $in_statement_continuation = 0;
24617             }
24618
24619             elsif ($type eq '}'
24620                 || $type eq 'R'
24621                 || $forced_indentation_flag < 0 )
24622             {
24623
24624                 # only a nesting error in the script would prevent popping here
24625                 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
24626
24627                 $level_i = --$level_in_tokenizer;
24628
24629                 # restore previous level values
24630                 if ( length($nesting_block_string) > 1 )
24631                 {    # true for valid script
24632                     chop $nesting_block_string;
24633                     $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
24634                     chop $nesting_list_string;
24635                     $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
24636
24637                     chop $ci_string_in_tokenizer;
24638                     $ci_string_sum = ones_count($ci_string_in_tokenizer);
24639
24640                     $in_statement_continuation =
24641                       chop $continuation_string_in_tokenizer;
24642
24643                     # zero continuation flag at terminal BLOCK '}' which
24644                     # ends a statement.
24645                     if ( $routput_block_type->[$i] ) {
24646
24647                         # ...These include non-anonymous subs
24648                         # note: could be sub ::abc { or sub 'abc
24649                         if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
24650
24651                          # note: older versions of perl require the /gc modifier
24652                          # here or else the \G does not work.
24653                             if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
24654                             {
24655                                 $in_statement_continuation = 0;
24656                             }
24657                         }
24658
24659 # ...and include all block types except user subs with
24660 # block prototypes and these: (sort|grep|map|do|eval)
24661 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
24662                         elsif (
24663                             $is_zero_continuation_block_type{
24664                                 $routput_block_type->[$i] } )
24665                         {
24666                             $in_statement_continuation = 0;
24667                         }
24668
24669                         # ..but these are not terminal types:
24670                         #     /^(sort|grep|map|do|eval)$/ )
24671                         elsif (
24672                             $is_not_zero_continuation_block_type{
24673                                 $routput_block_type->[$i] } )
24674                         {
24675                         }
24676
24677                         # ..and a block introduced by a label
24678                         # /^\w+\s*:$/gc ) {
24679                         elsif ( $routput_block_type->[$i] =~ /:$/ ) {
24680                             $in_statement_continuation = 0;
24681                         }
24682
24683                         # user function with block prototype
24684                         else {
24685                             $in_statement_continuation = 0;
24686                         }
24687                     }
24688
24689                     # If we are in a list, then
24690                     # we must set continuatoin indentation at the closing
24691                     # paren of something like this (paren after $check):
24692                     #     assert(
24693                     #         __LINE__,
24694                     #         ( not defined $check )
24695                     #           or ref $check
24696                     #           or $check eq "new"
24697                     #           or $check eq "old",
24698                     #     );
24699                     elsif ( $tok eq ')' ) {
24700                         $in_statement_continuation = 1
24701                           if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
24702                     }
24703
24704                     elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
24705                 }
24706
24707                 # use environment after updating
24708                 $container_environment =
24709                     $nesting_block_flag ? 'BLOCK'
24710                   : $nesting_list_flag  ? 'LIST'
24711                   :                       "";
24712                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
24713                 $nesting_block_string_i = $nesting_block_string;
24714                 $nesting_list_string_i  = $nesting_list_string;
24715             }
24716
24717             # not a structural indentation type..
24718             else {
24719
24720                 $container_environment =
24721                     $nesting_block_flag ? 'BLOCK'
24722                   : $nesting_list_flag  ? 'LIST'
24723                   :                       "";
24724
24725                 # zero the continuation indentation at certain tokens so
24726                 # that they will be at the same level as its container.  For
24727                 # commas, this simplifies the -lp indentation logic, which
24728                 # counts commas.  For ?: it makes them stand out.
24729                 if ($nesting_list_flag) {
24730                     if ( $type =~ /^[,\?\:]$/ ) {
24731                         $in_statement_continuation = 0;
24732                     }
24733                 }
24734
24735                 # be sure binary operators get continuation indentation
24736                 if (
24737                     $container_environment
24738                     && (   $type eq 'k' && $is_binary_keyword{$tok}
24739                         || $is_binary_type{$type} )
24740                   )
24741                 {
24742                     $in_statement_continuation = 1;
24743                 }
24744
24745                 # continuation indentation is sum of any open ci from previous
24746                 # levels plus the current level
24747                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
24748
24749                 # update continuation flag ...
24750                 # if this isn't a blank or comment..
24751                 if ( $type ne 'b' && $type ne '#' ) {
24752
24753                     # and we are in a BLOCK
24754                     if ($nesting_block_flag) {
24755
24756                         # the next token after a ';' and label starts a new stmt
24757                         if ( $type eq ';' || $type eq 'J' ) {
24758                             $in_statement_continuation = 0;
24759                         }
24760
24761                         # otherwise, we are continuing the current statement
24762                         else {
24763                             $in_statement_continuation = 1;
24764                         }
24765                     }
24766
24767                     # if we are not in a BLOCK..
24768                     else {
24769
24770                         # do not use continuation indentation if not list
24771                         # environment (could be within if/elsif clause)
24772                         if ( !$nesting_list_flag ) {
24773                             $in_statement_continuation = 0;
24774                         }
24775
24776                        # otherwise, the next token after a ',' starts a new term
24777                         elsif ( $type eq ',' ) {
24778                             $in_statement_continuation = 0;
24779                         }
24780
24781                         # otherwise, we are continuing the current term
24782                         else {
24783                             $in_statement_continuation = 1;
24784                         }
24785                     }
24786                 }
24787             }
24788
24789             if ( $level_in_tokenizer < 0 ) {
24790                 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
24791                     $tokenizer_self->{_saw_negative_indentation} = 1;
24792                     warning("Starting negative indentation\n");
24793                 }
24794             }
24795
24796             # set secondary nesting levels based on all continment token types
24797             # Note: these are set so that the nesting depth is the depth
24798             # of the PREVIOUS TOKEN, which is convenient for setting
24799             # the stength of token bonds
24800             my $slevel_i = $slevel_in_tokenizer;
24801
24802             #    /^[L\{\(\[]$/
24803             if ( $is_opening_type{$type} ) {
24804                 $slevel_in_tokenizer++;
24805                 $nesting_token_string .= $tok;
24806                 $nesting_type_string  .= $type;
24807             }
24808
24809             #       /^[R\}\)\]]$/
24810             elsif ( $is_closing_type{$type} ) {
24811                 $slevel_in_tokenizer--;
24812                 my $char = chop $nesting_token_string;
24813
24814                 if ( $char ne $matching_start_token{$tok} ) {
24815                     $nesting_token_string .= $char . $tok;
24816                     $nesting_type_string  .= $type;
24817                 }
24818                 else {
24819                     chop $nesting_type_string;
24820                 }
24821             }
24822
24823             push( @block_type,            $routput_block_type->[$i] );
24824             push( @ci_string,             $ci_string_i );
24825             push( @container_environment, $container_environment );
24826             push( @container_type,        $routput_container_type->[$i] );
24827             push( @levels,                $level_i );
24828             push( @nesting_tokens,        $nesting_token_string_i );
24829             push( @nesting_types,         $nesting_type_string_i );
24830             push( @slevels,               $slevel_i );
24831             push( @token_type,            $fix_type );
24832             push( @type_sequence,         $routput_type_sequence->[$i] );
24833             push( @nesting_blocks,        $nesting_block_string );
24834             push( @nesting_lists,         $nesting_list_string );
24835
24836             # now form the previous token
24837             if ( $im >= 0 ) {
24838                 $num =
24839                   $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
24840
24841                 if ( $num > 0 ) {
24842                     push( @tokens,
24843                         substr( $input_line, $$rtoken_map[$im], $num ) );
24844                 }
24845             }
24846             $im = $i;
24847         }
24848
24849         $num = length($input_line) - $$rtoken_map[$im];    # make the last token
24850         if ( $num > 0 ) {
24851             push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
24852         }
24853
24854         $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
24855         $tokenizer_self->{_in_quote}          = $in_quote;
24856         $tokenizer_self->{_quote_target} =
24857           $in_quote ? matching_end_token($quote_character) : "";
24858         $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
24859
24860         $line_of_tokens->{_rtoken_type}            = \@token_type;
24861         $line_of_tokens->{_rtokens}                = \@tokens;
24862         $line_of_tokens->{_rblock_type}            = \@block_type;
24863         $line_of_tokens->{_rcontainer_type}        = \@container_type;
24864         $line_of_tokens->{_rcontainer_environment} = \@container_environment;
24865         $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
24866         $line_of_tokens->{_rlevels}                = \@levels;
24867         $line_of_tokens->{_rslevels}               = \@slevels;
24868         $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
24869         $line_of_tokens->{_rci_levels}             = \@ci_string;
24870         $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
24871
24872         return;
24873     }
24874 }    # end tokenize_this_line
24875
24876 #########i#############################################################
24877 # Tokenizer routines which assist in identifying token types
24878 #######################################################################
24879
24880 sub operator_expected {
24881
24882     # Many perl symbols have two or more meanings.  For example, '<<'
24883     # can be a shift operator or a here-doc operator.  The
24884     # interpretation of these symbols depends on the current state of
24885     # the tokenizer, which may either be expecting a term or an
24886     # operator.  For this example, a << would be a shift if an operator
24887     # is expected, and a here-doc if a term is expected.  This routine
24888     # is called to make this decision for any current token.  It returns
24889     # one of three possible values:
24890     #
24891     #     OPERATOR - operator expected (or at least, not a term)
24892     #     UNKNOWN  - can't tell
24893     #     TERM     - a term is expected (or at least, not an operator)
24894     #
24895     # The decision is based on what has been seen so far.  This
24896     # information is stored in the "$last_nonblank_type" and
24897     # "$last_nonblank_token" variables.  For example, if the
24898     # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
24899     # if $last_nonblank_type is 'n' (numeric), we are expecting an
24900     # OPERATOR.
24901     #
24902     # If a UNKNOWN is returned, the calling routine must guess. A major
24903     # goal of this tokenizer is to minimize the possiblity of returning
24904     # UNKNOWN, because a wrong guess can spoil the formatting of a
24905     # script.
24906     #
24907     # adding NEW_TOKENS: it is critically important that this routine be
24908     # updated to allow it to determine if an operator or term is to be
24909     # expected after the new token.  Doing this simply involves adding
24910     # the new token character to one of the regexes in this routine or
24911     # to one of the hash lists
24912     # that it uses, which are initialized in the BEGIN section.
24913     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
24914     # $statement_type
24915
24916     my ( $prev_type, $tok, $next_type ) = @_;
24917
24918     my $op_expected = UNKNOWN;
24919
24920 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
24921
24922 # Note: function prototype is available for token type 'U' for future
24923 # program development.  It contains the leading and trailing parens,
24924 # and no blanks.  It might be used to eliminate token type 'C', for
24925 # example (prototype = '()'). Thus:
24926 # if ($last_nonblank_type eq 'U') {
24927 #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
24928 # }
24929
24930     # A possible filehandle (or object) requires some care...
24931     if ( $last_nonblank_type eq 'Z' ) {
24932
24933         # angle.t
24934         if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
24935             $op_expected = UNKNOWN;
24936         }
24937
24938         # For possible file handle like "$a", Perl uses weird parsing rules.
24939         # For example:
24940         # print $a/2,"/hi";   - division
24941         # print $a / 2,"/hi"; - division
24942         # print $a/ 2,"/hi";  - division
24943         # print $a /2,"/hi";  - pattern (and error)!
24944         elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
24945             $op_expected = TERM;
24946         }
24947
24948         # Note when an operation is being done where a
24949         # filehandle might be expected, since a change in whitespace
24950         # could change the interpretation of the statement.
24951         else {
24952             if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
24953                 complain("operator in print statement not recommended\n");
24954                 $op_expected = OPERATOR;
24955             }
24956         }
24957     }
24958
24959     # handle something after 'do' and 'eval'
24960     elsif ( $is_block_operator{$last_nonblank_token} ) {
24961
24962         # something like $a = eval "expression";
24963         #                          ^
24964         if ( $last_nonblank_type eq 'k' ) {
24965             $op_expected = TERM;    # expression or list mode following keyword
24966         }
24967
24968         # something like $a = do { BLOCK } / 2;
24969         #                                  ^
24970         else {
24971             $op_expected = OPERATOR;    # block mode following }
24972         }
24973     }
24974
24975     # handle bare word..
24976     elsif ( $last_nonblank_type eq 'w' ) {
24977
24978         # unfortunately, we can't tell what type of token to expect next
24979         # after most bare words
24980         $op_expected = UNKNOWN;
24981     }
24982
24983     # operator, but not term possible after these types
24984     # Note: moved ')' from type to token because parens in list context
24985     # get marked as '{' '}' now.  This is a minor glitch in the following:
24986     #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
24987     #
24988     elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
24989         || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
24990     {
24991         $op_expected = OPERATOR;
24992
24993         # in a 'use' statement, numbers and v-strings are not true
24994         # numbers, so to avoid incorrect error messages, we will
24995         # mark them as unknown for now (use.t)
24996         # TODO: it would be much nicer to create a new token V for VERSION
24997         # number in a use statement.  Then this could be a check on type V
24998         # and related patches which change $statement_type for '=>'
24999         # and ',' could be removed.  Further, it would clean things up to
25000         # scan the 'use' statement with a separate subroutine.
25001         if (   ( $statement_type eq 'use' )
25002             && ( $last_nonblank_type =~ /^[nv]$/ ) )
25003         {
25004             $op_expected = UNKNOWN;
25005         }
25006     }
25007
25008     # no operator after many keywords, such as "die", "warn", etc
25009     elsif ( $expecting_term_token{$last_nonblank_token} ) {
25010
25011         # patch for dor.t (defined or).
25012         # perl functions which may be unary operators
25013         # TODO: This list is incomplete, and these should be put
25014         # into a hash.
25015         if (   $tok eq '/'
25016             && $next_type          eq '/'
25017             && $last_nonblank_type eq 'k'
25018             && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
25019         {
25020             $op_expected = OPERATOR;
25021         }
25022         else {
25023             $op_expected = TERM;
25024         }
25025     }
25026
25027     # no operator after things like + - **  (i.e., other operators)
25028     elsif ( $expecting_term_types{$last_nonblank_type} ) {
25029         $op_expected = TERM;
25030     }
25031
25032     # a few operators, like "time", have an empty prototype () and so
25033     # take no parameters but produce a value to operate on
25034     elsif ( $expecting_operator_token{$last_nonblank_token} ) {
25035         $op_expected = OPERATOR;
25036     }
25037
25038     # post-increment and decrement produce values to be operated on
25039     elsif ( $expecting_operator_types{$last_nonblank_type} ) {
25040         $op_expected = OPERATOR;
25041     }
25042
25043     # no value to operate on after sub block
25044     elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
25045
25046     # a right brace here indicates the end of a simple block.
25047     # all non-structural right braces have type 'R'
25048     # all braces associated with block operator keywords have been given those
25049     # keywords as "last_nonblank_token" and caught above.
25050     # (This statement is order dependent, and must come after checking
25051     # $last_nonblank_token).
25052     elsif ( $last_nonblank_type eq '}' ) {
25053
25054         # patch for dor.t (defined or).
25055         if (   $tok eq '/'
25056             && $next_type eq '/'
25057             && $last_nonblank_token eq ']' )
25058         {
25059             $op_expected = OPERATOR;
25060         }
25061         else {
25062             $op_expected = TERM;
25063         }
25064     }
25065
25066     # something else..what did I forget?
25067     else {
25068
25069         # collecting diagnostics on unknown operator types..see what was missed
25070         $op_expected = UNKNOWN;
25071         write_diagnostics(
25072 "OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
25073         );
25074     }
25075
25076     TOKENIZER_DEBUG_FLAG_EXPECT && do {
25077         print
25078 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
25079     };
25080     return $op_expected;
25081 }
25082
25083 sub new_statement_ok {
25084
25085     # return true if the current token can start a new statement
25086     # USES GLOBAL VARIABLES: $last_nonblank_type
25087
25088     return label_ok()    # a label would be ok here
25089
25090       || $last_nonblank_type eq 'J';    # or we follow a label
25091
25092 }
25093
25094 sub label_ok {
25095
25096     # Decide if a bare word followed by a colon here is a label
25097     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25098     # $brace_depth, @brace_type
25099
25100     # if it follows an opening or closing code block curly brace..
25101     if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
25102         && $last_nonblank_type eq $last_nonblank_token )
25103     {
25104
25105         # it is a label if and only if the curly encloses a code block
25106         return $brace_type[$brace_depth];
25107     }
25108
25109     # otherwise, it is a label if and only if it follows a ';'
25110     # (real or fake)
25111     else {
25112         return ( $last_nonblank_type eq ';' );
25113     }
25114 }
25115
25116 sub code_block_type {
25117
25118     # Decide if this is a block of code, and its type.
25119     # Must be called only when $type = $token = '{'
25120     # The problem is to distinguish between the start of a block of code
25121     # and the start of an anonymous hash reference
25122     # Returns "" if not code block, otherwise returns 'last_nonblank_token'
25123     # to indicate the type of code block.  (For example, 'last_nonblank_token'
25124     # might be 'if' for an if block, 'else' for an else block, etc).
25125     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25126     # $last_nonblank_block_type, $brace_depth, @brace_type
25127
25128     # handle case of multiple '{'s
25129
25130 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
25131
25132     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25133     if (   $last_nonblank_token eq '{'
25134         && $last_nonblank_type eq $last_nonblank_token )
25135     {
25136
25137         # opening brace where a statement may appear is probably
25138         # a code block but might be and anonymous hash reference
25139         if ( $brace_type[$brace_depth] ) {
25140             return decide_if_code_block( $i, $rtokens, $rtoken_type,
25141                 $max_token_index );
25142         }
25143
25144         # cannot start a code block within an anonymous hash
25145         else {
25146             return "";
25147         }
25148     }
25149
25150     elsif ( $last_nonblank_token eq ';' ) {
25151
25152         # an opening brace where a statement may appear is probably
25153         # a code block but might be and anonymous hash reference
25154         return decide_if_code_block( $i, $rtokens, $rtoken_type,
25155             $max_token_index );
25156     }
25157
25158     # handle case of '}{'
25159     elsif ($last_nonblank_token eq '}'
25160         && $last_nonblank_type eq $last_nonblank_token )
25161     {
25162
25163         # a } { situation ...
25164         # could be hash reference after code block..(blktype1.t)
25165         if ($last_nonblank_block_type) {
25166             return decide_if_code_block( $i, $rtokens, $rtoken_type,
25167                 $max_token_index );
25168         }
25169
25170         # must be a block if it follows a closing hash reference
25171         else {
25172             return $last_nonblank_token;
25173         }
25174     }
25175
25176     # NOTE: braces after type characters start code blocks, but for
25177     # simplicity these are not identified as such.  See also
25178     # sub is_non_structural_brace.
25179     # elsif ( $last_nonblank_type eq 't' ) {
25180     #    return $last_nonblank_token;
25181     # }
25182
25183     # brace after label:
25184     elsif ( $last_nonblank_type eq 'J' ) {
25185         return $last_nonblank_token;
25186     }
25187
25188 # otherwise, look at previous token.  This must be a code block if
25189 # it follows any of these:
25190 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
25191     elsif ( $is_code_block_token{$last_nonblank_token} ) {
25192
25193         # Bug Patch: Note that the opening brace after the 'if' in the following
25194         # snippet is an anonymous hash ref and not a code block!
25195         #   print 'hi' if { x => 1, }->{x};
25196         # We can identify this situation because the last nonblank type
25197         # will be a keyword (instead of a closing peren)
25198         if (   $last_nonblank_token =~ /^(if|unless)$/
25199             && $last_nonblank_type eq 'k' )
25200         {
25201             return "";
25202         }
25203         else {
25204             return $last_nonblank_token;
25205         }
25206     }
25207
25208     # or a sub definition
25209     elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
25210         && $last_nonblank_token =~ /^sub\b/ )
25211     {
25212         return $last_nonblank_token;
25213     }
25214
25215     # user-defined subs with block parameters (like grep/map/eval)
25216     elsif ( $last_nonblank_type eq 'G' ) {
25217         return $last_nonblank_token;
25218     }
25219
25220     # check bareword
25221     elsif ( $last_nonblank_type eq 'w' ) {
25222         return decide_if_code_block( $i, $rtokens, $rtoken_type,
25223             $max_token_index );
25224     }
25225
25226     # anything else must be anonymous hash reference
25227     else {
25228         return "";
25229     }
25230 }
25231
25232 sub decide_if_code_block {
25233
25234     # USES GLOBAL VARIABLES: $last_nonblank_token
25235     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25236     my ( $next_nonblank_token, $i_next ) =
25237       find_next_nonblank_token( $i, $rtokens, $max_token_index );
25238
25239     # we are at a '{' where a statement may appear.
25240     # We must decide if this brace starts an anonymous hash or a code
25241     # block.
25242     # return "" if anonymous hash, and $last_nonblank_token otherwise
25243
25244     # initialize to be code BLOCK
25245     my $code_block_type = $last_nonblank_token;
25246
25247     # Check for the common case of an empty anonymous hash reference:
25248     # Maybe something like sub { { } }
25249     if ( $next_nonblank_token eq '}' ) {
25250         $code_block_type = "";
25251     }
25252
25253     else {
25254
25255         # To guess if this '{' is an anonymous hash reference, look ahead
25256         # and test as follows:
25257         #
25258         # it is a hash reference if next come:
25259         #   - a string or digit followed by a comma or =>
25260         #   - bareword followed by =>
25261         # otherwise it is a code block
25262         #
25263         # Examples of anonymous hash ref:
25264         # {'aa',};
25265         # {1,2}
25266         #
25267         # Examples of code blocks:
25268         # {1; print "hello\n", 1;}
25269         # {$a,1};
25270
25271         # We are only going to look ahead one more (nonblank/comment) line.
25272         # Strange formatting could cause a bad guess, but that's unlikely.
25273         my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
25274         my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
25275         my ( $rpre_tokens, $rpre_types ) =
25276           peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
25277                                                        # generous, and prevents
25278                                                        # wasting lots of
25279                                                        # time in mangled files
25280         if ( defined($rpre_types) && @$rpre_types ) {
25281             push @pre_types,  @$rpre_types;
25282             push @pre_tokens, @$rpre_tokens;
25283         }
25284
25285         # put a sentinal token to simplify stopping the search
25286         push @pre_types, '}';
25287
25288         my $jbeg = 0;
25289         $jbeg = 1 if $pre_types[0] eq 'b';
25290
25291         # first look for one of these
25292         #  - bareword
25293         #  - bareword with leading -
25294         #  - digit
25295         #  - quoted string
25296         my $j = $jbeg;
25297         if ( $pre_types[$j] =~ /^[\'\"]/ ) {
25298
25299             # find the closing quote; don't worry about escapes
25300             my $quote_mark = $pre_types[$j];
25301             for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
25302                 if ( $pre_types[$k] eq $quote_mark ) {
25303                     $j = $k + 1;
25304                     my $next = $pre_types[$j];
25305                     last;
25306                 }
25307             }
25308         }
25309         elsif ( $pre_types[$j] eq 'd' ) {
25310             $j++;
25311         }
25312         elsif ( $pre_types[$j] eq 'w' ) {
25313             unless ( $is_keyword{ $pre_tokens[$j] } ) {
25314                 $j++;
25315             }
25316         }
25317         elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
25318             $j++;
25319         }
25320         if ( $j > $jbeg ) {
25321
25322             $j++ if $pre_types[$j] eq 'b';
25323
25324             # it's a hash ref if a comma or => follow next
25325             if ( $pre_types[$j] eq ','
25326                 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
25327             {
25328                 $code_block_type = "";
25329             }
25330         }
25331     }
25332
25333     return $code_block_type;
25334 }
25335
25336 sub unexpected {
25337
25338     # report unexpected token type and show where it is
25339     # USES GLOBAL VARIABLES: $tokenizer_self
25340     my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
25341         $rpretoken_type, $input_line )
25342       = @_;
25343
25344     if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
25345         my $msg = "found $found where $expecting expected";
25346         my $pos = $$rpretoken_map[$i_tok];
25347         interrupt_logfile();
25348         my $input_line_number = $tokenizer_self->{_last_line_number};
25349         my ( $offset, $numbered_line, $underline ) =
25350           make_numbered_line( $input_line_number, $input_line, $pos );
25351         $underline = write_on_underline( $underline, $pos - $offset, '^' );
25352
25353         my $trailer = "";
25354         if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
25355             my $pos_prev = $$rpretoken_map[$last_nonblank_i];
25356             my $num;
25357             if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
25358                 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
25359             }
25360             else {
25361                 $num = $pos - $pos_prev;
25362             }
25363             if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
25364
25365             $underline =
25366               write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
25367             $trailer = " (previous token underlined)";
25368         }
25369         warning( $numbered_line . "\n" );
25370         warning( $underline . "\n" );
25371         warning( $msg . $trailer . "\n" );
25372         resume_logfile();
25373     }
25374 }
25375
25376 sub is_non_structural_brace {
25377
25378     # Decide if a brace or bracket is structural or non-structural
25379     # by looking at the previous token and type
25380     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
25381
25382     # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
25383     # Tentatively deactivated because it caused the wrong operator expectation
25384     # for this code:
25385     #      $user = @vars[1] / 100;
25386     # Must update sub operator_expected before re-implementing.
25387     # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
25388     #    return 0;
25389     # }
25390
25391     # NOTE: braces after type characters start code blocks, but for
25392     # simplicity these are not identified as such.  See also
25393     # sub code_block_type
25394     # if ($last_nonblank_type eq 't') {return 0}
25395
25396     # otherwise, it is non-structural if it is decorated
25397     # by type information.
25398     # For example, the '{' here is non-structural:   ${xxx}
25399     (
25400         $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
25401
25402           # or if we follow a hash or array closing curly brace or bracket
25403           # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
25404           # because the first '}' would have been given type 'R'
25405           || $last_nonblank_type =~ /^([R\]])$/
25406     );
25407 }
25408
25409 #########i#############################################################
25410 # Tokenizer routines for tracking container nesting depths
25411 #######################################################################
25412
25413 # The following routines keep track of nesting depths of the nesting
25414 # types, ( [ { and ?.  This is necessary for determining the indentation
25415 # level, and also for debugging programs.  Not only do they keep track of
25416 # nesting depths of the individual brace types, but they check that each
25417 # of the other brace types is balanced within matching pairs.  For
25418 # example, if the program sees this sequence:
25419 #
25420 #         {  ( ( ) }
25421 #
25422 # then it can determine that there is an extra left paren somewhere
25423 # between the { and the }.  And so on with every other possible
25424 # combination of outer and inner brace types.  For another
25425 # example:
25426 #
25427 #         ( [ ..... ]  ] )
25428 #
25429 # which has an extra ] within the parens.
25430 #
25431 # The brace types have indexes 0 .. 3 which are indexes into
25432 # the matrices.
25433 #
25434 # The pair ? : are treated as just another nesting type, with ? acting
25435 # as the opening brace and : acting as the closing brace.
25436 #
25437 # The matrix
25438 #
25439 #         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
25440 #
25441 # saves the nesting depth of brace type $b (where $b is either of the other
25442 # nesting types) when brace type $a enters a new depth.  When this depth
25443 # decreases, a check is made that the current depth of brace types $b is
25444 # unchanged, or otherwise there must have been an error.  This can
25445 # be very useful for localizing errors, particularly when perl runs to
25446 # the end of a large file (such as this one) and announces that there
25447 # is a problem somewhere.
25448 #
25449 # A numerical sequence number is maintained for every nesting type,
25450 # so that each matching pair can be uniquely identified in a simple
25451 # way.
25452
25453 sub increase_nesting_depth {
25454     my ( $aa, $pos ) = @_;
25455
25456     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25457     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
25458     my $bb;
25459     $current_depth[$aa]++;
25460     $total_depth++;
25461     $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
25462     my $input_line_number = $tokenizer_self->{_last_line_number};
25463     my $input_line        = $tokenizer_self->{_line_text};
25464
25465     # Sequence numbers increment by number of items.  This keeps
25466     # a unique set of numbers but still allows the relative location
25467     # of any type to be determined.
25468     $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
25469     my $seqno = $nesting_sequence_number[$aa];
25470     $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
25471
25472     $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
25473       [ $input_line_number, $input_line, $pos ];
25474
25475     for $bb ( 0 .. $#closing_brace_names ) {
25476         next if ( $bb == $aa );
25477         $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
25478     }
25479
25480     # set a flag for indenting a nested ternary statement
25481     my $indent = 0;
25482     if ( $aa == QUESTION_COLON ) {
25483         $nested_ternary_flag[ $current_depth[$aa] ] = 0;
25484         if ( $current_depth[$aa] > 1 ) {
25485             if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
25486                 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
25487                 if ( $pdepth == $total_depth - 1 ) {
25488                     $indent = 1;
25489                     $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
25490                 }
25491             }
25492         }
25493     }
25494     return ( $seqno, $indent );
25495 }
25496
25497 sub decrease_nesting_depth {
25498
25499     my ( $aa, $pos ) = @_;
25500
25501     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25502     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
25503     my $bb;
25504     my $seqno             = 0;
25505     my $input_line_number = $tokenizer_self->{_last_line_number};
25506     my $input_line        = $tokenizer_self->{_line_text};
25507
25508     my $outdent = 0;
25509     $total_depth--;
25510     if ( $current_depth[$aa] > 0 ) {
25511
25512         # set a flag for un-indenting after seeing a nested ternary statement
25513         $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
25514         if ( $aa == QUESTION_COLON ) {
25515             $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
25516         }
25517
25518         # check that any brace types $bb contained within are balanced
25519         for $bb ( 0 .. $#closing_brace_names ) {
25520             next if ( $bb == $aa );
25521
25522             unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
25523                 $current_depth[$bb] )
25524             {
25525                 my $diff =
25526                   $current_depth[$bb] -
25527                   $depth_array[$aa][$bb][ $current_depth[$aa] ];
25528
25529                 # don't whine too many times
25530                 my $saw_brace_error = get_saw_brace_error();
25531                 if (
25532                     $saw_brace_error <= MAX_NAG_MESSAGES
25533
25534                     # if too many closing types have occured, we probably
25535                     # already caught this error
25536                     && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
25537                   )
25538                 {
25539                     interrupt_logfile();
25540                     my $rsl =
25541                       $starting_line_of_current_depth[$aa]
25542                       [ $current_depth[$aa] ];
25543                     my $sl  = $$rsl[0];
25544                     my $rel = [ $input_line_number, $input_line, $pos ];
25545                     my $el  = $$rel[0];
25546                     my ($ess);
25547
25548                     if ( $diff == 1 || $diff == -1 ) {
25549                         $ess = '';
25550                     }
25551                     else {
25552                         $ess = 's';
25553                     }
25554                     my $bname =
25555                       ( $diff > 0 )
25556                       ? $opening_brace_names[$bb]
25557                       : $closing_brace_names[$bb];
25558                     write_error_indicator_pair( @$rsl, '^' );
25559                     my $msg = <<"EOM";
25560 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
25561 EOM
25562
25563                     if ( $diff > 0 ) {
25564                         my $rml =
25565                           $starting_line_of_current_depth[$bb]
25566                           [ $current_depth[$bb] ];
25567                         my $ml = $$rml[0];
25568                         $msg .=
25569 "    The most recent un-matched $bname is on line $ml\n";
25570                         write_error_indicator_pair( @$rml, '^' );
25571                     }
25572                     write_error_indicator_pair( @$rel, '^' );
25573                     warning($msg);
25574                     resume_logfile();
25575                 }
25576                 increment_brace_error();
25577             }
25578         }
25579         $current_depth[$aa]--;
25580     }
25581     else {
25582
25583         my $saw_brace_error = get_saw_brace_error();
25584         if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
25585             my $msg = <<"EOM";
25586 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
25587 EOM
25588             indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
25589         }
25590         increment_brace_error();
25591     }
25592     return ( $seqno, $outdent );
25593 }
25594
25595 sub check_final_nesting_depths {
25596     my ($aa);
25597
25598     # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
25599
25600     for $aa ( 0 .. $#closing_brace_names ) {
25601
25602         if ( $current_depth[$aa] ) {
25603             my $rsl =
25604               $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
25605             my $sl  = $$rsl[0];
25606             my $msg = <<"EOM";
25607 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
25608 The most recent un-matched $opening_brace_names[$aa] is on line $sl
25609 EOM
25610             indicate_error( $msg, @$rsl, '^' );
25611             increment_brace_error();
25612         }
25613     }
25614 }
25615
25616 #########i#############################################################
25617 # Tokenizer routines for looking ahead in input stream
25618 #######################################################################
25619
25620 sub peek_ahead_for_n_nonblank_pre_tokens {
25621
25622     # returns next n pretokens if they exist
25623     # returns undef's if hits eof without seeing any pretokens
25624     # USES GLOBAL VARIABLES: $tokenizer_self
25625     my $max_pretokens = shift;
25626     my $line;
25627     my $i = 0;
25628     my ( $rpre_tokens, $rmap, $rpre_types );
25629
25630     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25631     {
25632         $line =~ s/^\s*//;    # trim leading blanks
25633         next if ( length($line) <= 0 );    # skip blank
25634         next if ( $line =~ /^#/ );         # skip comment
25635         ( $rpre_tokens, $rmap, $rpre_types ) =
25636           pre_tokenize( $line, $max_pretokens );
25637         last;
25638     }
25639     return ( $rpre_tokens, $rpre_types );
25640 }
25641
25642 # look ahead for next non-blank, non-comment line of code
25643 sub peek_ahead_for_nonblank_token {
25644
25645     # USES GLOBAL VARIABLES: $tokenizer_self
25646     my ( $rtokens, $max_token_index ) = @_;
25647     my $line;
25648     my $i = 0;
25649
25650     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25651     {
25652         $line =~ s/^\s*//;    # trim leading blanks
25653         next if ( length($line) <= 0 );    # skip blank
25654         next if ( $line =~ /^#/ );         # skip comment
25655         my ( $rtok, $rmap, $rtype ) =
25656           pre_tokenize( $line, 2 );        # only need 2 pre-tokens
25657         my $j = $max_token_index + 1;
25658         my $tok;
25659
25660         foreach $tok (@$rtok) {
25661             last if ( $tok =~ "\n" );
25662             $$rtokens[ ++$j ] = $tok;
25663         }
25664         last;
25665     }
25666     return $rtokens;
25667 }
25668
25669 #########i#############################################################
25670 # Tokenizer guessing routines for ambiguous situations
25671 #######################################################################
25672
25673 sub guess_if_pattern_or_conditional {
25674
25675     # this routine is called when we have encountered a ? following an
25676     # unknown bareword, and we must decide if it starts a pattern or not
25677     # input parameters:
25678     #   $i - token index of the ? starting possible pattern
25679     # output parameters:
25680     #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
25681     #   msg = a warning or diagnostic message
25682     # USES GLOBAL VARIABLES: $last_nonblank_token
25683     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25684     my $is_pattern = 0;
25685     my $msg        = "guessing that ? after $last_nonblank_token starts a ";
25686
25687     if ( $i >= $max_token_index ) {
25688         $msg .= "conditional (no end to pattern found on the line)\n";
25689     }
25690     else {
25691         my $ibeg = $i;
25692         $i = $ibeg + 1;
25693         my $next_token = $$rtokens[$i];    # first token after ?
25694
25695         # look for a possible ending ? on this line..
25696         my $in_quote        = 1;
25697         my $quote_depth     = 0;
25698         my $quote_character = '';
25699         my $quote_pos       = 0;
25700         my $quoted_string;
25701         (
25702             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25703             $quoted_string
25704           )
25705           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25706             $quote_pos, $quote_depth, $max_token_index );
25707
25708         if ($in_quote) {
25709
25710             # we didn't find an ending ? on this line,
25711             # so we bias towards conditional
25712             $is_pattern = 0;
25713             $msg .= "conditional (no ending ? on this line)\n";
25714
25715             # we found an ending ?, so we bias towards a pattern
25716         }
25717         else {
25718
25719             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25720                 $is_pattern = 1;
25721                 $msg .= "pattern (found ending ? and pattern expected)\n";
25722             }
25723             else {
25724                 $msg .= "pattern (uncertain, but found ending ?)\n";
25725             }
25726         }
25727     }
25728     return ( $is_pattern, $msg );
25729 }
25730
25731 sub guess_if_pattern_or_division {
25732
25733     # this routine is called when we have encountered a / following an
25734     # unknown bareword, and we must decide if it starts a pattern or is a
25735     # division
25736     # input parameters:
25737     #   $i - token index of the / starting possible pattern
25738     # output parameters:
25739     #   $is_pattern = 0 if probably division,  =1 if probably a pattern
25740     #   msg = a warning or diagnostic message
25741     # USES GLOBAL VARIABLES: $last_nonblank_token
25742     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25743     my $is_pattern = 0;
25744     my $msg        = "guessing that / after $last_nonblank_token starts a ";
25745
25746     if ( $i >= $max_token_index ) {
25747         "division (no end to pattern found on the line)\n";
25748     }
25749     else {
25750         my $ibeg = $i;
25751         my $divide_expected =
25752           numerator_expected( $i, $rtokens, $max_token_index );
25753         $i = $ibeg + 1;
25754         my $next_token = $$rtokens[$i];    # first token after slash
25755
25756         # look for a possible ending / on this line..
25757         my $in_quote        = 1;
25758         my $quote_depth     = 0;
25759         my $quote_character = '';
25760         my $quote_pos       = 0;
25761         my $quoted_string;
25762         (
25763             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25764             $quoted_string
25765           )
25766           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25767             $quote_pos, $quote_depth, $max_token_index );
25768
25769         if ($in_quote) {
25770
25771             # we didn't find an ending / on this line,
25772             # so we bias towards division
25773             if ( $divide_expected >= 0 ) {
25774                 $is_pattern = 0;
25775                 $msg .= "division (no ending / on this line)\n";
25776             }
25777             else {
25778                 $msg        = "multi-line pattern (division not possible)\n";
25779                 $is_pattern = 1;
25780             }
25781
25782         }
25783
25784         # we found an ending /, so we bias towards a pattern
25785         else {
25786
25787             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25788
25789                 if ( $divide_expected >= 0 ) {
25790
25791                     if ( $i - $ibeg > 60 ) {
25792                         $msg .= "division (matching / too distant)\n";
25793                         $is_pattern = 0;
25794                     }
25795                     else {
25796                         $msg .= "pattern (but division possible too)\n";
25797                         $is_pattern = 1;
25798                     }
25799                 }
25800                 else {
25801                     $is_pattern = 1;
25802                     $msg .= "pattern (division not possible)\n";
25803                 }
25804             }
25805             else {
25806
25807                 if ( $divide_expected >= 0 ) {
25808                     $is_pattern = 0;
25809                     $msg .= "division (pattern not possible)\n";
25810                 }
25811                 else {
25812                     $is_pattern = 1;
25813                     $msg .=
25814                       "pattern (uncertain, but division would not work here)\n";
25815                 }
25816             }
25817         }
25818     }
25819     return ( $is_pattern, $msg );
25820 }
25821
25822 # try to resolve here-doc vs. shift by looking ahead for
25823 # non-code or the end token (currently only looks for end token)
25824 # returns 1 if it is probably a here doc, 0 if not
25825 sub guess_if_here_doc {
25826
25827     # This is how many lines we will search for a target as part of the
25828     # guessing strategy.  It is a constant because there is probably
25829     # little reason to change it.
25830     # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
25831     # %is_constant,
25832     use constant HERE_DOC_WINDOW => 40;
25833
25834     my $next_token        = shift;
25835     my $here_doc_expected = 0;
25836     my $line;
25837     my $k   = 0;
25838     my $msg = "checking <<";
25839
25840     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
25841     {
25842         chomp $line;
25843
25844         if ( $line =~ /^$next_token$/ ) {
25845             $msg .= " -- found target $next_token ahead $k lines\n";
25846             $here_doc_expected = 1;    # got it
25847             last;
25848         }
25849         last if ( $k >= HERE_DOC_WINDOW );
25850     }
25851
25852     unless ($here_doc_expected) {
25853
25854         if ( !defined($line) ) {
25855             $here_doc_expected = -1;    # hit eof without seeing target
25856             $msg .= " -- must be shift; target $next_token not in file\n";
25857
25858         }
25859         else {                          # still unsure..taking a wild guess
25860
25861             if ( !$is_constant{$current_package}{$next_token} ) {
25862                 $here_doc_expected = 1;
25863                 $msg .=
25864                   " -- guessing it's a here-doc ($next_token not a constant)\n";
25865             }
25866             else {
25867                 $msg .=
25868                   " -- guessing it's a shift ($next_token is a constant)\n";
25869             }
25870         }
25871     }
25872     write_logfile_entry($msg);
25873     return $here_doc_expected;
25874 }
25875
25876 #########i#############################################################
25877 # Tokenizer Routines for scanning identifiers and related items
25878 #######################################################################
25879
25880 sub scan_bare_identifier_do {
25881
25882     # this routine is called to scan a token starting with an alphanumeric
25883     # variable or package separator, :: or '.
25884     # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
25885     # $last_nonblank_type,@paren_type, $paren_depth
25886
25887     my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
25888         $max_token_index )
25889       = @_;
25890     my $i_begin = $i;
25891     my $package = undef;
25892
25893     my $i_beg = $i;
25894
25895     # we have to back up one pretoken at a :: since each : is one pretoken
25896     if ( $tok eq '::' ) { $i_beg-- }
25897     if ( $tok eq '->' ) { $i_beg-- }
25898     my $pos_beg = $$rtoken_map[$i_beg];
25899     pos($input_line) = $pos_beg;
25900
25901     #  Examples:
25902     #   A::B::C
25903     #   A::
25904     #   ::A
25905     #   A'B
25906     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
25907
25908         my $pos  = pos($input_line);
25909         my $numc = $pos - $pos_beg;
25910         $tok = substr( $input_line, $pos_beg, $numc );
25911
25912         # type 'w' includes anything without leading type info
25913         # ($,%,@,*) including something like abc::def::ghi
25914         $type = 'w';
25915
25916         my $sub_name = "";
25917         if ( defined($2) ) { $sub_name = $2; }
25918         if ( defined($1) ) {
25919             $package = $1;
25920
25921             # patch: don't allow isolated package name which just ends
25922             # in the old style package separator (single quote).  Example:
25923             #   use CGI':all';
25924             if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
25925                 $pos--;
25926             }
25927
25928             $package =~ s/\'/::/g;
25929             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
25930             $package =~ s/::$//;
25931         }
25932         else {
25933             $package = $current_package;
25934
25935             if ( $is_keyword{$tok} ) {
25936                 $type = 'k';
25937             }
25938         }
25939
25940         # if it is a bareword..
25941         if ( $type eq 'w' ) {
25942
25943             # check for v-string with leading 'v' type character
25944             # (This seems to have presidence over filehandle, type 'Y')
25945             if ( $tok =~ /^v\d[_\d]*$/ ) {
25946
25947                 # we only have the first part - something like 'v101' -
25948                 # look for more
25949                 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
25950                     $pos  = pos($input_line);
25951                     $numc = $pos - $pos_beg;
25952                     $tok  = substr( $input_line, $pos_beg, $numc );
25953                 }
25954                 $type = 'v';
25955
25956                 # warn if this version can't handle v-strings
25957                 report_v_string($tok);
25958             }
25959
25960             elsif ( $is_constant{$package}{$sub_name} ) {
25961                 $type = 'C';
25962             }
25963
25964             # bareword after sort has implied empty prototype; for example:
25965             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
25966             # This has priority over whatever the user has specified.
25967             elsif ($last_nonblank_token eq 'sort'
25968                 && $last_nonblank_type eq 'k' )
25969             {
25970                 $type = 'Z';
25971             }
25972
25973             # Note: strangely, perl does not seem to really let you create
25974             # functions which act like eval and do, in the sense that eval
25975             # and do may have operators following the final }, but any operators
25976             # that you create with prototype (&) apparently do not allow
25977             # trailing operators, only terms.  This seems strange.
25978             # If this ever changes, here is the update
25979             # to make perltidy behave accordingly:
25980
25981             # elsif ( $is_block_function{$package}{$tok} ) {
25982             #    $tok='eval'; # patch to do braces like eval  - doesn't work
25983             #    $type = 'k';
25984             #}
25985             # FIXME: This could become a separate type to allow for different
25986             # future behavior:
25987             elsif ( $is_block_function{$package}{$sub_name} ) {
25988                 $type = 'G';
25989             }
25990
25991             elsif ( $is_block_list_function{$package}{$sub_name} ) {
25992                 $type = 'G';
25993             }
25994             elsif ( $is_user_function{$package}{$sub_name} ) {
25995                 $type      = 'U';
25996                 $prototype = $user_function_prototype{$package}{$sub_name};
25997             }
25998
25999             # check for indirect object
26000             elsif (
26001
26002                 # added 2001-03-27: must not be followed immediately by '('
26003                 # see fhandle.t
26004                 ( $input_line !~ m/\G\(/gc )
26005
26006                 # and
26007                 && (
26008
26009                     # preceded by keyword like 'print', 'printf' and friends
26010                     $is_indirect_object_taker{$last_nonblank_token}
26011
26012                     # or preceded by something like 'print(' or 'printf('
26013                     || (
26014                         ( $last_nonblank_token eq '(' )
26015                         && $is_indirect_object_taker{ $paren_type[$paren_depth]
26016                         }
26017
26018                     )
26019                 )
26020               )
26021             {
26022
26023                 # may not be indirect object unless followed by a space
26024                 if ( $input_line =~ m/\G\s+/gc ) {
26025                     $type = 'Y';
26026
26027                     # Abandon Hope ...
26028                     # Perl's indirect object notation is a very bad
26029                     # thing and can cause subtle bugs, especially for
26030                     # beginning programmers.  And I haven't even been
26031                     # able to figure out a sane warning scheme which
26032                     # doesn't get in the way of good scripts.
26033
26034                     # Complain if a filehandle has any lower case
26035                     # letters.  This is suggested good practice.
26036                     # Use 'sub_name' because something like
26037                     # main::MYHANDLE is ok for filehandle
26038                     if ( $sub_name =~ /[a-z]/ ) {
26039
26040                         # could be bug caused by older perltidy if
26041                         # followed by '('
26042                         if ( $input_line =~ m/\G\s*\(/gc ) {
26043                             complain(
26044 "Caution: unknown word '$tok' in indirect object slot\n"
26045                             );
26046                         }
26047                     }
26048                 }
26049
26050                 # bareword not followed by a space -- may not be filehandle
26051                 # (may be function call defined in a 'use' statement)
26052                 else {
26053                     $type = 'Z';
26054                 }
26055             }
26056         }
26057
26058         # Now we must convert back from character position
26059         # to pre_token index.
26060         # I don't think an error flag can occur here ..but who knows
26061         my $error;
26062         ( $i, $error ) =
26063           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26064         if ($error) {
26065             warning("scan_bare_identifier: Possibly invalid tokenization\n");
26066         }
26067     }
26068
26069     # no match but line not blank - could be syntax error
26070     # perl will take '::' alone without complaint
26071     else {
26072         $type = 'w';
26073
26074         # change this warning to log message if it becomes annoying
26075         warning("didn't find identifier after leading ::\n");
26076     }
26077     return ( $i, $tok, $type, $prototype );
26078 }
26079
26080 sub scan_id_do {
26081
26082 # This is the new scanner and will eventually replace scan_identifier.
26083 # Only type 'sub' and 'package' are implemented.
26084 # Token types $ * % @ & -> are not yet implemented.
26085 #
26086 # Scan identifier following a type token.
26087 # The type of call depends on $id_scan_state: $id_scan_state = ''
26088 # for starting call, in which case $tok must be the token defining
26089 # the type.
26090 #
26091 # If the type token is the last nonblank token on the line, a value
26092 # of $id_scan_state = $tok is returned, indicating that further
26093 # calls must be made to get the identifier.  If the type token is
26094 # not the last nonblank token on the line, the identifier is
26095 # scanned and handled and a value of '' is returned.
26096 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
26097 # $statement_type, $tokenizer_self
26098
26099     my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
26100         $max_token_index )
26101       = @_;
26102     my $type = '';
26103     my ( $i_beg, $pos_beg );
26104
26105     #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26106     #my ($a,$b,$c) = caller;
26107     #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
26108
26109     # on re-entry, start scanning at first token on the line
26110     if ($id_scan_state) {
26111         $i_beg = $i;
26112         $type  = '';
26113     }
26114
26115     # on initial entry, start scanning just after type token
26116     else {
26117         $i_beg         = $i + 1;
26118         $id_scan_state = $tok;
26119         $type          = 't';
26120     }
26121
26122     # find $i_beg = index of next nonblank token,
26123     # and handle empty lines
26124     my $blank_line          = 0;
26125     my $next_nonblank_token = $$rtokens[$i_beg];
26126     if ( $i_beg > $max_token_index ) {
26127         $blank_line = 1;
26128     }
26129     else {
26130
26131         # only a '#' immediately after a '$' is not a comment
26132         if ( $next_nonblank_token eq '#' ) {
26133             unless ( $tok eq '$' ) {
26134                 $blank_line = 1;
26135             }
26136         }
26137
26138         if ( $next_nonblank_token =~ /^\s/ ) {
26139             ( $next_nonblank_token, $i_beg ) =
26140               find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
26141                 $max_token_index );
26142             if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
26143                 $blank_line = 1;
26144             }
26145         }
26146     }
26147
26148     # handle non-blank line; identifier, if any, must follow
26149     unless ($blank_line) {
26150
26151         if ( $id_scan_state eq 'sub' ) {
26152             ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
26153                 $input_line, $i,             $i_beg,
26154                 $tok,        $type,          $rtokens,
26155                 $rtoken_map, $id_scan_state, $max_token_index
26156             );
26157         }
26158
26159         elsif ( $id_scan_state eq 'package' ) {
26160             ( $i, $tok, $type ) =
26161               do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
26162                 $rtoken_map, $max_token_index );
26163             $id_scan_state = '';
26164         }
26165
26166         else {
26167             warning("invalid token in scan_id: $tok\n");
26168             $id_scan_state = '';
26169         }
26170     }
26171
26172     if ( $id_scan_state && ( !defined($type) || !$type ) ) {
26173
26174         # shouldn't happen:
26175         warning(
26176 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
26177         );
26178         report_definite_bug();
26179     }
26180
26181     TOKENIZER_DEBUG_FLAG_NSCAN && do {
26182         print
26183           "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26184     };
26185     return ( $i, $tok, $type, $id_scan_state );
26186 }
26187
26188 sub check_prototype {
26189     my ( $proto, $package, $subname ) = @_;
26190     return unless ( defined($package) && defined($subname) );
26191     if ( defined($proto) ) {
26192         $proto =~ s/^\s*\(\s*//;
26193         $proto =~ s/\s*\)$//;
26194         if ($proto) {
26195             $is_user_function{$package}{$subname}        = 1;
26196             $user_function_prototype{$package}{$subname} = "($proto)";
26197
26198             # prototypes containing '&' must be treated specially..
26199             if ( $proto =~ /\&/ ) {
26200
26201                 # right curly braces of prototypes ending in
26202                 # '&' may be followed by an operator
26203                 if ( $proto =~ /\&$/ ) {
26204                     $is_block_function{$package}{$subname} = 1;
26205                 }
26206
26207                 # right curly braces of prototypes NOT ending in
26208                 # '&' may NOT be followed by an operator
26209                 elsif ( $proto !~ /\&$/ ) {
26210                     $is_block_list_function{$package}{$subname} = 1;
26211                 }
26212             }
26213         }
26214         else {
26215             $is_constant{$package}{$subname} = 1;
26216         }
26217     }
26218     else {
26219         $is_user_function{$package}{$subname} = 1;
26220     }
26221 }
26222
26223 sub do_scan_package {
26224
26225     # do_scan_package parses a package name
26226     # it is called with $i_beg equal to the index of the first nonblank
26227     # token following a 'package' token.
26228     # USES GLOBAL VARIABLES: $current_package,
26229
26230     my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
26231         $max_token_index )
26232       = @_;
26233     my $package = undef;
26234     my $pos_beg = $$rtoken_map[$i_beg];
26235     pos($input_line) = $pos_beg;
26236
26237     # handle non-blank line; package name, if any, must follow
26238     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
26239         $package = $1;
26240         $package = ( defined($1) && $1 ) ? $1 : 'main';
26241         $package =~ s/\'/::/g;
26242         if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26243         $package =~ s/::$//;
26244         my $pos  = pos($input_line);
26245         my $numc = $pos - $pos_beg;
26246         $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
26247         $type = 'i';
26248
26249         # Now we must convert back from character position
26250         # to pre_token index.
26251         # I don't think an error flag can occur here ..but ?
26252         my $error;
26253         ( $i, $error ) =
26254           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26255         if ($error) { warning("Possibly invalid package\n") }
26256         $current_package = $package;
26257
26258         # check for error
26259         my ( $next_nonblank_token, $i_next ) =
26260           find_next_nonblank_token( $i, $rtokens, $max_token_index );
26261         if ( $next_nonblank_token !~ /^[;\}]$/ ) {
26262             warning(
26263                 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
26264             );
26265         }
26266     }
26267
26268     # no match but line not blank --
26269     # could be a label with name package, like package:  , for example.
26270     else {
26271         $type = 'k';
26272     }
26273
26274     return ( $i, $tok, $type );
26275 }
26276
26277 sub scan_identifier_do {
26278
26279     # This routine assembles tokens into identifiers.  It maintains a
26280     # scan state, id_scan_state.  It updates id_scan_state based upon
26281     # current id_scan_state and token, and returns an updated
26282     # id_scan_state and the next index after the identifier.
26283     # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
26284     # $last_nonblank_type
26285
26286     my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
26287         $expecting )
26288       = @_;
26289     my $i_begin   = $i;
26290     my $type      = '';
26291     my $tok_begin = $$rtokens[$i_begin];
26292     if ( $tok_begin eq ':' ) { $tok_begin = '::' }
26293     my $id_scan_state_begin = $id_scan_state;
26294     my $identifier_begin    = $identifier;
26295     my $tok                 = $tok_begin;
26296     my $message             = "";
26297
26298     # these flags will be used to help figure out the type:
26299     my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
26300     my $saw_type;
26301
26302     # allow old package separator (') except in 'use' statement
26303     my $allow_tick = ( $last_nonblank_token ne 'use' );
26304
26305     # get started by defining a type and a state if necessary
26306     unless ($id_scan_state) {
26307         $context = UNKNOWN_CONTEXT;
26308
26309         # fixup for digraph
26310         if ( $tok eq '>' ) {
26311             $tok       = '->';
26312             $tok_begin = $tok;
26313         }
26314         $identifier = $tok;
26315
26316         if ( $tok eq '$' || $tok eq '*' ) {
26317             $id_scan_state = '$';
26318             $context       = SCALAR_CONTEXT;
26319         }
26320         elsif ( $tok eq '%' || $tok eq '@' ) {
26321             $id_scan_state = '$';
26322             $context       = LIST_CONTEXT;
26323         }
26324         elsif ( $tok eq '&' ) {
26325             $id_scan_state = '&';
26326         }
26327         elsif ( $tok eq 'sub' or $tok eq 'package' ) {
26328             $saw_alpha     = 0;     # 'sub' is considered type info here
26329             $id_scan_state = '$';
26330             $identifier .= ' ';     # need a space to separate sub from sub name
26331         }
26332         elsif ( $tok eq '::' ) {
26333             $id_scan_state = 'A';
26334         }
26335         elsif ( $tok =~ /^[A-Za-z_]/ ) {
26336             $id_scan_state = ':';
26337         }
26338         elsif ( $tok eq '->' ) {
26339             $id_scan_state = '$';
26340         }
26341         else {
26342
26343             # shouldn't happen
26344             my ( $a, $b, $c ) = caller;
26345             warning("Program Bug: scan_identifier given bad token = $tok \n");
26346             warning("   called from sub $a  line: $c\n");
26347             report_definite_bug();
26348         }
26349         $saw_type = !$saw_alpha;
26350     }
26351     else {
26352         $i--;
26353         $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
26354     }
26355
26356     # now loop to gather the identifier
26357     my $i_save = $i;
26358
26359     while ( $i < $max_token_index ) {
26360         $i_save = $i unless ( $tok =~ /^\s*$/ );
26361         $tok = $$rtokens[ ++$i ];
26362
26363         if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
26364             $tok = '::';
26365             $i++;
26366         }
26367
26368         if ( $id_scan_state eq '$' ) {    # starting variable name
26369
26370             if ( $tok eq '$' ) {
26371
26372                 $identifier .= $tok;
26373
26374                 # we've got a punctuation variable if end of line (punct.t)
26375                 if ( $i == $max_token_index ) {
26376                     $type          = 'i';
26377                     $id_scan_state = '';
26378                     last;
26379                 }
26380             }
26381             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
26382                 $saw_alpha     = 1;
26383                 $id_scan_state = ':';           # now need ::
26384                 $identifier .= $tok;
26385             }
26386             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26387                 $saw_alpha     = 1;
26388                 $id_scan_state = ':';                 # now need ::
26389                 $identifier .= $tok;
26390
26391                 # Perl will accept leading digits in identifiers,
26392                 # although they may not always produce useful results.
26393                 # Something like $main::0 is ok.  But this also works:
26394                 #
26395                 #  sub howdy::123::bubba{ print "bubba $54321!\n" }
26396                 #  howdy::123::bubba();
26397                 #
26398             }
26399             elsif ( $tok =~ /^[0-9]/ ) {              # numeric
26400                 $saw_alpha     = 1;
26401                 $id_scan_state = ':';                 # now need ::
26402                 $identifier .= $tok;
26403             }
26404             elsif ( $tok eq '::' ) {
26405                 $id_scan_state = 'A';
26406                 $identifier .= $tok;
26407             }
26408             elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
26409                 $identifier .= $tok;    # keep same state, a $ could follow
26410             }
26411             elsif ( $tok eq '{' ) {
26412
26413                 # check for something like ${#} or ${©}
26414                 if (   $identifier eq '$'
26415                     && $i + 2 <= $max_token_index
26416                     && $$rtokens[ $i + 2 ] eq '}'
26417                     && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
26418                 {
26419                     my $next2 = $$rtokens[ $i + 2 ];
26420                     my $next1 = $$rtokens[ $i + 1 ];
26421                     $identifier .= $tok . $next1 . $next2;
26422                     $i += 2;
26423                     $id_scan_state = '';
26424                     last;
26425                 }
26426
26427                 # skip something like ${xxx} or ->{
26428                 $id_scan_state = '';
26429
26430                 # if this is the first token of a line, any tokens for this
26431                 # identifier have already been accumulated
26432                 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
26433                 $i = $i_save;
26434                 last;
26435             }
26436
26437             # space ok after leading $ % * & @
26438             elsif ( $tok =~ /^\s*$/ ) {
26439
26440                 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
26441
26442                     if ( length($identifier) > 1 ) {
26443                         $id_scan_state = '';
26444                         $i             = $i_save;
26445                         $type          = 'i';    # probably punctuation variable
26446                         last;
26447                     }
26448                     else {
26449
26450                         # spaces after $'s are common, and space after @
26451                         # is harmless, so only complain about space
26452                         # after other type characters. Space after $ and
26453                         # @ will be removed in formatting.  Report space
26454                         # after % and * because they might indicate a
26455                         # parsing error.  In other words '% ' might be a
26456                         # modulo operator.  Delete this warning if it
26457                         # gets annoying.
26458                         if ( $identifier !~ /^[\@\$]$/ ) {
26459                             $message =
26460                               "Space in identifier, following $identifier\n";
26461                         }
26462                     }
26463                 }
26464
26465                 # else:
26466                 # space after '->' is ok
26467             }
26468             elsif ( $tok eq '^' ) {
26469
26470                 # check for some special variables like $^W
26471                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26472                     $identifier .= $tok;
26473                     $id_scan_state = 'A';
26474
26475                     # Perl accepts '$^]' or '@^]', but
26476                     # there must not be a space before the ']'.
26477                     my $next1 = $$rtokens[ $i + 1 ];
26478                     if ( $next1 eq ']' ) {
26479                         $i++;
26480                         $identifier .= $next1;
26481                         $id_scan_state = "";
26482                         last;
26483                     }
26484                 }
26485                 else {
26486                     $id_scan_state = '';
26487                 }
26488             }
26489             else {    # something else
26490
26491                 # check for various punctuation variables
26492                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26493                     $identifier .= $tok;
26494                 }
26495
26496                 elsif ( $identifier eq '$#' ) {
26497
26498                     if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
26499
26500                     # perl seems to allow just these: $#: $#- $#+
26501                     elsif ( $tok =~ /^[\:\-\+]$/ ) {
26502                         $type = 'i';
26503                         $identifier .= $tok;
26504                     }
26505                     else {
26506                         $i = $i_save;
26507                         write_logfile_entry( 'Use of $# is deprecated' . "\n" );
26508                     }
26509                 }
26510                 elsif ( $identifier eq '$$' ) {
26511
26512                     # perl does not allow references to punctuation
26513                     # variables without braces.  For example, this
26514                     # won't work:
26515                     #  $:=\4;
26516                     #  $a = $$:;
26517                     # You would have to use
26518                     #  $a = ${$:};
26519
26520                     $i = $i_save;
26521                     if   ( $tok eq '{' ) { $type = 't' }
26522                     else                 { $type = 'i' }
26523                 }
26524                 elsif ( $identifier eq '->' ) {
26525                     $i = $i_save;
26526                 }
26527                 else {
26528                     $i = $i_save;
26529                     if ( length($identifier) == 1 ) { $identifier = ''; }
26530                 }
26531                 $id_scan_state = '';
26532                 last;
26533             }
26534         }
26535         elsif ( $id_scan_state eq '&' ) {    # starting sub call?
26536
26537             if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
26538                 $id_scan_state = ':';          # now need ::
26539                 $saw_alpha     = 1;
26540                 $identifier .= $tok;
26541             }
26542             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26543                 $id_scan_state = ':';                 # now need ::
26544                 $saw_alpha     = 1;
26545                 $identifier .= $tok;
26546             }
26547             elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
26548                 $id_scan_state = ':';       # now need ::
26549                 $saw_alpha     = 1;
26550                 $identifier .= $tok;
26551             }
26552             elsif ( $tok =~ /^\s*$/ ) {     # allow space
26553             }
26554             elsif ( $tok eq '::' ) {        # leading ::
26555                 $id_scan_state = 'A';       # accept alpha next
26556                 $identifier .= $tok;
26557             }
26558             elsif ( $tok eq '{' ) {
26559                 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
26560                 $i             = $i_save;
26561                 $id_scan_state = '';
26562                 last;
26563             }
26564             else {
26565
26566                 # punctuation variable?
26567                 # testfile: cunningham4.pl
26568                 #
26569                 # We have to be careful here.  If we are in an unknown state,
26570                 # we will reject the punctuation variable.  In the following
26571                 # example the '&' is a binary opeator but we are in an unknown
26572                 # state because there is no sigil on 'Prima', so we don't
26573                 # know what it is.  But it is a bad guess that
26574                 # '&~' is a punction variable.
26575                 # $self->{text}->{colorMap}->[
26576                 #   Prima::PodView::COLOR_CODE_FOREGROUND
26577                 #   & ~tb::COLOR_INDEX ] =
26578                 #   $sec->{ColorCode}
26579                 if ( $identifier eq '&' && $expecting ) {
26580                     $identifier .= $tok;
26581                 }
26582                 else {
26583                     $identifier = '';
26584                     $i          = $i_save;
26585                     $type       = '&';
26586                 }
26587                 $id_scan_state = '';
26588                 last;
26589             }
26590         }
26591         elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
26592
26593             if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
26594                 $identifier .= $tok;
26595                 $id_scan_state = ':';        # now need ::
26596                 $saw_alpha     = 1;
26597             }
26598             elsif ( $tok eq "'" && $allow_tick ) {
26599                 $identifier .= $tok;
26600                 $id_scan_state = ':';        # now need ::
26601                 $saw_alpha     = 1;
26602             }
26603             elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
26604                 $identifier .= $tok;
26605                 $id_scan_state = ':';        # now need ::
26606                 $saw_alpha     = 1;
26607             }
26608             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26609                 $id_scan_state = '(';
26610                 $identifier .= $tok;
26611             }
26612             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26613                 $id_scan_state = ')';
26614                 $identifier .= $tok;
26615             }
26616             else {
26617                 $id_scan_state = '';
26618                 $i             = $i_save;
26619                 last;
26620             }
26621         }
26622         elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
26623
26624             if ( $tok eq '::' ) {            # got it
26625                 $identifier .= $tok;
26626                 $id_scan_state = 'A';        # now require alpha
26627             }
26628             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
26629                 $identifier .= $tok;
26630                 $id_scan_state = ':';           # now need ::
26631                 $saw_alpha     = 1;
26632             }
26633             elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
26634                 $identifier .= $tok;
26635                 $id_scan_state = ':';           # now need ::
26636                 $saw_alpha     = 1;
26637             }
26638             elsif ( $tok eq "'" && $allow_tick ) {    # tick
26639
26640                 if ( $is_keyword{$identifier} ) {
26641                     $id_scan_state = '';              # that's all
26642                     $i             = $i_save;
26643                 }
26644                 else {
26645                     $identifier .= $tok;
26646                 }
26647             }
26648             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26649                 $id_scan_state = '(';
26650                 $identifier .= $tok;
26651             }
26652             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26653                 $id_scan_state = ')';
26654                 $identifier .= $tok;
26655             }
26656             else {
26657                 $id_scan_state = '';        # that's all
26658                 $i             = $i_save;
26659                 last;
26660             }
26661         }
26662         elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
26663
26664             if ( $tok eq '(' ) {             # got it
26665                 $identifier .= $tok;
26666                 $id_scan_state = ')';        # now find the end of it
26667             }
26668             elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
26669                 $identifier .= $tok;
26670             }
26671             else {
26672                 $id_scan_state = '';         # that's all - no prototype
26673                 $i             = $i_save;
26674                 last;
26675             }
26676         }
26677         elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
26678
26679             if ( $tok eq ')' ) {             # got it
26680                 $identifier .= $tok;
26681                 $id_scan_state = '';         # all done
26682                 last;
26683             }
26684             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
26685                 $identifier .= $tok;
26686             }
26687             else {    # probable error in script, but keep going
26688                 warning("Unexpected '$tok' while seeking end of prototype\n");
26689                 $identifier .= $tok;
26690             }
26691         }
26692         else {        # can get here due to error in initialization
26693             $id_scan_state = '';
26694             $i             = $i_save;
26695             last;
26696         }
26697     }
26698
26699     if ( $id_scan_state eq ')' ) {
26700         warning("Hit end of line while seeking ) to end prototype\n");
26701     }
26702
26703     # once we enter the actual identifier, it may not extend beyond
26704     # the end of the current line
26705     if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
26706         $id_scan_state = '';
26707     }
26708     if ( $i < 0 ) { $i = 0 }
26709
26710     unless ($type) {
26711
26712         if ($saw_type) {
26713
26714             if ($saw_alpha) {
26715                 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
26716                     $type = 'w';
26717                 }
26718                 else { $type = 'i' }
26719             }
26720             elsif ( $identifier eq '->' ) {
26721                 $type = '->';
26722             }
26723             elsif (
26724                 ( length($identifier) > 1 )
26725
26726                 # In something like '@$=' we have an identifier '@$'
26727                 # In something like '$${' we have type '$$' (and only
26728                 # part of an identifier)
26729                 && !( $identifier =~ /\$$/ && $tok eq '{' )
26730                 && ( $identifier !~ /^(sub |package )$/ )
26731               )
26732             {
26733                 $type = 'i';
26734             }
26735             else { $type = 't' }
26736         }
26737         elsif ($saw_alpha) {
26738
26739             # type 'w' includes anything without leading type info
26740             # ($,%,@,*) including something like abc::def::ghi
26741             $type = 'w';
26742         }
26743         else {
26744             $type = '';
26745         }    # this can happen on a restart
26746     }
26747
26748     if ($identifier) {
26749         $tok = $identifier;
26750         if ($message) { write_logfile_entry($message) }
26751     }
26752     else {
26753         $tok = $tok_begin;
26754         $i   = $i_begin;
26755     }
26756
26757     TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
26758         my ( $a, $b, $c ) = caller;
26759         print
26760 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
26761         print
26762 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
26763     };
26764     return ( $i, $tok, $type, $id_scan_state, $identifier );
26765 }
26766
26767 {
26768
26769     # saved package and subnames in case prototype is on separate line
26770     my ( $package_saved, $subname_saved );
26771
26772     sub do_scan_sub {
26773
26774         # do_scan_sub parses a sub name and prototype
26775         # it is called with $i_beg equal to the index of the first nonblank
26776         # token following a 'sub' token.
26777
26778         # TODO: add future error checks to be sure we have a valid
26779         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
26780         # a name is given if and only if a non-anonymous sub is
26781         # appropriate.
26782         # USES GLOBAL VARS: $current_package, $last_nonblank_token,
26783         # $in_attribute_list, %saw_function_definition,
26784         # $statement_type
26785
26786         my (
26787             $input_line, $i,             $i_beg,
26788             $tok,        $type,          $rtokens,
26789             $rtoken_map, $id_scan_state, $max_token_index
26790         ) = @_;
26791         $id_scan_state = "";    # normally we get everything in one call
26792         my $subname = undef;
26793         my $package = undef;
26794         my $proto   = undef;
26795         my $attrs   = undef;
26796         my $match;
26797
26798         my $pos_beg = $$rtoken_map[$i_beg];
26799         pos($input_line) = $pos_beg;
26800
26801         # sub NAME PROTO ATTRS
26802         if (
26803             $input_line =~ m/\G\s*
26804         ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
26805         (\w+)               # NAME    - required
26806         (\s*\([^){]*\))?    # PROTO   - something in parens
26807         (\s*:)?             # ATTRS   - leading : of attribute list
26808         /gcx
26809           )
26810         {
26811             $match   = 1;
26812             $subname = $2;
26813             $proto   = $3;
26814             $attrs   = $4;
26815
26816             $package = ( defined($1) && $1 ) ? $1 : $current_package;
26817             $package =~ s/\'/::/g;
26818             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26819             $package =~ s/::$//;
26820             my $pos  = pos($input_line);
26821             my $numc = $pos - $pos_beg;
26822             $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
26823             $type = 'i';
26824         }
26825
26826         # Look for prototype/attributes not preceded on this line by subname;
26827         # This might be an anonymous sub with attributes,
26828         # or a prototype on a separate line from its sub name
26829         elsif (
26830             $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
26831             (\s*:)?                              # ATTRS leading ':'
26832             /gcx
26833             && ( $1 || $2 )
26834           )
26835         {
26836             $match = 1;
26837             $proto = $1;
26838             $attrs = $2;
26839
26840             # Handle prototype on separate line from subname
26841             if ($subname_saved) {
26842                 $package = $package_saved;
26843                 $subname = $subname_saved;
26844                 $tok     = $last_nonblank_token;
26845             }
26846             $type = 'i';
26847         }
26848
26849         if ($match) {
26850
26851             # ATTRS: if there are attributes, back up and let the ':' be
26852             # found later by the scanner.
26853             my $pos = pos($input_line);
26854             if ($attrs) {
26855                 $pos -= length($attrs);
26856             }
26857
26858             my $next_nonblank_token = $tok;
26859
26860             # catch case of line with leading ATTR ':' after anonymous sub
26861             if ( $pos == $pos_beg && $tok eq ':' ) {
26862                 $type              = 'A';
26863                 $in_attribute_list = 1;
26864             }
26865
26866             # We must convert back from character position
26867             # to pre_token index.
26868             else {
26869
26870                 # I don't think an error flag can occur here ..but ?
26871                 my $error;
26872                 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
26873                     $max_token_index );
26874                 if ($error) { warning("Possibly invalid sub\n") }
26875
26876                 # check for multiple definitions of a sub
26877                 ( $next_nonblank_token, my $i_next ) =
26878                   find_next_nonblank_token_on_this_line( $i, $rtokens,
26879                     $max_token_index );
26880             }
26881
26882             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
26883             {    # skip blank or side comment
26884                 my ( $rpre_tokens, $rpre_types ) =
26885                   peek_ahead_for_n_nonblank_pre_tokens(1);
26886                 if ( defined($rpre_tokens) && @$rpre_tokens ) {
26887                     $next_nonblank_token = $rpre_tokens->[0];
26888                 }
26889                 else {
26890                     $next_nonblank_token = '}';
26891                 }
26892             }
26893             $package_saved = "";
26894             $subname_saved = "";
26895             if ( $next_nonblank_token eq '{' ) {
26896                 if ($subname) {
26897
26898                     # Check for multiple definitions of a sub, but
26899                     # it is ok to have multiple sub BEGIN, etc,
26900                     # so we do not complain if name is all caps
26901                     if (   $saw_function_definition{$package}{$subname}
26902                         && $subname !~ /^[A-Z]+$/ )
26903                     {
26904                         my $lno = $saw_function_definition{$package}{$subname};
26905                         warning(
26906 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
26907                         );
26908                     }
26909                     $saw_function_definition{$package}{$subname} =
26910                       $tokenizer_self->{_last_line_number};
26911                 }
26912             }
26913             elsif ( $next_nonblank_token eq ';' ) {
26914             }
26915             elsif ( $next_nonblank_token eq '}' ) {
26916             }
26917
26918             # ATTRS - if an attribute list follows, remember the name
26919             # of the sub so the next opening brace can be labeled.
26920             # Setting 'statement_type' causes any ':'s to introduce
26921             # attributes.
26922             elsif ( $next_nonblank_token eq ':' ) {
26923                 $statement_type = $tok;
26924             }
26925
26926             # see if PROTO follows on another line:
26927             elsif ( $next_nonblank_token eq '(' ) {
26928                 if ( $attrs || $proto ) {
26929                     warning(
26930 "unexpected '(' after definition or declaration of sub '$subname'\n"
26931                     );
26932                 }
26933                 else {
26934                     $id_scan_state  = 'sub';    # we must come back to get proto
26935                     $statement_type = $tok;
26936                     $package_saved  = $package;
26937                     $subname_saved  = $subname;
26938                 }
26939             }
26940             elsif ($next_nonblank_token) {      # EOF technically ok
26941                 warning(
26942 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
26943                 );
26944             }
26945             check_prototype( $proto, $package, $subname );
26946         }
26947
26948         # no match but line not blank
26949         else {
26950         }
26951         return ( $i, $tok, $type, $id_scan_state );
26952     }
26953 }
26954
26955 #########i###############################################################
26956 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
26957 #########################################################################
26958
26959 sub find_next_nonblank_token {
26960     my ( $i, $rtokens, $max_token_index ) = @_;
26961
26962     if ( $i >= $max_token_index ) {
26963         if ( !peeked_ahead() ) {
26964             peeked_ahead(1);
26965             $rtokens =
26966               peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
26967         }
26968     }
26969     my $next_nonblank_token = $$rtokens[ ++$i ];
26970
26971     if ( $next_nonblank_token =~ /^\s*$/ ) {
26972         $next_nonblank_token = $$rtokens[ ++$i ];
26973     }
26974     return ( $next_nonblank_token, $i );
26975 }
26976
26977 sub numerator_expected {
26978
26979     # this is a filter for a possible numerator, in support of guessing
26980     # for the / pattern delimiter token.
26981     # returns -
26982     #   1 - yes
26983     #   0 - can't tell
26984     #  -1 - no
26985     # Note: I am using the convention that variables ending in
26986     # _expected have these 3 possible values.
26987     my ( $i, $rtokens, $max_token_index ) = @_;
26988     my $next_token = $$rtokens[ $i + 1 ];
26989     if ( $next_token eq '=' ) { $i++; }    # handle /=
26990     my ( $next_nonblank_token, $i_next ) =
26991       find_next_nonblank_token( $i, $rtokens, $max_token_index );
26992
26993     if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
26994         1;
26995     }
26996     else {
26997
26998         if ( $next_nonblank_token =~ /^\s*$/ ) {
26999             0;
27000         }
27001         else {
27002             -1;
27003         }
27004     }
27005 }
27006
27007 sub pattern_expected {
27008
27009     # This is the start of a filter for a possible pattern.
27010     # It looks at the token after a possbible pattern and tries to
27011     # determine if that token could end a pattern.
27012     # returns -
27013     #   1 - yes
27014     #   0 - can't tell
27015     #  -1 - no
27016     my ( $i, $rtokens, $max_token_index ) = @_;
27017     my $next_token = $$rtokens[ $i + 1 ];
27018     if ( $next_token =~ /^[cgimosxp]/ ) { $i++; }    # skip possible modifier
27019     my ( $next_nonblank_token, $i_next ) =
27020       find_next_nonblank_token( $i, $rtokens, $max_token_index );
27021
27022     # list of tokens which may follow a pattern
27023     # (can probably be expanded)
27024     if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
27025     {
27026         1;
27027     }
27028     else {
27029
27030         if ( $next_nonblank_token =~ /^\s*$/ ) {
27031             0;
27032         }
27033         else {
27034             -1;
27035         }
27036     }
27037 }
27038
27039 sub find_next_nonblank_token_on_this_line {
27040     my ( $i, $rtokens, $max_token_index ) = @_;
27041     my $next_nonblank_token;
27042
27043     if ( $i < $max_token_index ) {
27044         $next_nonblank_token = $$rtokens[ ++$i ];
27045
27046         if ( $next_nonblank_token =~ /^\s*$/ ) {
27047
27048             if ( $i < $max_token_index ) {
27049                 $next_nonblank_token = $$rtokens[ ++$i ];
27050             }
27051         }
27052     }
27053     else {
27054         $next_nonblank_token = "";
27055     }
27056     return ( $next_nonblank_token, $i );
27057 }
27058
27059 sub find_angle_operator_termination {
27060
27061     # We are looking at a '<' and want to know if it is an angle operator.
27062     # We are to return:
27063     #   $i = pretoken index of ending '>' if found, current $i otherwise
27064     #   $type = 'Q' if found, '>' otherwise
27065     my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
27066     my $i    = $i_beg;
27067     my $type = '<';
27068     pos($input_line) = 1 + $$rtoken_map[$i];
27069
27070     my $filter;
27071
27072     # we just have to find the next '>' if a term is expected
27073     if ( $expecting == TERM ) { $filter = '[\>]' }
27074
27075     # we have to guess if we don't know what is expected
27076     elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
27077
27078     # shouldn't happen - we shouldn't be here if operator is expected
27079     else { warning("Program Bug in find_angle_operator_termination\n") }
27080
27081     # To illustrate what we might be looking at, in case we are
27082     # guessing, here are some examples of valid angle operators
27083     # (or file globs):
27084     #  <tmp_imp/*>
27085     #  <FH>
27086     #  <$fh>
27087     #  <*.c *.h>
27088     #  <_>
27089     #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
27090     #  <${PREFIX}*img*.$IMAGE_TYPE>
27091     #  <img*.$IMAGE_TYPE>
27092     #  <Timg*.$IMAGE_TYPE>
27093     #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
27094     #
27095     # Here are some examples of lines which do not have angle operators:
27096     #  return undef unless $self->[2]++ < $#{$self->[1]};
27097     #  < 2  || @$t >
27098     #
27099     # the following line from dlister.pl caused trouble:
27100     #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
27101     #
27102     # If the '<' starts an angle operator, it must end on this line and
27103     # it must not have certain characters like ';' and '=' in it.  I use
27104     # this to limit the testing.  This filter should be improved if
27105     # possible.
27106
27107     if ( $input_line =~ /($filter)/g ) {
27108
27109         if ( $1 eq '>' ) {
27110
27111             # We MAY have found an angle operator termination if we get
27112             # here, but we need to do more to be sure we haven't been
27113             # fooled.
27114             my $pos = pos($input_line);
27115
27116             my $pos_beg = $$rtoken_map[$i];
27117             my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
27118
27119             # Reject if the closing '>' follows a '-' as in:
27120             # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
27121             if ( $expecting eq UNKNOWN ) {
27122                 my $check = substr( $input_line, $pos - 2, 1 );
27123                 if ( $check eq '-' ) {
27124                     return ( $i, $type );
27125                 }
27126             }
27127
27128             ######################################debug#####
27129             #write_diagnostics( "ANGLE? :$str\n");
27130             #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
27131             ######################################debug#####
27132             $type = 'Q';
27133             my $error;
27134             ( $i, $error ) =
27135               inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27136
27137             # It may be possible that a quote ends midway in a pretoken.
27138             # If this happens, it may be necessary to split the pretoken.
27139             if ($error) {
27140                 warning(
27141                     "Possible tokinization error..please check this line\n");
27142                 report_possible_bug();
27143             }
27144
27145             # Now let's see where we stand....
27146             # OK if math op not possible
27147             if ( $expecting == TERM ) {
27148             }
27149
27150             # OK if there are no more than 2 pre-tokens inside
27151             # (not possible to write 2 token math between < and >)
27152             # This catches most common cases
27153             elsif ( $i <= $i_beg + 3 ) {
27154                 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
27155             }
27156
27157             # Not sure..
27158             else {
27159
27160                 # Let's try a Brace Test: any braces inside must balance
27161                 my $br = 0;
27162                 while ( $str =~ /\{/g ) { $br++ }
27163                 while ( $str =~ /\}/g ) { $br-- }
27164                 my $sb = 0;
27165                 while ( $str =~ /\[/g ) { $sb++ }
27166                 while ( $str =~ /\]/g ) { $sb-- }
27167                 my $pr = 0;
27168                 while ( $str =~ /\(/g ) { $pr++ }
27169                 while ( $str =~ /\)/g ) { $pr-- }
27170
27171                 # if braces do not balance - not angle operator
27172                 if ( $br || $sb || $pr ) {
27173                     $i    = $i_beg;
27174                     $type = '<';
27175                     write_diagnostics(
27176                         "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
27177                 }
27178
27179                 # we should keep doing more checks here...to be continued
27180                 # Tentatively accepting this as a valid angle operator.
27181                 # There are lots more things that can be checked.
27182                 else {
27183                     write_diagnostics(
27184                         "ANGLE-Guessing yes: $str expecting=$expecting\n");
27185                     write_logfile_entry("Guessing angle operator here: $str\n");
27186                 }
27187             }
27188         }
27189
27190         # didn't find ending >
27191         else {
27192             if ( $expecting == TERM ) {
27193                 warning("No ending > for angle operator\n");
27194             }
27195         }
27196     }
27197     return ( $i, $type );
27198 }
27199
27200 sub scan_number_do {
27201
27202     #  scan a number in any of the formats that Perl accepts
27203     #  Underbars (_) are allowed in decimal numbers.
27204     #  input parameters -
27205     #      $input_line  - the string to scan
27206     #      $i           - pre_token index to start scanning
27207     #    $rtoken_map    - reference to the pre_token map giving starting
27208     #                    character position in $input_line of token $i
27209     #  output parameters -
27210     #    $i            - last pre_token index of the number just scanned
27211     #    number        - the number (characters); or undef if not a number
27212
27213     my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
27214     my $pos_beg = $$rtoken_map[$i];
27215     my $pos;
27216     my $i_begin = $i;
27217     my $number  = undef;
27218     my $type    = $input_type;
27219
27220     my $first_char = substr( $input_line, $pos_beg, 1 );
27221
27222     # Look for bad starting characters; Shouldn't happen..
27223     if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
27224         warning("Program bug - scan_number given character $first_char\n");
27225         report_definite_bug();
27226         return ( $i, $type, $number );
27227     }
27228
27229     # handle v-string without leading 'v' character ('Two Dot' rule)
27230     # (vstring.t)
27231     # TODO: v-strings may contain underscores
27232     pos($input_line) = $pos_beg;
27233     if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
27234         $pos = pos($input_line);
27235         my $numc = $pos - $pos_beg;
27236         $number = substr( $input_line, $pos_beg, $numc );
27237         $type = 'v';
27238         report_v_string($number);
27239     }
27240
27241     # handle octal, hex, binary
27242     if ( !defined($number) ) {
27243         pos($input_line) = $pos_beg;
27244         if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
27245         {
27246             $pos = pos($input_line);
27247             my $numc = $pos - $pos_beg;
27248             $number = substr( $input_line, $pos_beg, $numc );
27249             $type = 'n';
27250         }
27251     }
27252
27253     # handle decimal
27254     if ( !defined($number) ) {
27255         pos($input_line) = $pos_beg;
27256
27257         if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
27258             $pos = pos($input_line);
27259
27260             # watch out for things like 0..40 which would give 0. by this;
27261             if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
27262                 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
27263             {
27264                 $pos--;
27265             }
27266             my $numc = $pos - $pos_beg;
27267             $number = substr( $input_line, $pos_beg, $numc );
27268             $type = 'n';
27269         }
27270     }
27271
27272     # filter out non-numbers like e + - . e2  .e3 +e6
27273     # the rule: at least one digit, and any 'e' must be preceded by a digit
27274     if (
27275         $number !~ /\d/    # no digits
27276         || (   $number =~ /^(.*)[eE]/
27277             && $1 !~ /\d/ )    # or no digits before the 'e'
27278       )
27279     {
27280         $number = undef;
27281         $type   = $input_type;
27282         return ( $i, $type, $number );
27283     }
27284
27285     # Found a number; now we must convert back from character position
27286     # to pre_token index. An error here implies user syntax error.
27287     # An example would be an invalid octal number like '009'.
27288     my $error;
27289     ( $i, $error ) =
27290       inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27291     if ($error) { warning("Possibly invalid number\n") }
27292
27293     return ( $i, $type, $number );
27294 }
27295
27296 sub inverse_pretoken_map {
27297
27298     # Starting with the current pre_token index $i, scan forward until
27299     # finding the index of the next pre_token whose position is $pos.
27300     my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
27301     my $error = 0;
27302
27303     while ( ++$i <= $max_token_index ) {
27304
27305         if ( $pos <= $$rtoken_map[$i] ) {
27306
27307             # Let the calling routine handle errors in which we do not
27308             # land on a pre-token boundary.  It can happen by running
27309             # perltidy on some non-perl scripts, for example.
27310             if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
27311             $i--;
27312             last;
27313         }
27314     }
27315     return ( $i, $error );
27316 }
27317
27318 sub find_here_doc {
27319
27320     # find the target of a here document, if any
27321     # input parameters:
27322     #   $i - token index of the second < of <<
27323     #   ($i must be less than the last token index if this is called)
27324     # output parameters:
27325     #   $found_target = 0 didn't find target; =1 found target
27326     #   HERE_TARGET - the target string (may be empty string)
27327     #   $i - unchanged if not here doc,
27328     #    or index of the last token of the here target
27329     #   $saw_error - flag noting unbalanced quote on here target
27330     my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27331     my $ibeg                 = $i;
27332     my $found_target         = 0;
27333     my $here_doc_target      = '';
27334     my $here_quote_character = '';
27335     my $saw_error            = 0;
27336     my ( $next_nonblank_token, $i_next_nonblank, $next_token );
27337     $next_token = $$rtokens[ $i + 1 ];
27338
27339     # perl allows a backslash before the target string (heredoc.t)
27340     my $backslash = 0;
27341     if ( $next_token eq '\\' ) {
27342         $backslash  = 1;
27343         $next_token = $$rtokens[ $i + 2 ];
27344     }
27345
27346     ( $next_nonblank_token, $i_next_nonblank ) =
27347       find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
27348
27349     if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
27350
27351         my $in_quote    = 1;
27352         my $quote_depth = 0;
27353         my $quote_pos   = 0;
27354         my $quoted_string;
27355
27356         (
27357             $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
27358             $quoted_string
27359           )
27360           = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
27361             $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
27362
27363         if ($in_quote) {    # didn't find end of quote, so no target found
27364             $i = $ibeg;
27365             if ( $expecting == TERM ) {
27366                 warning(
27367 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
27368                 );
27369                 $saw_error = 1;
27370             }
27371         }
27372         else {              # found ending quote
27373             my $j;
27374             $found_target = 1;
27375
27376             my $tokj;
27377             for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
27378                 $tokj = $$rtokens[$j];
27379
27380                 # we have to remove any backslash before the quote character
27381                 # so that the here-doc-target exactly matches this string
27382                 next
27383                   if ( $tokj eq "\\"
27384                     && $j < $i - 1
27385                     && $$rtokens[ $j + 1 ] eq $here_quote_character );
27386                 $here_doc_target .= $tokj;
27387             }
27388         }
27389     }
27390
27391     elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
27392         $found_target = 1;
27393         write_logfile_entry(
27394             "found blank here-target after <<; suggest using \"\"\n");
27395         $i = $ibeg;
27396     }
27397     elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
27398
27399         my $here_doc_expected;
27400         if ( $expecting == UNKNOWN ) {
27401             $here_doc_expected = guess_if_here_doc($next_token);
27402         }
27403         else {
27404             $here_doc_expected = 1;
27405         }
27406
27407         if ($here_doc_expected) {
27408             $found_target    = 1;
27409             $here_doc_target = $next_token;
27410             $i               = $ibeg + 1;
27411         }
27412
27413     }
27414     else {
27415
27416         if ( $expecting == TERM ) {
27417             $found_target = 1;
27418             write_logfile_entry("Note: bare here-doc operator <<\n");
27419         }
27420         else {
27421             $i = $ibeg;
27422         }
27423     }
27424
27425     # patch to neglect any prepended backslash
27426     if ( $found_target && $backslash ) { $i++ }
27427
27428     return ( $found_target, $here_doc_target, $here_quote_character, $i,
27429         $saw_error );
27430 }
27431
27432 sub do_quote {
27433
27434     # follow (or continue following) quoted string(s)
27435     # $in_quote return code:
27436     #   0 - ok, found end
27437     #   1 - still must find end of quote whose target is $quote_character
27438     #   2 - still looking for end of first of two quotes
27439     #
27440     # Returns updated strings:
27441     #  $quoted_string_1 = quoted string seen while in_quote=1
27442     #  $quoted_string_2 = quoted string seen while in_quote=2
27443     my (
27444         $i,               $in_quote,    $quote_character,
27445         $quote_pos,       $quote_depth, $quoted_string_1,
27446         $quoted_string_2, $rtokens,     $rtoken_map,
27447         $max_token_index
27448     ) = @_;
27449
27450     my $in_quote_starting = $in_quote;
27451
27452     my $quoted_string;
27453     if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
27454         my $ibeg = $i;
27455         (
27456             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27457             $quoted_string
27458           )
27459           = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
27460             $quote_pos, $quote_depth, $max_token_index );
27461         $quoted_string_2 .= $quoted_string;
27462         if ( $in_quote == 1 ) {
27463             if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
27464             $quote_character = '';
27465         }
27466         else {
27467             $quoted_string_2 .= "\n";
27468         }
27469     }
27470
27471     if ( $in_quote == 1 ) {    # one (more) quote to follow
27472         my $ibeg = $i;
27473         (
27474             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27475             $quoted_string
27476           )
27477           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27478             $quote_pos, $quote_depth, $max_token_index );
27479         $quoted_string_1 .= $quoted_string;
27480         if ( $in_quote == 1 ) {
27481             $quoted_string_1 .= "\n";
27482         }
27483     }
27484     return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27485         $quoted_string_1, $quoted_string_2 );
27486 }
27487
27488 sub follow_quoted_string {
27489
27490     # scan for a specific token, skipping escaped characters
27491     # if the quote character is blank, use the first non-blank character
27492     # input parameters:
27493     #   $rtokens = reference to the array of tokens
27494     #   $i = the token index of the first character to search
27495     #   $in_quote = number of quoted strings being followed
27496     #   $beginning_tok = the starting quote character
27497     #   $quote_pos = index to check next for alphanumeric delimiter
27498     # output parameters:
27499     #   $i = the token index of the ending quote character
27500     #   $in_quote = decremented if found end, unchanged if not
27501     #   $beginning_tok = the starting quote character
27502     #   $quote_pos = index to check next for alphanumeric delimiter
27503     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
27504     #   $quoted_string = the text of the quote (without quotation tokens)
27505     my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
27506         $max_token_index )
27507       = @_;
27508     my ( $tok, $end_tok );
27509     my $i             = $i_beg - 1;
27510     my $quoted_string = "";
27511
27512     TOKENIZER_DEBUG_FLAG_QUOTE && do {
27513         print
27514 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
27515     };
27516
27517     # get the corresponding end token
27518     if ( $beginning_tok !~ /^\s*$/ ) {
27519         $end_tok = matching_end_token($beginning_tok);
27520     }
27521
27522     # a blank token means we must find and use the first non-blank one
27523     else {
27524         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
27525
27526         while ( $i < $max_token_index ) {
27527             $tok = $$rtokens[ ++$i ];
27528
27529             if ( $tok !~ /^\s*$/ ) {
27530
27531                 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
27532                     $i = $max_token_index;
27533                 }
27534                 else {
27535
27536                     if ( length($tok) > 1 ) {
27537                         if ( $quote_pos <= 0 ) { $quote_pos = 1 }
27538                         $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
27539                     }
27540                     else {
27541                         $beginning_tok = $tok;
27542                         $quote_pos     = 0;
27543                     }
27544                     $end_tok     = matching_end_token($beginning_tok);
27545                     $quote_depth = 1;
27546                     last;
27547                 }
27548             }
27549             else {
27550                 $allow_quote_comments = 1;
27551             }
27552         }
27553     }
27554
27555     # There are two different loops which search for the ending quote
27556     # character.  In the rare case of an alphanumeric quote delimiter, we
27557     # have to look through alphanumeric tokens character-by-character, since
27558     # the pre-tokenization process combines multiple alphanumeric
27559     # characters, whereas for a non-alphanumeric delimiter, only tokens of
27560     # length 1 can match.
27561
27562     ###################################################################
27563     # Case 1 (rare): loop for case of alphanumeric quote delimiter..
27564     # "quote_pos" is the position the current word to begin searching
27565     ###################################################################
27566     if ( $beginning_tok =~ /\w/ ) {
27567
27568         # Note this because it is not recommended practice except
27569         # for obfuscated perl contests
27570         if ( $in_quote == 1 ) {
27571             write_logfile_entry(
27572                 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
27573         }
27574
27575         while ( $i < $max_token_index ) {
27576
27577             if ( $quote_pos == 0 || ( $i < 0 ) ) {
27578                 $tok = $$rtokens[ ++$i ];
27579
27580                 if ( $tok eq '\\' ) {
27581
27582                     # retain backslash unless it hides the end token
27583                     $quoted_string .= $tok
27584                       unless $$rtokens[ $i + 1 ] eq $end_tok;
27585                     $quote_pos++;
27586                     last if ( $i >= $max_token_index );
27587                     $tok = $$rtokens[ ++$i ];
27588                 }
27589             }
27590             my $old_pos = $quote_pos;
27591
27592             unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
27593             {
27594
27595             }
27596             $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
27597
27598             if ( $quote_pos > 0 ) {
27599
27600                 $quoted_string .=
27601                   substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
27602
27603                 $quote_depth--;
27604
27605                 if ( $quote_depth == 0 ) {
27606                     $in_quote--;
27607                     last;
27608                 }
27609             }
27610             else {
27611                 $quoted_string .= substr( $tok, $old_pos );
27612             }
27613         }
27614     }
27615
27616     ########################################################################
27617     # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
27618     ########################################################################
27619     else {
27620
27621         while ( $i < $max_token_index ) {
27622             $tok = $$rtokens[ ++$i ];
27623
27624             if ( $tok eq $end_tok ) {
27625                 $quote_depth--;
27626
27627                 if ( $quote_depth == 0 ) {
27628                     $in_quote--;
27629                     last;
27630                 }
27631             }
27632             elsif ( $tok eq $beginning_tok ) {
27633                 $quote_depth++;
27634             }
27635             elsif ( $tok eq '\\' ) {
27636
27637                 # retain backslash unless it hides the beginning or end token
27638                 $tok = $$rtokens[ ++$i ];
27639                 $quoted_string .= '\\'
27640                   unless ( $tok eq $end_tok || $tok eq $beginning_tok );
27641             }
27642             $quoted_string .= $tok;
27643         }
27644     }
27645     if ( $i > $max_token_index ) { $i = $max_token_index }
27646     return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
27647         $quoted_string );
27648 }
27649
27650 sub indicate_error {
27651     my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
27652     interrupt_logfile();
27653     warning($msg);
27654     write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
27655     resume_logfile();
27656 }
27657
27658 sub write_error_indicator_pair {
27659     my ( $line_number, $input_line, $pos, $carrat ) = @_;
27660     my ( $offset, $numbered_line, $underline ) =
27661       make_numbered_line( $line_number, $input_line, $pos );
27662     $underline = write_on_underline( $underline, $pos - $offset, $carrat );
27663     warning( $numbered_line . "\n" );
27664     $underline =~ s/\s*$//;
27665     warning( $underline . "\n" );
27666 }
27667
27668 sub make_numbered_line {
27669
27670     #  Given an input line, its line number, and a character position of
27671     #  interest, create a string not longer than 80 characters of the form
27672     #     $lineno: sub_string
27673     #  such that the sub_string of $str contains the position of interest
27674     #
27675     #  Here is an example of what we want, in this case we add trailing
27676     #  '...' because the line is long.
27677     #
27678     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27679     #
27680     #  Here is another example, this time in which we used leading '...'
27681     #  because of excessive length:
27682     #
27683     # 2: ... er of the World Wide Web Consortium's
27684     #
27685     #  input parameters are:
27686     #   $lineno = line number
27687     #   $str = the text of the line
27688     #   $pos = position of interest (the error) : 0 = first character
27689     #
27690     #   We return :
27691     #     - $offset = an offset which corrects the position in case we only
27692     #       display part of a line, such that $pos-$offset is the effective
27693     #       position from the start of the displayed line.
27694     #     - $numbered_line = the numbered line as above,
27695     #     - $underline = a blank 'underline' which is all spaces with the same
27696     #       number of characters as the numbered line.
27697
27698     my ( $lineno, $str, $pos ) = @_;
27699     my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
27700     my $excess = length($str) - $offset - 68;
27701     my $numc   = ( $excess > 0 ) ? 68 : undef;
27702
27703     if ( defined($numc) ) {
27704         if ( $offset == 0 ) {
27705             $str = substr( $str, $offset, $numc - 4 ) . " ...";
27706         }
27707         else {
27708             $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
27709         }
27710     }
27711     else {
27712
27713         if ( $offset == 0 ) {
27714         }
27715         else {
27716             $str = "... " . substr( $str, $offset + 4 );
27717         }
27718     }
27719
27720     my $numbered_line = sprintf( "%d: ", $lineno );
27721     $offset -= length($numbered_line);
27722     $numbered_line .= $str;
27723     my $underline = " " x length($numbered_line);
27724     return ( $offset, $numbered_line, $underline );
27725 }
27726
27727 sub write_on_underline {
27728
27729     # The "underline" is a string that shows where an error is; it starts
27730     # out as a string of blanks with the same length as the numbered line of
27731     # code above it, and we have to add marking to show where an error is.
27732     # In the example below, we want to write the string '--^' just below
27733     # the line of bad code:
27734     #
27735     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27736     #                 ---^
27737     # We are given the current underline string, plus a position and a
27738     # string to write on it.
27739     #
27740     # In the above example, there will be 2 calls to do this:
27741     # First call:  $pos=19, pos_chr=^
27742     # Second call: $pos=16, pos_chr=---
27743     #
27744     # This is a trivial thing to do with substr, but there is some
27745     # checking to do.
27746
27747     my ( $underline, $pos, $pos_chr ) = @_;
27748
27749     # check for error..shouldn't happen
27750     unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
27751         return $underline;
27752     }
27753     my $excess = length($pos_chr) + $pos - length($underline);
27754     if ( $excess > 0 ) {
27755         $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
27756     }
27757     substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
27758     return ($underline);
27759 }
27760
27761 sub pre_tokenize {
27762
27763     # Break a string, $str, into a sequence of preliminary tokens.  We
27764     # are interested in these types of tokens:
27765     #   words       (type='w'),            example: 'max_tokens_wanted'
27766     #   digits      (type = 'd'),          example: '0755'
27767     #   whitespace  (type = 'b'),          example: '   '
27768     #   any other single character (i.e. punct; type = the character itself).
27769     # We cannot do better than this yet because we might be in a quoted
27770     # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
27771     # tokens.
27772     my ( $str, $max_tokens_wanted ) = @_;
27773
27774     # we return references to these 3 arrays:
27775     my @tokens    = ();     # array of the tokens themselves
27776     my @token_map = (0);    # string position of start of each token
27777     my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
27778
27779     do {
27780
27781         # whitespace
27782         if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
27783
27784         # numbers
27785         # note that this must come before words!
27786         elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
27787
27788         # words
27789         elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
27790
27791         # single-character punctuation
27792         elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
27793
27794         # that's all..
27795         else {
27796             return ( \@tokens, \@token_map, \@type );
27797         }
27798
27799         push @tokens,    $1;
27800         push @token_map, pos($str);
27801
27802     } while ( --$max_tokens_wanted != 0 );
27803
27804     return ( \@tokens, \@token_map, \@type );
27805 }
27806
27807 sub show_tokens {
27808
27809     # this is an old debug routine
27810     my ( $rtokens, $rtoken_map ) = @_;
27811     my $num = scalar(@$rtokens);
27812     my $i;
27813
27814     for ( $i = 0 ; $i < $num ; $i++ ) {
27815         my $len = length( $$rtokens[$i] );
27816         print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
27817     }
27818 }
27819
27820 sub matching_end_token {
27821
27822     # find closing character for a pattern
27823     my $beginning_token = shift;
27824
27825     if ( $beginning_token eq '{' ) {
27826         '}';
27827     }
27828     elsif ( $beginning_token eq '[' ) {
27829         ']';
27830     }
27831     elsif ( $beginning_token eq '<' ) {
27832         '>';
27833     }
27834     elsif ( $beginning_token eq '(' ) {
27835         ')';
27836     }
27837     else {
27838         $beginning_token;
27839     }
27840 }
27841
27842 sub dump_token_types {
27843     my $class = shift;
27844     my $fh    = shift;
27845
27846     # This should be the latest list of token types in use
27847     # adding NEW_TOKENS: add a comment here
27848     print $fh <<'END_OF_LIST';
27849
27850 Here is a list of the token types currently used for lines of type 'CODE'.
27851 For the following tokens, the "type" of a token is just the token itself.
27852
27853 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27854 ( ) <= >= == =~ !~ != ++ -- /= x=
27855 ... **= <<= >>= &&= ||= //= <=>
27856 , + - / * | % ! x ~ = \ ? : . < > ^ &
27857
27858 The following additional token types are defined:
27859
27860  type    meaning
27861     b    blank (white space)
27862     {    indent: opening structural curly brace or square bracket or paren
27863          (code block, anonymous hash reference, or anonymous array reference)
27864     }    outdent: right structural curly brace or square bracket or paren
27865     [    left non-structural square bracket (enclosing an array index)
27866     ]    right non-structural square bracket
27867     (    left non-structural paren (all but a list right of an =)
27868     )    right non-structural parena
27869     L    left non-structural curly brace (enclosing a key)
27870     R    right non-structural curly brace
27871     ;    terminal semicolon
27872     f    indicates a semicolon in a "for" statement
27873     h    here_doc operator <<
27874     #    a comment
27875     Q    indicates a quote or pattern
27876     q    indicates a qw quote block
27877     k    a perl keyword
27878     C    user-defined constant or constant function (with void prototype = ())
27879     U    user-defined function taking parameters
27880     G    user-defined function taking block parameter (like grep/map/eval)
27881     M    (unused, but reserved for subroutine definition name)
27882     P    (unused, but -html uses it to label pod text)
27883     t    type indicater such as %,$,@,*,&,sub
27884     w    bare word (perhaps a subroutine call)
27885     i    identifier of some type (with leading %, $, @, *, &, sub, -> )
27886     n    a number
27887     v    a v-string
27888     F    a file test operator (like -e)
27889     Y    File handle
27890     Z    identifier in indirect object slot: may be file handle, object
27891     J    LABEL:  code block label
27892     j    LABEL after next, last, redo, goto
27893     p    unary +
27894     m    unary -
27895     pp   pre-increment operator ++
27896     mm   pre-decrement operator --
27897     A    : used as attribute separator
27898
27899     Here are the '_line_type' codes used internally:
27900     SYSTEM         - system-specific code before hash-bang line
27901     CODE           - line of perl code (including comments)
27902     POD_START      - line starting pod, such as '=head'
27903     POD            - pod documentation text
27904     POD_END        - last line of pod section, '=cut'
27905     HERE           - text of here-document
27906     HERE_END       - last line of here-doc (target word)
27907     FORMAT         - format section
27908     FORMAT_END     - last line of format section, '.'
27909     DATA_START     - __DATA__ line
27910     DATA           - unidentified text following __DATA__
27911     END_START      - __END__ line
27912     END            - unidentified text following __END__
27913     ERROR          - we are in big trouble, probably not a perl script
27914 END_OF_LIST
27915 }
27916
27917 BEGIN {
27918
27919     # These names are used in error messages
27920     @opening_brace_names = qw# '{' '[' '(' '?' #;
27921     @closing_brace_names = qw# '}' ']' ')' ':' #;
27922
27923     my @digraphs = qw(
27924       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27925       <= >= == =~ !~ != ++ -- /= x= ~~
27926     );
27927     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
27928
27929     my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
27930     @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
27931
27932     # make a hash of all valid token types for self-checking the tokenizer
27933     # (adding NEW_TOKENS : select a new character and add to this list)
27934     my @valid_token_types = qw#
27935       A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
27936       { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
27937       #;
27938     push( @valid_token_types, @digraphs );
27939     push( @valid_token_types, @trigraphs );
27940     push( @valid_token_types, '#' );
27941     push( @valid_token_types, ',' );
27942     @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
27943
27944     # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
27945     my @file_test_operators =
27946       qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
27947     @is_file_test_operator{@file_test_operators} =
27948       (1) x scalar(@file_test_operators);
27949
27950     # these functions have prototypes of the form (&), so when they are
27951     # followed by a block, that block MAY BE followed by an operator.
27952     @_ = qw( do eval );
27953     @is_block_operator{@_} = (1) x scalar(@_);
27954
27955     # these functions allow an identifier in the indirect object slot
27956     @_ = qw( print printf sort exec system say);
27957     @is_indirect_object_taker{@_} = (1) x scalar(@_);
27958
27959     # These tokens may precede a code block
27960     # patched for SWITCH/CASE
27961     @_ =
27962       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
27963       unless do while until eval for foreach map grep sort
27964       switch case given when);
27965     @is_code_block_token{@_} = (1) x scalar(@_);
27966
27967     # I'll build the list of keywords incrementally
27968     my @Keywords = ();
27969
27970     # keywords and tokens after which a value or pattern is expected,
27971     # but not an operator.  In other words, these should consume terms
27972     # to their right, or at least they are not expected to be followed
27973     # immediately by operators.
27974     my @value_requestor = qw(
27975       AUTOLOAD
27976       BEGIN
27977       CHECK
27978       DESTROY
27979       END
27980       EQ
27981       GE
27982       GT
27983       INIT
27984       LE
27985       LT
27986       NE
27987       UNITCHECK
27988       abs
27989       accept
27990       alarm
27991       and
27992       atan2
27993       bind
27994       binmode
27995       bless
27996       break
27997       caller
27998       chdir
27999       chmod
28000       chomp
28001       chop
28002       chown
28003       chr
28004       chroot
28005       close
28006       closedir
28007       cmp
28008       connect
28009       continue
28010       cos
28011       crypt
28012       dbmclose
28013       dbmopen
28014       defined
28015       delete
28016       die
28017       dump
28018       each
28019       else
28020       elsif
28021       eof
28022       eq
28023       exec
28024       exists
28025       exit
28026       exp
28027       fcntl
28028       fileno
28029       flock
28030       for
28031       foreach
28032       formline
28033       ge
28034       getc
28035       getgrgid
28036       getgrnam
28037       gethostbyaddr
28038       gethostbyname
28039       getnetbyaddr
28040       getnetbyname
28041       getpeername
28042       getpgrp
28043       getpriority
28044       getprotobyname
28045       getprotobynumber
28046       getpwnam
28047       getpwuid
28048       getservbyname
28049       getservbyport
28050       getsockname
28051       getsockopt
28052       glob
28053       gmtime
28054       goto
28055       grep
28056       gt
28057       hex
28058       if
28059       index
28060       int
28061       ioctl
28062       join
28063       keys
28064       kill
28065       last
28066       lc
28067       lcfirst
28068       le
28069       length
28070       link
28071       listen
28072       local
28073       localtime
28074       lock
28075       log
28076       lstat
28077       lt
28078       map
28079       mkdir
28080       msgctl
28081       msgget
28082       msgrcv
28083       msgsnd
28084       my
28085       ne
28086       next
28087       no
28088       not
28089       oct
28090       open
28091       opendir
28092       or
28093       ord
28094       our
28095       pack
28096       pipe
28097       pop
28098       pos
28099       print
28100       printf
28101       prototype
28102       push
28103       quotemeta
28104       rand
28105       read
28106       readdir
28107       readlink
28108       readline
28109       readpipe
28110       recv
28111       redo
28112       ref
28113       rename
28114       require
28115       reset
28116       return
28117       reverse
28118       rewinddir
28119       rindex
28120       rmdir
28121       scalar
28122       seek
28123       seekdir
28124       select
28125       semctl
28126       semget
28127       semop
28128       send
28129       sethostent
28130       setnetent
28131       setpgrp
28132       setpriority
28133       setprotoent
28134       setservent
28135       setsockopt
28136       shift
28137       shmctl
28138       shmget
28139       shmread
28140       shmwrite
28141       shutdown
28142       sin
28143       sleep
28144       socket
28145       socketpair
28146       sort
28147       splice
28148       split
28149       sprintf
28150       sqrt
28151       srand
28152       stat
28153       study
28154       substr
28155       symlink
28156       syscall
28157       sysopen
28158       sysread
28159       sysseek
28160       system
28161       syswrite
28162       tell
28163       telldir
28164       tie
28165       tied
28166       truncate
28167       uc
28168       ucfirst
28169       umask
28170       undef
28171       unless
28172       unlink
28173       unpack
28174       unshift
28175       untie
28176       until
28177       use
28178       utime
28179       values
28180       vec
28181       waitpid
28182       warn
28183       while
28184       write
28185       xor
28186
28187       switch
28188       case
28189       given
28190       when
28191       err
28192       say
28193     );
28194
28195     # patched above for SWITCH/CASE given/when err say
28196     # 'err' is a fairly safe addition.
28197     # TODO: 'default' still needed if appropriate
28198     # 'use feature' seen, but perltidy works ok without it.
28199     # Concerned that 'default' could break code.
28200     push( @Keywords, @value_requestor );
28201
28202     # These are treated the same but are not keywords:
28203     my @extra_vr = qw(
28204       constant
28205       vars
28206     );
28207     push( @value_requestor, @extra_vr );
28208
28209     @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
28210
28211     # this list contains keywords which do not look for arguments,
28212     # so that they might be followed by an operator, or at least
28213     # not a term.
28214     my @operator_requestor = qw(
28215       endgrent
28216       endhostent
28217       endnetent
28218       endprotoent
28219       endpwent
28220       endservent
28221       fork
28222       getgrent
28223       gethostent
28224       getlogin
28225       getnetent
28226       getppid
28227       getprotoent
28228       getpwent
28229       getservent
28230       setgrent
28231       setpwent
28232       time
28233       times
28234       wait
28235       wantarray
28236     );
28237
28238     push( @Keywords, @operator_requestor );
28239
28240     # These are treated the same but are not considered keywords:
28241     my @extra_or = qw(
28242       STDERR
28243       STDIN
28244       STDOUT
28245     );
28246
28247     push( @operator_requestor, @extra_or );
28248
28249     @expecting_operator_token{@operator_requestor} =
28250       (1) x scalar(@operator_requestor);
28251
28252     # these token TYPES expect trailing operator but not a term
28253     # note: ++ and -- are post-increment and decrement, 'C' = constant
28254     my @operator_requestor_types = qw( ++ -- C <> q );
28255     @expecting_operator_types{@operator_requestor_types} =
28256       (1) x scalar(@operator_requestor_types);
28257
28258     # these token TYPES consume values (terms)
28259     # note: pp and mm are pre-increment and decrement
28260     # f=semicolon in for,  F=file test operator
28261     my @value_requestor_type = qw#
28262       L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
28263       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
28264       <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
28265       f F pp mm Y p m U J G j >> << ^ t
28266       #;
28267     push( @value_requestor_type, ',' )
28268       ;    # (perl doesn't like a ',' in a qw block)
28269     @expecting_term_types{@value_requestor_type} =
28270       (1) x scalar(@value_requestor_type);
28271
28272     # Note: the following valid token types are not assigned here to
28273     # hashes requesting to be followed by values or terms, but are
28274     # instead currently hard-coded into sub operator_expected:
28275     # ) -> :: Q R Z ] b h i k n v w } #
28276
28277     # For simple syntax checking, it is nice to have a list of operators which
28278     # will really be unhappy if not followed by a term.  This includes most
28279     # of the above...
28280     %really_want_term = %expecting_term_types;
28281
28282     # with these exceptions...
28283     delete $really_want_term{'U'}; # user sub, depends on prototype
28284     delete $really_want_term{'F'}; # file test works on $_ if no following term
28285     delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
28286                                    # let perl do it
28287
28288     @_ = qw(q qq qw qx qr s y tr m);
28289     @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
28290
28291     # These keywords are handled specially in the tokenizer code:
28292     my @special_keywords = qw(
28293       do
28294       eval
28295       format
28296       m
28297       package
28298       q
28299       qq
28300       qr
28301       qw
28302       qx
28303       s
28304       sub
28305       tr
28306       y
28307     );
28308     push( @Keywords, @special_keywords );
28309
28310     # Keywords after which list formatting may be used
28311     # WARNING: do not include |map|grep|eval or perl may die on
28312     # syntax errors (map1.t).
28313     my @keyword_taking_list = qw(
28314       and
28315       chmod
28316       chomp
28317       chop
28318       chown
28319       dbmopen
28320       die
28321       elsif
28322       exec
28323       fcntl
28324       for
28325       foreach
28326       formline
28327       getsockopt
28328       if
28329       index
28330       ioctl
28331       join
28332       kill
28333       local
28334       msgctl
28335       msgrcv
28336       msgsnd
28337       my
28338       open
28339       or
28340       our
28341       pack
28342       print
28343       printf
28344       push
28345       read
28346       readpipe
28347       recv
28348       return
28349       reverse
28350       rindex
28351       seek
28352       select
28353       semctl
28354       semget
28355       send
28356       setpriority
28357       setsockopt
28358       shmctl
28359       shmget
28360       shmread
28361       shmwrite
28362       socket
28363       socketpair
28364       sort
28365       splice
28366       split
28367       sprintf
28368       substr
28369       syscall
28370       sysopen
28371       sysread
28372       sysseek
28373       system
28374       syswrite
28375       tie
28376       unless
28377       unlink
28378       unpack
28379       unshift
28380       until
28381       vec
28382       warn
28383       while
28384     );
28385     @is_keyword_taking_list{@keyword_taking_list} =
28386       (1) x scalar(@keyword_taking_list);
28387
28388     # These are not used in any way yet
28389     #    my @unused_keywords = qw(
28390     #      CORE
28391     #     __FILE__
28392     #     __LINE__
28393     #     __PACKAGE__
28394     #     );
28395
28396     #  The list of keywords was extracted from function 'keyword' in
28397     #  perl file toke.c version 5.005.03, using this utility, plus a
28398     #  little editing: (file getkwd.pl):
28399     #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
28400     #  Add 'get' prefix where necessary, then split into the above lists.
28401     #  This list should be updated as necessary.
28402     #  The list should not contain these special variables:
28403     #  ARGV DATA ENV SIG STDERR STDIN STDOUT
28404     #  __DATA__ __END__
28405
28406     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
28407 }
28408 1;
28409 __END__
28410
28411 =head1 NAME
28412
28413 Perl::Tidy - Parses and beautifies perl source
28414
28415 =head1 SYNOPSIS
28416
28417     use Perl::Tidy;
28418
28419     Perl::Tidy::perltidy(
28420         source            => $source,
28421         destination       => $destination,
28422         stderr            => $stderr,
28423         argv              => $argv,
28424         perltidyrc        => $perltidyrc,
28425         logfile           => $logfile,
28426         errorfile         => $errorfile,
28427         formatter         => $formatter,           # callback object (see below)
28428         dump_options      => $dump_options,
28429         dump_options_type => $dump_options_type,
28430         prefilter         => $prefilter_coderef,
28431         postfilter        => $postfilter_coderef,
28432     );
28433
28434 =head1 DESCRIPTION
28435
28436 This module makes the functionality of the perltidy utility available to perl
28437 scripts.  Any or all of the input parameters may be omitted, in which case the
28438 @ARGV array will be used to provide input parameters as described
28439 in the perltidy(1) man page.
28440
28441 For example, the perltidy script is basically just this:
28442
28443     use Perl::Tidy;
28444     Perl::Tidy::perltidy();
28445
28446 The module accepts input and output streams by a variety of methods.
28447 The following list of parameters may be any of a the following: a
28448 filename, an ARRAY reference, a SCALAR reference, or an object with
28449 either a B<getline> or B<print> method, as appropriate.
28450
28451         source            - the source of the script to be formatted
28452         destination       - the destination of the formatted output
28453         stderr            - standard error output
28454         perltidyrc        - the .perltidyrc file
28455         logfile           - the .LOG file stream, if any
28456         errorfile         - the .ERR file stream, if any
28457         dump_options      - ref to a hash to receive parameters (see below),
28458         dump_options_type - controls contents of dump_options
28459         dump_getopt_flags - ref to a hash to receive Getopt flags
28460         dump_options_category - ref to a hash giving category of options
28461         dump_abbreviations    - ref to a hash giving all abbreviations
28462
28463 The following chart illustrates the logic used to decide how to
28464 treat a parameter.
28465
28466    ref($param)  $param is assumed to be:
28467    -----------  ---------------------
28468    undef        a filename
28469    SCALAR       ref to string
28470    ARRAY        ref to array
28471    (other)      object with getline (if source) or print method
28472
28473 If the parameter is an object, and the object has a B<close> method, that
28474 close method will be called at the end of the stream.
28475
28476 =over 4
28477
28478 =item source
28479
28480 If the B<source> parameter is given, it defines the source of the
28481 input stream.
28482
28483 =item destination
28484
28485 If the B<destination> parameter is given, it will be used to define the
28486 file or memory location to receive output of perltidy.
28487
28488 =item stderr
28489
28490 The B<stderr> parameter allows the calling program to capture the output
28491 to what would otherwise go to the standard error output device.
28492
28493 =item perltidyrc
28494
28495 If the B<perltidyrc> file is given, it will be used instead of any
28496 F<.perltidyrc> configuration file that would otherwise be used.
28497
28498 =item argv
28499
28500 If the B<argv> parameter is given, it will be used instead of the
28501 B<@ARGV> array.  The B<argv> parameter may be a string, a reference to a
28502 string, or a reference to an array.  If it is a string or reference to a
28503 string, it will be parsed into an array of items just as if it were a
28504 command line string.
28505
28506 =item dump_options
28507
28508 If the B<dump_options> parameter is given, it must be the reference to a hash.
28509 In this case, the parameters contained in any perltidyrc configuration file
28510 will be placed in this hash and perltidy will return immediately.  This is
28511 equivalent to running perltidy with --dump-options, except that the perameters
28512 are returned in a hash rather than dumped to standard output.  Also, by default
28513 only the parameters in the perltidyrc file are returned, but this can be
28514 changed (see the next parameter).  This parameter provides a convenient method
28515 for external programs to read a perltidyrc file.  An example program using
28516 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
28517
28518 Any combination of the B<dump_> parameters may be used together.
28519
28520 =item dump_options_type
28521
28522 This parameter is a string which can be used to control the parameters placed
28523 in the hash reference supplied by B<dump_options>.  The possible values are
28524 'perltidyrc' (default) and 'full'.  The 'full' parameter causes both the
28525 default options plus any options found in a perltidyrc file to be returned.
28526
28527 =item dump_getopt_flags
28528
28529 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
28530 hash.  This hash will receive all of the parameters that perltidy understands
28531 and flags that are passed to Getopt::Long.  This parameter may be
28532 used alone or with the B<dump_options> flag.  Perltidy will
28533 exit immediately after filling this hash.  See the demo program
28534 F<perltidyrc_dump.pl> for example usage.
28535
28536 =item dump_options_category
28537
28538 If the B<dump_options_category> parameter is given, it must be the reference to a
28539 hash.  This hash will receive a hash with keys equal to all long parameter names
28540 and values equal to the title of the corresponding section of the perltidy manual.
28541 See the demo program F<perltidyrc_dump.pl> for example usage.
28542
28543 =item dump_abbreviations
28544
28545 If the B<dump_abbreviations> parameter is given, it must be the reference to a
28546 hash.  This hash will receive all abbreviations used by Perl::Tidy.  See the
28547 demo program F<perltidyrc_dump.pl> for example usage.
28548
28549 =item prefilter
28550
28551 A code reference that will be applied to the source before tidying. It is
28552 expected to take the full content as a string in its input, and output the
28553 transformed content.
28554
28555 =item postfilter
28556
28557 A code reference that will be applied to the tidied result before outputting.
28558 It is expected to take the full content as a string in its input, and output
28559 the transformed content.
28560
28561 Note: A convenient way to check the function of your custom prefilter and
28562 postfilter code is to use the --notidy option, first with just the prefilter
28563 and then with both the prefilter and postfilter.  See also the file
28564 B<filter_example.pl> in the perltidy distribution.
28565
28566 =back
28567
28568 =head1 EXAMPLE
28569
28570 The following example passes perltidy a snippet as a reference
28571 to a string and receives the result back in a reference to
28572 an array.
28573
28574  use Perl::Tidy;
28575
28576  # some messy source code to format
28577  my $source = <<'EOM';
28578  use strict;
28579  my @editors=('Emacs', 'Vi   '); my $rand = rand();
28580  print "A poll of 10 random programmers gave these results:\n";
28581  foreach(0..10) {
28582  my $i=int ($rand+rand());
28583  print " $editors[$i] users are from Venus" . ", " .
28584  "$editors[1-$i] users are from Mars" .
28585  "\n";
28586  }
28587  EOM
28588
28589  # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
28590  my @dest;
28591  perltidy( source => \$source, destination => \@dest );
28592  foreach (@dest) {print}
28593
28594 =head1 Using the B<formatter> Callback Object
28595
28596 The B<formatter> parameter is an optional callback object which allows
28597 the calling program to receive tokenized lines directly from perltidy for
28598 further specialized processing.  When this parameter is used, the two
28599 formatting options which are built into perltidy (beautification or
28600 html) are ignored.  The following diagram illustrates the logical flow:
28601
28602                     |-- (normal route)   -> code beautification
28603   caller->perltidy->|-- (-html flag )    -> create html
28604                     |-- (formatter given)-> callback to write_line
28605
28606 This can be useful for processing perl scripts in some way.  The
28607 parameter C<$formatter> in the perltidy call,
28608
28609         formatter   => $formatter,
28610
28611 is an object created by the caller with a C<write_line> method which
28612 will accept and process tokenized lines, one line per call.  Here is
28613 a simple example of a C<write_line> which merely prints the line number,
28614 the line type (as determined by perltidy), and the text of the line:
28615
28616  sub write_line {
28617
28618      # This is called from perltidy line-by-line
28619      my $self              = shift;
28620      my $line_of_tokens    = shift;
28621      my $line_type         = $line_of_tokens->{_line_type};
28622      my $input_line_number = $line_of_tokens->{_line_number};
28623      my $input_line        = $line_of_tokens->{_line_text};
28624      print "$input_line_number:$line_type:$input_line";
28625  }
28626
28627 The complete program, B<perllinetype>, is contained in the examples section of
28628 the source distribution.  As this example shows, the callback method
28629 receives a parameter B<$line_of_tokens>, which is a reference to a hash
28630 of other useful information.  This example uses these hash entries:
28631
28632  $line_of_tokens->{_line_number} - the line number (1,2,...)
28633  $line_of_tokens->{_line_text}   - the text of the line
28634  $line_of_tokens->{_line_type}   - the type of the line, one of:
28635
28636     SYSTEM         - system-specific code before hash-bang line
28637     CODE           - line of perl code (including comments)
28638     POD_START      - line starting pod, such as '=head'
28639     POD            - pod documentation text
28640     POD_END        - last line of pod section, '=cut'
28641     HERE           - text of here-document
28642     HERE_END       - last line of here-doc (target word)
28643     FORMAT         - format section
28644     FORMAT_END     - last line of format section, '.'
28645     DATA_START     - __DATA__ line
28646     DATA           - unidentified text following __DATA__
28647     END_START      - __END__ line
28648     END            - unidentified text following __END__
28649     ERROR          - we are in big trouble, probably not a perl script
28650
28651 Most applications will be only interested in lines of type B<CODE>.  For
28652 another example, let's write a program which checks for one of the
28653 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
28654 can slow down processing.  Here is a B<write_line>, from the example
28655 program B<find_naughty.pl>, which does that:
28656
28657  sub write_line {
28658
28659      # This is called back from perltidy line-by-line
28660      # We're looking for $`, $&, and $'
28661      my ( $self, $line_of_tokens ) = @_;
28662
28663      # pull out some stuff we might need
28664      my $line_type         = $line_of_tokens->{_line_type};
28665      my $input_line_number = $line_of_tokens->{_line_number};
28666      my $input_line        = $line_of_tokens->{_line_text};
28667      my $rtoken_type       = $line_of_tokens->{_rtoken_type};
28668      my $rtokens           = $line_of_tokens->{_rtokens};
28669      chomp $input_line;
28670
28671      # skip comments, pod, etc
28672      return if ( $line_type ne 'CODE' );
28673
28674      # loop over tokens looking for $`, $&, and $'
28675      for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
28676
28677          # we only want to examine token types 'i' (identifier)
28678          next unless $$rtoken_type[$j] eq 'i';
28679
28680          # pull out the actual token text
28681          my $token = $$rtokens[$j];
28682
28683          # and check it
28684          if ( $token =~ /^\$[\`\&\']$/ ) {
28685              print STDERR
28686                "$input_line_number: $token\n";
28687          }
28688      }
28689  }
28690
28691 This example pulls out these tokenization variables from the $line_of_tokens
28692 hash reference:
28693
28694      $rtoken_type = $line_of_tokens->{_rtoken_type};
28695      $rtokens     = $line_of_tokens->{_rtokens};
28696
28697 The variable C<$rtoken_type> is a reference to an array of token type codes,
28698 and C<$rtokens> is a reference to a corresponding array of token text.
28699 These are obviously only defined for lines of type B<CODE>.
28700 Perltidy classifies tokens into types, and has a brief code for each type.
28701 You can get a complete list at any time by running perltidy from the
28702 command line with
28703
28704      perltidy --dump-token-types
28705
28706 In the present example, we are only looking for tokens of type B<i>
28707 (identifiers), so the for loop skips past all other types.  When an
28708 identifier is found, its actual text is checked to see if it is one
28709 being sought.  If so, the above write_line prints the token and its
28710 line number.
28711
28712 The B<formatter> feature is relatively new in perltidy, and further
28713 documentation needs to be written to complete its description.  However,
28714 several example programs have been written and can be found in the
28715 B<examples> section of the source distribution.  Probably the best way
28716 to get started is to find one of the examples which most closely matches
28717 your application and start modifying it.
28718
28719 For help with perltidy's pecular way of breaking lines into tokens, you
28720 might run, from the command line,
28721
28722  perltidy -D filename
28723
28724 where F<filename> is a short script of interest.  This will produce
28725 F<filename.DEBUG> with interleaved lines of text and their token types.
28726 The B<-D> flag has been in perltidy from the beginning for this purpose.
28727 If you want to see the code which creates this file, it is
28728 C<write_debug_entry> in Tidy.pm.
28729
28730 =head1 EXPORT
28731
28732   &perltidy
28733
28734 =head1 CREDITS
28735
28736 Thanks to Hugh Myers who developed the initial modular interface
28737 to perltidy.
28738
28739 =head1 VERSION
28740
28741 This man page documents Perl::Tidy version 20101217.
28742
28743 =head1 AUTHOR
28744
28745  Steve Hancock
28746  perltidy at users.sourceforge.net
28747
28748 =head1 SEE ALSO
28749
28750 The perltidy(1) man page describes all of the features of perltidy.  It
28751 can be found at http://perltidy.sourceforge.net.
28752
28753 =cut