lib/Perl/Tidy.pm

   1 #
   2 ############################################################
   3 #
   4 #    perltidy - a perl script indenter and formatter
   5 #
   6 #    Copyright (c) 2000-2009 by Steve Hancock
   7 #    Distributed under the GPL license agreement; see file COPYING
   8 #
   9 #    This program is free software; you can redistribute it and/or modify
  10 #    it under the terms of the GNU General Public License as published by
  11 #    the Free Software Foundation; either version 2 of the License, or
  12 #    (at your option) any later version.
  13 #
  14 #    This program is distributed in the hope that it will be useful,
  15 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 #    GNU General Public License for more details.
  18 #
  19 #    You should have received a copy of the GNU General Public License
  20 #    along with this program; if not, write to the Free Software
  21 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22 #
  23 #    For brief instructions instructions, try 'perltidy -h'.
  24 #    For more complete documentation, try 'man perltidy'
  25 #    or visit http://perltidy.sourceforge.net
  26 #
  27 #    This script is an example of the default style.  It was formatted with:
  28 #
  29 #      perltidy Tidy.pm
  30 #
  31 #    Code Contributions: See ChangeLog.html for a complete history.
  32 #      Michael Cartmell supplied code for adaptation to VMS and helped with
  33 #        v-strings.
  34 #      Hugh S. Myers supplied sub streamhandle and the supporting code to
  35 #        create a Perl::Tidy module which can operate on strings, arrays, etc.
  36 #      Yves Orton supplied coding to help detect Windows versions.
  37 #      Axel Rose supplied a patch for MacPerl.
  38 #      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
  39 #      Dan Tyrell contributed a patch for binary I/O.
  40 #      Ueli Hugenschmidt contributed a patch for -fpsc
  41 #      Sam Kington supplied a patch to identify the initial indentation of
  42 #      entabbed code.
  43 #      jonathan swartz supplied patches for:
  44 #      * .../ pattern, which looks upwards from directory
  45 #      * --notidy, to be used in directories where we want to avoid
  46 #        accidentally tidying
  47 #      * prefilter and postfilter
  48 #      * iterations option
  49 #
  50 #      Many others have supplied key ideas, suggestions, and bug reports;
  51 #        see the CHANGES file.
  52 #
  53 ############################################################
  54
  55 package Perl::Tidy;
  56 use 5.004;    # need IO::File from 5.004 or later
  57 BEGIN { $^W = 1; }    # turn on warnings
  58
  59 use strict;
  60 use Exporter;
  61 use Carp;
  62 $|++;
  63
  64 use vars qw{
  65   $VERSION
  66   @ISA
  67   @EXPORT
  68   $missing_file_spec
  69 };
  70
  71 @ISA    = qw( Exporter );
  72 @EXPORT = qw( &perltidy );
  73
  74 use Cwd;
  75 use IO::File;
  76 use File::Basename;
  77
  78 BEGIN {
  79     ( $VERSION = q($Id: Tidy.pm,v 1.74 2010/12/17 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  80 }
  81
  82 sub streamhandle {
  83
  84     # given filename and mode (r or w), create an object which:
  85     #   has a 'getline' method if mode='r', and
  86     #   has a 'print' method if mode='w'.
  87     # The objects also need a 'close' method.
  88     #
  89     # How the object is made:
  90     #
  91     # if $filename is:     Make object using:
  92     # ----------------     -----------------
  93     # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
  94     # string               IO::File
  95     # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
  96     # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
  97     # object               object
  98     #                      (check for 'print' method for 'w' mode)
  99     #                      (check for 'getline' method for 'r' mode)
 100     my $ref = ref( my $filename = shift );
 101     my $mode = shift;
 102     my $New;
 103     my $fh;
 104
 105     # handle a reference
 106     if ($ref) {
 107         if ( $ref eq 'ARRAY' ) {
 108             $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
 109         }
 110         elsif ( $ref eq 'SCALAR' ) {
 111             $New = sub { Perl::Tidy::IOScalar->new(@_) };
 112         }
 113         else {
 114
 115             # Accept an object with a getline method for reading. Note:
 116             # IO::File is built-in and does not respond to the defined
 117             # operator.  If this causes trouble, the check can be
 118             # skipped and we can just let it crash if there is no
 119             # getline.
 120             if ( $mode =~ /[rR]/ ) {
 121                 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
 122                     $New = sub { $filename };
 123                 }
 124                 else {
 125                     $New = sub { undef };
 126                     confess <<EOM;
 127 ------------------------------------------------------------------------
 128 No 'getline' method is defined for object of class $ref
 129 Please check your call to Perl::Tidy::perltidy.  Trace follows.
 130 ------------------------------------------------------------------------
 131 EOM
 132                 }
 133             }
 134
 135             # Accept an object with a print method for writing.
 136             # See note above about IO::File
 137             if ( $mode =~ /[wW]/ ) {
 138                 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
 139                     $New = sub { $filename };
 140                 }
 141                 else {
 142                     $New = sub { undef };
 143                     confess <<EOM;
 144 ------------------------------------------------------------------------
 145 No 'print' method is defined for object of class $ref
 146 Please check your call to Perl::Tidy::perltidy. Trace follows.
 147 ------------------------------------------------------------------------
 148 EOM
 149                 }
 150             }
 151         }
 152     }
 153
 154     # handle a string
 155     else {
 156         if ( $filename eq '-' ) {
 157             $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
 158         }
 159         else {
 160             $New = sub { IO::File->new(@_) };
 161         }
 162     }
 163     $fh = $New->( $filename, $mode )
 164       or warn "Couldn't open file:$filename in mode:$mode : $!\n";
 165     return $fh, ( $ref or $filename );
 166 }
 167
 168 sub find_input_line_ending {
 169
 170     # Peek at a file and return first line ending character.
 171     # Quietly return undef in case of any trouble.
 172     my ($input_file) = @_;
 173     my $ending;
 174
 175     # silently ignore input from object or stdin
 176     if ( ref($input_file) || $input_file eq '-' ) {
 177         return $ending;
 178     }
 179     open( INFILE, $input_file ) || return $ending;
 180
 181     binmode INFILE;
 182     my $buf;
 183     read( INFILE, $buf, 1024 );
 184     close INFILE;
 185     if ( $buf && $buf =~ /([\012\015]+)/ ) {
 186         my $test = $1;
 187
 188         # dos
 189         if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 190
 191         # mac
 192         elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 193
 194         # unix
 195         elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 196
 197         # unknown
 198         else { }
 199     }
 200
 201     # no ending seen
 202     else { }
 203
 204     return $ending;
 205 }
 206
 207 sub catfile {
 208
 209     # concatenate a path and file basename
 210     # returns undef in case of error
 211
 212     BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
 213
 214     # use File::Spec if we can
 215     unless ($missing_file_spec) {
 216         return File::Spec->catfile(@_);
 217     }
 218
 219     # Perl 5.004 systems may not have File::Spec so we'll make
 220     # a simple try.  We assume File::Basename is available.
 221     # return undef if not successful.
 222     my $name      = pop @_;
 223     my $path      = join '/', @_;
 224     my $test_file = $path . $name;
 225     my ( $test_name, $test_path ) = fileparse($test_file);
 226     return $test_file if ( $test_name eq $name );
 227     return undef if ( $^O eq 'VMS' );
 228
 229     # this should work at least for Windows and Unix:
 230     $test_file = $path . '/' . $name;
 231     ( $test_name, $test_path ) = fileparse($test_file);
 232     return $test_file if ( $test_name eq $name );
 233     return undef;
 234 }
 235
 236 sub make_temporary_filename {
 237
 238     # Make a temporary filename.
 239     #
 240     # The POSIX tmpnam() function tends to be unreliable for non-unix
 241     # systems (at least for the win32 systems that I've tested), so use
 242     # a pre-defined name.  A slight disadvantage of this is that two
 243     # perltidy runs in the same working directory may conflict.
 244     # However, the chance of that is small and managable by the user.
 245     # An alternative would be to check for the file's existance and use,
 246     # say .TMP0, .TMP1, etc, but that scheme has its own problems.  So,
 247     # keep it simple.
 248     my $name = "perltidy.TMP";
 249     if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
 250         return $name;
 251     }
 252     eval "use POSIX qw(tmpnam)";
 253     if ($@) { return $name }
 254     use IO::File;
 255
 256     # just make a couple of tries before giving up and using the default
 257     for ( 0 .. 1 ) {
 258         my $tmpname = tmpnam();
 259         my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
 260         if ($fh) {
 261             $fh->close();
 262             return ($tmpname);
 263             last;
 264         }
 265     }
 266     return ($name);
 267 }
 268
 269 # Here is a map of the flow of data from the input source to the output
 270 # line sink:
 271 #
 272 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
 273 #       input                         groups                 output
 274 #       lines   tokens      lines       of          lines    lines
 275 #                                      lines
 276 #
 277 # The names correspond to the package names responsible for the unit processes.
 278 #
 279 # The overall process is controlled by the "main" package.
 280 #
 281 # LineSource is the stream of input lines
 282 #
 283 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
 284 # if necessary.  A token is any section of the input line which should be
 285 # manipulated as a single entity during formatting.  For example, a single
 286 # ',' character is a token, and so is an entire side comment.  It handles
 287 # the complexities of Perl syntax, such as distinguishing between '<<' as
 288 # a shift operator and as a here-document, or distinguishing between '/'
 289 # as a divide symbol and as a pattern delimiter.
 290 #
 291 # Formatter inserts and deletes whitespace between tokens, and breaks
 292 # sequences of tokens at appropriate points as output lines.  It bases its
 293 # decisions on the default rules as modified by any command-line options.
 294 #
 295 # VerticalAligner collects groups of lines together and tries to line up
 296 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
 297 #
 298 # FileWriter simply writes lines to the output stream.
 299 #
 300 # The Logger package, not shown, records significant events and warning
 301 # messages.  It writes a .LOG file, which may be saved with a
 302 # '-log' or a '-g' flag.
 303
 304 {
 305
 306     # variables needed by interrupt handler:
 307     my $tokenizer;
 308     my $input_file;
 309
 310     # this routine may be called to give a status report if interrupted.  If a
 311     # parameter is given, it will call exit with that parameter.  This is no
 312     # longer used because it works under Unix but not under Windows.
 313     sub interrupt_handler {
 314
 315         my $exit_flag = shift;
 316         print STDERR "perltidy interrupted";
 317         if ($tokenizer) {
 318             my $input_line_number =
 319               Perl::Tidy::Tokenizer::get_input_line_number();
 320             print STDERR " at line $input_line_number";
 321         }
 322         if ($input_file) {
 323
 324             if   ( ref $input_file ) { print STDERR " of reference to:" }
 325             else                     { print STDERR " of file:" }
 326             print STDERR " $input_file";
 327         }
 328         print STDERR "\n";
 329         exit $exit_flag if defined($exit_flag);
 330     }
 331
 332     sub perltidy {
 333
 334         my %defaults = (
 335             argv                  => undef,
 336             destination           => undef,
 337             formatter             => undef,
 338             logfile               => undef,
 339             errorfile             => undef,
 340             perltidyrc            => undef,
 341             source                => undef,
 342             stderr                => undef,
 343             dump_options          => undef,
 344             dump_options_type     => undef,
 345             dump_getopt_flags     => undef,
 346             dump_options_category => undef,
 347             dump_options_range    => undef,
 348             dump_abbreviations    => undef,
 349             prefilter             => undef,
 350             postfilter            => undef,
 351         );
 352
 353         # don't overwrite callers ARGV
 354         local @ARGV = @ARGV;
 355
 356         my %input_hash = @_;
 357
 358         if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
 359             local $" = ')(';
 360             my @good_keys = sort keys %defaults;
 361             @bad_keys = sort @bad_keys;
 362             confess <<EOM;
 363 ------------------------------------------------------------------------
 364 Unknown perltidy parameter : (@bad_keys)
 365 perltidy only understands : (@good_keys)
 366 ------------------------------------------------------------------------
 367
 368 EOM
 369         }
 370
 371         my $get_hash_ref = sub {
 372             my ($key) = @_;
 373             my $hash_ref = $input_hash{$key};
 374             if ( defined($hash_ref) ) {
 375                 unless ( ref($hash_ref) eq 'HASH' ) {
 376                     my $what = ref($hash_ref);
 377                     my $but_is =
 378                       $what ? "but is ref to $what" : "but is not a reference";
 379                     croak <<EOM;
 380 ------------------------------------------------------------------------
 381 error in call to perltidy:
 382 -$key must be reference to HASH $but_is
 383 ------------------------------------------------------------------------
 384 EOM
 385                 }
 386             }
 387             return $hash_ref;
 388         };
 389
 390         %input_hash = ( %defaults, %input_hash );
 391         my $argv               = $input_hash{'argv'};
 392         my $destination_stream = $input_hash{'destination'};
 393         my $errorfile_stream   = $input_hash{'errorfile'};
 394         my $logfile_stream     = $input_hash{'logfile'};
 395         my $perltidyrc_stream  = $input_hash{'perltidyrc'};
 396         my $source_stream      = $input_hash{'source'};
 397         my $stderr_stream      = $input_hash{'stderr'};
 398         my $user_formatter     = $input_hash{'formatter'};
 399         my $prefilter          = $input_hash{'prefilter'};
 400         my $postfilter         = $input_hash{'postfilter'};
 401
 402         # various dump parameters
 403         my $dump_options_type     = $input_hash{'dump_options_type'};
 404         my $dump_options          = $get_hash_ref->('dump_options');
 405         my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
 406         my $dump_options_category = $get_hash_ref->('dump_options_category');
 407         my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
 408         my $dump_options_range    = $get_hash_ref->('dump_options_range');
 409
 410         # validate dump_options_type
 411         if ( defined($dump_options) ) {
 412             unless ( defined($dump_options_type) ) {
 413                 $dump_options_type = 'perltidyrc';
 414             }
 415             unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
 416                 croak <<EOM;
 417 ------------------------------------------------------------------------
 418 Please check value of -dump_options_type in call to perltidy;
 419 saw: '$dump_options_type'
 420 expecting: 'perltidyrc' or 'full'
 421 ------------------------------------------------------------------------
 422 EOM
 423
 424             }
 425         }
 426         else {
 427             $dump_options_type = "";
 428         }
 429
 430         if ($user_formatter) {
 431
 432             # if the user defines a formatter, there is no output stream,
 433             # but we need a null stream to keep coding simple
 434             $destination_stream = Perl::Tidy::DevNull->new();
 435         }
 436
 437         # see if ARGV is overridden
 438         if ( defined($argv) ) {
 439
 440             my $rargv = ref $argv;
 441             if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
 442
 443             # ref to ARRAY
 444             if ($rargv) {
 445                 if ( $rargv eq 'ARRAY' ) {
 446                     @ARGV = @$argv;
 447                 }
 448                 else {
 449                     croak <<EOM;
 450 ------------------------------------------------------------------------
 451 Please check value of -argv in call to perltidy;
 452 it must be a string or ref to ARRAY but is: $rargv
 453 ------------------------------------------------------------------------
 454 EOM
 455                 }
 456             }
 457
 458             # string
 459             else {
 460                 my ( $rargv, $msg ) = parse_args($argv);
 461                 if ($msg) {
 462                     die <<EOM;
 463 Error parsing this string passed to to perltidy with 'argv':
 464 $msg
 465 EOM
 466                 }
 467                 @ARGV = @{$rargv};
 468             }
 469         }
 470
 471         # redirect STDERR if requested
 472         if ($stderr_stream) {
 473             my ( $fh_stderr, $stderr_file ) =
 474               Perl::Tidy::streamhandle( $stderr_stream, 'w' );
 475             if ($fh_stderr) { *STDERR = $fh_stderr }
 476             else {
 477                 croak <<EOM;
 478 ------------------------------------------------------------------------
 479 Unable to redirect STDERR to $stderr_stream
 480 Please check value of -stderr in call to perltidy
 481 ------------------------------------------------------------------------
 482 EOM
 483             }
 484         }
 485
 486         my $rpending_complaint;
 487         $$rpending_complaint = "";
 488         my $rpending_logfile_message;
 489         $$rpending_logfile_message = "";
 490
 491         my ( $is_Windows, $Windows_type ) =
 492           look_for_Windows($rpending_complaint);
 493
 494         # VMS file names are restricted to a 40.40 format, so we append _tdy
 495         # instead of .tdy, etc. (but see also sub check_vms_filename)
 496         my $dot;
 497         my $dot_pattern;
 498         if ( $^O eq 'VMS' ) {
 499             $dot         = '_';
 500             $dot_pattern = '_';
 501         }
 502         else {
 503             $dot         = '.';
 504             $dot_pattern = '\.';    # must escape for use in regex
 505         }
 506
 507         # handle command line options
 508         my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
 509             $rexpansion, $roption_category, $roption_range )
 510           = process_command_line(
 511             $perltidyrc_stream,  $is_Windows, $Windows_type,
 512             $rpending_complaint, $dump_options_type,
 513           );
 514
 515         # return or exit immediately after all dumps
 516         my $quit_now = 0;
 517
 518         # Getopt parameters and their flags
 519         if ( defined($dump_getopt_flags) ) {
 520             $quit_now = 1;
 521             foreach my $op ( @{$roption_string} ) {
 522                 my $opt  = $op;
 523                 my $flag = "";
 524
 525                 # Examples:
 526                 #  some-option=s
 527                 #  some-option=i
 528                 #  some-option:i
 529                 #  some-option!
 530                 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
 531                     $opt  = $1;
 532                     $flag = $2;
 533                 }
 534                 $dump_getopt_flags->{$opt} = $flag;
 535             }
 536         }
 537
 538         if ( defined($dump_options_category) ) {
 539             $quit_now = 1;
 540             %{$dump_options_category} = %{$roption_category};
 541         }
 542
 543         if ( defined($dump_options_range) ) {
 544             $quit_now = 1;
 545             %{$dump_options_range} = %{$roption_range};
 546         }
 547
 548         if ( defined($dump_abbreviations) ) {
 549             $quit_now = 1;
 550             %{$dump_abbreviations} = %{$rexpansion};
 551         }
 552
 553         if ( defined($dump_options) ) {
 554             $quit_now = 1;
 555             %{$dump_options} = %{$rOpts};
 556         }
 557
 558         return if ($quit_now);
 559
 560         # make printable string of options for this run as possible diagnostic
 561         my $readable_options = readable_options( $rOpts, $roption_string );
 562
 563         # dump from command line
 564         if ( $rOpts->{'dump-options'} ) {
 565             print STDOUT $readable_options;
 566             exit 1;
 567         }
 568
 569         check_options( $rOpts, $is_Windows, $Windows_type,
 570             $rpending_complaint );
 571
 572         if ($user_formatter) {
 573             $rOpts->{'format'} = 'user';
 574         }
 575
 576         # there must be one entry here for every possible format
 577         my %default_file_extension = (
 578             tidy => 'tdy',
 579             html => 'html',
 580             user => '',
 581         );
 582
 583         # be sure we have a valid output format
 584         unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
 585             my $formats = join ' ',
 586               sort map { "'" . $_ . "'" } keys %default_file_extension;
 587             my $fmt = $rOpts->{'format'};
 588             die "-format='$fmt' but must be one of: $formats\n";
 589         }
 590
 591         my $output_extension =
 592           make_extension( $rOpts->{'output-file-extension'},
 593             $default_file_extension{ $rOpts->{'format'} }, $dot );
 594
 595         my $backup_extension =
 596           make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
 597
 598         my $html_toc_extension =
 599           make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
 600
 601         my $html_src_extension =
 602           make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
 603
 604         # check for -b option;
 605         my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
 606           && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
 607           && @ARGV > 0;    # silently ignore if standard input;
 608                            # this allows -b to be in a .perltidyrc file
 609                            # without error messages when running from an editor
 610
 611         # turn off -b with warnings in case of conflicts with other options
 612         if ($in_place_modify) {
 613             if ( $rOpts->{'standard-output'} ) {
 614                 warn "Ignoring -b; you may not use -b and -st together\n";
 615                 $in_place_modify = 0;
 616             }
 617             if ($destination_stream) {
 618                 warn
 619 "Ignoring -b; you may not specify a destination array and -b together\n";
 620                 $in_place_modify = 0;
 621             }
 622             if ($source_stream) {
 623                 warn
 624 "Ignoring -b; you may not specify a source array and -b together\n";
 625                 $in_place_modify = 0;
 626             }
 627             if ( $rOpts->{'outfile'} ) {
 628                 warn "Ignoring -b; you may not use -b and -o together\n";
 629                 $in_place_modify = 0;
 630             }
 631             if ( defined( $rOpts->{'output-path'} ) ) {
 632                 warn "Ignoring -b; you may not use -b and -opath together\n";
 633                 $in_place_modify = 0;
 634             }
 635         }
 636
 637         Perl::Tidy::Formatter::check_options($rOpts);
 638         if ( $rOpts->{'format'} eq 'html' ) {
 639             Perl::Tidy::HtmlWriter->check_options($rOpts);
 640         }
 641
 642         # make the pattern of file extensions that we shouldn't touch
 643         my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
 644         if ($output_extension) {
 645             my $ext = quotemeta($output_extension);
 646             $forbidden_file_extensions .= "|$ext";
 647         }
 648         if ( $in_place_modify && $backup_extension ) {
 649             my $ext = quotemeta($backup_extension);
 650             $forbidden_file_extensions .= "|$ext";
 651         }
 652         $forbidden_file_extensions .= ')$';
 653
 654         # Create a diagnostics object if requested;
 655         # This is only useful for code development
 656         my $diagnostics_object = undef;
 657         if ( $rOpts->{'DIAGNOSTICS'} ) {
 658             $diagnostics_object = Perl::Tidy::Diagnostics->new();
 659         }
 660
 661         # no filenames should be given if input is from an array
 662         if ($source_stream) {
 663             if ( @ARGV > 0 ) {
 664                 die
 665 "You may not specify any filenames when a source array is given\n";
 666             }
 667
 668             # we'll stuff the source array into ARGV
 669             unshift( @ARGV, $source_stream );
 670
 671             # No special treatment for source stream which is a filename.
 672             # This will enable checks for binary files and other bad stuff.
 673             $source_stream = undef unless ref($source_stream);
 674         }
 675
 676         # use stdin by default if no source array and no args
 677         else {
 678             unshift( @ARGV, '-' ) unless @ARGV;
 679         }
 680
 681         # loop to process all files in argument list
 682         my $number_of_files = @ARGV;
 683         my $formatter       = undef;
 684         $tokenizer = undef;
 685         while ( $input_file = shift @ARGV ) {
 686             my $fileroot;
 687             my $input_file_permissions;
 688
 689             #---------------------------------------------------------------
 690             # determine the input file name
 691             #---------------------------------------------------------------
 692             if ($source_stream) {
 693                 $fileroot = "perltidy";
 694             }
 695             elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
 696                 $fileroot = "perltidy";   # root name to use for .ERR, .LOG, etc
 697                 $in_place_modify = 0;
 698             }
 699             else {
 700                 $fileroot = $input_file;
 701                 unless ( -e $input_file ) {
 702
 703                     # file doesn't exist - check for a file glob
 704                     if ( $input_file =~ /([\?\*\[\{])/ ) {
 705
 706                         # Windows shell may not remove quotes, so do it
 707                         my $input_file = $input_file;
 708                         if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
 709                         if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
 710                         my $pattern = fileglob_to_re($input_file);
 711                         ##eval "/$pattern/";
 712                         if ( !$@ && opendir( DIR, './' ) ) {
 713                             my @files =
 714                               grep { /$pattern/ && !-d $_ } readdir(DIR);
 715                             closedir(DIR);
 716                             if (@files) {
 717                                 unshift @ARGV, @files;
 718                                 next;
 719                             }
 720                         }
 721                     }
 722                     print "skipping file: '$input_file': no matches found\n";
 723                     next;
 724                 }
 725
 726                 unless ( -f $input_file ) {
 727                     print "skipping file: $input_file: not a regular file\n";
 728                     next;
 729                 }
 730
 731                 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
 732                     print
 733 "skipping file: $input_file: Non-text (override with -f)\n";
 734                     next;
 735                 }
 736
 737                 # we should have a valid filename now
 738                 $fileroot               = $input_file;
 739                 $input_file_permissions = ( stat $input_file )[2] & 07777;
 740
 741                 if ( $^O eq 'VMS' ) {
 742                     ( $fileroot, $dot ) = check_vms_filename($fileroot);
 743                 }
 744
 745                 # add option to change path here
 746                 if ( defined( $rOpts->{'output-path'} ) ) {
 747
 748                     my ( $base, $old_path ) = fileparse($fileroot);
 749                     my $new_path = $rOpts->{'output-path'};
 750                     unless ( -d $new_path ) {
 751                         unless ( mkdir $new_path, 0777 ) {
 752                             die "unable to create directory $new_path: $!\n";
 753                         }
 754                     }
 755                     my $path = $new_path;
 756                     $fileroot = catfile( $path, $base );
 757                     unless ($fileroot) {
 758                         die <<EOM;
 759 ------------------------------------------------------------------------
 760 Problem combining $new_path and $base to make a filename; check -opath
 761 ------------------------------------------------------------------------
 762 EOM
 763                     }
 764                 }
 765             }
 766
 767             # Skip files with same extension as the output files because
 768             # this can lead to a messy situation with files like
 769             # script.tdy.tdy.tdy ... or worse problems ...  when you
 770             # rerun perltidy over and over with wildcard input.
 771             if (
 772                 !$source_stream
 773                 && (   $input_file =~ /$forbidden_file_extensions/o
 774                     || $input_file eq 'DIAGNOSTICS' )
 775               )
 776             {
 777                 print "skipping file: $input_file: wrong extension\n";
 778                 next;
 779             }
 780
 781             # the 'source_object' supplies a method to read the input file
 782             my $source_object =
 783               Perl::Tidy::LineSource->new( $input_file, $rOpts,
 784                 $rpending_logfile_message );
 785             next unless ($source_object);
 786
 787             # Prefilters and postfilters: The prefilter is a code reference
 788             # that will be applied to the source before tidying, and the
 789             # postfilter is a code reference to the result before outputting.
 790             if ($prefilter) {
 791                 my $buf = '';
 792                 while ( my $line = $source_object->get_line() ) {
 793                     $buf .= $line;
 794                 }
 795                 $buf = $prefilter->($buf);
 796
 797                 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
 798                     $rpending_logfile_message );
 799             }
 800
 801             # register this file name with the Diagnostics package
 802             $diagnostics_object->set_input_file($input_file)
 803               if $diagnostics_object;
 804
 805             #---------------------------------------------------------------
 806             # determine the output file name
 807             #---------------------------------------------------------------
 808             my $output_file = undef;
 809             my $actual_output_extension;
 810
 811             if ( $rOpts->{'outfile'} ) {
 812
 813                 if ( $number_of_files <= 1 ) {
 814
 815                     if ( $rOpts->{'standard-output'} ) {
 816                         die "You may not use -o and -st together\n";
 817                     }
 818                     elsif ($destination_stream) {
 819                         die
 820 "You may not specify a destination array and -o together\n";
 821                     }
 822                     elsif ( defined( $rOpts->{'output-path'} ) ) {
 823                         die "You may not specify -o and -opath together\n";
 824                     }
 825                     elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
 826                         die "You may not specify -o and -oext together\n";
 827                     }
 828                     $output_file = $rOpts->{outfile};
 829
 830                     # make sure user gives a file name after -o
 831                     if ( $output_file =~ /^-/ ) {
 832                         die "You must specify a valid filename after -o\n";
 833                     }
 834
 835                     # do not overwrite input file with -o
 836                     if ( defined($input_file_permissions)
 837                         && ( $output_file eq $input_file ) )
 838                     {
 839                         die
 840                           "Use 'perltidy -b $input_file' to modify in-place\n";
 841                     }
 842                 }
 843                 else {
 844                     die "You may not use -o with more than one input file\n";
 845                 }
 846             }
 847             elsif ( $rOpts->{'standard-output'} ) {
 848                 if ($destination_stream) {
 849                     die
 850 "You may not specify a destination array and -st together\n";
 851                 }
 852                 $output_file = '-';
 853
 854                 if ( $number_of_files <= 1 ) {
 855                 }
 856                 else {
 857                     die "You may not use -st with more than one input file\n";
 858                 }
 859             }
 860             elsif ($destination_stream) {
 861                 $output_file = $destination_stream;
 862             }
 863             elsif ($source_stream) {  # source but no destination goes to stdout
 864                 $output_file = '-';
 865             }
 866             elsif ( $input_file eq '-' ) {
 867                 $output_file = '-';
 868             }
 869             else {
 870                 if ($in_place_modify) {
 871                     $output_file = IO::File->new_tmpfile()
 872                       or die "cannot open temp file for -b option: $!\n";
 873                 }
 874                 else {
 875                     $actual_output_extension = $output_extension;
 876                     $output_file             = $fileroot . $output_extension;
 877                 }
 878             }
 879
 880             # the 'sink_object' knows how to write the output file
 881             my $tee_file = $fileroot . $dot . "TEE";
 882
 883             my $line_separator = $rOpts->{'output-line-ending'};
 884             if ( $rOpts->{'preserve-line-endings'} ) {
 885                 $line_separator = find_input_line_ending($input_file);
 886             }
 887
 888             # Eventually all I/O may be done with binmode, but for now it is
 889             # only done when a user requests a particular line separator
 890             # through the -ple or -ole flags
 891             my $binmode = 0;
 892             if   ( defined($line_separator) ) { $binmode        = 1 }
 893             else                              { $line_separator = "\n" }
 894
 895             my ( $sink_object, $postfilter_buffer );
 896             if ($postfilter) {
 897                 $sink_object =
 898                   Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
 899                     $line_separator, $rOpts, $rpending_logfile_message,
 900                     $binmode );
 901             }
 902             else {
 903                 $sink_object =
 904                   Perl::Tidy::LineSink->new( $output_file, $tee_file,
 905                     $line_separator, $rOpts, $rpending_logfile_message,
 906                     $binmode );
 907             }
 908
 909             #---------------------------------------------------------------
 910             # initialize the error logger
 911             #---------------------------------------------------------------
 912             my $warning_file = $fileroot . $dot . "ERR";
 913             if ($errorfile_stream) { $warning_file = $errorfile_stream }
 914             my $log_file = $fileroot . $dot . "LOG";
 915             if ($logfile_stream) { $log_file = $logfile_stream }
 916
 917             my $logger_object =
 918               Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
 919                 $saw_extrude );
 920             write_logfile_header(
 921                 $rOpts,        $logger_object, $config_file,
 922                 $rraw_options, $Windows_type,  $readable_options,
 923             );
 924             if ($$rpending_logfile_message) {
 925                 $logger_object->write_logfile_entry($$rpending_logfile_message);
 926             }
 927             if ($$rpending_complaint) {
 928                 $logger_object->complain($$rpending_complaint);
 929             }
 930
 931             #---------------------------------------------------------------
 932             # initialize the debug object, if any
 933             #---------------------------------------------------------------
 934             my $debugger_object = undef;
 935             if ( $rOpts->{DEBUG} ) {
 936                 $debugger_object =
 937                   Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
 938             }
 939
 940             # loop over iterations
 941             my $max_iterations    = $rOpts->{'iterations'};
 942             my $sink_object_final = $sink_object;
 943             for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
 944                 my $temp_buffer;
 945
 946                 # local copies of some debugging objects which get deleted
 947                 # after first iteration, but will reappear after this loop
 948                 my $debugger_object    = $debugger_object;
 949                 my $logger_object      = $logger_object;
 950                 my $diagnostics_object = $diagnostics_object;
 951
 952                 # output to temp buffer until last iteration
 953                 if ( $iter < $max_iterations ) {
 954                     $sink_object =
 955                       Perl::Tidy::LineSink->new( \$temp_buffer, $tee_file,
 956                         $line_separator, $rOpts, $rpending_logfile_message,
 957                         $binmode );
 958                 }
 959                 else {
 960                     $sink_object = $sink_object_final;
 961
 962                     # terminate some debugging output after first pass
 963                     # to avoid needless output.
 964                     $debugger_object    = undef;
 965                     $logger_object      = undef;
 966                     $diagnostics_object = undef;
 967                 }
 968
 969               #---------------------------------------------------------------
 970               # create a formatter for this file : html writer or pretty printer
 971               #---------------------------------------------------------------
 972
 973                 # we have to delete any old formatter because, for safety,
 974                 # the formatter will check to see that there is only one.
 975                 $formatter = undef;
 976
 977                 if ($user_formatter) {
 978                     $formatter = $user_formatter;
 979                 }
 980                 elsif ( $rOpts->{'format'} eq 'html' ) {
 981                     $formatter =
 982                       Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
 983                         $actual_output_extension, $html_toc_extension,
 984                         $html_src_extension );
 985                 }
 986                 elsif ( $rOpts->{'format'} eq 'tidy' ) {
 987                     $formatter = Perl::Tidy::Formatter->new(
 988                         logger_object      => $logger_object,
 989                         diagnostics_object => $diagnostics_object,
 990                         sink_object        => $sink_object,
 991                     );
 992                 }
 993                 else {
 994                     die "I don't know how to do -format=$rOpts->{'format'}\n";
 995                 }
 996
 997                 unless ($formatter) {
 998                     die
 999                       "Unable to continue with $rOpts->{'format'} formatting\n";
1000                 }
1001
1002                 #---------------------------------------------------------------
1003                 # create the tokenizer for this file
1004                 #---------------------------------------------------------------
1005                 $tokenizer = undef;    # must destroy old tokenizer
1006                 $tokenizer = Perl::Tidy::Tokenizer->new(
1007                     source_object      => $source_object,
1008                     logger_object      => $logger_object,
1009                     debugger_object    => $debugger_object,
1010                     diagnostics_object => $diagnostics_object,
1011                     starting_level => $rOpts->{'starting-indentation-level'},
1012                     tabs           => $rOpts->{'tabs'},
1013                     entab_leading_space => $rOpts->{'entab-leading-whitespace'},
1014                     indent_columns      => $rOpts->{'indent-columns'},
1015                     look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
1016                     look_for_autoloader => $rOpts->{'look-for-autoloader'},
1017                     look_for_selfloader => $rOpts->{'look-for-selfloader'},
1018                     trim_qw             => $rOpts->{'trim-qw'},
1019                 );
1020
1021                 #---------------------------------------------------------------
1022                 # now we can do it
1023                 #---------------------------------------------------------------
1024                 process_this_file( $tokenizer, $formatter );
1025
1026                 #---------------------------------------------------------------
1027                 # close the input source and report errors
1028                 #---------------------------------------------------------------
1029                 $source_object->close_input_file();
1030
1031                 # line source for next iteration (if any) comes from the current
1032                 # temporary buffer
1033                 if ( $iter < $max_iterations ) {
1034                     $source_object =
1035                       Perl::Tidy::LineSource->new( \$temp_buffer, $rOpts,
1036                         $rpending_logfile_message );
1037                 }
1038
1039             }    # end loop over iterations
1040
1041             # get file names to use for syntax check
1042             my $ifname = $source_object->get_input_file_copy_name();
1043             my $ofname = $sink_object->get_output_file_copy();
1044
1045             #---------------------------------------------------------------
1046             # handle the -b option (backup and modify in-place)
1047             #---------------------------------------------------------------
1048             if ($in_place_modify) {
1049                 unless ( -f $input_file ) {
1050
1051                     # oh, oh, no real file to backup ..
1052                     # shouldn't happen because of numerous preliminary checks
1053                     die print
1054 "problem with -b backing up input file '$input_file': not a file\n";
1055                 }
1056                 my $backup_name = $input_file . $backup_extension;
1057                 if ( -f $backup_name ) {
1058                     unlink($backup_name)
1059                       or die
1060 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1061                 }
1062                 rename( $input_file, $backup_name )
1063                   or die
1064 "problem renaming $input_file to $backup_name for -b option: $!\n";
1065                 $ifname = $backup_name;
1066
1067                 seek( $output_file, 0, 0 )
1068                   or die "unable to rewind tmp file for -b option: $!\n";
1069
1070                 my $fout = IO::File->new("> $input_file")
1071                   or die
1072 "problem opening $input_file for write for -b option; check directory permissions: $!\n";
1073                 binmode $fout;
1074                 my $line;
1075                 while ( $line = $output_file->getline() ) {
1076                     $fout->print($line);
1077                 }
1078                 $fout->close();
1079                 $output_file = $input_file;
1080                 $ofname      = $input_file;
1081             }
1082
1083             #---------------------------------------------------------------
1084             # clean up and report errors
1085             #---------------------------------------------------------------
1086             $sink_object->close_output_file()    if $sink_object;
1087             $debugger_object->close_debug_file() if $debugger_object;
1088
1089             if ($postfilter) {
1090                 my $new_sink =
1091                   Perl::Tidy::LineSink->new( $output_file, $tee_file,
1092                     $line_separator, $rOpts, $rpending_logfile_message,
1093                     $binmode );
1094                 my $buf = $postfilter->($postfilter_buffer);
1095                 foreach my $line ( split( "\n", $buf ) ) {
1096                     $new_sink->write_line($line);
1097                 }
1098             }
1099
1100             my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
1101             if ($output_file) {
1102
1103                 if ($input_file_permissions) {
1104
1105                     # give output script same permissions as input script, but
1106                     # make it user-writable or else we can't run perltidy again.
1107                     # Thus we retain whatever executable flags were set.
1108                     if ( $rOpts->{'format'} eq 'tidy' ) {
1109                         chmod( $input_file_permissions | 0600, $output_file );
1110                     }
1111
1112                     # else use default permissions for html and any other format
1113
1114                 }
1115                 if ( $logger_object && $rOpts->{'check-syntax'} ) {
1116                     $infile_syntax_ok =
1117                       check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1118                 }
1119             }
1120
1121             $logger_object->finish( $infile_syntax_ok, $formatter )
1122               if $logger_object;
1123         }    # end of loop to process all files
1124     }    # end of main program
1125 }
1126
1127 sub fileglob_to_re {
1128
1129     # modified (corrected) from version in find2perl
1130     my $x = shift;
1131     $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1132     $x =~ s#\*#.*#g;               # '*' -> '.*'
1133     $x =~ s#\?#.#g;                # '?' -> '.'
1134     "^$x\\z";                      # match whole word
1135 }
1136
1137 sub make_extension {
1138
1139     # Make a file extension, including any leading '.' if necessary
1140     # The '.' may actually be an '_' under VMS
1141     my ( $extension, $default, $dot ) = @_;
1142
1143     # Use the default if none specified
1144     $extension = $default unless ($extension);
1145
1146     # Only extensions with these leading characters get a '.'
1147     # This rule gives the user some freedom
1148     if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1149         $extension = $dot . $extension;
1150     }
1151     return $extension;
1152 }
1153
1154 sub write_logfile_header {
1155     my (
1156         $rOpts,        $logger_object, $config_file,
1157         $rraw_options, $Windows_type,  $readable_options
1158     ) = @_;
1159     $logger_object->write_logfile_entry(
1160 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1161     );
1162     if ($Windows_type) {
1163         $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1164     }
1165     my $options_string = join( ' ', @$rraw_options );
1166
1167     if ($config_file) {
1168         $logger_object->write_logfile_entry(
1169             "Found Configuration File >>> $config_file \n");
1170     }
1171     $logger_object->write_logfile_entry(
1172         "Configuration and command line parameters for this run:\n");
1173     $logger_object->write_logfile_entry("$options_string\n");
1174
1175     if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1176         $rOpts->{'logfile'} = 1;    # force logfile to be saved
1177         $logger_object->write_logfile_entry(
1178             "Final parameter set for this run\n");
1179         $logger_object->write_logfile_entry(
1180             "------------------------------------\n");
1181
1182         $logger_object->write_logfile_entry($readable_options);
1183
1184         $logger_object->write_logfile_entry(
1185             "------------------------------------\n");
1186     }
1187     $logger_object->write_logfile_entry(
1188         "To find error messages search for 'WARNING' with your editor\n");
1189 }
1190
1191 sub generate_options {
1192
1193     ######################################################################
1194     # Generate and return references to:
1195     #  @option_string - the list of options to be passed to Getopt::Long
1196     #  @defaults - the list of default options
1197     #  %expansion - a hash showing how all abbreviations are expanded
1198     #  %category - a hash giving the general category of each option
1199     #  %option_range - a hash giving the valid ranges of certain options
1200
1201     # Note: a few options are not documented in the man page and usage
1202     # message. This is because these are experimental or debug options and
1203     # may or may not be retained in future versions.
1204     #
1205     # Here are the undocumented flags as far as I know.  Any of them
1206     # may disappear at any time.  They are mainly for fine-tuning
1207     # and debugging.
1208     #
1209     # fll --> fuzzy-line-length           # a trivial parameter which gets
1210     #                                       turned off for the extrude option
1211     #                                       which is mainly for debugging
1212     # chk --> check-multiline-quotes      # check for old bug; to be deleted
1213     # scl --> short-concatenation-item-length   # helps break at '.'
1214     # recombine                           # for debugging line breaks
1215     # valign                              # for debugging vertical alignment
1216     # I   --> DIAGNOSTICS                 # for debugging
1217     ######################################################################
1218
1219     # here is a summary of the Getopt codes:
1220     # <none> does not take an argument
1221     # =s takes a mandatory string
1222     # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1223     # =i takes a mandatory integer
1224     # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1225     # ! does not take an argument and may be negated
1226     #  i.e., -foo and -nofoo are allowed
1227     # a double dash signals the end of the options list
1228     #
1229     #---------------------------------------------------------------
1230     # Define the option string passed to GetOptions.
1231     #---------------------------------------------------------------
1232
1233     my @option_string   = ();
1234     my %expansion       = ();
1235     my %option_category = ();
1236     my %option_range    = ();
1237     my $rexpansion      = \%expansion;
1238
1239     # names of categories in manual
1240     # leading integers will allow sorting
1241     my @category_name = (
1242         '0. I/O control',
1243         '1. Basic formatting options',
1244         '2. Code indentation control',
1245         '3. Whitespace control',
1246         '4. Comment controls',
1247         '5. Linebreak controls',
1248         '6. Controlling list formatting',
1249         '7. Retaining or ignoring existing line breaks',
1250         '8. Blank line control',
1251         '9. Other controls',
1252         '10. HTML options',
1253         '11. pod2html options',
1254         '12. Controlling HTML properties',
1255         '13. Debugging',
1256     );
1257
1258     #  These options are parsed directly by perltidy:
1259     #    help h
1260     #    version v
1261     #  However, they are included in the option set so that they will
1262     #  be seen in the options dump.
1263
1264     # These long option names have no abbreviations or are treated specially
1265     @option_string = qw(
1266       html!
1267       noprofile
1268       no-profile
1269       npro
1270       recombine!
1271       valign!
1272       notidy
1273     );
1274
1275     my $category = 13;    # Debugging
1276     foreach (@option_string) {
1277         my $opt = $_;     # must avoid changing the actual flag
1278         $opt =~ s/!$//;
1279         $option_category{$opt} = $category_name[$category];
1280     }
1281
1282     $category = 11;                                       # HTML
1283     $option_category{html} = $category_name[$category];
1284
1285     # routine to install and check options
1286     my $add_option = sub {
1287         my ( $long_name, $short_name, $flag ) = @_;
1288         push @option_string, $long_name . $flag;
1289         $option_category{$long_name} = $category_name[$category];
1290         if ($short_name) {
1291             if ( $expansion{$short_name} ) {
1292                 my $existing_name = $expansion{$short_name}[0];
1293                 die
1294 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1295             }
1296             $expansion{$short_name} = [$long_name];
1297             if ( $flag eq '!' ) {
1298                 my $nshort_name = 'n' . $short_name;
1299                 my $nolong_name = 'no' . $long_name;
1300                 if ( $expansion{$nshort_name} ) {
1301                     my $existing_name = $expansion{$nshort_name}[0];
1302                     die
1303 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1304                 }
1305                 $expansion{$nshort_name} = [$nolong_name];
1306             }
1307         }
1308     };
1309
1310     # Install long option names which have a simple abbreviation.
1311     # Options with code '!' get standard negation ('no' for long names,
1312     # 'n' for abbreviations).  Categories follow the manual.
1313
1314     ###########################
1315     $category = 0;    # I/O_Control
1316     ###########################
1317     $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1318     $add_option->( 'backup-file-extension',      'bext',  '=s' );
1319     $add_option->( 'force-read-binary',          'f',     '!' );
1320     $add_option->( 'format',                     'fmt',   '=s' );
1321     $add_option->( 'iterations',                 'it',    '=i' );
1322     $add_option->( 'logfile',                    'log',   '!' );
1323     $add_option->( 'logfile-gap',                'g',     ':i' );
1324     $add_option->( 'outfile',                    'o',     '=s' );
1325     $add_option->( 'output-file-extension',      'oext',  '=s' );
1326     $add_option->( 'output-path',                'opath', '=s' );
1327     $add_option->( 'profile',                    'pro',   '=s' );
1328     $add_option->( 'quiet',                      'q',     '!' );
1329     $add_option->( 'standard-error-output',      'se',    '!' );
1330     $add_option->( 'standard-output',            'st',    '!' );
1331     $add_option->( 'warning-output',             'w',     '!' );
1332
1333     # options which are both toggle switches and values moved here
1334     # to hide from tidyview (which does not show category 0 flags):
1335     # -ole moved here from category 1
1336     # -sil moved here from category 2
1337     $add_option->( 'output-line-ending',         'ole', '=s' );
1338     $add_option->( 'starting-indentation-level', 'sil', '=i' );
1339
1340     ########################################
1341     $category = 1;    # Basic formatting options
1342     ########################################
1343     $add_option->( 'check-syntax',             'syn',  '!' );
1344     $add_option->( 'entab-leading-whitespace', 'et',   '=i' );
1345     $add_option->( 'indent-columns',           'i',    '=i' );
1346     $add_option->( 'maximum-line-length',      'l',    '=i' );
1347     $add_option->( 'perl-syntax-check-flags',  'pscf', '=s' );
1348     $add_option->( 'preserve-line-endings',    'ple',  '!' );
1349     $add_option->( 'tabs',                     't',    '!' );
1350
1351     ########################################
1352     $category = 2;    # Code indentation control
1353     ########################################
1354     $add_option->( 'continuation-indentation',           'ci',   '=i' );
1355     $add_option->( 'line-up-parentheses',                'lp',   '!' );
1356     $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1357     $add_option->( 'outdent-keywords',                   'okw',  '!' );
1358     $add_option->( 'outdent-labels',                     'ola',  '!' );
1359     $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1360     $add_option->( 'indent-closing-brace',               'icb',  '!' );
1361     $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1362     $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1363     $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1364     $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1365     $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1366     $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1367
1368     ########################################
1369     $category = 3;    # Whitespace control
1370     ########################################
1371     $add_option->( 'add-semicolons',                            'asc',   '!' );
1372     $add_option->( 'add-whitespace',                            'aws',   '!' );
1373     $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1374     $add_option->( 'brace-tightness',                           'bt',    '=i' );
1375     $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1376     $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1377     $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1378     $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1379     $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1380     $add_option->( 'paren-tightness',                           'pt',    '=i' );
1381     $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1382     $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1383     $add_option->( 'space-function-paren',                      'sfp',   '!' );
1384     $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1385     $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1386     $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1387     $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1388     $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1389     $add_option->( 'trim-qw',                                   'tqw',   '!' );
1390     $add_option->( 'want-left-space',                           'wls',   '=s' );
1391     $add_option->( 'want-right-space',                          'wrs',   '=s' );
1392
1393     ########################################
1394     $category = 4;    # Comment controls
1395     ########################################
1396     $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1397     $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1398     $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1399     $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1400     $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1401     $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1402     $add_option->( 'closing-side-comments',             'csc',  '!' );
1403     $add_option->( 'closing-side-comments-balanced',    'cscb', '!' );
1404     $add_option->( 'format-skipping',                   'fs',   '!' );
1405     $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1406     $add_option->( 'format-skipping-end',               'fse',  '=s' );
1407     $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1408     $add_option->( 'indent-block-comments',             'ibc',  '!' );
1409     $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1410     $add_option->( 'fixed-position-side-comment',       'fpsc', '=i' );
1411     $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1412     $add_option->( 'outdent-long-comments',             'olc',  '!' );
1413     $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1414     $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1415     $add_option->( 'static-block-comments',             'sbc',  '!' );
1416     $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1417     $add_option->( 'static-side-comments',              'ssc',  '!' );
1418
1419     ########################################
1420     $category = 5;    # Linebreak controls
1421     ########################################
1422     $add_option->( 'add-newlines',                            'anl',   '!' );
1423     $add_option->( 'block-brace-vertical-tightness',          'bbvt',  '=i' );
1424     $add_option->( 'block-brace-vertical-tightness-list',     'bbvtl', '=s' );
1425     $add_option->( 'brace-vertical-tightness',                'bvt',   '=i' );
1426     $add_option->( 'brace-vertical-tightness-closing',        'bvtc',  '=i' );
1427     $add_option->( 'cuddled-else',                            'ce',    '!' );
1428     $add_option->( 'delete-old-newlines',                     'dnl',   '!' );
1429     $add_option->( 'opening-brace-always-on-right',           'bar',   '!' );
1430     $add_option->( 'opening-brace-on-new-line',               'bl',    '!' );
1431     $add_option->( 'opening-hash-brace-right',                'ohbr',  '!' );
1432     $add_option->( 'opening-paren-right',                     'opr',   '!' );
1433     $add_option->( 'opening-square-bracket-right',            'osbr',  '!' );
1434     $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl',  '!' );
1435     $add_option->( 'opening-sub-brace-on-new-line',           'sbl',   '!' );
1436     $add_option->( 'paren-vertical-tightness',                'pvt',   '=i' );
1437     $add_option->( 'paren-vertical-tightness-closing',        'pvtc',  '=i' );
1438     $add_option->( 'stack-closing-hash-brace',                'schb',  '!' );
1439     $add_option->( 'stack-closing-paren',                     'scp',   '!' );
1440     $add_option->( 'stack-closing-square-bracket',            'scsb',  '!' );
1441     $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
1442     $add_option->( 'stack-opening-paren',                     'sop',   '!' );
1443     $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
1444     $add_option->( 'vertical-tightness',                      'vt',    '=i' );
1445     $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
1446     $add_option->( 'want-break-after',                        'wba',   '=s' );
1447     $add_option->( 'want-break-before',                       'wbb',   '=s' );
1448     $add_option->( 'break-after-all-operators',               'baao',  '!' );
1449     $add_option->( 'break-before-all-operators',              'bbao',  '!' );
1450     $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
1451
1452     ########################################
1453     $category = 6;    # Controlling list formatting
1454     ########################################
1455     $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1456     $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1457     $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1458
1459     ########################################
1460     $category = 7;    # Retaining or ignoring existing line breaks
1461     ########################################
1462     $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1463     $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1464     $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1465     $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
1466
1467     ########################################
1468     $category = 8;    # Blank line control
1469     ########################################
1470     $add_option->( 'blanks-before-blocks',            'bbb', '!' );
1471     $add_option->( 'blanks-before-comments',          'bbc', '!' );
1472     $add_option->( 'blanks-before-subs',              'bbs', '!' );
1473     $add_option->( 'long-block-line-count',           'lbl', '=i' );
1474     $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1475     $add_option->( 'keep-old-blank-lines',            'kbl', '=i' );
1476
1477     ########################################
1478     $category = 9;    # Other controls
1479     ########################################
1480     $add_option->( 'delete-block-comments',        'dbc',  '!' );
1481     $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1482     $add_option->( 'delete-pod',                   'dp',   '!' );
1483     $add_option->( 'delete-side-comments',         'dsc',  '!' );
1484     $add_option->( 'tee-block-comments',           'tbc',  '!' );
1485     $add_option->( 'tee-pod',                      'tp',   '!' );
1486     $add_option->( 'tee-side-comments',            'tsc',  '!' );
1487     $add_option->( 'look-for-autoloader',          'lal',  '!' );
1488     $add_option->( 'look-for-hash-bang',           'x',    '!' );
1489     $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1490     $add_option->( 'pass-version-line',            'pvl',  '!' );
1491
1492     ########################################
1493     $category = 13;    # Debugging
1494     ########################################
1495     $add_option->( 'DEBUG',                           'D',    '!' );
1496     $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1497     $add_option->( 'check-multiline-quotes',          'chk',  '!' );
1498     $add_option->( 'dump-defaults',                   'ddf',  '!' );
1499     $add_option->( 'dump-long-names',                 'dln',  '!' );
1500     $add_option->( 'dump-options',                    'dop',  '!' );
1501     $add_option->( 'dump-profile',                    'dpro', '!' );
1502     $add_option->( 'dump-short-names',                'dsn',  '!' );
1503     $add_option->( 'dump-token-types',                'dtt',  '!' );
1504     $add_option->( 'dump-want-left-space',            'dwls', '!' );
1505     $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1506     $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1507     $add_option->( 'help',                            'h',    '' );
1508     $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1509     $add_option->( 'show-options',                    'opt',  '!' );
1510     $add_option->( 'version',                         'v',    '' );
1511
1512     #---------------------------------------------------------------------
1513
1514     # The Perl::Tidy::HtmlWriter will add its own options to the string
1515     Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1516
1517     ########################################
1518     # Set categories 10, 11, 12
1519     ########################################
1520     # Based on their known order
1521     $category = 12;    # HTML properties
1522     foreach my $opt (@option_string) {
1523         my $long_name = $opt;
1524         $long_name =~ s/(!|=.*|:.*)$//;
1525         unless ( defined( $option_category{$long_name} ) ) {
1526             if ( $long_name =~ /^html-linked/ ) {
1527                 $category = 10;    # HTML options
1528             }
1529             elsif ( $long_name =~ /^pod2html/ ) {
1530                 $category = 11;    # Pod2html
1531             }
1532             $option_category{$long_name} = $category_name[$category];
1533         }
1534     }
1535
1536     #---------------------------------------------------------------
1537     # Assign valid ranges to certain options
1538     #---------------------------------------------------------------
1539     # In the future, these may be used to make preliminary checks
1540     # hash keys are long names
1541     # If key or value is undefined:
1542     #   strings may have any value
1543     #   integer ranges are >=0
1544     # If value is defined:
1545     #   value is [qw(any valid words)] for strings
1546     #   value is [min, max] for integers
1547     #   if min is undefined, there is no lower limit
1548     #   if max is undefined, there is no upper limit
1549     # Parameters not listed here have defaults
1550     %option_range = (
1551         'format'             => [ 'tidy', 'html', 'user' ],
1552         'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
1553
1554         'block-brace-tightness'    => [ 0, 2 ],
1555         'brace-tightness'          => [ 0, 2 ],
1556         'paren-tightness'          => [ 0, 2 ],
1557         'square-bracket-tightness' => [ 0, 2 ],
1558
1559         'block-brace-vertical-tightness'            => [ 0, 2 ],
1560         'brace-vertical-tightness'                  => [ 0, 2 ],
1561         'brace-vertical-tightness-closing'          => [ 0, 2 ],
1562         'paren-vertical-tightness'                  => [ 0, 2 ],
1563         'paren-vertical-tightness-closing'          => [ 0, 2 ],
1564         'square-bracket-vertical-tightness'         => [ 0, 2 ],
1565         'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1566         'vertical-tightness'                        => [ 0, 2 ],
1567         'vertical-tightness-closing'                => [ 0, 2 ],
1568
1569         'closing-brace-indentation'          => [ 0, 3 ],
1570         'closing-paren-indentation'          => [ 0, 3 ],
1571         'closing-square-bracket-indentation' => [ 0, 3 ],
1572         'closing-token-indentation'          => [ 0, 3 ],
1573
1574         'closing-side-comment-else-flag' => [ 0, 2 ],
1575         'comma-arrow-breakpoints'        => [ 0, 3 ],
1576     );
1577
1578     # Note: we could actually allow negative ci if someone really wants it:
1579     # $option_range{'continuation-indentation'} = [ undef, undef ];
1580
1581     #---------------------------------------------------------------
1582     # Assign default values to the above options here, except
1583     # for 'outfile' and 'help'.
1584     # These settings should approximate the perlstyle(1) suggestions.
1585     #---------------------------------------------------------------
1586     my @defaults = qw(
1587       add-newlines
1588       add-semicolons
1589       add-whitespace
1590       blanks-before-blocks
1591       blanks-before-comments
1592       blanks-before-subs
1593       block-brace-tightness=0
1594       block-brace-vertical-tightness=0
1595       brace-tightness=1
1596       brace-vertical-tightness-closing=0
1597       brace-vertical-tightness=0
1598       break-at-old-logical-breakpoints
1599       break-at-old-ternary-breakpoints
1600       break-at-old-keyword-breakpoints
1601       comma-arrow-breakpoints=1
1602       nocheck-syntax
1603       closing-side-comment-interval=6
1604       closing-side-comment-maximum-text=20
1605       closing-side-comment-else-flag=0
1606       closing-side-comments-balanced
1607       closing-paren-indentation=0
1608       closing-brace-indentation=0
1609       closing-square-bracket-indentation=0
1610       continuation-indentation=2
1611       delete-old-newlines
1612       delete-semicolons
1613       fuzzy-line-length
1614       hanging-side-comments
1615       indent-block-comments
1616       indent-columns=4
1617       iterations=1
1618       keep-old-blank-lines=1
1619       long-block-line-count=8
1620       look-for-autoloader
1621       look-for-selfloader
1622       maximum-consecutive-blank-lines=1
1623       maximum-fields-per-table=0
1624       maximum-line-length=80
1625       minimum-space-to-comment=4
1626       nobrace-left-and-indent
1627       nocuddled-else
1628       nodelete-old-whitespace
1629       nohtml
1630       nologfile
1631       noquiet
1632       noshow-options
1633       nostatic-side-comments
1634       notabs
1635       nowarning-output
1636       outdent-labels
1637       outdent-long-quotes
1638       outdent-long-comments
1639       paren-tightness=1
1640       paren-vertical-tightness-closing=0
1641       paren-vertical-tightness=0
1642       pass-version-line
1643       recombine
1644       valign
1645       short-concatenation-item-length=8
1646       space-for-semicolon
1647       square-bracket-tightness=1
1648       square-bracket-vertical-tightness-closing=0
1649       square-bracket-vertical-tightness=0
1650       static-block-comments
1651       trim-qw
1652       format=tidy
1653       backup-file-extension=bak
1654       format-skipping
1655
1656       pod2html
1657       html-table-of-contents
1658       html-entities
1659     );
1660
1661     push @defaults, "perl-syntax-check-flags=-c -T";
1662
1663     #---------------------------------------------------------------
1664     # Define abbreviations which will be expanded into the above primitives.
1665     # These may be defined recursively.
1666     #---------------------------------------------------------------
1667     %expansion = (
1668         %expansion,
1669         'freeze-newlines'   => [qw(noadd-newlines nodelete-old-newlines)],
1670         'fnl'               => [qw(freeze-newlines)],
1671         'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1672         'fws'               => [qw(freeze-whitespace)],
1673         'freeze-blank-lines' =>
1674           [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1675         'fbl'                => [qw(freeze-blank-lines)],
1676         'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1677         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1678         'nooutdent-long-lines' =>
1679           [qw(nooutdent-long-quotes nooutdent-long-comments)],
1680         'noll' => [qw(nooutdent-long-lines)],
1681         'io'   => [qw(indent-only)],
1682         'delete-all-comments' =>
1683           [qw(delete-block-comments delete-side-comments delete-pod)],
1684         'nodelete-all-comments' =>
1685           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1686         'dac'  => [qw(delete-all-comments)],
1687         'ndac' => [qw(nodelete-all-comments)],
1688         'gnu'  => [qw(gnu-style)],
1689         'pbp'  => [qw(perl-best-practices)],
1690         'tee-all-comments' =>
1691           [qw(tee-block-comments tee-side-comments tee-pod)],
1692         'notee-all-comments' =>
1693           [qw(notee-block-comments notee-side-comments notee-pod)],
1694         'tac'   => [qw(tee-all-comments)],
1695         'ntac'  => [qw(notee-all-comments)],
1696         'html'  => [qw(format=html)],
1697         'nhtml' => [qw(format=tidy)],
1698         'tidy'  => [qw(format=tidy)],
1699
1700         'swallow-optional-blank-lines'   => [qw(kbl=0)],
1701         'noswallow-optional-blank-lines' => [qw(kbl=1)],
1702         'sob'                            => [qw(kbl=0)],
1703         'nsob'                           => [qw(kbl=1)],
1704
1705         'break-after-comma-arrows'   => [qw(cab=0)],
1706         'nobreak-after-comma-arrows' => [qw(cab=1)],
1707         'baa'                        => [qw(cab=0)],
1708         'nbaa'                       => [qw(cab=1)],
1709
1710         'break-at-old-trinary-breakpoints' => [qw(bot)],
1711
1712         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1713         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1714         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1715         'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1716         'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1717
1718         'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1719         'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1720         'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1721         'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1722         'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1723
1724         'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1725         'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1726         'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1727
1728         'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1729         'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1730         'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1731
1732         'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1733         'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1734         'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1735
1736         'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1737         'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1738         'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1739
1740         'otr'                   => [qw(opr ohbr osbr)],
1741         'opening-token-right'   => [qw(opr ohbr osbr)],
1742         'notr'                  => [qw(nopr nohbr nosbr)],
1743         'noopening-token-right' => [qw(nopr nohbr nosbr)],
1744
1745         'sot'                    => [qw(sop sohb sosb)],
1746         'nsot'                   => [qw(nsop nsohb nsosb)],
1747         'stack-opening-tokens'   => [qw(sop sohb sosb)],
1748         'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1749
1750         'sct'                    => [qw(scp schb scsb)],
1751         'stack-closing-tokens'   => => [qw(scp schb scsb)],
1752         'nsct'                   => [qw(nscp nschb nscsb)],
1753         'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1754
1755         # 'mangle' originally deleted pod and comments, but to keep it
1756         # reversible, it no longer does.  But if you really want to
1757         # delete them, just use:
1758         #   -mangle -dac
1759
1760         # An interesting use for 'mangle' is to do this:
1761         #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1762         # which will form as many one-line blocks as possible
1763
1764         'mangle' => [
1765             qw(
1766               check-syntax
1767               keep-old-blank-lines=0
1768               delete-old-newlines
1769               delete-old-whitespace
1770               delete-semicolons
1771               indent-columns=0
1772               maximum-consecutive-blank-lines=0
1773               maximum-line-length=100000
1774               noadd-newlines
1775               noadd-semicolons
1776               noadd-whitespace
1777               noblanks-before-blocks
1778               noblanks-before-subs
1779               notabs
1780               )
1781         ],
1782
1783         # 'extrude' originally deleted pod and comments, but to keep it
1784         # reversible, it no longer does.  But if you really want to
1785         # delete them, just use
1786         #   extrude -dac
1787         #
1788         # An interesting use for 'extrude' is to do this:
1789         #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1790         # which will break up all one-line blocks.
1791
1792         'extrude' => [
1793             qw(
1794               check-syntax
1795               ci=0
1796               delete-old-newlines
1797               delete-old-whitespace
1798               delete-semicolons
1799               indent-columns=0
1800               maximum-consecutive-blank-lines=0
1801               maximum-line-length=1
1802               noadd-semicolons
1803               noadd-whitespace
1804               noblanks-before-blocks
1805               noblanks-before-subs
1806               nofuzzy-line-length
1807               notabs
1808               norecombine
1809               )
1810         ],
1811
1812         # this style tries to follow the GNU Coding Standards (which do
1813         # not really apply to perl but which are followed by some perl
1814         # programmers).
1815         'gnu-style' => [
1816             qw(
1817               lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1818               )
1819         ],
1820
1821         # Style suggested in Damian Conway's Perl Best Practices
1822         'perl-best-practices' => [
1823             qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
1824 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
1825         ],
1826
1827         # Additional styles can be added here
1828     );
1829
1830     Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1831
1832     # Uncomment next line to dump all expansions for debugging:
1833     # dump_short_names(\%expansion);
1834     return (
1835         \@option_string,   \@defaults, \%expansion,
1836         \%option_category, \%option_range
1837     );
1838
1839 }    # end of generate_options
1840
1841 sub process_command_line {
1842
1843     my (
1844         $perltidyrc_stream,  $is_Windows, $Windows_type,
1845         $rpending_complaint, $dump_options_type
1846     ) = @_;
1847
1848     use Getopt::Long;
1849
1850     my (
1851         $roption_string,   $rdefaults, $rexpansion,
1852         $roption_category, $roption_range
1853     ) = generate_options();
1854
1855     #---------------------------------------------------------------
1856     # set the defaults by passing the above list through GetOptions
1857     #---------------------------------------------------------------
1858     my %Opts = ();
1859     {
1860         local @ARGV;
1861         my $i;
1862
1863         # do not load the defaults if we are just dumping perltidyrc
1864         unless ( $dump_options_type eq 'perltidyrc' ) {
1865             for $i (@$rdefaults) { push @ARGV, "--" . $i }
1866         }
1867
1868         # Patch to save users Getopt::Long configuration
1869         # and set to Getopt::Long defaults.  Use eval to avoid
1870         # breaking old versions of Perl without these routines.
1871         my $glc;
1872         eval { $glc = Getopt::Long::Configure() };
1873         unless ($@) {
1874             eval { Getopt::Long::ConfigDefaults() };
1875         }
1876         else { $glc = undef }
1877
1878         if ( !GetOptions( \%Opts, @$roption_string ) ) {
1879             die "Programming Bug: error in setting default options";
1880         }
1881
1882         # Patch to put the previous Getopt::Long configuration back
1883         eval { Getopt::Long::Configure($glc) } if defined $glc;
1884     }
1885
1886     my $word;
1887     my @raw_options        = ();
1888     my $config_file        = "";
1889     my $saw_ignore_profile = 0;
1890     my $saw_extrude        = 0;
1891     my $saw_dump_profile   = 0;
1892     my $i;
1893
1894     #---------------------------------------------------------------
1895     # Take a first look at the command-line parameters.  Do as many
1896     # immediate dumps as possible, which can avoid confusion if the
1897     # perltidyrc file has an error.
1898     #---------------------------------------------------------------
1899     foreach $i (@ARGV) {
1900
1901         $i =~ s/^--/-/;
1902         if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1903             $saw_ignore_profile = 1;
1904         }
1905
1906         # note: this must come before -pro and -profile, below:
1907         elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1908             $saw_dump_profile = 1;
1909         }
1910         elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1911             if ($config_file) {
1912                 warn
1913 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1914             }
1915             $config_file = $2;
1916
1917             # resolve <dir>/.../<file>, meaning look upwards from directory
1918             if ( defined($config_file) ) {
1919                 if ( my ( $start_dir, $search_file ) =
1920                     ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
1921                 {
1922                     $start_dir = '.' if !$start_dir;
1923                     $start_dir = Cwd::realpath($start_dir);
1924                     if ( my $found_file =
1925                         find_file_upwards( $start_dir, $search_file ) )
1926                     {
1927                         $config_file = $found_file;
1928                     }
1929                 }
1930             }
1931             unless ( -e $config_file ) {
1932                 warn "cannot find file given with -pro=$config_file: $!\n";
1933                 $config_file = "";
1934             }
1935         }
1936         elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1937             die "usage: -pro=filename or --profile=filename, no spaces\n";
1938         }
1939         elsif ( $i =~ /^-extrude$/ ) {
1940             $saw_extrude = 1;
1941         }
1942         elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1943             usage();
1944             exit 1;
1945         }
1946         elsif ( $i =~ /^-(version|v)$/ ) {
1947             show_version();
1948             exit 1;
1949         }
1950         elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1951             dump_defaults(@$rdefaults);
1952             exit 1;
1953         }
1954         elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1955             dump_long_names(@$roption_string);
1956             exit 1;
1957         }
1958         elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1959             dump_short_names($rexpansion);
1960             exit 1;
1961         }
1962         elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1963             Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1964             exit 1;
1965         }
1966     }
1967
1968     if ( $saw_dump_profile && $saw_ignore_profile ) {
1969         warn "No profile to dump because of -npro\n";
1970         exit 1;
1971     }
1972
1973     #---------------------------------------------------------------
1974     # read any .perltidyrc configuration file
1975     #---------------------------------------------------------------
1976     unless ($saw_ignore_profile) {
1977
1978         # resolve possible conflict between $perltidyrc_stream passed
1979         # as call parameter to perltidy and -pro=filename on command
1980         # line.
1981         if ($perltidyrc_stream) {
1982             if ($config_file) {
1983                 warn <<EOM;
1984  Conflict: a perltidyrc configuration file was specified both as this
1985  perltidy call parameter: $perltidyrc_stream
1986  and with this -profile=$config_file.
1987  Using -profile=$config_file.
1988 EOM
1989             }
1990             else {
1991                 $config_file = $perltidyrc_stream;
1992             }
1993         }
1994
1995         # look for a config file if we don't have one yet
1996         my $rconfig_file_chatter;
1997         $$rconfig_file_chatter = "";
1998         $config_file =
1999           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2000             $rpending_complaint )
2001           unless $config_file;
2002
2003         # open any config file
2004         my $fh_config;
2005         if ($config_file) {
2006             ( $fh_config, $config_file ) =
2007               Perl::Tidy::streamhandle( $config_file, 'r' );
2008             unless ($fh_config) {
2009                 $$rconfig_file_chatter .=
2010                   "# $config_file exists but cannot be opened\n";
2011             }
2012         }
2013
2014         if ($saw_dump_profile) {
2015             if ($saw_dump_profile) {
2016                 dump_config_file( $fh_config, $config_file,
2017                     $rconfig_file_chatter );
2018                 exit 1;
2019             }
2020         }
2021
2022         if ($fh_config) {
2023
2024             my ( $rconfig_list, $death_message ) =
2025               read_config_file( $fh_config, $config_file, $rexpansion );
2026             die $death_message if ($death_message);
2027
2028             # process any .perltidyrc parameters right now so we can
2029             # localize errors
2030             if (@$rconfig_list) {
2031                 local @ARGV = @$rconfig_list;
2032
2033                 expand_command_abbreviations( $rexpansion, \@raw_options,
2034                     $config_file );
2035
2036                 if ( !GetOptions( \%Opts, @$roption_string ) ) {
2037                     die
2038 "Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
2039                 }
2040
2041                 # Anything left in this local @ARGV is an error and must be
2042                 # invalid bare words from the configuration file.  We cannot
2043                 # check this earlier because bare words may have been valid
2044                 # values for parameters.  We had to wait for GetOptions to have
2045                 # a look at @ARGV.
2046                 if (@ARGV) {
2047                     my $count = @ARGV;
2048                     my $str   = "\'" . pop(@ARGV) . "\'";
2049                     while ( my $param = pop(@ARGV) ) {
2050                         if ( length($str) < 70 ) {
2051                             $str .= ", '$param'";
2052                         }
2053                         else {
2054                             $str .= ", ...";
2055                             last;
2056                         }
2057                     }
2058                     die <<EOM;
2059 There are $count unrecognized values in the configuration file '$config_file':
2060 $str
2061 Use leading dashes for parameters.  Use -npro to ignore this file.
2062 EOM
2063                 }
2064
2065                 # Undo any options which cause premature exit.  They are not
2066                 # appropriate for a config file, and it could be hard to
2067                 # diagnose the cause of the premature exit.
2068                 foreach (
2069                     qw{
2070                     dump-defaults
2071                     dump-long-names
2072                     dump-options
2073                     dump-profile
2074                     dump-short-names
2075                     dump-token-types
2076                     dump-want-left-space
2077                     dump-want-right-space
2078                     help
2079                     stylesheet
2080                     version
2081                     }
2082                   )
2083                 {
2084
2085                     if ( defined( $Opts{$_} ) ) {
2086                         delete $Opts{$_};
2087                         warn "ignoring --$_ in config file: $config_file\n";
2088                     }
2089                 }
2090             }
2091         }
2092     }
2093
2094     #---------------------------------------------------------------
2095     # now process the command line parameters
2096     #---------------------------------------------------------------
2097     expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2098
2099     if ( !GetOptions( \%Opts, @$roption_string ) ) {
2100         die "Error on command line; for help try 'perltidy -h'\n";
2101     }
2102
2103     return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
2104         $rexpansion, $roption_category, $roption_range );
2105 }    # end of process_command_line
2106
2107 sub check_options {
2108
2109     my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2110
2111     #---------------------------------------------------------------
2112     # check and handle any interactions among the basic options..
2113     #---------------------------------------------------------------
2114
2115     # Since -vt, -vtc, and -cti are abbreviations, but under
2116     # msdos, an unquoted input parameter like vtc=1 will be
2117     # seen as 2 parameters, vtc and 1, so the abbreviations
2118     # won't be seen.  Therefore, we will catch them here if
2119     # they get through.
2120
2121     if ( defined $rOpts->{'vertical-tightness'} ) {
2122         my $vt = $rOpts->{'vertical-tightness'};
2123         $rOpts->{'paren-vertical-tightness'}          = $vt;
2124         $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2125         $rOpts->{'brace-vertical-tightness'}          = $vt;
2126     }
2127
2128     if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2129         my $vtc = $rOpts->{'vertical-tightness-closing'};
2130         $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
2131         $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2132         $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
2133     }
2134
2135     if ( defined $rOpts->{'closing-token-indentation'} ) {
2136         my $cti = $rOpts->{'closing-token-indentation'};
2137         $rOpts->{'closing-square-bracket-indentation'} = $cti;
2138         $rOpts->{'closing-brace-indentation'}          = $cti;
2139         $rOpts->{'closing-paren-indentation'}          = $cti;
2140     }
2141
2142     # In quiet mode, there is no log file and hence no way to report
2143     # results of syntax check, so don't do it.
2144     if ( $rOpts->{'quiet'} ) {
2145         $rOpts->{'check-syntax'} = 0;
2146     }
2147
2148     # can't check syntax if no output
2149     if ( $rOpts->{'format'} ne 'tidy' ) {
2150         $rOpts->{'check-syntax'} = 0;
2151     }
2152
2153     # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2154     # wide variety of nasty problems on these systems, because they cannot
2155     # reliably run backticks.  Don't even think about changing this!
2156     if (   $rOpts->{'check-syntax'}
2157         && $is_Windows
2158         && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2159     {
2160         $rOpts->{'check-syntax'} = 0;
2161     }
2162
2163     # It's really a bad idea to check syntax as root unless you wrote
2164     # the script yourself.  FIXME: not sure if this works with VMS
2165     unless ($is_Windows) {
2166
2167         if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2168             $rOpts->{'check-syntax'} = 0;
2169             $$rpending_complaint .=
2170 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2171         }
2172     }
2173
2174     # check iteration count and quietly fix if necessary:
2175     # - iterations option only applies to code beautification mode
2176     # - it shouldn't be nessary to use more than about 2 iterations
2177     if ( $rOpts->{'format'} ne 'tidy' ) {
2178         $rOpts->{'iterations'} = 1;
2179     }
2180     elsif ( defined( $rOpts->{'iterations'} ) ) {
2181         if    ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2182         elsif ( $rOpts->{'iterations'} > 5 )  { $rOpts->{'iterations'} = 5 }
2183     }
2184     else {
2185         $rOpts->{'iterations'} = 1;
2186     }
2187
2188     # see if user set a non-negative logfile-gap
2189     if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2190
2191         # a zero gap will be taken as a 1
2192         if ( $rOpts->{'logfile-gap'} == 0 ) {
2193             $rOpts->{'logfile-gap'} = 1;
2194         }
2195
2196         # setting a non-negative logfile gap causes logfile to be saved
2197         $rOpts->{'logfile'} = 1;
2198     }
2199
2200     # not setting logfile gap, or setting it negative, causes default of 50
2201     else {
2202         $rOpts->{'logfile-gap'} = 50;
2203     }
2204
2205     # set short-cut flag when only indentation is to be done.
2206     # Note that the user may or may not have already set the
2207     # indent-only flag.
2208     if (   !$rOpts->{'add-whitespace'}
2209         && !$rOpts->{'delete-old-whitespace'}
2210         && !$rOpts->{'add-newlines'}
2211         && !$rOpts->{'delete-old-newlines'} )
2212     {
2213         $rOpts->{'indent-only'} = 1;
2214     }
2215
2216     # -isbc implies -ibc
2217     if ( $rOpts->{'indent-spaced-block-comments'} ) {
2218         $rOpts->{'indent-block-comments'} = 1;
2219     }
2220
2221     # -bli flag implies -bl
2222     if ( $rOpts->{'brace-left-and-indent'} ) {
2223         $rOpts->{'opening-brace-on-new-line'} = 1;
2224     }
2225
2226     if (   $rOpts->{'opening-brace-always-on-right'}
2227         && $rOpts->{'opening-brace-on-new-line'} )
2228     {
2229         warn <<EOM;
2230  Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2231   'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2232 EOM
2233         $rOpts->{'opening-brace-on-new-line'} = 0;
2234     }
2235
2236     # it simplifies things if -bl is 0 rather than undefined
2237     if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2238         $rOpts->{'opening-brace-on-new-line'} = 0;
2239     }
2240
2241     # -sbl defaults to -bl if not defined
2242     if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2243         $rOpts->{'opening-sub-brace-on-new-line'} =
2244           $rOpts->{'opening-brace-on-new-line'};
2245     }
2246
2247     if ( $rOpts->{'entab-leading-whitespace'} ) {
2248         if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2249             warn "-et=n must use a positive integer; ignoring -et\n";
2250             $rOpts->{'entab-leading-whitespace'} = undef;
2251         }
2252
2253         # entab leading whitespace has priority over the older 'tabs' option
2254         if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2255     }
2256 }
2257
2258 sub find_file_upwards {
2259     my ( $search_dir, $search_file ) = @_;
2260
2261     $search_dir  =~ s{/+$}{};
2262     $search_file =~ s{^/+}{};
2263
2264     while (1) {
2265         my $try_path = "$search_dir/$search_file";
2266         if ( -f $try_path ) {
2267             return $try_path;
2268         }
2269         elsif ( $search_dir eq '/' ) {
2270             return undef;
2271         }
2272         else {
2273             $search_dir = dirname($search_dir);
2274         }
2275     }
2276 }
2277
2278 sub expand_command_abbreviations {
2279
2280     # go through @ARGV and expand any abbreviations
2281
2282     my ( $rexpansion, $rraw_options, $config_file ) = @_;
2283     my ($word);
2284
2285     # set a pass limit to prevent an infinite loop;
2286     # 10 should be plenty, but it may be increased to allow deeply
2287     # nested expansions.
2288     my $max_passes = 10;
2289     my @new_argv   = ();
2290
2291     # keep looping until all expansions have been converted into actual
2292     # dash parameters..
2293     for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2294         my @new_argv     = ();
2295         my $abbrev_count = 0;
2296
2297         # loop over each item in @ARGV..
2298         foreach $word (@ARGV) {
2299
2300             # convert any leading 'no-' to just 'no'
2301             if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2302
2303             # if it is a dash flag (instead of a file name)..
2304             if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2305
2306                 my $abr   = $1;
2307                 my $flags = $2;
2308
2309                 # save the raw input for debug output in case of circular refs
2310                 if ( $pass_count == 0 ) {
2311                     push( @$rraw_options, $word );
2312                 }
2313
2314                 # recombine abbreviation and flag, if necessary,
2315                 # to allow abbreviations with arguments such as '-vt=1'
2316                 if ( $rexpansion->{ $abr . $flags } ) {
2317                     $abr   = $abr . $flags;
2318                     $flags = "";
2319                 }
2320
2321                 # if we see this dash item in the expansion hash..
2322                 if ( $rexpansion->{$abr} ) {
2323                     $abbrev_count++;
2324
2325                     # stuff all of the words that it expands to into the
2326                     # new arg list for the next pass
2327                     foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2328                         next unless $abbrev;    # for safety; shouldn't happen
2329                         push( @new_argv, '--' . $abbrev . $flags );
2330                     }
2331                 }
2332
2333                 # not in expansion hash, must be actual long name
2334                 else {
2335                     push( @new_argv, $word );
2336                 }
2337             }
2338
2339             # not a dash item, so just save it for the next pass
2340             else {
2341                 push( @new_argv, $word );
2342             }
2343         }    # end of this pass
2344
2345         # update parameter list @ARGV to the new one
2346         @ARGV = @new_argv;
2347         last unless ( $abbrev_count > 0 );
2348
2349         # make sure we are not in an infinite loop
2350         if ( $pass_count == $max_passes ) {
2351             print STDERR
2352 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2353             print STDERR "Here are the raw options\n";
2354             local $" = ')(';
2355             print STDERR "(@$rraw_options)\n";
2356             my $num = @new_argv;
2357
2358             if ( $num < 50 ) {
2359                 print STDERR "After $max_passes passes here is ARGV\n";
2360                 print STDERR "(@new_argv)\n";
2361             }
2362             else {
2363                 print STDERR "After $max_passes passes ARGV has $num entries\n";
2364             }
2365
2366             if ($config_file) {
2367                 die <<"DIE";
2368 Please check your configuration file $config_file for circular-references.
2369 To deactivate it, use -npro.
2370 DIE
2371             }
2372             else {
2373                 die <<'DIE';
2374 Program bug - circular-references in the %expansion hash, probably due to
2375 a recent program change.
2376 DIE
2377             }
2378         }    # end of check for circular references
2379     }    # end of loop over all passes
2380 }
2381
2382 # Debug routine -- this will dump the expansion hash
2383 sub dump_short_names {
2384     my $rexpansion = shift;
2385     print STDOUT <<EOM;
2386 List of short names.  This list shows how all abbreviations are
2387 translated into other abbreviations and, eventually, into long names.
2388 New abbreviations may be defined in a .perltidyrc file.
2389 For a list of all long names, use perltidy --dump-long-names (-dln).
2390 --------------------------------------------------------------------------
2391 EOM
2392     foreach my $abbrev ( sort keys %$rexpansion ) {
2393         my @list = @{ $$rexpansion{$abbrev} };
2394         print STDOUT "$abbrev --> @list\n";
2395     }
2396 }
2397
2398 sub check_vms_filename {
2399
2400     # given a valid filename (the perltidy input file)
2401     # create a modified filename and separator character
2402     # suitable for VMS.
2403     #
2404     # Contributed by Michael Cartmell
2405     #
2406     my ( $base, $path ) = fileparse( $_[0] );
2407
2408     # remove explicit ; version
2409     $base =~ s/;-?\d*$//
2410
2411       # remove explicit . version ie two dots in filename NB ^ escapes a dot
2412       or $base =~ s/(          # begin capture $1
2413                   (?:^|[^^])\. # match a dot not preceded by a caret
2414                   (?:          # followed by nothing
2415                     |          # or
2416                     .*[^^]     # anything ending in a non caret
2417                   )
2418                 )              # end capture $1
2419                 \.-?\d*$       # match . version number
2420               /$1/x;
2421
2422     # normalise filename, if there are no unescaped dots then append one
2423     $base .= '.' unless $base =~ /(?:^|[^^])\./;
2424
2425     # if we don't already have an extension then we just append the extention
2426     my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2427     return ( $path . $base, $separator );
2428 }
2429
2430 sub Win_OS_Type {
2431
2432     # TODO: are these more standard names?
2433     # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2434
2435     # Returns a string that determines what MS OS we are on.
2436     # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2437     # Returns blank string if not an MS system.
2438     # Original code contributed by: Yves Orton
2439     # We need to know this to decide where to look for config files
2440
2441     my $rpending_complaint = shift;
2442     my $os                 = "";
2443     return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2444
2445     # Systems built from Perl source may not have Win32.pm
2446     # But probably have Win32::GetOSVersion() anyway so the
2447     # following line is not 'required':
2448     # return $os unless eval('require Win32');
2449
2450     # Use the standard API call to determine the version
2451     my ( $undef, $major, $minor, $build, $id );
2452     eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2453
2454     #
2455     #    NAME                   ID   MAJOR  MINOR
2456     #    Windows NT 4           2      4       0
2457     #    Windows 2000           2      5       0
2458     #    Windows XP             2      5       1
2459     #    Windows Server 2003    2      5       2
2460
2461     return "win32s" unless $id;    # If id==0 then its a win32s box.
2462     $os = {                        # Magic numbers from MSDN
2463                                    # documentation of GetOSVersion
2464         1 => {
2465             0  => "95",
2466             10 => "98",
2467             90 => "Me"
2468         },
2469         2 => {
2470             0  => "2000",          # or NT 4, see below
2471             1  => "XP/.Net",
2472             2  => "Win2003",
2473             51 => "NT3.51"
2474         }
2475     }->{$id}->{$minor};
2476
2477     # If $os is undefined, the above code is out of date.  Suggested updates
2478     # are welcome.
2479     unless ( defined $os ) {
2480         $os = "";
2481         $$rpending_complaint .= <<EOS;
2482 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2483 We won't be able to look for a system-wide config file.
2484 EOS
2485     }
2486
2487     # Unfortunately the logic used for the various versions isnt so clever..
2488     # so we have to handle an outside case.
2489     return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2490 }
2491
2492 sub is_unix {
2493     return
2494          ( $^O !~ /win32|dos/i )
2495       && ( $^O ne 'VMS' )
2496       && ( $^O ne 'OS2' )
2497       && ( $^O ne 'MacOS' );
2498 }
2499
2500 sub look_for_Windows {
2501
2502     # determine Windows sub-type and location of
2503     # system-wide configuration files
2504     my $rpending_complaint = shift;
2505     my $is_Windows         = ( $^O =~ /win32|dos/i );
2506     my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
2507     return ( $is_Windows, $Windows_type );
2508 }
2509
2510 sub find_config_file {
2511
2512     # look for a .perltidyrc configuration file
2513     # For Windows also look for a file named perltidy.ini
2514     my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2515         $rpending_complaint ) = @_;
2516
2517     $$rconfig_file_chatter .= "# Config file search...system reported as:";
2518     if ($is_Windows) {
2519         $$rconfig_file_chatter .= "Windows $Windows_type\n";
2520     }
2521     else {
2522         $$rconfig_file_chatter .= " $^O\n";
2523     }
2524
2525     # sub to check file existance and record all tests
2526     my $exists_config_file = sub {
2527         my $config_file = shift;
2528         return 0 unless $config_file;
2529         $$rconfig_file_chatter .= "# Testing: $config_file\n";
2530         return -f $config_file;
2531     };
2532
2533     my $config_file;
2534
2535     # look in current directory first
2536     $config_file = ".perltidyrc";
2537     return $config_file if $exists_config_file->($config_file);
2538     if ($is_Windows) {
2539         $config_file = "perltidy.ini";
2540         return $config_file if $exists_config_file->($config_file);
2541     }
2542
2543     # Default environment vars.
2544     my @envs = qw(PERLTIDY HOME);
2545
2546     # Check the NT/2k/XP locations, first a local machine def, then a
2547     # network def
2548     push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2549
2550     # Now go through the enviornment ...
2551     foreach my $var (@envs) {
2552         $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2553         if ( defined( $ENV{$var} ) ) {
2554             $$rconfig_file_chatter .= " = $ENV{$var}\n";
2555
2556             # test ENV{ PERLTIDY } as file:
2557             if ( $var eq 'PERLTIDY' ) {
2558                 $config_file = "$ENV{$var}";
2559                 return $config_file if $exists_config_file->($config_file);
2560             }
2561
2562             # test ENV as directory:
2563             $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2564             return $config_file if $exists_config_file->($config_file);
2565
2566             if ($is_Windows) {
2567                 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2568                 return $config_file if $exists_config_file->($config_file);
2569             }
2570         }
2571         else {
2572             $$rconfig_file_chatter .= "\n";
2573         }
2574     }
2575
2576     # then look for a system-wide definition
2577     # where to look varies with OS
2578     if ($is_Windows) {
2579
2580         if ($Windows_type) {
2581             my ( $os, $system, $allusers ) =
2582               Win_Config_Locs( $rpending_complaint, $Windows_type );
2583
2584             # Check All Users directory, if there is one.
2585             # i.e. C:\Documents and Settings\User\perltidy.ini
2586             if ($allusers) {
2587
2588                 $config_file = catfile( $allusers, ".perltidyrc" );
2589                 return $config_file if $exists_config_file->($config_file);
2590
2591                 $config_file = catfile( $allusers, "perltidy.ini" );
2592                 return $config_file if $exists_config_file->($config_file);
2593             }
2594
2595             # Check system directory.
2596             # retain old code in case someone has been able to create
2597             # a file with a leading period.
2598             $config_file = catfile( $system, ".perltidyrc" );
2599             return $config_file if $exists_config_file->($config_file);
2600
2601             $config_file = catfile( $system, "perltidy.ini" );
2602             return $config_file if $exists_config_file->($config_file);
2603         }
2604     }
2605
2606     # Place to add customization code for other systems
2607     elsif ( $^O eq 'OS2' ) {
2608     }
2609     elsif ( $^O eq 'MacOS' ) {
2610     }
2611     elsif ( $^O eq 'VMS' ) {
2612     }
2613
2614     # Assume some kind of Unix
2615     else {
2616
2617         $config_file = "/usr/local/etc/perltidyrc";
2618         return $config_file if $exists_config_file->($config_file);
2619
2620         $config_file = "/etc/perltidyrc";
2621         return $config_file if $exists_config_file->($config_file);
2622     }
2623
2624     # Couldn't find a config file
2625     return;
2626 }
2627
2628 sub Win_Config_Locs {
2629
2630     # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2631     # or undef if its not a win32 OS.  In list context returns OS, System
2632     # Directory, and All Users Directory.  All Users will be empty on a
2633     # 9x/Me box.  Contributed by: Yves Orton.
2634
2635     my $rpending_complaint = shift;
2636     my $os = (@_) ? shift : Win_OS_Type();
2637     return unless $os;
2638
2639     my $system   = "";
2640     my $allusers = "";
2641
2642     if ( $os =~ /9[58]|Me/ ) {
2643         $system = "C:/Windows";
2644     }
2645     elsif ( $os =~ /NT|XP|200?/ ) {
2646         $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2647         $allusers =
2648           ( $os =~ /NT/ )
2649           ? "C:/WinNT/profiles/All Users/"
2650           : "C:/Documents and Settings/All Users/";
2651     }
2652     else {
2653
2654         # This currently would only happen on a win32s computer.  I dont have
2655         # one to test, so I am unsure how to proceed.  Suggestions welcome!
2656         $$rpending_complaint .=
2657 "I dont know a sensible place to look for config files on an $os system.\n";
2658         return;
2659     }
2660     return wantarray ? ( $os, $system, $allusers ) : $os;
2661 }
2662
2663 sub dump_config_file {
2664     my $fh                   = shift;
2665     my $config_file          = shift;
2666     my $rconfig_file_chatter = shift;
2667     print STDOUT "$$rconfig_file_chatter";
2668     if ($fh) {
2669         print STDOUT "# Dump of file: '$config_file'\n";
2670         while ( my $line = $fh->getline() ) { print STDOUT $line }
2671         eval { $fh->close() };
2672     }
2673     else {
2674         print STDOUT "# ...no config file found\n";
2675     }
2676 }
2677
2678 sub read_config_file {
2679
2680     my ( $fh, $config_file, $rexpansion ) = @_;
2681     my @config_list = ();
2682
2683     # file is bad if non-empty $death_message is returned
2684     my $death_message = "";
2685
2686     my $name = undef;
2687     my $line_no;
2688     while ( my $line = $fh->getline() ) {
2689         $line_no++;
2690         chomp $line;
2691         next if $line =~ /^\s*#/;    # skip full-line comment
2692         ( $line, $death_message ) =
2693           strip_comment( $line, $config_file, $line_no );
2694         last if ($death_message);
2695         $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
2696         next unless $line;
2697
2698         # look for something of the general form
2699         #    newname { body }
2700         # or just
2701         #    body
2702
2703         if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2704             my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2705
2706             # handle a new alias definition
2707             if ($newname) {
2708                 if ($name) {
2709                     $death_message =
2710 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2711                     last;
2712                 }
2713                 $name = $newname;
2714
2715                 if ( ${$rexpansion}{$name} ) {
2716                     local $" = ')(';
2717                     my @names = sort keys %$rexpansion;
2718                     $death_message =
2719                         "Here is a list of all installed aliases\n(@names)\n"
2720                       . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2721                     last;
2722                 }
2723                 ${$rexpansion}{$name} = [];
2724             }
2725
2726             # now do the body
2727             if ($body) {
2728
2729                 my ( $rbody_parts, $msg ) = parse_args($body);
2730                 if ($msg) {
2731                     $death_message = <<EOM;
2732 Error reading file '$config_file' at line number $line_no.
2733 $msg
2734 Please fix this line or use -npro to avoid reading this file
2735 EOM
2736                     last;
2737                 }
2738
2739                 if ($name) {
2740
2741                     # remove leading dashes if this is an alias
2742                     foreach (@$rbody_parts) { s/^\-+//; }
2743                     push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2744                 }
2745                 else {
2746                     push( @config_list, @$rbody_parts );
2747                 }
2748             }
2749
2750             if ($curly) {
2751                 unless ($name) {
2752                     $death_message =
2753 "Unexpected '}' seen in config file $config_file line $.\n";
2754                     last;
2755                 }
2756                 $name = undef;
2757             }
2758         }
2759     }
2760     eval { $fh->close() };
2761     return ( \@config_list, $death_message );
2762 }
2763
2764 sub strip_comment {
2765
2766     my ( $instr, $config_file, $line_no ) = @_;
2767     my $msg = "";
2768
2769     # nothing to do if no comments
2770     if ( $instr !~ /#/ ) {
2771         return ( $instr, $msg );
2772     }
2773
2774     # use simple method of no quotes
2775     elsif ( $instr !~ /['"]/ ) {
2776         $instr =~ s/\s*\#.*$//;    # simple trim
2777         return ( $instr, $msg );
2778     }
2779
2780     # handle comments and quotes
2781     my $outstr     = "";
2782     my $quote_char = "";
2783     while (1) {
2784
2785         # looking for ending quote character
2786         if ($quote_char) {
2787             if ( $instr =~ /\G($quote_char)/gc ) {
2788                 $quote_char = "";
2789                 $outstr .= $1;
2790             }
2791             elsif ( $instr =~ /\G(.)/gc ) {
2792                 $outstr .= $1;
2793             }
2794
2795             # error..we reached the end without seeing the ending quote char
2796             else {
2797                 $msg = <<EOM;
2798 Error reading file $config_file at line number $line_no.
2799 Did not see ending quote character <$quote_char> in this text:
2800 $instr
2801 Please fix this line or use -npro to avoid reading this file
2802 EOM
2803                 last;
2804             }
2805         }
2806
2807         # accumulating characters and looking for start of a quoted string
2808         else {
2809             if ( $instr =~ /\G([\"\'])/gc ) {
2810                 $outstr .= $1;
2811                 $quote_char = $1;
2812             }
2813             elsif ( $instr =~ /\G#/gc ) {
2814                 last;
2815             }
2816             elsif ( $instr =~ /\G(.)/gc ) {
2817                 $outstr .= $1;
2818             }
2819             else {
2820                 last;
2821             }
2822         }
2823     }
2824     return ( $outstr, $msg );
2825 }
2826
2827 sub parse_args {
2828
2829     # Parse a command string containing multiple string with possible
2830     # quotes, into individual commands.  It might look like this, for example:
2831     #
2832     #    -wba=" + - "  -some-thing -wbb='. && ||'
2833     #
2834     # There is no need, at present, to handle escaped quote characters.
2835     # (They are not perltidy tokens, so needn't be in strings).
2836
2837     my ($body)     = @_;
2838     my @body_parts = ();
2839     my $quote_char = "";
2840     my $part       = "";
2841     my $msg        = "";
2842     while (1) {
2843
2844         # looking for ending quote character
2845         if ($quote_char) {
2846             if ( $body =~ /\G($quote_char)/gc ) {
2847                 $quote_char = "";
2848             }
2849             elsif ( $body =~ /\G(.)/gc ) {
2850                 $part .= $1;
2851             }
2852
2853             # error..we reached the end without seeing the ending quote char
2854             else {
2855                 if ( length($part) ) { push @body_parts, $part; }
2856                 $msg = <<EOM;
2857 Did not see ending quote character <$quote_char> in this text:
2858 $body
2859 EOM
2860                 last;
2861             }
2862         }
2863
2864         # accumulating characters and looking for start of a quoted string
2865         else {
2866             if ( $body =~ /\G([\"\'])/gc ) {
2867                 $quote_char = $1;
2868             }
2869             elsif ( $body =~ /\G(\s+)/gc ) {
2870                 if ( length($part) ) { push @body_parts, $part; }
2871                 $part = "";
2872             }
2873             elsif ( $body =~ /\G(.)/gc ) {
2874                 $part .= $1;
2875             }
2876             else {
2877                 if ( length($part) ) { push @body_parts, $part; }
2878                 last;
2879             }
2880         }
2881     }
2882     return ( \@body_parts, $msg );
2883 }
2884
2885 sub dump_long_names {
2886
2887     my @names = sort @_;
2888     print STDOUT <<EOM;
2889 # Command line long names (passed to GetOptions)
2890 #---------------------------------------------------------------
2891 # here is a summary of the Getopt codes:
2892 # <none> does not take an argument
2893 # =s takes a mandatory string
2894 # :s takes an optional string
2895 # =i takes a mandatory integer
2896 # :i takes an optional integer
2897 # ! does not take an argument and may be negated
2898 #  i.e., -foo and -nofoo are allowed
2899 # a double dash signals the end of the options list
2900 #
2901 #---------------------------------------------------------------
2902 EOM
2903
2904     foreach (@names) { print STDOUT "$_\n" }
2905 }
2906
2907 sub dump_defaults {
2908     my @defaults = sort @_;
2909     print STDOUT "Default command line options:\n";
2910     foreach (@_) { print STDOUT "$_\n" }
2911 }
2912
2913 sub readable_options {
2914
2915     # return options for this run as a string which could be
2916     # put in a perltidyrc file
2917     my ( $rOpts, $roption_string ) = @_;
2918     my %Getopt_flags;
2919     my $rGetopt_flags    = \%Getopt_flags;
2920     my $readable_options = "# Final parameter set for this run.\n";
2921     $readable_options .=
2922       "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
2923     foreach my $opt ( @{$roption_string} ) {
2924         my $flag = "";
2925         if ( $opt =~ /(.*)(!|=.*)$/ ) {
2926             $opt  = $1;
2927             $flag = $2;
2928         }
2929         if ( defined( $rOpts->{$opt} ) ) {
2930             $rGetopt_flags->{$opt} = $flag;
2931         }
2932     }
2933     foreach my $key ( sort keys %{$rOpts} ) {
2934         my $flag   = $rGetopt_flags->{$key};
2935         my $value  = $rOpts->{$key};
2936         my $prefix = '--';
2937         my $suffix = "";
2938         if ($flag) {
2939             if ( $flag =~ /^=/ ) {
2940                 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2941                 $suffix = "=" . $value;
2942             }
2943             elsif ( $flag =~ /^!/ ) {
2944                 $prefix .= "no" unless ($value);
2945             }
2946             else {
2947
2948                 # shouldn't happen
2949                 $readable_options .=
2950                   "# ERROR in dump_options: unrecognized flag $flag for $key\n";
2951             }
2952         }
2953         $readable_options .= $prefix . $key . $suffix . "\n";
2954     }
2955     return $readable_options;
2956 }
2957
2958 sub show_version {
2959     print <<"EOM";
2960 This is perltidy, v$VERSION
2961
2962 Copyright 2000-2010, Steve Hancock
2963
2964 Perltidy is free software and may be copied under the terms of the GNU
2965 General Public License, which is included in the distribution files.
2966
2967 Complete documentation for perltidy can be found using 'man perltidy'
2968 or on the internet at http://perltidy.sourceforge.net.
2969 EOM
2970 }
2971
2972 sub usage {
2973
2974     print STDOUT <<EOF;
2975 This is perltidy version $VERSION, a perl script indenter.  Usage:
2976
2977     perltidy [ options ] file1 file2 file3 ...
2978             (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2979     perltidy [ options ] file1 -o outfile
2980     perltidy [ options ] file1 -st >outfile
2981     perltidy [ options ] <infile >outfile
2982
2983 Options have short and long forms. Short forms are shown; see
2984 man pages for long forms.  Note: '=s' indicates a required string,
2985 and '=n' indicates a required integer.
2986
2987 I/O control
2988  -h      show this help
2989  -o=file name of the output file (only if single input file)
2990  -oext=s change output extension from 'tdy' to s
2991  -opath=path  change path to be 'path' for output files
2992  -b      backup original to .bak and modify file in-place
2993  -bext=s change default backup extension from 'bak' to s
2994  -q      deactivate error messages (for running under editor)
2995  -w      include non-critical warning messages in the .ERR error output
2996  -syn    run perl -c to check syntax (default under unix systems)
2997  -log    save .LOG file, which has useful diagnostics
2998  -f      force perltidy to read a binary file
2999  -g      like -log but writes more detailed .LOG file, for debugging scripts
3000  -opt    write the set of options actually used to a .LOG file
3001  -npro   ignore .perltidyrc configuration command file
3002  -pro=file   read configuration commands from file instead of .perltidyrc
3003  -st     send output to standard output, STDOUT
3004  -se     send error output to standard error output, STDERR
3005  -v      display version number to standard output and quit
3006
3007 Basic Options:
3008  -i=n    use n columns per indentation level (default n=4)
3009  -t      tabs: use one tab character per indentation level, not recommeded
3010  -nt     no tabs: use n spaces per indentation level (default)
3011  -et=n   entab leading whitespace n spaces per tab; not recommended
3012  -io     "indent only": just do indentation, no other formatting.
3013  -sil=n  set starting indentation level to n;  use if auto detection fails
3014  -ole=s  specify output line ending (s=dos or win, mac, unix)
3015  -ple    keep output line endings same as input (input must be filename)
3016
3017 Whitespace Control
3018  -fws    freeze whitespace; this disables all whitespace changes
3019            and disables the following switches:
3020  -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
3021  -bbt    same as -bt but for code block braces; same as -bt if not given
3022  -bbvt   block braces vertically tight; use with -bl or -bli
3023  -bbvtl=s  make -bbvt to apply to selected list of block types
3024  -pt=n   paren tightness (n=0, 1 or 2)
3025  -sbt=n  square bracket tightness (n=0, 1, or 2)
3026  -bvt=n  brace vertical tightness,
3027          n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3028  -pvt=n  paren vertical tightness (see -bvt for n)
3029  -sbvt=n square bracket vertical tightness (see -bvt for n)
3030  -bvtc=n closing brace vertical tightness:
3031          n=(0=open, 1=sometimes close, 2=always close)
3032  -pvtc=n closing paren vertical tightness, see -bvtc for n.
3033  -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3034  -ci=n   sets continuation indentation=n,  default is n=2 spaces
3035  -lp     line up parentheses, brackets, and non-BLOCK braces
3036  -sfs    add space before semicolon in for( ; ; )
3037  -aws    allow perltidy to add whitespace (default)
3038  -dws    delete all old non-essential whitespace
3039  -icb    indent closing brace of a code block
3040  -cti=n  closing indentation of paren, square bracket, or non-block brace:
3041          n=0 none, =1 align with opening, =2 one full indentation level
3042  -icp    equivalent to -cti=2
3043  -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
3044  -wrs=s  want space right of tokens in string;
3045  -sts    put space before terminal semicolon of a statement
3046  -sak=s  put space between keywords given in s and '(';
3047  -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3048
3049 Line Break Control
3050  -fnl    freeze newlines; this disables all line break changes
3051             and disables the following switches:
3052  -anl    add newlines;  ok to introduce new line breaks
3053  -bbs    add blank line before subs and packages
3054  -bbc    add blank line before block comments
3055  -bbb    add blank line between major blocks
3056  -kbl=n  keep old blank lines? 0=no, 1=some, 2=all
3057  -mbl=n  maximum consecutive blank lines to output (default=1)
3058  -ce     cuddled else; use this style: '} else {'
3059  -dnl    delete old newlines (default)
3060  -l=n    maximum line length;  default n=80
3061  -bl     opening brace on new line
3062  -sbl    opening sub brace on new line.  value of -bl is used if not given.
3063  -bli    opening brace on new line and indented
3064  -bar    opening brace always on right, even for long clauses
3065  -vt=n   vertical tightness (requires -lp); n controls break after opening
3066          token: 0=never  1=no break if next line balanced   2=no break
3067  -vtc=n  vertical tightness of closing container; n controls if closing
3068          token starts new line: 0=always  1=not unless list  1=never
3069  -wba=s  want break after tokens in string; i.e. wba=': .'
3070  -wbb=s  want break before tokens in string
3071
3072 Following Old Breakpoints
3073  -kis    keep interior semicolons.  Allows multiple statements per line.
3074  -boc    break at old comma breaks: turns off all automatic list formatting
3075  -bol    break at old logical breakpoints: or, and, ||, && (default)
3076  -bok    break at old list keyword breakpoints such as map, sort (default)
3077  -bot    break at old conditional (ternary ?:) operator breakpoints (default)
3078  -cab=n  break at commas after a comma-arrow (=>):
3079          n=0 break at all commas after =>
3080          n=1 stable: break unless this breaks an existing one-line container
3081          n=2 break only if a one-line container cannot be formed
3082          n=3 do not treat commas after => specially at all
3083
3084 Comment controls
3085  -ibc    indent block comments (default)
3086  -isbc   indent spaced block comments; may indent unless no leading space
3087  -msc=n  minimum desired spaces to side comment, default 4
3088  -fpsc=n fix position for side comments; default 0;
3089  -csc    add or update closing side comments after closing BLOCK brace
3090  -dcsc   delete closing side comments created by a -csc command
3091  -cscp=s change closing side comment prefix to be other than '## end'
3092  -cscl=s change closing side comment to apply to selected list of blocks
3093  -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3094  -csct=n maximum number of columns of appended text, default n=20
3095  -cscw   causes warning if old side comment is overwritten with -csc
3096
3097  -sbc    use 'static block comments' identified by leading '##' (default)
3098  -sbcp=s change static block comment identifier to be other than '##'
3099  -osbc   outdent static block comments
3100
3101  -ssc    use 'static side comments' identified by leading '##' (default)
3102  -sscp=s change static side comment identifier to be other than '##'
3103
3104 Delete selected text
3105  -dac    delete all comments AND pod
3106  -dbc    delete block comments
3107  -dsc    delete side comments
3108  -dp     delete pod
3109
3110 Send selected text to a '.TEE' file
3111  -tac    tee all comments AND pod
3112  -tbc    tee block comments
3113  -tsc    tee side comments
3114  -tp     tee pod
3115
3116 Outdenting
3117  -olq    outdent long quoted strings (default)
3118  -olc    outdent a long block comment line
3119  -ola    outdent statement labels
3120  -okw    outdent control keywords (redo, next, last, goto, return)
3121  -okwl=s specify alternative keywords for -okw command
3122
3123 Other controls
3124  -mft=n  maximum fields per table; default n=40
3125  -x      do not format lines before hash-bang line (i.e., for VMS)
3126  -asc    allows perltidy to add a ';' when missing (default)
3127  -dsm    allows perltidy to delete an unnecessary ';'  (default)
3128
3129 Combinations of other parameters
3130  -gnu     attempt to follow GNU Coding Standards as applied to perl
3131  -mangle  remove as many newlines as possible (but keep comments and pods)
3132  -extrude  insert as many newlines as possible
3133
3134 Dump and die, debugging
3135  -dop    dump options used in this run to standard output and quit
3136  -ddf    dump default options to standard output and quit
3137  -dsn    dump all option short names to standard output and quit
3138  -dln    dump option long names to standard output and quit
3139  -dpro   dump whatever configuration file is in effect to standard output
3140  -dtt    dump all token types to standard output and quit
3141
3142 HTML
3143  -html write an html file (see 'man perl2web' for many options)
3144        Note: when -html is used, no indentation or formatting are done.
3145        Hint: try perltidy -html -css=mystyle.css filename.pl
3146        and edit mystyle.css to change the appearance of filename.html.
3147        -nnn gives line numbers
3148        -pre only writes out <pre>..</pre> code section
3149        -toc places a table of contents to subs at the top (default)
3150        -pod passes pod text through pod2html (default)
3151        -frm write html as a frame (3 files)
3152        -text=s extra extension for table of contents if -frm, default='toc'
3153        -sext=s extra extension for file content if -frm, default='src'
3154
3155 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3156 negates the long forms.  For example, -nasc means don't add missing
3157 semicolons.
3158
3159 If you are unable to see this entire text, try "perltidy -h | more"
3160 For more detailed information, and additional options, try "man perltidy",
3161 or go to the perltidy home page at http://perltidy.sourceforge.net
3162 EOF
3163
3164 }
3165
3166 sub process_this_file {
3167
3168     my ( $truth, $beauty ) = @_;
3169
3170     # loop to process each line of this file
3171     while ( my $line_of_tokens = $truth->get_line() ) {
3172         $beauty->write_line($line_of_tokens);
3173     }
3174
3175     # finish up
3176     eval { $beauty->finish_formatting() };
3177     $truth->report_tokenization_errors();
3178 }
3179
3180 sub check_syntax {
3181
3182     # Use 'perl -c' to make sure that we did not create bad syntax
3183     # This is a very good independent check for programming errors
3184     #
3185     # Given names of the input and output files, ($ifname, $ofname),
3186     # we do the following:
3187     # - check syntax of the input file
3188     # - if bad, all done (could be an incomplete code snippet)
3189     # - if infile syntax ok, then check syntax of the output file;
3190     #   - if outfile syntax bad, issue warning; this implies a code bug!
3191     # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3192
3193     my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
3194     my $infile_syntax_ok = 0;
3195     my $line_of_dashes   = '-' x 42 . "\n";
3196
3197     my $flags = $rOpts->{'perl-syntax-check-flags'};
3198
3199     # be sure we invoke perl with -c
3200     # note: perl will accept repeated flags like '-c -c'.  It is safest
3201     # to append another -c than try to find an interior bundled c, as
3202     # in -Tc, because such a 'c' might be in a quoted string, for example.
3203     if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3204
3205     # be sure we invoke perl with -x if requested
3206     # same comments about repeated parameters applies
3207     if ( $rOpts->{'look-for-hash-bang'} ) {
3208         if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3209     }
3210
3211     # this shouldn't happen unless a termporary file couldn't be made
3212     if ( $ifname eq '-' ) {
3213         $logger_object->write_logfile_entry(
3214             "Cannot run perl -c on STDIN and STDOUT\n");
3215         return $infile_syntax_ok;
3216     }
3217
3218     $logger_object->write_logfile_entry(
3219         "checking input file syntax with perl $flags\n");
3220     $logger_object->write_logfile_entry($line_of_dashes);
3221
3222     # Not all operating systems/shells support redirection of the standard
3223     # error output.
3224     my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3225
3226     my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3227     $logger_object->write_logfile_entry("$perl_output\n");
3228
3229     if ( $perl_output =~ /syntax\s*OK/ ) {
3230         $infile_syntax_ok = 1;
3231         $logger_object->write_logfile_entry($line_of_dashes);
3232         $logger_object->write_logfile_entry(
3233             "checking output file syntax with perl $flags ...\n");
3234         $logger_object->write_logfile_entry($line_of_dashes);
3235
3236         my $perl_output =
3237           do_syntax_check( $ofname, $flags, $error_redirection );
3238         $logger_object->write_logfile_entry("$perl_output\n");
3239
3240         unless ( $perl_output =~ /syntax\s*OK/ ) {
3241             $logger_object->write_logfile_entry($line_of_dashes);
3242             $logger_object->warning(
3243 "The output file has a syntax error when tested with perl $flags $ofname !\n"
3244             );
3245             $logger_object->warning(
3246                 "This implies an error in perltidy; the file $ofname is bad\n");
3247             $logger_object->report_definite_bug();
3248
3249             # the perl version number will be helpful for diagnosing the problem
3250             $logger_object->write_logfile_entry(
3251                 qx/perl -v $error_redirection/ . "\n" );
3252         }
3253     }
3254     else {
3255
3256         # Only warn of perl -c syntax errors.  Other messages,
3257         # such as missing modules, are too common.  They can be
3258         # seen by running with perltidy -w
3259         $logger_object->complain("A syntax check using perl $flags gives: \n");
3260         $logger_object->complain($line_of_dashes);
3261         $logger_object->complain("$perl_output\n");
3262         $logger_object->complain($line_of_dashes);
3263         $infile_syntax_ok = -1;
3264         $logger_object->write_logfile_entry($line_of_dashes);
3265         $logger_object->write_logfile_entry(
3266 "The output file will not be checked because of input file problems\n"
3267         );
3268     }
3269     return $infile_syntax_ok;
3270 }
3271
3272 sub do_syntax_check {
3273     my ( $fname, $flags, $error_redirection ) = @_;
3274
3275     # We have to quote the filename in case it has unusual characters
3276     # or spaces.  Example: this filename #CM11.pm# gives trouble.
3277     $fname = '"' . $fname . '"';
3278
3279     # Under VMS something like -T will become -t (and an error) so we
3280     # will put quotes around the flags.  Double quotes seem to work on
3281     # Unix/Windows/VMS, but this may not work on all systems.  (Single
3282     # quotes do not work under Windows).  It could become necessary to
3283     # put double quotes around each flag, such as:  -"c"  -"T"
3284     # We may eventually need some system-dependent coding here.
3285     $flags = '"' . $flags . '"';
3286
3287     # now wish for luck...
3288     return qx/perl $flags $fname $error_redirection/;
3289 }
3290
3291 #####################################################################
3292 #
3293 # This is a stripped down version of IO::Scalar
3294 # Given a reference to a scalar, it supplies either:
3295 # a getline method which reads lines (mode='r'), or
3296 # a print method which reads lines (mode='w')
3297 #
3298 #####################################################################
3299 package Perl::Tidy::IOScalar;
3300 use Carp;
3301
3302 sub new {
3303     my ( $package, $rscalar, $mode ) = @_;
3304     my $ref = ref $rscalar;
3305     if ( $ref ne 'SCALAR' ) {
3306         confess <<EOM;
3307 ------------------------------------------------------------------------
3308 expecting ref to SCALAR but got ref to ($ref); trace follows:
3309 ------------------------------------------------------------------------
3310 EOM
3311
3312     }
3313     if ( $mode eq 'w' ) {
3314         $$rscalar = "";
3315         return bless [ $rscalar, $mode ], $package;
3316     }
3317     elsif ( $mode eq 'r' ) {
3318
3319         # Convert a scalar to an array.
3320         # This avoids looking for "\n" on each call to getline
3321         my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3322         my $i_next = 0;
3323         return bless [ \@array, $mode, $i_next ], $package;
3324     }
3325     else {
3326         confess <<EOM;
3327 ------------------------------------------------------------------------
3328 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3329 ------------------------------------------------------------------------
3330 EOM
3331     }
3332 }
3333
3334 sub getline {
3335     my $self = shift;
3336     my $mode = $self->[1];
3337     if ( $mode ne 'r' ) {
3338         confess <<EOM;
3339 ------------------------------------------------------------------------
3340 getline call requires mode = 'r' but mode = ($mode); trace follows:
3341 ------------------------------------------------------------------------
3342 EOM
3343     }
3344     my $i = $self->[2]++;
3345     ##my $line = $self->[0]->[$i];
3346     return $self->[0]->[$i];
3347 }
3348
3349 sub print {
3350     my $self = shift;
3351     my $mode = $self->[1];
3352     if ( $mode ne 'w' ) {
3353         confess <<EOM;
3354 ------------------------------------------------------------------------
3355 print call requires mode = 'w' but mode = ($mode); trace follows:
3356 ------------------------------------------------------------------------
3357 EOM
3358     }
3359     ${ $self->[0] } .= $_[0];
3360 }
3361 sub close { return }
3362
3363 #####################################################################
3364 #
3365 # This is a stripped down version of IO::ScalarArray
3366 # Given a reference to an array, it supplies either:
3367 # a getline method which reads lines (mode='r'), or
3368 # a print method which reads lines (mode='w')
3369 #
3370 # NOTE: this routine assumes that that there aren't any embedded
3371 # newlines within any of the array elements.  There are no checks
3372 # for that.
3373 #
3374 #####################################################################
3375 package Perl::Tidy::IOScalarArray;
3376 use Carp;
3377
3378 sub new {
3379     my ( $package, $rarray, $mode ) = @_;
3380     my $ref = ref $rarray;
3381     if ( $ref ne 'ARRAY' ) {
3382         confess <<EOM;
3383 ------------------------------------------------------------------------
3384 expecting ref to ARRAY but got ref to ($ref); trace follows:
3385 ------------------------------------------------------------------------
3386 EOM
3387
3388     }
3389     if ( $mode eq 'w' ) {
3390         @$rarray = ();
3391         return bless [ $rarray, $mode ], $package;
3392     }
3393     elsif ( $mode eq 'r' ) {
3394         my $i_next = 0;
3395         return bless [ $rarray, $mode, $i_next ], $package;
3396     }
3397     else {
3398         confess <<EOM;
3399 ------------------------------------------------------------------------
3400 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3401 ------------------------------------------------------------------------
3402 EOM
3403     }
3404 }
3405
3406 sub getline {
3407     my $self = shift;
3408     my $mode = $self->[1];
3409     if ( $mode ne 'r' ) {
3410         confess <<EOM;
3411 ------------------------------------------------------------------------
3412 getline requires mode = 'r' but mode = ($mode); trace follows:
3413 ------------------------------------------------------------------------
3414 EOM
3415     }
3416     my $i = $self->[2]++;
3417     return $self->[0]->[$i];
3418 }
3419
3420 sub print {
3421     my $self = shift;
3422     my $mode = $self->[1];
3423     if ( $mode ne 'w' ) {
3424         confess <<EOM;
3425 ------------------------------------------------------------------------
3426 print requires mode = 'w' but mode = ($mode); trace follows:
3427 ------------------------------------------------------------------------
3428 EOM
3429     }
3430     push @{ $self->[0] }, $_[0];
3431 }
3432 sub close { return }
3433
3434 #####################################################################
3435 #
3436 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3437 # which returns the next line to be parsed
3438 #
3439 #####################################################################
3440
3441 package Perl::Tidy::LineSource;
3442
3443 sub new {
3444
3445     my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3446     my $input_file_copy = undef;
3447     my $fh_copy;
3448
3449     my $input_line_ending;
3450     if ( $rOpts->{'preserve-line-endings'} ) {
3451         $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3452     }
3453
3454     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3455     return undef unless $fh;
3456
3457     # in order to check output syntax when standard output is used,
3458     # or when it is an object, we have to make a copy of the file
3459     if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3460     {
3461
3462         # Turning off syntax check when input output is used.
3463         # The reason is that temporary files cause problems on
3464         # on many systems.
3465         $rOpts->{'check-syntax'} = 0;
3466         $input_file_copy = '-';
3467
3468         $$rpending_logfile_message .= <<EOM;
3469 Note: --syntax check will be skipped because standard input is used
3470 EOM
3471
3472     }
3473
3474     return bless {
3475         _fh                => $fh,
3476         _fh_copy           => $fh_copy,
3477         _filename          => $input_file,
3478         _input_file_copy   => $input_file_copy,
3479         _input_line_ending => $input_line_ending,
3480         _rinput_buffer     => [],
3481         _started           => 0,
3482     }, $class;
3483 }
3484
3485 sub get_input_file_copy_name {
3486     my $self   = shift;
3487     my $ifname = $self->{_input_file_copy};
3488     unless ($ifname) {
3489         $ifname = $self->{_filename};
3490     }
3491     return $ifname;
3492 }
3493
3494 sub close_input_file {
3495     my $self = shift;
3496     eval { $self->{_fh}->close() };
3497     eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3498 }
3499
3500 sub get_line {
3501     my $self          = shift;
3502     my $line          = undef;
3503     my $fh            = $self->{_fh};
3504     my $fh_copy       = $self->{_fh_copy};
3505     my $rinput_buffer = $self->{_rinput_buffer};
3506
3507     if ( scalar(@$rinput_buffer) ) {
3508         $line = shift @$rinput_buffer;
3509     }
3510     else {
3511         $line = $fh->getline();
3512
3513         # patch to read raw mac files under unix, dos
3514         # see if the first line has embedded \r's
3515         if ( $line && !$self->{_started} ) {
3516             if ( $line =~ /[\015][^\015\012]/ ) {
3517
3518                 # found one -- break the line up and store in a buffer
3519                 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3520                 my $count = @$rinput_buffer;
3521                 $line = shift @$rinput_buffer;
3522             }
3523             $self->{_started}++;
3524         }
3525     }
3526     if ( $line && $fh_copy ) { $fh_copy->print($line); }
3527     return $line;
3528 }
3529
3530 #####################################################################
3531 #
3532 # the Perl::Tidy::LineSink class supplies a write_line method for
3533 # actual file writing
3534 #
3535 #####################################################################
3536
3537 package Perl::Tidy::LineSink;
3538
3539 sub new {
3540
3541     my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3542         $rpending_logfile_message, $binmode )
3543       = @_;
3544     my $fh               = undef;
3545     my $fh_copy          = undef;
3546     my $fh_tee           = undef;
3547     my $output_file_copy = "";
3548     my $output_file_open = 0;
3549
3550     if ( $rOpts->{'format'} eq 'tidy' ) {
3551         ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3552         unless ($fh) { die "Cannot write to output stream\n"; }
3553         $output_file_open = 1;
3554         if ($binmode) {
3555             if ( ref($fh) eq 'IO::File' ) {
3556                 binmode $fh;
3557             }
3558             if ( $output_file eq '-' ) { binmode STDOUT }
3559         }
3560     }
3561
3562     # in order to check output syntax when standard output is used,
3563     # or when it is an object, we have to make a copy of the file
3564     if ( $output_file eq '-' || ref $output_file ) {
3565         if ( $rOpts->{'check-syntax'} ) {
3566
3567             # Turning off syntax check when standard output is used.
3568             # The reason is that temporary files cause problems on
3569             # on many systems.
3570             $rOpts->{'check-syntax'} = 0;
3571             $output_file_copy = '-';
3572             $$rpending_logfile_message .= <<EOM;
3573 Note: --syntax check will be skipped because standard output is used
3574 EOM
3575
3576         }
3577     }
3578
3579     bless {
3580         _fh               => $fh,
3581         _fh_copy          => $fh_copy,
3582         _fh_tee           => $fh_tee,
3583         _output_file      => $output_file,
3584         _output_file_open => $output_file_open,
3585         _output_file_copy => $output_file_copy,
3586         _tee_flag         => 0,
3587         _tee_file         => $tee_file,
3588         _tee_file_opened  => 0,
3589         _line_separator   => $line_separator,
3590         _binmode          => $binmode,
3591     }, $class;
3592 }
3593
3594 sub write_line {
3595
3596     my $self    = shift;
3597     my $fh      = $self->{_fh};
3598     my $fh_copy = $self->{_fh_copy};
3599
3600     my $output_file_open = $self->{_output_file_open};
3601     chomp $_[0];
3602     $_[0] .= $self->{_line_separator};
3603
3604     $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3605     print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3606
3607     if ( $self->{_tee_flag} ) {
3608         unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3609         my $fh_tee = $self->{_fh_tee};
3610         print $fh_tee $_[0];
3611     }
3612 }
3613
3614 sub get_output_file_copy {
3615     my $self   = shift;
3616     my $ofname = $self->{_output_file_copy};
3617     unless ($ofname) {
3618         $ofname = $self->{_output_file};
3619     }
3620     return $ofname;
3621 }
3622
3623 sub tee_on {
3624     my $self = shift;
3625     $self->{_tee_flag} = 1;
3626 }
3627
3628 sub tee_off {
3629     my $self = shift;
3630     $self->{_tee_flag} = 0;
3631 }
3632
3633 sub really_open_tee_file {
3634     my $self     = shift;
3635     my $tee_file = $self->{_tee_file};
3636     my $fh_tee;
3637     $fh_tee = IO::File->new(">$tee_file")
3638       or die("couldn't open TEE file $tee_file: $!\n");
3639     binmode $fh_tee if $self->{_binmode};
3640     $self->{_tee_file_opened} = 1;
3641     $self->{_fh_tee}          = $fh_tee;
3642 }
3643
3644 sub close_output_file {
3645     my $self = shift;
3646     eval { $self->{_fh}->close() }      if $self->{_output_file_open};
3647     eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3648     $self->close_tee_file();
3649 }
3650
3651 sub close_tee_file {
3652     my $self = shift;
3653
3654     if ( $self->{_tee_file_opened} ) {
3655         eval { $self->{_fh_tee}->close() };
3656         $self->{_tee_file_opened} = 0;
3657     }
3658 }
3659
3660 #####################################################################
3661 #
3662 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3663 # useful for program development.
3664 #
3665 # Only one such file is created regardless of the number of input
3666 # files processed.  This allows the results of processing many files
3667 # to be summarized in a single file.
3668 #
3669 #####################################################################
3670
3671 package Perl::Tidy::Diagnostics;
3672
3673 sub new {
3674
3675     my $class = shift;
3676     bless {
3677         _write_diagnostics_count => 0,
3678         _last_diagnostic_file    => "",
3679         _input_file              => "",
3680         _fh                      => undef,
3681     }, $class;
3682 }
3683
3684 sub set_input_file {
3685     my $self = shift;
3686     $self->{_input_file} = $_[0];
3687 }
3688
3689 # This is a diagnostic routine which is useful for program development.
3690 # Output from debug messages go to a file named DIAGNOSTICS, where
3691 # they are labeled by file and line.  This allows many files to be
3692 # scanned at once for some particular condition of interest.
3693 sub write_diagnostics {
3694     my $self = shift;
3695
3696     unless ( $self->{_write_diagnostics_count} ) {
3697         open DIAGNOSTICS, ">DIAGNOSTICS"
3698           or death("couldn't open DIAGNOSTICS: $!\n");
3699     }
3700
3701     my $last_diagnostic_file = $self->{_last_diagnostic_file};
3702     my $input_file           = $self->{_input_file};
3703     if ( $last_diagnostic_file ne $input_file ) {
3704         print DIAGNOSTICS "\nFILE:$input_file\n";
3705     }
3706     $self->{_last_diagnostic_file} = $input_file;
3707     my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3708     print DIAGNOSTICS "$input_line_number:\t@_";
3709     $self->{_write_diagnostics_count}++;
3710 }
3711
3712 #####################################################################
3713 #
3714 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3715 #
3716 #####################################################################
3717
3718 package Perl::Tidy::Logger;
3719
3720 sub new {
3721     my $class = shift;
3722     my $fh;
3723     my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3724
3725     # remove any old error output file
3726     unless ( ref($warning_file) ) {
3727         if ( -e $warning_file ) { unlink($warning_file) }
3728     }
3729
3730     bless {
3731         _log_file                      => $log_file,
3732         _fh_warnings                   => undef,
3733         _rOpts                         => $rOpts,
3734         _fh_warnings                   => undef,
3735         _last_input_line_written       => 0,
3736         _at_end_of_file                => 0,
3737         _use_prefix                    => 1,
3738         _block_log_output              => 0,
3739         _line_of_tokens                => undef,
3740         _output_line_number            => undef,
3741         _wrote_line_information_string => 0,
3742         _wrote_column_headings         => 0,
3743         _warning_file                  => $warning_file,
3744         _warning_count                 => 0,
3745         _complaint_count               => 0,
3746         _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
3747         _saw_brace_error => 0,
3748         _saw_extrude     => $saw_extrude,
3749         _output_array    => [],
3750     }, $class;
3751 }
3752
3753 sub close_log_file {
3754
3755     my $self = shift;
3756     if ( $self->{_fh_warnings} ) {
3757         eval { $self->{_fh_warnings}->close() };
3758         $self->{_fh_warnings} = undef;
3759     }
3760 }
3761
3762 sub get_warning_count {
3763     my $self = shift;
3764     return $self->{_warning_count};
3765 }
3766
3767 sub get_use_prefix {
3768     my $self = shift;
3769     return $self->{_use_prefix};
3770 }
3771
3772 sub block_log_output {
3773     my $self = shift;
3774     $self->{_block_log_output} = 1;
3775 }
3776
3777 sub unblock_log_output {
3778     my $self = shift;
3779     $self->{_block_log_output} = 0;
3780 }
3781
3782 sub interrupt_logfile {
3783     my $self = shift;
3784     $self->{_use_prefix} = 0;
3785     $self->warning("\n");
3786     $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
3787 }
3788
3789 sub resume_logfile {
3790     my $self = shift;
3791     $self->write_logfile_entry( '#' x 60 . "\n" );
3792     $self->{_use_prefix} = 1;
3793 }
3794
3795 sub we_are_at_the_last_line {
3796     my $self = shift;
3797     unless ( $self->{_wrote_line_information_string} ) {
3798         $self->write_logfile_entry("Last line\n\n");
3799     }
3800     $self->{_at_end_of_file} = 1;
3801 }
3802
3803 # record some stuff in case we go down in flames
3804 sub black_box {
3805     my $self = shift;
3806     my ( $line_of_tokens, $output_line_number ) = @_;
3807     my $input_line        = $line_of_tokens->{_line_text};
3808     my $input_line_number = $line_of_tokens->{_line_number};
3809
3810     # save line information in case we have to write a logfile message
3811     $self->{_line_of_tokens}                = $line_of_tokens;
3812     $self->{_output_line_number}            = $output_line_number;
3813     $self->{_wrote_line_information_string} = 0;
3814
3815     my $last_input_line_written = $self->{_last_input_line_written};
3816     my $rOpts                   = $self->{_rOpts};
3817     if (
3818         (
3819             ( $input_line_number - $last_input_line_written ) >=
3820             $rOpts->{'logfile-gap'}
3821         )
3822         || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3823       )
3824     {
3825         my $rlevels                      = $line_of_tokens->{_rlevels};
3826         my $structural_indentation_level = $$rlevels[0];
3827         $self->{_last_input_line_written} = $input_line_number;
3828         ( my $out_str = $input_line ) =~ s/^\s*//;
3829         chomp $out_str;
3830
3831         $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3832
3833         if ( length($out_str) > 35 ) {
3834             $out_str = substr( $out_str, 0, 35 ) . " ....";
3835         }
3836         $self->logfile_output( "", "$out_str\n" );
3837     }
3838 }
3839
3840 sub write_logfile_entry {
3841     my $self = shift;
3842
3843     # add leading >>> to avoid confusing error mesages and code
3844     $self->logfile_output( ">>>", "@_" );
3845 }
3846
3847 sub write_column_headings {
3848     my $self = shift;
3849
3850     $self->{_wrote_column_headings} = 1;
3851     my $routput_array = $self->{_output_array};
3852     push @{$routput_array}, <<EOM;
3853 The nesting depths in the table below are at the start of the lines.
3854 The indicated output line numbers are not always exact.
3855 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3856
3857 in:out indent c b  nesting   code + messages; (messages begin with >>>)
3858 lines  levels i k            (code begins with one '.' per indent level)
3859 ------  ----- - - --------   -------------------------------------------
3860 EOM
3861 }
3862
3863 sub make_line_information_string {
3864
3865     # make columns of information when a logfile message needs to go out
3866     my $self                    = shift;
3867     my $line_of_tokens          = $self->{_line_of_tokens};
3868     my $input_line_number       = $line_of_tokens->{_line_number};
3869     my $line_information_string = "";
3870     if ($input_line_number) {
3871
3872         my $output_line_number   = $self->{_output_line_number};
3873         my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
3874         my $paren_depth          = $line_of_tokens->{_paren_depth};
3875         my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3876         my $python_indentation_level =
3877           $line_of_tokens->{_python_indentation_level};
3878         my $rlevels         = $line_of_tokens->{_rlevels};
3879         my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3880         my $rci_levels      = $line_of_tokens->{_rci_levels};
3881         my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3882
3883         my $structural_indentation_level = $$rlevels[0];
3884
3885         $self->write_column_headings() unless $self->{_wrote_column_headings};
3886
3887         # keep logfile columns aligned for scripts up to 999 lines;
3888         # for longer scripts it doesn't really matter
3889         my $extra_space = "";
3890         $extra_space .=
3891             ( $input_line_number < 10 )  ? "  "
3892           : ( $input_line_number < 100 ) ? " "
3893           :                                "";
3894         $extra_space .=
3895             ( $output_line_number < 10 )  ? "  "
3896           : ( $output_line_number < 100 ) ? " "
3897           :                                 "";
3898
3899         # there are 2 possible nesting strings:
3900         # the original which looks like this:  (0 [1 {2
3901         # the new one, which looks like this:  {{[
3902         # the new one is easier to read, and shows the order, but
3903         # could be arbitrarily long, so we use it unless it is too long
3904         my $nesting_string =
3905           "($paren_depth [$square_bracket_depth {$brace_depth";
3906         my $nesting_string_new = $$rnesting_tokens[0];
3907
3908         my $ci_level = $$rci_levels[0];
3909         if ( $ci_level > 9 ) { $ci_level = '*' }
3910         my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3911
3912         if ( length($nesting_string_new) <= 8 ) {
3913             $nesting_string =
3914               $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3915         }
3916         if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3917         $line_information_string =
3918 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3919     }
3920     return $line_information_string;
3921 }
3922
3923 sub logfile_output {
3924     my $self = shift;
3925     my ( $prompt, $msg ) = @_;
3926     return if ( $self->{_block_log_output} );
3927
3928     my $routput_array = $self->{_output_array};
3929     if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3930         push @{$routput_array}, "$msg";
3931     }
3932     else {
3933         my $line_information_string = $self->make_line_information_string();
3934         $self->{_wrote_line_information_string} = 1;
3935
3936         if ($line_information_string) {
3937             push @{$routput_array}, "$line_information_string   $prompt$msg";
3938         }
3939         else {
3940             push @{$routput_array}, "$msg";
3941         }
3942     }
3943 }
3944
3945 sub get_saw_brace_error {
3946     my $self = shift;
3947     return $self->{_saw_brace_error};
3948 }
3949
3950 sub increment_brace_error {
3951     my $self = shift;
3952     $self->{_saw_brace_error}++;
3953 }
3954
3955 sub brace_warning {
3956     my $self = shift;
3957     use constant BRACE_WARNING_LIMIT => 10;
3958     my $saw_brace_error = $self->{_saw_brace_error};
3959
3960     if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3961         $self->warning(@_);
3962     }
3963     $saw_brace_error++;
3964     $self->{_saw_brace_error} = $saw_brace_error;
3965
3966     if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3967         $self->warning("No further warnings of this type will be given\n");
3968     }
3969 }
3970
3971 sub complain {
3972
3973     # handle non-critical warning messages based on input flag
3974     my $self  = shift;
3975     my $rOpts = $self->{_rOpts};
3976
3977     # these appear in .ERR output only if -w flag is used
3978     if ( $rOpts->{'warning-output'} ) {
3979         $self->warning(@_);
3980     }
3981
3982     # otherwise, they go to the .LOG file
3983     else {
3984         $self->{_complaint_count}++;
3985         $self->write_logfile_entry(@_);
3986     }
3987 }
3988
3989 sub warning {
3990
3991     # report errors to .ERR file (or stdout)
3992     my $self = shift;
3993     use constant WARNING_LIMIT => 50;
3994
3995     my $rOpts = $self->{_rOpts};
3996     unless ( $rOpts->{'quiet'} ) {
3997
3998         my $warning_count = $self->{_warning_count};
3999         unless ($warning_count) {
4000             my $warning_file = $self->{_warning_file};
4001             my $fh_warnings;
4002             if ( $rOpts->{'standard-error-output'} ) {
4003                 $fh_warnings = *STDERR;
4004             }
4005             else {
4006                 ( $fh_warnings, my $filename ) =
4007                   Perl::Tidy::streamhandle( $warning_file, 'w' );
4008                 $fh_warnings or die("couldn't open $filename $!\n");
4009                 warn "## Please see file $filename\n";
4010             }
4011             $self->{_fh_warnings} = $fh_warnings;
4012         }
4013
4014         my $fh_warnings = $self->{_fh_warnings};
4015         if ( $warning_count < WARNING_LIMIT ) {
4016             if ( $self->get_use_prefix() > 0 ) {
4017                 my $input_line_number =
4018                   Perl::Tidy::Tokenizer::get_input_line_number();
4019                 $fh_warnings->print("$input_line_number:\t@_");
4020                 $self->write_logfile_entry("WARNING: @_");
4021             }
4022             else {
4023                 $fh_warnings->print(@_);
4024                 $self->write_logfile_entry(@_);
4025             }
4026         }
4027         $warning_count++;
4028         $self->{_warning_count} = $warning_count;
4029
4030         if ( $warning_count == WARNING_LIMIT ) {
4031             $fh_warnings->print("No further warnings will be given\n");
4032         }
4033     }
4034 }
4035
4036 # programming bug codes:
4037 #   -1 = no bug
4038 #    0 = maybe, not sure.
4039 #    1 = definitely
4040 sub report_possible_bug {
4041     my $self         = shift;
4042     my $saw_code_bug = $self->{_saw_code_bug};
4043     $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4044 }
4045
4046 sub report_definite_bug {
4047     my $self = shift;
4048     $self->{_saw_code_bug} = 1;
4049 }
4050
4051 sub ask_user_for_bug_report {
4052     my $self = shift;
4053
4054     my ( $infile_syntax_ok, $formatter ) = @_;
4055     my $saw_code_bug = $self->{_saw_code_bug};
4056     if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4057         $self->warning(<<EOM);
4058
4059 You may have encountered a code bug in perltidy.  If you think so, and
4060 the problem is not listed in the BUGS file at
4061 http://perltidy.sourceforge.net, please report it so that it can be
4062 corrected.  Include the smallest possible script which has the problem,
4063 along with the .LOG file. See the manual pages for contact information.
4064 Thank you!
4065 EOM
4066
4067     }
4068     elsif ( $saw_code_bug == 1 ) {
4069         if ( $self->{_saw_extrude} ) {
4070             $self->warning(<<EOM);
4071
4072 You may have encountered a bug in perltidy.  However, since you are using the
4073 -extrude option, the problem may be with perl or one of its modules, which have
4074 occasional problems with this type of file.  If you believe that the
4075 problem is with perltidy, and the problem is not listed in the BUGS file at
4076 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4077 Include the smallest possible script which has the problem, along with the .LOG
4078 file. See the manual pages for contact information.
4079 Thank you!
4080 EOM
4081         }
4082         else {
4083             $self->warning(<<EOM);
4084
4085 Oops, you seem to have encountered a bug in perltidy.  Please check the
4086 BUGS file at http://perltidy.sourceforge.net.  If the problem is not
4087 listed there, please report it so that it can be corrected.  Include the
4088 smallest possible script which produces this message, along with the
4089 .LOG file if appropriate.  See the manual pages for contact information.
4090 Your efforts are appreciated.
4091 Thank you!
4092 EOM
4093             my $added_semicolon_count = 0;
4094             eval {
4095                 $added_semicolon_count =
4096                   $formatter->get_added_semicolon_count();
4097             };
4098             if ( $added_semicolon_count > 0 ) {
4099                 $self->warning(<<EOM);
4100
4101 The log file shows that perltidy added $added_semicolon_count semicolons.
4102 Please rerun with -nasc to see if that is the cause of the syntax error.  Even
4103 if that is the problem, please report it so that it can be fixed.
4104 EOM
4105
4106             }
4107         }
4108     }
4109 }
4110
4111 sub finish {
4112
4113     # called after all formatting to summarize errors
4114     my $self = shift;
4115     my ( $infile_syntax_ok, $formatter ) = @_;
4116
4117     my $rOpts         = $self->{_rOpts};
4118     my $warning_count = $self->{_warning_count};
4119     my $saw_code_bug  = $self->{_saw_code_bug};
4120
4121     my $save_logfile =
4122          ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4123       || $saw_code_bug == 1
4124       || $rOpts->{'logfile'};
4125     my $log_file = $self->{_log_file};
4126     if ($warning_count) {
4127         if ($save_logfile) {
4128             $self->block_log_output();    # avoid echoing this to the logfile
4129             $self->warning(
4130                 "The logfile $log_file may contain useful information\n");
4131             $self->unblock_log_output();
4132         }
4133
4134         if ( $self->{_complaint_count} > 0 ) {
4135             $self->warning(
4136 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4137             );
4138         }
4139
4140         if ( $self->{_saw_brace_error}
4141             && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
4142         {
4143             $self->warning("To save a full .LOG file rerun with -g\n");
4144         }
4145     }
4146     $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4147
4148     if ($save_logfile) {
4149         my $log_file = $self->{_log_file};
4150         my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4151         if ($fh) {
4152             my $routput_array = $self->{_output_array};
4153             foreach ( @{$routput_array} ) { $fh->print($_) }
4154             eval { $fh->close() };
4155         }
4156     }
4157 }
4158
4159 #####################################################################
4160 #
4161 # The Perl::Tidy::DevNull class supplies a dummy print method
4162 #
4163 #####################################################################
4164
4165 package Perl::Tidy::DevNull;
4166 sub new { return bless {}, $_[0] }
4167 sub print { return }
4168 sub close { return }
4169
4170 #####################################################################
4171 #
4172 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4173 #
4174 #####################################################################
4175
4176 package Perl::Tidy::HtmlWriter;
4177
4178 use File::Basename;
4179
4180 # class variables
4181 use vars qw{
4182   %html_color
4183   %html_bold
4184   %html_italic
4185   %token_short_names
4186   %short_to_long_names
4187   $rOpts
4188   $css_filename
4189   $css_linkname
4190   $missing_html_entities
4191 };
4192
4193 # replace unsafe characters with HTML entity representation if HTML::Entities
4194 # is available
4195 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4196
4197 sub new {
4198
4199     my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4200         $html_src_extension )
4201       = @_;
4202
4203     my $html_file_opened = 0;
4204     my $html_fh;
4205     ( $html_fh, my $html_filename ) =
4206       Perl::Tidy::streamhandle( $html_file, 'w' );
4207     unless ($html_fh) {
4208         warn("can't open $html_file: $!\n");
4209         return undef;
4210     }
4211     $html_file_opened = 1;
4212
4213     if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4214         $input_file = "NONAME";
4215     }
4216
4217     # write the table of contents to a string
4218     my $toc_string;
4219     my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4220
4221     my $html_pre_fh;
4222     my @pre_string_stack;
4223     if ( $rOpts->{'html-pre-only'} ) {
4224
4225         # pre section goes directly to the output stream
4226         $html_pre_fh = $html_fh;
4227         $html_pre_fh->print( <<"PRE_END");
4228 <pre>
4229 PRE_END
4230     }
4231     else {
4232
4233         # pre section go out to a temporary string
4234         my $pre_string;
4235         $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4236         push @pre_string_stack, \$pre_string;
4237     }
4238
4239     # pod text gets diverted if the 'pod2html' is used
4240     my $html_pod_fh;
4241     my $pod_string;
4242     if ( $rOpts->{'pod2html'} ) {
4243         if ( $rOpts->{'html-pre-only'} ) {
4244             undef $rOpts->{'pod2html'};
4245         }
4246         else {
4247             eval "use Pod::Html";
4248             if ($@) {
4249                 warn
4250 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4251                 undef $rOpts->{'pod2html'};
4252             }
4253             else {
4254                 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4255             }
4256         }
4257     }
4258
4259     my $toc_filename;
4260     my $src_filename;
4261     if ( $rOpts->{'frames'} ) {
4262         unless ($extension) {
4263             warn
4264 "cannot use frames without a specified output extension; ignoring -frm\n";
4265             undef $rOpts->{'frames'};
4266         }
4267         else {
4268             $toc_filename = $input_file . $html_toc_extension . $extension;
4269             $src_filename = $input_file . $html_src_extension . $extension;
4270         }
4271     }
4272
4273     # ----------------------------------------------------------
4274     # Output is now directed as follows:
4275     # html_toc_fh <-- table of contents items
4276     # html_pre_fh <-- the <pre> section of formatted code, except:
4277     # html_pod_fh <-- pod goes here with the pod2html option
4278     # ----------------------------------------------------------
4279
4280     my $title = $rOpts->{'title'};
4281     unless ($title) {
4282         ( $title, my $path ) = fileparse($input_file);
4283     }
4284     my $toc_item_count = 0;
4285     my $in_toc_package = "";
4286     my $last_level     = 0;
4287     bless {
4288         _input_file        => $input_file,          # name of input file
4289         _title             => $title,               # title, unescaped
4290         _html_file         => $html_file,           # name of .html output file
4291         _toc_filename      => $toc_filename,        # for frames option
4292         _src_filename      => $src_filename,        # for frames option
4293         _html_file_opened  => $html_file_opened,    # a flag
4294         _html_fh           => $html_fh,             # the output stream
4295         _html_pre_fh       => $html_pre_fh,         # pre section goes here
4296         _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4297         _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4298         _rpod_string       => \$pod_string,         # string holding pod
4299         _pod_cut_count     => 0,                    # how many =cut's?
4300         _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4301         _rtoc_string       => \$toc_string,         # string holding toc
4302         _rtoc_item_count   => \$toc_item_count,     # how many toc items
4303         _rin_toc_package   => \$in_toc_package,     # package name
4304         _rtoc_name_count   => {},                   # hash to track unique names
4305         _rpackage_stack    => [],                   # stack to check for package
4306                                                     # name changes
4307         _rlast_level       => \$last_level,         # brace indentation level
4308     }, $class;
4309 }
4310
4311 sub add_toc_item {
4312
4313     # Add an item to the html table of contents.
4314     # This is called even if no table of contents is written,
4315     # because we still want to put the anchors in the <pre> text.
4316     # We are given an anchor name and its type; types are:
4317     #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4318     # There must be an 'EOF' call at the end to wrap things up.
4319     my $self = shift;
4320     my ( $name, $type ) = @_;
4321     my $html_toc_fh     = $self->{_html_toc_fh};
4322     my $html_pre_fh     = $self->{_html_pre_fh};
4323     my $rtoc_name_count = $self->{_rtoc_name_count};
4324     my $rtoc_item_count = $self->{_rtoc_item_count};
4325     my $rlast_level     = $self->{_rlast_level};
4326     my $rin_toc_package = $self->{_rin_toc_package};
4327     my $rpackage_stack  = $self->{_rpackage_stack};
4328
4329     # packages contain sublists of subs, so to avoid errors all package
4330     # items are written and finished with the following routines
4331     my $end_package_list = sub {
4332         if ($$rin_toc_package) {
4333             $html_toc_fh->print("</ul>\n</li>\n");
4334             $$rin_toc_package = "";
4335         }
4336     };
4337
4338     my $start_package_list = sub {
4339         my ( $unique_name, $package ) = @_;
4340         if ($$rin_toc_package) { $end_package_list->() }
4341         $html_toc_fh->print(<<EOM);
4342 <li><a href=\"#$unique_name\">package $package</a>
4343 <ul>
4344 EOM
4345         $$rin_toc_package = $package;
4346     };
4347
4348     # start the table of contents on the first item
4349     unless ($$rtoc_item_count) {
4350
4351         # but just quit if we hit EOF without any other entries
4352         # in this case, there will be no toc
4353         return if ( $type eq 'EOF' );
4354         $html_toc_fh->print( <<"TOC_END");
4355 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4356 <ul>
4357 TOC_END
4358     }
4359     $$rtoc_item_count++;
4360
4361     # make a unique anchor name for this location:
4362     #   - packages get a 'package-' prefix
4363     #   - subs use their names
4364     my $unique_name = $name;
4365     if ( $type eq 'package' ) { $unique_name = "package-$name" }
4366
4367     # append '-1', '-2', etc if necessary to make unique; this will
4368     # be unique because subs and packages cannot have a '-'
4369     if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4370         $unique_name .= "-$count";
4371     }
4372
4373     #   - all names get terminal '-' if pod2html is used, to avoid
4374     #     conflicts with anchor names created by pod2html
4375     if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4376
4377     # start/stop lists of subs
4378     if ( $type eq 'sub' ) {
4379         my $package = $rpackage_stack->[$$rlast_level];
4380         unless ($package) { $package = 'main' }
4381
4382         # if we're already in a package/sub list, be sure its the right
4383         # package or else close it
4384         if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4385             $end_package_list->();
4386         }
4387
4388         # start a package/sub list if necessary
4389         unless ($$rin_toc_package) {
4390             $start_package_list->( $unique_name, $package );
4391         }
4392     }
4393
4394     # now write an entry in the toc for this item
4395     if ( $type eq 'package' ) {
4396         $start_package_list->( $unique_name, $name );
4397     }
4398     elsif ( $type eq 'sub' ) {
4399         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4400     }
4401     else {
4402         $end_package_list->();
4403         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4404     }
4405
4406     # write the anchor in the <pre> section
4407     $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4408
4409     # end the table of contents, if any, on the end of file
4410     if ( $type eq 'EOF' ) {
4411         $html_toc_fh->print( <<"TOC_END");
4412 </ul>
4413 <!-- END CODE INDEX -->
4414 TOC_END
4415     }
4416 }
4417
4418 BEGIN {
4419
4420     # This is the official list of tokens which may be identified by the
4421     # user.  Long names are used as getopt keys.  Short names are
4422     # convenient short abbreviations for specifying input.  Short names
4423     # somewhat resemble token type characters, but are often different
4424     # because they may only be alphanumeric, to allow command line
4425     # input.  Also, note that because of case insensitivity of html,
4426     # this table must be in a single case only (I've chosen to use all
4427     # lower case).
4428     # When adding NEW_TOKENS: update this hash table
4429     # short names => long names
4430     %short_to_long_names = (
4431         'n'  => 'numeric',
4432         'p'  => 'paren',
4433         'q'  => 'quote',
4434         's'  => 'structure',
4435         'c'  => 'comment',
4436         'v'  => 'v-string',
4437         'cm' => 'comma',
4438         'w'  => 'bareword',
4439         'co' => 'colon',
4440         'pu' => 'punctuation',
4441         'i'  => 'identifier',
4442         'j'  => 'label',
4443         'h'  => 'here-doc-target',
4444         'hh' => 'here-doc-text',
4445         'k'  => 'keyword',
4446         'sc' => 'semicolon',
4447         'm'  => 'subroutine',
4448         'pd' => 'pod-text',
4449     );
4450
4451     # Now we have to map actual token types into one of the above short
4452     # names; any token types not mapped will get 'punctuation'
4453     # properties.
4454
4455     # The values of this hash table correspond to the keys of the
4456     # previous hash table.
4457     # The keys of this hash table are token types and can be seen
4458     # by running with --dump-token-types (-dtt).
4459
4460     # When adding NEW_TOKENS: update this hash table
4461     # $type => $short_name
4462     %token_short_names = (
4463         '#'  => 'c',
4464         'n'  => 'n',
4465         'v'  => 'v',
4466         'k'  => 'k',
4467         'F'  => 'k',
4468         'Q'  => 'q',
4469         'q'  => 'q',
4470         'J'  => 'j',
4471         'j'  => 'j',
4472         'h'  => 'h',
4473         'H'  => 'hh',
4474         'w'  => 'w',
4475         ','  => 'cm',
4476         '=>' => 'cm',
4477         ';'  => 'sc',
4478         ':'  => 'co',
4479         'f'  => 'sc',
4480         '('  => 'p',
4481         ')'  => 'p',
4482         'M'  => 'm',
4483         'P'  => 'pd',
4484         'A'  => 'co',
4485     );
4486
4487     # These token types will all be called identifiers for now
4488     # FIXME: need to separate user defined modules as separate type
4489     my @identifier = qw" i t U C Y Z G :: ";
4490     @token_short_names{@identifier} = ('i') x scalar(@identifier);
4491
4492     # These token types will be called 'structure'
4493     my @structure = qw" { } ";
4494     @token_short_names{@structure} = ('s') x scalar(@structure);
4495
4496     # OLD NOTES: save for reference
4497     # Any of these could be added later if it would be useful.
4498     # For now, they will by default become punctuation
4499     #    my @list = qw" L R [ ] ";
4500     #    @token_long_names{@list} = ('non-structure') x scalar(@list);
4501     #
4502     #    my @list = qw"
4503     #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4504     #      ";
4505     #    @token_long_names{@list} = ('math') x scalar(@list);
4506     #
4507     #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
4508     #    @token_long_names{@list} = ('bit') x scalar(@list);
4509     #
4510     #    my @list = qw" == != < > <= <=> ";
4511     #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4512     #
4513     #    my @list = qw" && || ! &&= ||= //= ";
4514     #    @token_long_names{@list} = ('logical') x scalar(@list);
4515     #
4516     #    my @list = qw" . .= =~ !~ x x= ";
4517     #    @token_long_names{@list} = ('string-operators') x scalar(@list);
4518     #
4519     #    # Incomplete..
4520     #    my @list = qw" .. -> <> ... \ ? ";
4521     #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
4522
4523 }
4524
4525 sub make_getopt_long_names {
4526     my $class = shift;
4527     my ($rgetopt_names) = @_;
4528     while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4529         push @$rgetopt_names, "html-color-$name=s";
4530         push @$rgetopt_names, "html-italic-$name!";
4531         push @$rgetopt_names, "html-bold-$name!";
4532     }
4533     push @$rgetopt_names, "html-color-background=s";
4534     push @$rgetopt_names, "html-linked-style-sheet=s";
4535     push @$rgetopt_names, "nohtml-style-sheets";
4536     push @$rgetopt_names, "html-pre-only";
4537     push @$rgetopt_names, "html-line-numbers";
4538     push @$rgetopt_names, "html-entities!";
4539     push @$rgetopt_names, "stylesheet";
4540     push @$rgetopt_names, "html-table-of-contents!";
4541     push @$rgetopt_names, "pod2html!";
4542     push @$rgetopt_names, "frames!";
4543     push @$rgetopt_names, "html-toc-extension=s";
4544     push @$rgetopt_names, "html-src-extension=s";
4545
4546     # Pod::Html parameters:
4547     push @$rgetopt_names, "backlink=s";
4548     push @$rgetopt_names, "cachedir=s";
4549     push @$rgetopt_names, "htmlroot=s";
4550     push @$rgetopt_names, "libpods=s";
4551     push @$rgetopt_names, "podpath=s";
4552     push @$rgetopt_names, "podroot=s";
4553     push @$rgetopt_names, "title=s";
4554
4555     # Pod::Html parameters with leading 'pod' which will be removed
4556     # before the call to Pod::Html
4557     push @$rgetopt_names, "podquiet!";
4558     push @$rgetopt_names, "podverbose!";
4559     push @$rgetopt_names, "podrecurse!";
4560     push @$rgetopt_names, "podflush";
4561     push @$rgetopt_names, "podheader!";
4562     push @$rgetopt_names, "podindex!";
4563 }
4564
4565 sub make_abbreviated_names {
4566
4567     # We're appending things like this to the expansion list:
4568     #      'hcc'    => [qw(html-color-comment)],
4569     #      'hck'    => [qw(html-color-keyword)],
4570     #  etc
4571     my $class = shift;
4572     my ($rexpansion) = @_;
4573
4574     # abbreviations for color/bold/italic properties
4575     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4576         ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
4577         ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
4578         ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
4579         ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4580         ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4581     }
4582
4583     # abbreviations for all other html options
4584     ${$rexpansion}{"hcbg"}  = ["html-color-background"];
4585     ${$rexpansion}{"pre"}   = ["html-pre-only"];
4586     ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
4587     ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
4588     ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
4589     ${$rexpansion}{"hent"}  = ["html-entities"];
4590     ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4591     ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
4592     ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
4593     ${$rexpansion}{"ss"}    = ["stylesheet"];
4594     ${$rexpansion}{"pod"}   = ["pod2html"];
4595     ${$rexpansion}{"npod"}  = ["nopod2html"];
4596     ${$rexpansion}{"frm"}   = ["frames"];
4597     ${$rexpansion}{"nfrm"}  = ["noframes"];
4598     ${$rexpansion}{"text"}  = ["html-toc-extension"];
4599     ${$rexpansion}{"sext"}  = ["html-src-extension"];
4600 }
4601
4602 sub check_options {
4603
4604     # This will be called once after options have been parsed
4605     my $class = shift;
4606     $rOpts = shift;
4607
4608     # X11 color names for default settings that seemed to look ok
4609     # (these color names are only used for programming clarity; the hex
4610     # numbers are actually written)
4611     use constant ForestGreen   => "#228B22";
4612     use constant SaddleBrown   => "#8B4513";
4613     use constant magenta4      => "#8B008B";
4614     use constant IndianRed3    => "#CD5555";
4615     use constant DeepSkyBlue4  => "#00688B";
4616     use constant MediumOrchid3 => "#B452CD";
4617     use constant black         => "#000000";
4618     use constant white         => "#FFFFFF";
4619     use constant red           => "#FF0000";
4620
4621     # set default color, bold, italic properties
4622     # anything not listed here will be given the default (punctuation) color --
4623     # these types currently not listed and get default: ws pu s sc cm co p
4624     # When adding NEW_TOKENS: add an entry here if you don't want defaults
4625
4626     # set_default_properties( $short_name, default_color, bold?, italic? );
4627     set_default_properties( 'c',  ForestGreen,   0, 0 );
4628     set_default_properties( 'pd', ForestGreen,   0, 1 );
4629     set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4630     set_default_properties( 'q',  IndianRed3,    0, 0 );
4631     set_default_properties( 'hh', IndianRed3,    0, 1 );
4632     set_default_properties( 'h',  IndianRed3,    1, 0 );
4633     set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4634     set_default_properties( 'w',  black,         0, 0 );
4635     set_default_properties( 'n',  MediumOrchid3, 0, 0 );
4636     set_default_properties( 'v',  MediumOrchid3, 0, 0 );
4637     set_default_properties( 'j',  IndianRed3,    1, 0 );
4638     set_default_properties( 'm',  red,           1, 0 );
4639
4640     set_default_color( 'html-color-background',  white );
4641     set_default_color( 'html-color-punctuation', black );
4642
4643     # setup property lookup tables for tokens based on their short names
4644     # every token type has a short name, and will use these tables
4645     # to do the html markup
4646     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4647         $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
4648         $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
4649         $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4650     }
4651
4652     # write style sheet to STDOUT and die if requested
4653     if ( defined( $rOpts->{'stylesheet'} ) ) {
4654         write_style_sheet_file('-');
4655         exit 1;
4656     }
4657
4658     # make sure user gives a file name after -css
4659     if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4660         $css_linkname = $rOpts->{'html-linked-style-sheet'};
4661         if ( $css_linkname =~ /^-/ ) {
4662             die "You must specify a valid filename after -css\n";
4663         }
4664     }
4665
4666     # check for conflict
4667     if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4668         $rOpts->{'nohtml-style-sheets'} = 0;
4669         warning("You can't specify both -css and -nss; -nss ignored\n");
4670     }
4671
4672     # write a style sheet file if necessary
4673     if ($css_linkname) {
4674
4675         # if the selected filename exists, don't write, because user may
4676         # have done some work by hand to create it; use backup name instead
4677         # Also, this will avoid a potential disaster in which the user
4678         # forgets to specify the style sheet, like this:
4679         #    perltidy -html -css myfile1.pl myfile2.pl
4680         # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4681         my $css_filename = $css_linkname;
4682         unless ( -e $css_filename ) {
4683             write_style_sheet_file($css_filename);
4684         }
4685     }
4686     $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4687 }
4688
4689 sub write_style_sheet_file {
4690
4691     my $css_filename = shift;
4692     my $fh;
4693     unless ( $fh = IO::File->new("> $css_filename") ) {
4694         die "can't open $css_filename: $!\n";
4695     }
4696     write_style_sheet_data($fh);
4697     eval { $fh->close };
4698 }
4699
4700 sub write_style_sheet_data {
4701
4702     # write the style sheet data to an open file handle
4703     my $fh = shift;
4704
4705     my $bg_color   = $rOpts->{'html-color-background'};
4706     my $text_color = $rOpts->{'html-color-punctuation'};
4707
4708     # pre-bgcolor is new, and may not be defined
4709     my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4710     $pre_bg_color = $bg_color unless $pre_bg_color;
4711
4712     $fh->print(<<"EOM");
4713 /* default style sheet generated by perltidy */
4714 body {background: $bg_color; color: $text_color}
4715 pre { color: $text_color;
4716       background: $pre_bg_color;
4717       font-family: courier;
4718     }
4719
4720 EOM
4721
4722     foreach my $short_name ( sort keys %short_to_long_names ) {
4723         my $long_name = $short_to_long_names{$short_name};
4724
4725         my $abbrev = '.' . $short_name;
4726         if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
4727         my $color = $html_color{$short_name};
4728         if ( !defined($color) ) { $color = $text_color }
4729         $fh->print("$abbrev \{ color: $color;");
4730
4731         if ( $html_bold{$short_name} ) {
4732             $fh->print(" font-weight:bold;");
4733         }
4734
4735         if ( $html_italic{$short_name} ) {
4736             $fh->print(" font-style:italic;");
4737         }
4738         $fh->print("} /* $long_name */\n");
4739     }
4740 }
4741
4742 sub set_default_color {
4743
4744     # make sure that options hash $rOpts->{$key} contains a valid color
4745     my ( $key, $color ) = @_;
4746     if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4747     $rOpts->{$key} = check_RGB($color);
4748 }
4749
4750 sub check_RGB {
4751
4752     # if color is a 6 digit hex RGB value, prepend a #, otherwise
4753     # assume that it is a valid ascii color name
4754     my ($color) = @_;
4755     if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4756     return $color;
4757 }
4758
4759 sub set_default_properties {
4760     my ( $short_name, $color, $bold, $italic ) = @_;
4761
4762     set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4763     my $key;
4764     $key = "html-bold-$short_to_long_names{$short_name}";
4765     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4766     $key = "html-italic-$short_to_long_names{$short_name}";
4767     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4768 }
4769
4770 sub pod_to_html {
4771
4772     # Use Pod::Html to process the pod and make the page
4773     # then merge the perltidy code sections into it.
4774     # return 1 if success, 0 otherwise
4775     my $self = shift;
4776     my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4777     my $input_file   = $self->{_input_file};
4778     my $title        = $self->{_title};
4779     my $success_flag = 0;
4780
4781     # don't try to use pod2html if no pod
4782     unless ($pod_string) {
4783         return $success_flag;
4784     }
4785
4786     # Pod::Html requires a real temporary filename
4787     # If we are making a frame, we have a name available
4788     # Otherwise, we have to fine one
4789     my $tmpfile;
4790     if ( $rOpts->{'frames'} ) {
4791         $tmpfile = $self->{_toc_filename};
4792     }
4793     else {
4794         $tmpfile = Perl::Tidy::make_temporary_filename();
4795     }
4796     my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4797     unless ($fh_tmp) {
4798         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4799         return $success_flag;
4800     }
4801
4802     #------------------------------------------------------------------
4803     # Warning: a temporary file is open; we have to clean up if
4804     # things go bad.  From here on all returns should be by going to
4805     # RETURN so that the temporary file gets unlinked.
4806     #------------------------------------------------------------------
4807
4808     # write the pod text to the temporary file
4809     $fh_tmp->print($pod_string);
4810     $fh_tmp->close();
4811
4812     # Hand off the pod to pod2html.
4813     # Note that we can use the same temporary filename for input and output
4814     # because of the way pod2html works.
4815     {
4816
4817         my @args;
4818         push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4819         my $kw;
4820
4821         # Flags with string args:
4822         # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4823         # "podpath=s", "podroot=s"
4824         # Note: -css=s is handled by perltidy itself
4825         foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4826             if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4827         }
4828
4829         # Toggle switches; these have extra leading 'pod'
4830         # "header!", "index!", "recurse!", "quiet!", "verbose!"
4831         foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4832             my $kwd = $kw;    # allows us to strip 'pod'
4833             if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4834             elsif ( defined( $rOpts->{$kw} ) ) {
4835                 $kwd =~ s/^pod//;
4836                 push @args, "--no$kwd";
4837             }
4838         }
4839
4840         # "flush",
4841         $kw = 'podflush';
4842         if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4843
4844         # Must clean up if pod2html dies (it can);
4845         # Be careful not to overwrite callers __DIE__ routine
4846         local $SIG{__DIE__} = sub {
4847             print $_[0];
4848             unlink $tmpfile if -e $tmpfile;
4849             exit 1;
4850         };
4851
4852         pod2html(@args);
4853     }
4854     $fh_tmp = IO::File->new( $tmpfile, 'r' );
4855     unless ($fh_tmp) {
4856
4857         # this error shouldn't happen ... we just used this filename
4858         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4859         goto RETURN;
4860     }
4861
4862     my $html_fh = $self->{_html_fh};
4863     my @toc;
4864     my $in_toc;
4865     my $no_print;
4866
4867     # This routine will write the html selectively and store the toc
4868     my $html_print = sub {
4869         foreach (@_) {
4870             $html_fh->print($_) unless ($no_print);
4871             if ($in_toc) { push @toc, $_ }
4872         }
4873     };
4874
4875     # loop over lines of html output from pod2html and merge in
4876     # the necessary perltidy html sections
4877     my ( $saw_body, $saw_index, $saw_body_end );
4878     while ( my $line = $fh_tmp->getline() ) {
4879
4880         if ( $line =~ /^\s*<html>\s*$/i ) {
4881             my $date = localtime;
4882             $html_print->("<!-- Generated by perltidy on $date -->\n");
4883             $html_print->($line);
4884         }
4885
4886         # Copy the perltidy css, if any, after <body> tag
4887         elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4888             $saw_body = 1;
4889             $html_print->($css_string) if $css_string;
4890             $html_print->($line);
4891
4892             # add a top anchor and heading
4893             $html_print->("<a name=\"-top-\"></a>\n");
4894             $title = escape_html($title);
4895             $html_print->("<h1>$title</h1>\n");
4896         }
4897         elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4898             $in_toc = 1;
4899
4900             # when frames are used, an extra table of contents in the
4901             # contents panel is confusing, so don't print it
4902             $no_print = $rOpts->{'frames'}
4903               || !$rOpts->{'html-table-of-contents'};
4904             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4905             $html_print->($line);
4906         }
4907
4908         # Copy the perltidy toc, if any, after the Pod::Html toc
4909         elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4910             $saw_index = 1;
4911             $html_print->($line);
4912             if ($toc_string) {
4913                 $html_print->("<hr />\n") if $rOpts->{'frames'};
4914                 $html_print->("<h2>Code Index:</h2>\n");
4915                 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4916                 $html_print->(@toc);
4917             }
4918             $in_toc   = 0;
4919             $no_print = 0;
4920         }
4921
4922         # Copy one perltidy section after each marker
4923         elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4924             $line = $2;
4925             $html_print->($1) if $1;
4926
4927             # Intermingle code and pod sections if we saw multiple =cut's.
4928             if ( $self->{_pod_cut_count} > 1 ) {
4929                 my $rpre_string = shift(@$rpre_string_stack);
4930                 if ($$rpre_string) {
4931                     $html_print->('<pre>');
4932                     $html_print->($$rpre_string);
4933                     $html_print->('</pre>');
4934                 }
4935                 else {
4936
4937                     # shouldn't happen: we stored a string before writing
4938                     # each marker.
4939                     warn
4940 "Problem merging html stream with pod2html; order may be wrong\n";
4941                 }
4942                 $html_print->($line);
4943             }
4944
4945             # If didn't see multiple =cut lines, we'll put the pod out first
4946             # and then the code, because it's less confusing.
4947             else {
4948
4949                 # since we are not intermixing code and pod, we don't need
4950                 # or want any <hr> lines which separated pod and code
4951                 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4952             }
4953         }
4954
4955         # Copy any remaining code section before the </body> tag
4956         elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4957             $saw_body_end = 1;
4958             if (@$rpre_string_stack) {
4959                 unless ( $self->{_pod_cut_count} > 1 ) {
4960                     $html_print->('<hr />');
4961                 }
4962                 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4963                     $html_print->('<pre>');
4964                     $html_print->($$rpre_string);
4965                     $html_print->('</pre>');
4966                 }
4967             }
4968             $html_print->($line);
4969         }
4970         else {
4971             $html_print->($line);
4972         }
4973     }
4974
4975     $success_flag = 1;
4976     unless ($saw_body) {
4977         warn "Did not see <body> in pod2html output\n";
4978         $success_flag = 0;
4979     }
4980     unless ($saw_body_end) {
4981         warn "Did not see </body> in pod2html output\n";
4982         $success_flag = 0;
4983     }
4984     unless ($saw_index) {
4985         warn "Did not find INDEX END in pod2html output\n";
4986         $success_flag = 0;
4987     }
4988
4989   RETURN:
4990     eval { $html_fh->close() };
4991
4992     # note that we have to unlink tmpfile before making frames
4993     # because the tmpfile may be one of the names used for frames
4994     unlink $tmpfile if -e $tmpfile;
4995     if ( $success_flag && $rOpts->{'frames'} ) {
4996         $self->make_frame( \@toc );
4997     }
4998     return $success_flag;
4999 }
5000
5001 sub make_frame {
5002
5003     # Make a frame with table of contents in the left panel
5004     # and the text in the right panel.
5005     # On entry:
5006     #  $html_filename contains the no-frames html output
5007     #  $rtoc is a reference to an array with the table of contents
5008     my $self          = shift;
5009     my ($rtoc)        = @_;
5010     my $input_file    = $self->{_input_file};
5011     my $html_filename = $self->{_html_file};
5012     my $toc_filename  = $self->{_toc_filename};
5013     my $src_filename  = $self->{_src_filename};
5014     my $title         = $self->{_title};
5015     $title = escape_html($title);
5016
5017     # FUTURE input parameter:
5018     my $top_basename = "";
5019
5020     # We need to produce 3 html files:
5021     # 1. - the table of contents
5022     # 2. - the contents (source code) itself
5023     # 3. - the frame which contains them
5024
5025     # get basenames for relative links
5026     my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5027     my ( $src_basename, $src_path ) = fileparse($src_filename);
5028
5029     # 1. Make the table of contents panel, with appropriate changes
5030     # to the anchor names
5031     my $src_frame_name = 'SRC';
5032     my $first_anchor =
5033       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5034         $src_frame_name );
5035
5036     # 2. The current .html filename is renamed to be the contents panel
5037     rename( $html_filename, $src_filename )
5038       or die "Cannot rename $html_filename to $src_filename:$!\n";
5039
5040     # 3. Then use the original html filename for the frame
5041     write_frame_html(
5042         $title,        $html_filename, $top_basename,
5043         $toc_basename, $src_basename,  $src_frame_name
5044     );
5045 }
5046
5047 sub write_toc_html {
5048
5049     # write a separate html table of contents file for frames
5050     my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5051     my $fh = IO::File->new( $toc_filename, 'w' )
5052       or die "Cannot open $toc_filename:$!\n";
5053     $fh->print(<<EOM);
5054 <html>
5055 <head>
5056 <title>$title</title>
5057 </head>
5058 <body>
5059 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5060 EOM
5061
5062     my $first_anchor =
5063       change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5064     $fh->print( join "", @$rtoc );
5065
5066     $fh->print(<<EOM);
5067 </body>
5068 </html>
5069 EOM
5070
5071 }
5072
5073 sub write_frame_html {
5074
5075     # write an html file to be the table of contents frame
5076     my (
5077         $title,        $frame_filename, $top_basename,
5078         $toc_basename, $src_basename,   $src_frame_name
5079     ) = @_;
5080
5081     my $fh = IO::File->new( $frame_filename, 'w' )
5082       or die "Cannot open $toc_basename:$!\n";
5083
5084     $fh->print(<<EOM);
5085 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5086     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5087 <?xml version="1.0" encoding="iso-8859-1" ?>
5088 <html xmlns="http://www.w3.org/1999/xhtml">
5089 <head>
5090 <title>$title</title>
5091 </head>
5092 EOM
5093
5094     # two left panels, one right, if master index file
5095     if ($top_basename) {
5096         $fh->print(<<EOM);
5097 <frameset cols="20%,80%">
5098 <frameset rows="30%,70%">
5099 <frame src = "$top_basename" />
5100 <frame src = "$toc_basename" />
5101 </frameset>
5102 EOM
5103     }
5104
5105     # one left panels, one right, if no master index file
5106     else {
5107         $fh->print(<<EOM);
5108 <frameset cols="20%,*">
5109 <frame src = "$toc_basename" />
5110 EOM
5111     }
5112     $fh->print(<<EOM);
5113 <frame src = "$src_basename" name = "$src_frame_name" />
5114 <noframes>
5115 <body>
5116 <p>If you see this message, you are using a non-frame-capable web client.</p>
5117 <p>This document contains:</p>
5118 <ul>
5119 <li><a href="$toc_basename">A table of contents</a></li>
5120 <li><a href="$src_basename">The source code</a></li>
5121 </ul>
5122 </body>
5123 </noframes>
5124 </frameset>
5125 </html>
5126 EOM
5127 }
5128
5129 sub change_anchor_names {
5130
5131     # add a filename and target to anchors
5132     # also return the first anchor
5133     my ( $rlines, $filename, $target ) = @_;
5134     my $first_anchor;
5135     foreach my $line (@$rlines) {
5136
5137         #  We're looking for lines like this:
5138         #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5139         #  ----  -       --------  -----------------
5140         #  $1              $4            $5
5141         if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5142             my $pre  = $1;
5143             my $name = $4;
5144             my $post = $5;
5145             my $href = "$filename#$name";
5146             $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5147             unless ($first_anchor) { $first_anchor = $href }
5148         }
5149     }
5150     return $first_anchor;
5151 }
5152
5153 sub close_html_file {
5154     my $self = shift;
5155     return unless $self->{_html_file_opened};
5156
5157     my $html_fh     = $self->{_html_fh};
5158     my $rtoc_string = $self->{_rtoc_string};
5159
5160     # There are 3 basic paths to html output...
5161
5162     # ---------------------------------
5163     # Path 1: finish up if in -pre mode
5164     # ---------------------------------
5165     if ( $rOpts->{'html-pre-only'} ) {
5166         $html_fh->print( <<"PRE_END");
5167 </pre>
5168 PRE_END
5169         eval { $html_fh->close() };
5170         return;
5171     }
5172
5173     # Finish the index
5174     $self->add_toc_item( 'EOF', 'EOF' );
5175
5176     my $rpre_string_stack = $self->{_rpre_string_stack};
5177
5178     # Patch to darken the <pre> background color in case of pod2html and
5179     # interleaved code/documentation.  Otherwise, the distinction
5180     # between code and documentation is blurred.
5181     if (   $rOpts->{pod2html}
5182         && $self->{_pod_cut_count} >= 1
5183         && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5184     {
5185         $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5186     }
5187
5188     # put the css or its link into a string, if used
5189     my $css_string;
5190     my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5191
5192     # use css linked to another file
5193     if ( $rOpts->{'html-linked-style-sheet'} ) {
5194         $fh_css->print(
5195             qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5196         );
5197     }
5198
5199     # use css embedded in this file
5200     elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5201         $fh_css->print( <<'ENDCSS');
5202 <style type="text/css">
5203 <!--
5204 ENDCSS
5205         write_style_sheet_data($fh_css);
5206         $fh_css->print( <<"ENDCSS");
5207 -->
5208 </style>
5209 ENDCSS
5210     }
5211
5212     # -----------------------------------------------------------
5213     # path 2: use pod2html if requested
5214     #         If we fail for some reason, continue on to path 3
5215     # -----------------------------------------------------------
5216     if ( $rOpts->{'pod2html'} ) {
5217         my $rpod_string = $self->{_rpod_string};
5218         $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5219             $rpre_string_stack )
5220           && return;
5221     }
5222
5223     # --------------------------------------------------
5224     # path 3: write code in html, with pod only in italics
5225     # --------------------------------------------------
5226     my $input_file = $self->{_input_file};
5227     my $title      = escape_html($input_file);
5228     my $date       = localtime;
5229     $html_fh->print( <<"HTML_START");
5230 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5231    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5232 <!-- Generated by perltidy on $date -->
5233 <html xmlns="http://www.w3.org/1999/xhtml">
5234 <head>
5235 <title>$title</title>
5236 HTML_START
5237
5238     # output the css, if used
5239     if ($css_string) {
5240         $html_fh->print($css_string);
5241         $html_fh->print( <<"ENDCSS");
5242 </head>
5243 <body>
5244 ENDCSS
5245     }
5246     else {
5247
5248         $html_fh->print( <<"HTML_START");
5249 </head>
5250 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5251 HTML_START
5252     }
5253
5254     $html_fh->print("<a name=\"-top-\"></a>\n");
5255     $html_fh->print( <<"EOM");
5256 <h1>$title</h1>
5257 EOM
5258
5259     # copy the table of contents
5260     if (   $$rtoc_string
5261         && !$rOpts->{'frames'}
5262         && $rOpts->{'html-table-of-contents'} )
5263     {
5264         $html_fh->print($$rtoc_string);
5265     }
5266
5267     # copy the pre section(s)
5268     my $fname_comment = $input_file;
5269     $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5270     $html_fh->print( <<"END_PRE");
5271 <hr />
5272 <!-- contents of filename: $fname_comment -->
5273 <pre>
5274 END_PRE
5275
5276     foreach my $rpre_string (@$rpre_string_stack) {
5277         $html_fh->print($$rpre_string);
5278     }
5279
5280     # and finish the html page
5281     $html_fh->print( <<"HTML_END");
5282 </pre>
5283 </body>
5284 </html>
5285 HTML_END
5286     eval { $html_fh->close() };    # could be object without close method
5287
5288     if ( $rOpts->{'frames'} ) {
5289         my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5290         $self->make_frame( \@toc );
5291     }
5292 }
5293
5294 sub markup_tokens {
5295     my $self = shift;
5296     my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5297     my ( @colored_tokens, $j, $string, $type, $token, $level );
5298     my $rlast_level    = $self->{_rlast_level};
5299     my $rpackage_stack = $self->{_rpackage_stack};
5300
5301     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5302         $type  = $$rtoken_type[$j];
5303         $token = $$rtokens[$j];
5304         $level = $$rlevels[$j];
5305         $level = 0 if ( $level < 0 );
5306
5307         #-------------------------------------------------------
5308         # Update the package stack.  The package stack is needed to keep
5309         # the toc correct because some packages may be declared within
5310         # blocks and go out of scope when we leave the block.
5311         #-------------------------------------------------------
5312         if ( $level > $$rlast_level ) {
5313             unless ( $rpackage_stack->[ $level - 1 ] ) {
5314                 $rpackage_stack->[ $level - 1 ] = 'main';
5315             }
5316             $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5317         }
5318         elsif ( $level < $$rlast_level ) {
5319             my $package = $rpackage_stack->[$level];
5320             unless ($package) { $package = 'main' }
5321
5322             # if we change packages due to a nesting change, we
5323             # have to make an entry in the toc
5324             if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5325                 $self->add_toc_item( $package, 'package' );
5326             }
5327         }
5328         $$rlast_level = $level;
5329
5330         #-------------------------------------------------------
5331         # Intercept a sub name here; split it
5332         # into keyword 'sub' and sub name; and add an
5333         # entry in the toc
5334         #-------------------------------------------------------
5335         if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5336             $token = $self->markup_html_element( $1, 'k' );
5337             push @colored_tokens, $token;
5338             $token = $2;
5339             $type  = 'M';
5340
5341             # but don't include sub declarations in the toc;
5342             # these wlll have leading token types 'i;'
5343             my $signature = join "", @$rtoken_type;
5344             unless ( $signature =~ /^i;/ ) {
5345                 my $subname = $token;
5346                 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5347                 $self->add_toc_item( $subname, 'sub' );
5348             }
5349         }
5350
5351         #-------------------------------------------------------
5352         # Intercept a package name here; split it
5353         # into keyword 'package' and name; add to the toc,
5354         # and update the package stack
5355         #-------------------------------------------------------
5356         if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5357             $token = $self->markup_html_element( $1, 'k' );
5358             push @colored_tokens, $token;
5359             $token = $2;
5360             $type  = 'i';
5361             $self->add_toc_item( "$token", 'package' );
5362             $rpackage_stack->[$level] = $token;
5363         }
5364
5365         $token = $self->markup_html_element( $token, $type );
5366         push @colored_tokens, $token;
5367     }
5368     return ( \@colored_tokens );
5369 }
5370
5371 sub markup_html_element {
5372     my $self = shift;
5373     my ( $token, $type ) = @_;
5374
5375     return $token if ( $type eq 'b' );    # skip a blank token
5376     return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5377     $token = escape_html($token);
5378
5379     # get the short abbreviation for this token type
5380     my $short_name = $token_short_names{$type};
5381     if ( !defined($short_name) ) {
5382         $short_name = "pu";                    # punctuation is default
5383     }
5384
5385     # handle style sheets..
5386     if ( !$rOpts->{'nohtml-style-sheets'} ) {
5387         if ( $short_name ne 'pu' ) {
5388             $token = qq(<span class="$short_name">) . $token . "</span>";
5389         }
5390     }
5391
5392     # handle no style sheets..
5393     else {
5394         my $color = $html_color{$short_name};
5395
5396         if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5397             $token = qq(<font color="$color">) . $token . "</font>";
5398         }
5399         if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5400         if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5401     }
5402     return $token;
5403 }
5404
5405 sub escape_html {
5406
5407     my $token = shift;
5408     if ($missing_html_entities) {
5409         $token =~ s/\&/&amp;/g;
5410         $token =~ s/\</&lt;/g;
5411         $token =~ s/\>/&gt;/g;
5412         $token =~ s/\"/&quot;/g;
5413     }
5414     else {
5415         HTML::Entities::encode_entities($token);
5416     }
5417     return $token;
5418 }
5419
5420 sub finish_formatting {
5421
5422     # called after last line
5423     my $self = shift;
5424     $self->close_html_file();
5425     return;
5426 }
5427
5428 sub write_line {
5429
5430     my $self = shift;
5431     return unless $self->{_html_file_opened};
5432     my $html_pre_fh      = $self->{_html_pre_fh};
5433     my ($line_of_tokens) = @_;
5434     my $line_type        = $line_of_tokens->{_line_type};
5435     my $input_line       = $line_of_tokens->{_line_text};
5436     my $line_number      = $line_of_tokens->{_line_number};
5437     chomp $input_line;
5438
5439     # markup line of code..
5440     my $html_line;
5441     if ( $line_type eq 'CODE' ) {
5442         my $rtoken_type = $line_of_tokens->{_rtoken_type};
5443         my $rtokens     = $line_of_tokens->{_rtokens};
5444         my $rlevels     = $line_of_tokens->{_rlevels};
5445
5446         if ( $input_line =~ /(^\s*)/ ) {
5447             $html_line = $1;
5448         }
5449         else {
5450             $html_line = "";
5451         }
5452         my ($rcolored_tokens) =
5453           $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5454         $html_line .= join '', @$rcolored_tokens;
5455     }
5456
5457     # markup line of non-code..
5458     else {
5459         my $line_character;
5460         if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
5461         elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
5462         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
5463         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5464         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
5465         elsif ( $line_type eq 'END_START' ) {
5466             $line_character = 'k';
5467             $self->add_toc_item( '__END__', '__END__' );
5468         }
5469         elsif ( $line_type eq 'DATA_START' ) {
5470             $line_character = 'k';
5471             $self->add_toc_item( '__DATA__', '__DATA__' );
5472         }
5473         elsif ( $line_type =~ /^POD/ ) {
5474             $line_character = 'P';
5475             if ( $rOpts->{'pod2html'} ) {
5476                 my $html_pod_fh = $self->{_html_pod_fh};
5477                 if ( $line_type eq 'POD_START' ) {
5478
5479                     my $rpre_string_stack = $self->{_rpre_string_stack};
5480                     my $rpre_string       = $rpre_string_stack->[-1];
5481
5482                     # if we have written any non-blank lines to the
5483                     # current pre section, start writing to a new output
5484                     # string
5485                     if ( $$rpre_string =~ /\S/ ) {
5486                         my $pre_string;
5487                         $html_pre_fh =
5488                           Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5489                         $self->{_html_pre_fh} = $html_pre_fh;
5490                         push @$rpre_string_stack, \$pre_string;
5491
5492                         # leave a marker in the pod stream so we know
5493                         # where to put the pre section we just
5494                         # finished.
5495                         my $for_html = '=for html';    # don't confuse pod utils
5496                         $html_pod_fh->print(<<EOM);
5497
5498 $for_html
5499 <!-- pERLTIDY sECTION -->
5500
5501 EOM
5502                     }
5503
5504                     # otherwise, just clear the current string and start
5505                     # over
5506                     else {
5507                         $$rpre_string = "";
5508                         $html_pod_fh->print("\n");
5509                     }
5510                 }
5511                 $html_pod_fh->print( $input_line . "\n" );
5512                 if ( $line_type eq 'POD_END' ) {
5513                     $self->{_pod_cut_count}++;
5514                     $html_pod_fh->print("\n");
5515                 }
5516                 return;
5517             }
5518         }
5519         else { $line_character = 'Q' }
5520         $html_line = $self->markup_html_element( $input_line, $line_character );
5521     }
5522
5523     # add the line number if requested
5524     if ( $rOpts->{'html-line-numbers'} ) {
5525         my $extra_space .=
5526             ( $line_number < 10 )   ? "   "
5527           : ( $line_number < 100 )  ? "  "
5528           : ( $line_number < 1000 ) ? " "
5529           :                           "";
5530         $html_line = $extra_space . $line_number . " " . $html_line;
5531     }
5532
5533     # write the line
5534     $html_pre_fh->print("$html_line\n");
5535 }
5536
5537 #####################################################################
5538 #
5539 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5540 # line breaks to the token stream
5541 #
5542 # WARNING: This is not a real class for speed reasons.  Only one
5543 # Formatter may be used.
5544 #
5545 #####################################################################
5546
5547 package Perl::Tidy::Formatter;
5548
5549 BEGIN {
5550
5551     # Caution: these debug flags produce a lot of output
5552     # They should all be 0 except when debugging small scripts
5553     use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
5554     use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
5555     use constant FORMATTER_DEBUG_FLAG_CI      => 0;
5556     use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
5557     use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
5558     use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
5559     use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5560     use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
5561     use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
5562     use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
5563     use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
5564     use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
5565
5566     my $debug_warning = sub {
5567         print "FORMATTER_DEBUGGING with key $_[0]\n";
5568     };
5569
5570     FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
5571     FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
5572     FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
5573     FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
5574     FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
5575     FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
5576     FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5577     FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
5578     FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
5579     FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
5580     FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
5581     FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
5582 }
5583
5584 use Carp;
5585 use vars qw{
5586
5587   @gnu_stack
5588   $max_gnu_stack_index
5589   $gnu_position_predictor
5590   $line_start_index_to_go
5591   $last_indentation_written
5592   $last_unadjusted_indentation
5593   $last_leading_token
5594
5595   $saw_VERSION_in_this_file
5596   $saw_END_or_DATA_
5597
5598   @gnu_item_list
5599   $max_gnu_item_index
5600   $gnu_sequence_number
5601   $last_output_indentation
5602   %last_gnu_equals
5603   %gnu_comma_count
5604   %gnu_arrow_count
5605
5606   @block_type_to_go
5607   @type_sequence_to_go
5608   @container_environment_to_go
5609   @bond_strength_to_go
5610   @forced_breakpoint_to_go
5611   @lengths_to_go
5612   @levels_to_go
5613   @leading_spaces_to_go
5614   @reduced_spaces_to_go
5615   @matching_token_to_go
5616   @mate_index_to_go
5617   @nesting_blocks_to_go
5618   @ci_levels_to_go
5619   @nesting_depth_to_go
5620   @nobreak_to_go
5621   @old_breakpoint_to_go
5622   @tokens_to_go
5623   @types_to_go
5624
5625   %saved_opening_indentation
5626
5627   $max_index_to_go
5628   $comma_count_in_batch
5629   $old_line_count_in_batch
5630   $last_nonblank_index_to_go
5631   $last_nonblank_type_to_go
5632   $last_nonblank_token_to_go
5633   $last_last_nonblank_index_to_go
5634   $last_last_nonblank_type_to_go
5635   $last_last_nonblank_token_to_go
5636   @nonblank_lines_at_depth
5637   $starting_in_quote
5638   $ending_in_quote
5639
5640   $in_format_skipping_section
5641   $format_skipping_pattern_begin
5642   $format_skipping_pattern_end
5643
5644   $forced_breakpoint_count
5645   $forced_breakpoint_undo_count
5646   @forced_breakpoint_undo_stack
5647   %postponed_breakpoint
5648
5649   $tabbing
5650   $embedded_tab_count
5651   $first_embedded_tab_at
5652   $last_embedded_tab_at
5653   $deleted_semicolon_count
5654   $first_deleted_semicolon_at
5655   $last_deleted_semicolon_at
5656   $added_semicolon_count
5657   $first_added_semicolon_at
5658   $last_added_semicolon_at
5659   $first_tabbing_disagreement
5660   $last_tabbing_disagreement
5661   $in_tabbing_disagreement
5662   $tabbing_disagreement_count
5663   $input_line_tabbing
5664
5665   $last_line_type
5666   $last_line_leading_type
5667   $last_line_leading_level
5668   $last_last_line_leading_level
5669
5670   %block_leading_text
5671   %block_opening_line_number
5672   $csc_new_statement_ok
5673   $accumulating_text_for_block
5674   $leading_block_text
5675   $rleading_block_if_elsif_text
5676   $leading_block_text_level
5677   $leading_block_text_length_exceeded
5678   $leading_block_text_line_length
5679   $leading_block_text_line_number
5680   $closing_side_comment_prefix_pattern
5681   $closing_side_comment_list_pattern
5682
5683   $last_nonblank_token
5684   $last_nonblank_type
5685   $last_last_nonblank_token
5686   $last_last_nonblank_type
5687   $last_nonblank_block_type
5688   $last_output_level
5689   %is_do_follower
5690   %is_if_brace_follower
5691   %space_after_keyword
5692   $rbrace_follower
5693   $looking_for_else
5694   %is_last_next_redo_return
5695   %is_other_brace_follower
5696   %is_else_brace_follower
5697   %is_anon_sub_brace_follower
5698   %is_anon_sub_1_brace_follower
5699   %is_sort_map_grep
5700   %is_sort_map_grep_eval
5701   %is_sort_map_grep_eval_do
5702   %is_block_without_semicolon
5703   %is_if_unless
5704   %is_and_or
5705   %is_assignment
5706   %is_chain_operator
5707   %is_if_unless_and_or_last_next_redo_return
5708   %is_until_while_for_if_elsif_else
5709
5710   @has_broken_sublist
5711   @dont_align
5712   @want_comma_break
5713
5714   $is_static_block_comment
5715   $index_start_one_line_block
5716   $semicolons_before_block_self_destruct
5717   $index_max_forced_break
5718   $input_line_number
5719   $diagnostics_object
5720   $vertical_aligner_object
5721   $logger_object
5722   $file_writer_object
5723   $formatter_self
5724   @ci_stack
5725   $last_line_had_side_comment
5726   %want_break_before
5727   %outdent_keyword
5728   $static_block_comment_pattern
5729   $static_side_comment_pattern
5730   %opening_vertical_tightness
5731   %closing_vertical_tightness
5732   %closing_token_indentation
5733
5734   %opening_token_right
5735   %stack_opening_token
5736   %stack_closing_token
5737
5738   $block_brace_vertical_tightness_pattern
5739
5740   $rOpts_add_newlines
5741   $rOpts_add_whitespace
5742   $rOpts_block_brace_tightness
5743   $rOpts_block_brace_vertical_tightness
5744   $rOpts_brace_left_and_indent
5745   $rOpts_comma_arrow_breakpoints
5746   $rOpts_break_at_old_keyword_breakpoints
5747   $rOpts_break_at_old_comma_breakpoints
5748   $rOpts_break_at_old_logical_breakpoints
5749   $rOpts_break_at_old_ternary_breakpoints
5750   $rOpts_closing_side_comment_else_flag
5751   $rOpts_closing_side_comment_maximum_text
5752   $rOpts_continuation_indentation
5753   $rOpts_cuddled_else
5754   $rOpts_delete_old_whitespace
5755   $rOpts_fuzzy_line_length
5756   $rOpts_indent_columns
5757   $rOpts_line_up_parentheses
5758   $rOpts_maximum_fields_per_table
5759   $rOpts_maximum_line_length
5760   $rOpts_short_concatenation_item_length
5761   $rOpts_keep_old_blank_lines
5762   $rOpts_ignore_old_breakpoints
5763   $rOpts_format_skipping
5764   $rOpts_space_function_paren
5765   $rOpts_space_keyword_paren
5766   $rOpts_keep_interior_semicolons
5767
5768   $half_maximum_line_length
5769
5770   %is_opening_type
5771   %is_closing_type
5772   %is_keyword_returning_list
5773   %tightness
5774   %matching_token
5775   $rOpts
5776   %right_bond_strength
5777   %left_bond_strength
5778   %binary_ws_rules
5779   %want_left_space
5780   %want_right_space
5781   %is_digraph
5782   %is_trigraph
5783   $bli_pattern
5784   $bli_list_string
5785   %is_closing_type
5786   %is_opening_type
5787   %is_closing_token
5788   %is_opening_token
5789 };
5790
5791 BEGIN {
5792
5793     # default list of block types for which -bli would apply
5794     $bli_list_string = 'if else elsif unless while for foreach do : sub';
5795
5796     @_ = qw(
5797       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5798       <= >= == =~ !~ != ++ -- /= x=
5799     );
5800     @is_digraph{@_} = (1) x scalar(@_);
5801
5802     @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5803     @is_trigraph{@_} = (1) x scalar(@_);
5804
5805     @_ = qw(
5806       = **= += *= &= <<= &&=
5807       -= /= |= >>= ||= //=
5808       .= %= ^=
5809       x=
5810     );
5811     @is_assignment{@_} = (1) x scalar(@_);
5812
5813     @_ = qw(
5814       grep
5815       keys
5816       map
5817       reverse
5818       sort
5819       split
5820     );
5821     @is_keyword_returning_list{@_} = (1) x scalar(@_);
5822
5823     @_ = qw(is if unless and or err last next redo return);
5824     @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5825
5826     # always break after a closing curly of these block types:
5827     @_ = qw(until while for if elsif else);
5828     @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
5829
5830     @_ = qw(last next redo return);
5831     @is_last_next_redo_return{@_} = (1) x scalar(@_);
5832
5833     @_ = qw(sort map grep);
5834     @is_sort_map_grep{@_} = (1) x scalar(@_);
5835
5836     @_ = qw(sort map grep eval);
5837     @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5838
5839     @_ = qw(sort map grep eval do);
5840     @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5841
5842     @_ = qw(if unless);
5843     @is_if_unless{@_} = (1) x scalar(@_);
5844
5845     @_ = qw(and or err);
5846     @is_and_or{@_} = (1) x scalar(@_);
5847
5848     # Identify certain operators which often occur in chains.
5849     # Note: the minus (-) causes a side effect of padding of the first line in
5850     # something like this (by sub set_logical_padding):
5851     #    Checkbutton => 'Transmission checked',
5852     #   -variable    => \$TRANS
5853     # This usually improves appearance so it seems ok.
5854     @_ = qw(&& || and or : ? . + - * /);
5855     @is_chain_operator{@_} = (1) x scalar(@_);
5856
5857     # We can remove semicolons after blocks preceded by these keywords
5858     @_ =
5859       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
5860       unless while until for foreach);
5861     @is_block_without_semicolon{@_} = (1) x scalar(@_);
5862
5863     # 'L' is token for opening { at hash key
5864     @_ = qw" L { ( [ ";
5865     @is_opening_type{@_} = (1) x scalar(@_);
5866
5867     # 'R' is token for closing } at hash key
5868     @_ = qw" R } ) ] ";
5869     @is_closing_type{@_} = (1) x scalar(@_);
5870
5871     @_ = qw" { ( [ ";
5872     @is_opening_token{@_} = (1) x scalar(@_);
5873
5874     @_ = qw" } ) ] ";
5875     @is_closing_token{@_} = (1) x scalar(@_);
5876 }
5877
5878 # whitespace codes
5879 use constant WS_YES      => 1;
5880 use constant WS_OPTIONAL => 0;
5881 use constant WS_NO       => -1;
5882
5883 # Token bond strengths.
5884 use constant NO_BREAK    => 10000;
5885 use constant VERY_STRONG => 100;
5886 use constant STRONG      => 2.1;
5887 use constant NOMINAL     => 1.1;
5888 use constant WEAK        => 0.8;
5889 use constant VERY_WEAK   => 0.55;
5890
5891 # values for testing indexes in output array
5892 use constant UNDEFINED_INDEX => -1;
5893
5894 # Maximum number of little messages; probably need not be changed.
5895 use constant MAX_NAG_MESSAGES => 6;
5896
5897 # increment between sequence numbers for each type
5898 # For example, ?: pairs might have numbers 7,11,15,...
5899 use constant TYPE_SEQUENCE_INCREMENT => 4;
5900
5901 {
5902
5903     # methods to count instances
5904     my $_count = 0;
5905     sub get_count        { $_count; }
5906     sub _increment_count { ++$_count }
5907     sub _decrement_count { --$_count }
5908 }
5909
5910 sub trim {
5911
5912     # trim leading and trailing whitespace from a string
5913     $_[0] =~ s/\s+$//;
5914     $_[0] =~ s/^\s+//;
5915     return $_[0];
5916 }
5917
5918 sub split_words {
5919
5920     # given a string containing words separated by whitespace,
5921     # return the list of words
5922     my ($str) = @_;
5923     return unless $str;
5924     $str =~ s/\s+$//;
5925     $str =~ s/^\s+//;
5926     return split( /\s+/, $str );
5927 }
5928
5929 # interface to Perl::Tidy::Logger routines
5930 sub warning {
5931     if ($logger_object) {
5932         $logger_object->warning(@_);
5933     }
5934 }
5935
5936 sub complain {
5937     if ($logger_object) {
5938         $logger_object->complain(@_);
5939     }
5940 }
5941
5942 sub write_logfile_entry {
5943     if ($logger_object) {
5944         $logger_object->write_logfile_entry(@_);
5945     }
5946 }
5947
5948 sub black_box {
5949     if ($logger_object) {
5950         $logger_object->black_box(@_);
5951     }
5952 }
5953
5954 sub report_definite_bug {
5955     if ($logger_object) {
5956         $logger_object->report_definite_bug();
5957     }
5958 }
5959
5960 sub get_saw_brace_error {
5961     if ($logger_object) {
5962         $logger_object->get_saw_brace_error();
5963     }
5964 }
5965
5966 sub we_are_at_the_last_line {
5967     if ($logger_object) {
5968         $logger_object->we_are_at_the_last_line();
5969     }
5970 }
5971
5972 # interface to Perl::Tidy::Diagnostics routine
5973 sub write_diagnostics {
5974
5975     if ($diagnostics_object) {
5976         $diagnostics_object->write_diagnostics(@_);
5977     }
5978 }
5979
5980 sub get_added_semicolon_count {
5981     my $self = shift;
5982     return $added_semicolon_count;
5983 }
5984
5985 sub DESTROY {
5986     $_[0]->_decrement_count();
5987 }
5988
5989 sub new {
5990
5991     my $class = shift;
5992
5993     # we are given an object with a write_line() method to take lines
5994     my %defaults = (
5995         sink_object        => undef,
5996         diagnostics_object => undef,
5997         logger_object      => undef,
5998     );
5999     my %args = ( %defaults, @_ );
6000
6001     $logger_object      = $args{logger_object};
6002     $diagnostics_object = $args{diagnostics_object};
6003
6004     # we create another object with a get_line() and peek_ahead() method
6005     my $sink_object = $args{sink_object};
6006     $file_writer_object =
6007       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6008
6009     # initialize the leading whitespace stack to negative levels
6010     # so that we can never run off the end of the stack
6011     $gnu_position_predictor = 0;    # where the current token is predicted to be
6012     $max_gnu_stack_index    = 0;
6013     $max_gnu_item_index     = -1;
6014     $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6015     @gnu_item_list               = ();
6016     $last_output_indentation     = 0;
6017     $last_indentation_written    = 0;
6018     $last_unadjusted_indentation = 0;
6019     $last_leading_token          = "";
6020
6021     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6022     $saw_END_or_DATA_         = 0;
6023
6024     @block_type_to_go            = ();
6025     @type_sequence_to_go         = ();
6026     @container_environment_to_go = ();
6027     @bond_strength_to_go         = ();
6028     @forced_breakpoint_to_go     = ();
6029     @lengths_to_go               = ();    # line length to start of ith token
6030     @levels_to_go                = ();
6031     @matching_token_to_go        = ();
6032     @mate_index_to_go            = ();
6033     @nesting_blocks_to_go        = ();
6034     @ci_levels_to_go             = ();
6035     @nesting_depth_to_go         = (0);
6036     @nobreak_to_go               = ();
6037     @old_breakpoint_to_go        = ();
6038     @tokens_to_go                = ();
6039     @types_to_go                 = ();
6040     @leading_spaces_to_go        = ();
6041     @reduced_spaces_to_go        = ();
6042
6043     @dont_align         = ();
6044     @has_broken_sublist = ();
6045     @want_comma_break   = ();
6046
6047     @ci_stack                   = ("");
6048     $first_tabbing_disagreement = 0;
6049     $last_tabbing_disagreement  = 0;
6050     $tabbing_disagreement_count = 0;
6051     $in_tabbing_disagreement    = 0;
6052     $input_line_tabbing         = undef;
6053
6054     $last_line_type               = "";
6055     $last_last_line_leading_level = 0;
6056     $last_line_leading_level      = 0;
6057     $last_line_leading_type       = '#';
6058
6059     $last_nonblank_token        = ';';
6060     $last_nonblank_type         = ';';
6061     $last_last_nonblank_token   = ';';
6062     $last_last_nonblank_type    = ';';
6063     $last_nonblank_block_type   = "";
6064     $last_output_level          = 0;
6065     $looking_for_else           = 0;
6066     $embedded_tab_count         = 0;
6067     $first_embedded_tab_at      = 0;
6068     $last_embedded_tab_at       = 0;
6069     $deleted_semicolon_count    = 0;
6070     $first_deleted_semicolon_at = 0;
6071     $last_deleted_semicolon_at  = 0;
6072     $added_semicolon_count      = 0;
6073     $first_added_semicolon_at   = 0;
6074     $last_added_semicolon_at    = 0;
6075     $last_line_had_side_comment = 0;
6076     $is_static_block_comment    = 0;
6077     %postponed_breakpoint       = ();
6078
6079     # variables for adding side comments
6080     %block_leading_text        = ();
6081     %block_opening_line_number = ();
6082     $csc_new_statement_ok      = 1;
6083
6084     %saved_opening_indentation  = ();
6085     $in_format_skipping_section = 0;
6086
6087     reset_block_text_accumulator();
6088
6089     prepare_for_new_input_lines();
6090
6091     $vertical_aligner_object =
6092       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6093         $logger_object, $diagnostics_object );
6094
6095     if ( $rOpts->{'entab-leading-whitespace'} ) {
6096         write_logfile_entry(
6097 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6098         );
6099     }
6100     elsif ( $rOpts->{'tabs'} ) {
6101         write_logfile_entry("Indentation will be with a tab character\n");
6102     }
6103     else {
6104         write_logfile_entry(
6105             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6106     }
6107
6108     # This was the start of a formatter referent, but object-oriented
6109     # coding has turned out to be too slow here.
6110     $formatter_self = {};
6111
6112     bless $formatter_self, $class;
6113
6114     # Safety check..this is not a class yet
6115     if ( _increment_count() > 1 ) {
6116         confess
6117 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6118     }
6119     return $formatter_self;
6120 }
6121
6122 sub prepare_for_new_input_lines {
6123
6124     $gnu_sequence_number++;    # increment output batch counter
6125     %last_gnu_equals                = ();
6126     %gnu_comma_count                = ();
6127     %gnu_arrow_count                = ();
6128     $line_start_index_to_go         = 0;
6129     $max_gnu_item_index             = UNDEFINED_INDEX;
6130     $index_max_forced_break         = UNDEFINED_INDEX;
6131     $max_index_to_go                = UNDEFINED_INDEX;
6132     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
6133     $last_nonblank_type_to_go       = '';
6134     $last_nonblank_token_to_go      = '';
6135     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6136     $last_last_nonblank_type_to_go  = '';
6137     $last_last_nonblank_token_to_go = '';
6138     $forced_breakpoint_count        = 0;
6139     $forced_breakpoint_undo_count   = 0;
6140     $rbrace_follower                = undef;
6141     $lengths_to_go[0]               = 0;
6142     $old_line_count_in_batch        = 1;
6143     $comma_count_in_batch           = 0;
6144     $starting_in_quote              = 0;
6145
6146     destroy_one_line_block();
6147 }
6148
6149 sub write_line {
6150
6151     my $self = shift;
6152     my ($line_of_tokens) = @_;
6153
6154     my $line_type  = $line_of_tokens->{_line_type};
6155     my $input_line = $line_of_tokens->{_line_text};
6156
6157     if ( $rOpts->{notidy} ) {
6158         write_unindented_line($input_line);
6159         $last_line_type = $line_type;
6160         return;
6161     }
6162
6163     # _line_type codes are:
6164     #   SYSTEM         - system-specific code before hash-bang line
6165     #   CODE           - line of perl code (including comments)
6166     #   POD_START      - line starting pod, such as '=head'
6167     #   POD            - pod documentation text
6168     #   POD_END        - last line of pod section, '=cut'
6169     #   HERE           - text of here-document
6170     #   HERE_END       - last line of here-doc (target word)
6171     #   FORMAT         - format section
6172     #   FORMAT_END     - last line of format section, '.'
6173     #   DATA_START     - __DATA__ line
6174     #   DATA           - unidentified text following __DATA__
6175     #   END_START      - __END__ line
6176     #   END            - unidentified text following __END__
6177     #   ERROR          - we are in big trouble, probably not a perl script
6178
6179     # put a blank line after an =cut which comes before __END__ and __DATA__
6180     # (required by podchecker)
6181     if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6182         $file_writer_object->reset_consecutive_blank_lines();
6183         if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6184     }
6185
6186     # handle line of code..
6187     if ( $line_type eq 'CODE' ) {
6188
6189         # let logger see all non-blank lines of code
6190         if ( $input_line !~ /^\s*$/ ) {
6191             my $output_line_number =
6192               $vertical_aligner_object->get_output_line_number();
6193             black_box( $line_of_tokens, $output_line_number );
6194         }
6195         print_line_of_tokens($line_of_tokens);
6196     }
6197
6198     # handle line of non-code..
6199     else {
6200
6201         # set special flags
6202         my $skip_line = 0;
6203         my $tee_line  = 0;
6204         if ( $line_type =~ /^POD/ ) {
6205
6206             # Pod docs should have a preceding blank line.  But be
6207             # very careful in __END__ and __DATA__ sections, because:
6208             #   1. the user may be using this section for any purpose whatsoever
6209             #   2. the blank counters are not active there
6210             # It should be safe to request a blank line between an
6211             # __END__ or __DATA__ and an immediately following '=head'
6212             # type line, (types END_START and DATA_START), but not for
6213             # any other lines of type END or DATA.
6214             if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6215             if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
6216             if (  !$skip_line
6217                 && $line_type eq 'POD_START'
6218                 && $last_line_type !~ /^(END|DATA)$/ )
6219             {
6220                 want_blank_line();
6221             }
6222         }
6223
6224         # leave the blank counters in a predictable state
6225         # after __END__ or __DATA__
6226         elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6227             $file_writer_object->reset_consecutive_blank_lines();
6228             $saw_END_or_DATA_ = 1;
6229         }
6230
6231         # write unindented non-code line
6232         if ( !$skip_line ) {
6233             if ($tee_line) { $file_writer_object->tee_on() }
6234             write_unindented_line($input_line);
6235             if ($tee_line) { $file_writer_object->tee_off() }
6236         }
6237     }
6238     $last_line_type = $line_type;
6239 }
6240
6241 sub create_one_line_block {
6242     $index_start_one_line_block            = $_[0];
6243     $semicolons_before_block_self_destruct = $_[1];
6244 }
6245
6246 sub destroy_one_line_block {
6247     $index_start_one_line_block            = UNDEFINED_INDEX;
6248     $semicolons_before_block_self_destruct = 0;
6249 }
6250
6251 sub leading_spaces_to_go {
6252
6253     # return the number of indentation spaces for a token in the output stream;
6254     # these were previously stored by 'set_leading_whitespace'.
6255
6256     return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6257
6258 }
6259
6260 sub get_SPACES {
6261
6262     # return the number of leading spaces associated with an indentation
6263     # variable $indentation is either a constant number of spaces or an object
6264     # with a get_SPACES method.
6265     my $indentation = shift;
6266     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6267 }
6268
6269 sub get_RECOVERABLE_SPACES {
6270
6271     # return the number of spaces (+ means shift right, - means shift left)
6272     # that we would like to shift a group of lines with the same indentation
6273     # to get them to line up with their opening parens
6274     my $indentation = shift;
6275     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6276 }
6277
6278 sub get_AVAILABLE_SPACES_to_go {
6279
6280     my $item = $leading_spaces_to_go[ $_[0] ];
6281
6282     # return the number of available leading spaces associated with an
6283     # indentation variable.  $indentation is either a constant number of
6284     # spaces or an object with a get_AVAILABLE_SPACES method.
6285     return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6286 }
6287
6288 sub new_lp_indentation_item {
6289
6290     # this is an interface to the IndentationItem class
6291     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6292
6293     # A negative level implies not to store the item in the item_list
6294     my $index = 0;
6295     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6296
6297     my $item = Perl::Tidy::IndentationItem->new(
6298         $spaces,      $level,
6299         $ci_level,    $available_spaces,
6300         $index,       $gnu_sequence_number,
6301         $align_paren, $max_gnu_stack_index,
6302         $line_start_index_to_go,
6303     );
6304
6305     if ( $level >= 0 ) {
6306         $gnu_item_list[$max_gnu_item_index] = $item;
6307     }
6308
6309     return $item;
6310 }
6311
6312 sub set_leading_whitespace {
6313
6314     # This routine defines leading whitespace
6315     # given: the level and continuation_level of a token,
6316     # define: space count of leading string which would apply if it
6317     # were the first token of a new line.
6318
6319     my ( $level, $ci_level, $in_continued_quote ) = @_;
6320
6321     # modify for -bli, which adds one continuation indentation for
6322     # opening braces
6323     if (   $rOpts_brace_left_and_indent
6324         && $max_index_to_go == 0
6325         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6326     {
6327         $ci_level++;
6328     }
6329
6330     # patch to avoid trouble when input file has negative indentation.
6331     # other logic should catch this error.
6332     if ( $level < 0 ) { $level = 0 }
6333
6334     #-------------------------------------------
6335     # handle the standard indentation scheme
6336     #-------------------------------------------
6337     unless ($rOpts_line_up_parentheses) {
6338         my $space_count =
6339           $ci_level * $rOpts_continuation_indentation +
6340           $level * $rOpts_indent_columns;
6341         my $ci_spaces =
6342           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6343
6344         if ($in_continued_quote) {
6345             $space_count = 0;
6346             $ci_spaces   = 0;
6347         }
6348         $leading_spaces_to_go[$max_index_to_go] = $space_count;
6349         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6350         return;
6351     }
6352
6353     #-------------------------------------------------------------
6354     # handle case of -lp indentation..
6355     #-------------------------------------------------------------
6356
6357     # The continued_quote flag means that this is the first token of a
6358     # line, and it is the continuation of some kind of multi-line quote
6359     # or pattern.  It requires special treatment because it must have no
6360     # added leading whitespace. So we create a special indentation item
6361     # which is not in the stack.
6362     if ($in_continued_quote) {
6363         my $space_count     = 0;
6364         my $available_space = 0;
6365         $level = -1;    # flag to prevent storing in item_list
6366         $leading_spaces_to_go[$max_index_to_go] =
6367           $reduced_spaces_to_go[$max_index_to_go] =
6368           new_lp_indentation_item( $space_count, $level, $ci_level,
6369             $available_space, 0 );
6370         return;
6371     }
6372
6373     # get the top state from the stack
6374     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6375     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6376     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6377
6378     my $type        = $types_to_go[$max_index_to_go];
6379     my $token       = $tokens_to_go[$max_index_to_go];
6380     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6381
6382     if ( $type eq '{' || $type eq '(' ) {
6383
6384         $gnu_comma_count{ $total_depth + 1 } = 0;
6385         $gnu_arrow_count{ $total_depth + 1 } = 0;
6386
6387         # If we come to an opening token after an '=' token of some type,
6388         # see if it would be helpful to 'break' after the '=' to save space
6389         my $last_equals = $last_gnu_equals{$total_depth};
6390         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6391
6392             # find the position if we break at the '='
6393             my $i_test = $last_equals;
6394             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6395
6396             # TESTING
6397             ##my $too_close = ($i_test==$max_index_to_go-1);
6398
6399             my $test_position = total_line_length( $i_test, $max_index_to_go );
6400
6401             if (
6402
6403                 # the equals is not just before an open paren (testing)
6404                 ##!$too_close &&
6405
6406                 # if we are beyond the midpoint
6407                 $gnu_position_predictor > $half_maximum_line_length
6408
6409                 # or we are beyont the 1/4 point and there was an old
6410                 # break at the equals
6411                 || (
6412                     $gnu_position_predictor > $half_maximum_line_length / 2
6413                     && (
6414                         $old_breakpoint_to_go[$last_equals]
6415                         || (   $last_equals > 0
6416                             && $old_breakpoint_to_go[ $last_equals - 1 ] )
6417                         || (   $last_equals > 1
6418                             && $types_to_go[ $last_equals - 1 ] eq 'b'
6419                             && $old_breakpoint_to_go[ $last_equals - 2 ] )
6420                     )
6421                 )
6422               )
6423             {
6424
6425                 # then make the switch -- note that we do not set a real
6426                 # breakpoint here because we may not really need one; sub
6427                 # scan_list will do that if necessary
6428                 $line_start_index_to_go = $i_test + 1;
6429                 $gnu_position_predictor = $test_position;
6430             }
6431         }
6432     }
6433
6434     # Check for decreasing depth ..
6435     # Note that one token may have both decreasing and then increasing
6436     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
6437     # in this example we would first go back to (1,0) then up to (2,0)
6438     # in a single call.
6439     if ( $level < $current_level || $ci_level < $current_ci_level ) {
6440
6441         # loop to find the first entry at or completely below this level
6442         my ( $lev, $ci_lev );
6443         while (1) {
6444             if ($max_gnu_stack_index) {
6445
6446                 # save index of token which closes this level
6447                 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6448
6449                 # Undo any extra indentation if we saw no commas
6450                 my $available_spaces =
6451                   $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6452
6453                 my $comma_count = 0;
6454                 my $arrow_count = 0;
6455                 if ( $type eq '}' || $type eq ')' ) {
6456                     $comma_count = $gnu_comma_count{$total_depth};
6457                     $arrow_count = $gnu_arrow_count{$total_depth};
6458                     $comma_count = 0 unless $comma_count;
6459                     $arrow_count = 0 unless $arrow_count;
6460                 }
6461                 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6462                 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6463
6464                 if ( $available_spaces > 0 ) {
6465
6466                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
6467
6468                         my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6469                         my $seqno =
6470                           $gnu_stack[$max_gnu_stack_index]
6471                           ->get_SEQUENCE_NUMBER();
6472
6473                         # Be sure this item was created in this batch.  This
6474                         # should be true because we delete any available
6475                         # space from open items at the end of each batch.
6476                         if (   $gnu_sequence_number != $seqno
6477                             || $i > $max_gnu_item_index )
6478                         {
6479                             warning(
6480 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6481                             );
6482                             report_definite_bug();
6483                         }
6484
6485                         else {
6486                             if ( $arrow_count == 0 ) {
6487                                 $gnu_item_list[$i]
6488                                   ->permanently_decrease_AVAILABLE_SPACES(
6489                                     $available_spaces);
6490                             }
6491                             else {
6492                                 $gnu_item_list[$i]
6493                                   ->tentatively_decrease_AVAILABLE_SPACES(
6494                                     $available_spaces);
6495                             }
6496
6497                             my $j;
6498                             for (
6499                                 $j = $i + 1 ;
6500                                 $j <= $max_gnu_item_index ;
6501                                 $j++
6502                               )
6503                             {
6504                                 $gnu_item_list[$j]
6505                                   ->decrease_SPACES($available_spaces);
6506                             }
6507                         }
6508                     }
6509                 }
6510
6511                 # go down one level
6512                 --$max_gnu_stack_index;
6513                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6514                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6515
6516                 # stop when we reach a level at or below the current level
6517                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6518                     $space_count =
6519                       $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6520                     $current_level    = $lev;
6521                     $current_ci_level = $ci_lev;
6522                     last;
6523                 }
6524             }
6525
6526             # reached bottom of stack .. should never happen because
6527             # only negative levels can get here, and $level was forced
6528             # to be positive above.
6529             else {
6530                 warning(
6531 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6532                 );
6533                 report_definite_bug();
6534                 last;
6535             }
6536         }
6537     }
6538
6539     # handle increasing depth
6540     if ( $level > $current_level || $ci_level > $current_ci_level ) {
6541
6542         # Compute the standard incremental whitespace.  This will be
6543         # the minimum incremental whitespace that will be used.  This
6544         # choice results in a smooth transition between the gnu-style
6545         # and the standard style.
6546         my $standard_increment =
6547           ( $level - $current_level ) * $rOpts_indent_columns +
6548           ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6549
6550         # Now we have to define how much extra incremental space
6551         # ("$available_space") we want.  This extra space will be
6552         # reduced as necessary when long lines are encountered or when
6553         # it becomes clear that we do not have a good list.
6554         my $available_space = 0;
6555         my $align_paren     = 0;
6556         my $excess          = 0;
6557
6558         # initialization on empty stack..
6559         if ( $max_gnu_stack_index == 0 ) {
6560             $space_count = $level * $rOpts_indent_columns;
6561         }
6562
6563         # if this is a BLOCK, add the standard increment
6564         elsif ($last_nonblank_block_type) {
6565             $space_count += $standard_increment;
6566         }
6567
6568         # if last nonblank token was not structural indentation,
6569         # just use standard increment
6570         elsif ( $last_nonblank_type ne '{' ) {
6571             $space_count += $standard_increment;
6572         }
6573
6574         # otherwise use the space to the first non-blank level change token
6575         else {
6576
6577             $space_count = $gnu_position_predictor;
6578
6579             my $min_gnu_indentation =
6580               $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6581
6582             $available_space = $space_count - $min_gnu_indentation;
6583             if ( $available_space >= $standard_increment ) {
6584                 $min_gnu_indentation += $standard_increment;
6585             }
6586             elsif ( $available_space > 1 ) {
6587                 $min_gnu_indentation += $available_space + 1;
6588             }
6589             elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6590                 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6591                     $min_gnu_indentation += 2;
6592                 }
6593                 else {
6594                     $min_gnu_indentation += 1;
6595                 }
6596             }
6597             else {
6598                 $min_gnu_indentation += $standard_increment;
6599             }
6600             $available_space = $space_count - $min_gnu_indentation;
6601
6602             if ( $available_space < 0 ) {
6603                 $space_count     = $min_gnu_indentation;
6604                 $available_space = 0;
6605             }
6606             $align_paren = 1;
6607         }
6608
6609         # update state, but not on a blank token
6610         if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6611
6612             $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6613
6614             ++$max_gnu_stack_index;
6615             $gnu_stack[$max_gnu_stack_index] =
6616               new_lp_indentation_item( $space_count, $level, $ci_level,
6617                 $available_space, $align_paren );
6618
6619             # If the opening paren is beyond the half-line length, then
6620             # we will use the minimum (standard) indentation.  This will
6621             # help avoid problems associated with running out of space
6622             # near the end of a line.  As a result, in deeply nested
6623             # lists, there will be some indentations which are limited
6624             # to this minimum standard indentation. But the most deeply
6625             # nested container will still probably be able to shift its
6626             # parameters to the right for proper alignment, so in most
6627             # cases this will not be noticable.
6628             if (   $available_space > 0
6629                 && $space_count > $half_maximum_line_length )
6630             {
6631                 $gnu_stack[$max_gnu_stack_index]
6632                   ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6633             }
6634         }
6635     }
6636
6637     # Count commas and look for non-list characters.  Once we see a
6638     # non-list character, we give up and don't look for any more commas.
6639     if ( $type eq '=>' ) {
6640         $gnu_arrow_count{$total_depth}++;
6641
6642         # tentatively treating '=>' like '=' for estimating breaks
6643         # TODO: this could use some experimentation
6644         $last_gnu_equals{$total_depth} = $max_index_to_go;
6645     }
6646
6647     elsif ( $type eq ',' ) {
6648         $gnu_comma_count{$total_depth}++;
6649     }
6650
6651     elsif ( $is_assignment{$type} ) {
6652         $last_gnu_equals{$total_depth} = $max_index_to_go;
6653     }
6654
6655     # this token might start a new line
6656     # if this is a non-blank..
6657     if ( $type ne 'b' ) {
6658
6659         # and if ..
6660         if (
6661
6662             # this is the first nonblank token of the line
6663             $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6664
6665             # or previous character was one of these:
6666             || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6667
6668             # or previous character was opening and this does not close it
6669             || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6670             || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6671
6672             # or this token is one of these:
6673             || $type =~ /^([\.]|\|\||\&\&)$/
6674
6675             # or this is a closing structure
6676             || (   $last_nonblank_type_to_go eq '}'
6677                 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6678
6679             # or previous token was keyword 'return'
6680             || ( $last_nonblank_type_to_go eq 'k'
6681                 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6682
6683             # or starting a new line at certain keywords is fine
6684             || (   $type eq 'k'
6685                 && $is_if_unless_and_or_last_next_redo_return{$token} )
6686
6687             # or this is after an assignment after a closing structure
6688             || (
6689                 $is_assignment{$last_nonblank_type_to_go}
6690                 && (
6691                     $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6692
6693                     # and it is significantly to the right
6694                     || $gnu_position_predictor > $half_maximum_line_length
6695                 )
6696             )
6697           )
6698         {
6699             check_for_long_gnu_style_lines();
6700             $line_start_index_to_go = $max_index_to_go;
6701
6702             # back up 1 token if we want to break before that type
6703             # otherwise, we may strand tokens like '?' or ':' on a line
6704             if ( $line_start_index_to_go > 0 ) {
6705                 if ( $last_nonblank_type_to_go eq 'k' ) {
6706
6707                     if ( $want_break_before{$last_nonblank_token_to_go} ) {
6708                         $line_start_index_to_go--;
6709                     }
6710                 }
6711                 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6712                     $line_start_index_to_go--;
6713                 }
6714             }
6715         }
6716     }
6717
6718     # remember the predicted position of this token on the output line
6719     if ( $max_index_to_go > $line_start_index_to_go ) {
6720         $gnu_position_predictor =
6721           total_line_length( $line_start_index_to_go, $max_index_to_go );
6722     }
6723     else {
6724         $gnu_position_predictor = $space_count +
6725           token_sequence_length( $max_index_to_go, $max_index_to_go );
6726     }
6727
6728     # store the indentation object for this token
6729     # this allows us to manipulate the leading whitespace
6730     # (in case we have to reduce indentation to fit a line) without
6731     # having to change any token values
6732     $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6733     $reduced_spaces_to_go[$max_index_to_go] =
6734       ( $max_gnu_stack_index > 0 && $ci_level )
6735       ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6736       : $gnu_stack[$max_gnu_stack_index];
6737     return;
6738 }
6739
6740 sub check_for_long_gnu_style_lines {
6741
6742     # look at the current estimated maximum line length, and
6743     # remove some whitespace if it exceeds the desired maximum
6744
6745     # this is only for the '-lp' style
6746     return unless ($rOpts_line_up_parentheses);
6747
6748     # nothing can be done if no stack items defined for this line
6749     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6750
6751     # see if we have exceeded the maximum desired line length
6752     # keep 2 extra free because they are needed in some cases
6753     # (result of trial-and-error testing)
6754     my $spaces_needed =
6755       $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6756
6757     return if ( $spaces_needed <= 0 );
6758
6759     # We are over the limit, so try to remove a requested number of
6760     # spaces from leading whitespace.  We are only allowed to remove
6761     # from whitespace items created on this batch, since others have
6762     # already been used and cannot be undone.
6763     my @candidates = ();
6764     my $i;
6765
6766     # loop over all whitespace items created for the current batch
6767     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6768         my $item = $gnu_item_list[$i];
6769
6770         # item must still be open to be a candidate (otherwise it
6771         # cannot influence the current token)
6772         next if ( $item->get_CLOSED() >= 0 );
6773
6774         my $available_spaces = $item->get_AVAILABLE_SPACES();
6775
6776         if ( $available_spaces > 0 ) {
6777             push( @candidates, [ $i, $available_spaces ] );
6778         }
6779     }
6780
6781     return unless (@candidates);
6782
6783     # sort by available whitespace so that we can remove whitespace
6784     # from the maximum available first
6785     @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6786
6787     # keep removing whitespace until we are done or have no more
6788     my $candidate;
6789     foreach $candidate (@candidates) {
6790         my ( $i, $available_spaces ) = @{$candidate};
6791         my $deleted_spaces =
6792           ( $available_spaces > $spaces_needed )
6793           ? $spaces_needed
6794           : $available_spaces;
6795
6796         # remove the incremental space from this item
6797         $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6798
6799         my $i_debug = $i;
6800
6801         # update the leading whitespace of this item and all items
6802         # that came after it
6803         for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6804
6805             my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6806             if ( $old_spaces >= $deleted_spaces ) {
6807                 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6808             }
6809
6810             # shouldn't happen except for code bug:
6811             else {
6812                 my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
6813                 my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6814                 my $old_level    = $gnu_item_list[$i]->get_LEVEL();
6815                 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6816                 warning(
6817 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
6818                 );
6819                 report_definite_bug();
6820             }
6821         }
6822         $gnu_position_predictor -= $deleted_spaces;
6823         $spaces_needed          -= $deleted_spaces;
6824         last unless ( $spaces_needed > 0 );
6825     }
6826 }
6827
6828 sub finish_lp_batch {
6829
6830     # This routine is called once after each each output stream batch is
6831     # finished to undo indentation for all incomplete -lp
6832     # indentation levels.  It is too risky to leave a level open,
6833     # because then we can't backtrack in case of a long line to follow.
6834     # This means that comments and blank lines will disrupt this
6835     # indentation style.  But the vertical aligner may be able to
6836     # get the space back if there are side comments.
6837
6838     # this is only for the 'lp' style
6839     return unless ($rOpts_line_up_parentheses);
6840
6841     # nothing can be done if no stack items defined for this line
6842     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6843
6844     # loop over all whitespace items created for the current batch
6845     my $i;
6846     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6847         my $item = $gnu_item_list[$i];
6848
6849         # only look for open items
6850         next if ( $item->get_CLOSED() >= 0 );
6851
6852         # Tentatively remove all of the available space
6853         # (The vertical aligner will try to get it back later)
6854         my $available_spaces = $item->get_AVAILABLE_SPACES();
6855         if ( $available_spaces > 0 ) {
6856
6857             # delete incremental space for this item
6858             $gnu_item_list[$i]
6859               ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6860
6861             # Reduce the total indentation space of any nodes that follow
6862             # Note that any such nodes must necessarily be dependents
6863             # of this node.
6864             foreach ( $i + 1 .. $max_gnu_item_index ) {
6865                 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6866             }
6867         }
6868     }
6869     return;
6870 }
6871
6872 sub reduce_lp_indentation {
6873
6874     # reduce the leading whitespace at token $i if possible by $spaces_needed
6875     # (a large value of $spaces_needed will remove all excess space)
6876     # NOTE: to be called from scan_list only for a sequence of tokens
6877     # contained between opening and closing parens/braces/brackets
6878
6879     my ( $i, $spaces_wanted ) = @_;
6880     my $deleted_spaces = 0;
6881
6882     my $item             = $leading_spaces_to_go[$i];
6883     my $available_spaces = $item->get_AVAILABLE_SPACES();
6884
6885     if (
6886         $available_spaces > 0
6887         && ( ( $spaces_wanted <= $available_spaces )
6888             || !$item->get_HAVE_CHILD() )
6889       )
6890     {
6891
6892         # we'll remove these spaces, but mark them as recoverable
6893         $deleted_spaces =
6894           $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6895     }
6896
6897     return $deleted_spaces;
6898 }
6899
6900 sub token_sequence_length {
6901
6902     # return length of tokens ($ifirst .. $ilast) including first & last
6903     # returns 0 if $ifirst > $ilast
6904     my $ifirst = shift;
6905     my $ilast  = shift;
6906     return 0 if ( $ilast < 0 || $ifirst > $ilast );
6907     return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6908     return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6909 }
6910
6911 sub total_line_length {
6912
6913     # return length of a line of tokens ($ifirst .. $ilast)
6914     my $ifirst = shift;
6915     my $ilast  = shift;
6916     if ( $ifirst < 0 ) { $ifirst = 0 }
6917
6918     return leading_spaces_to_go($ifirst) +
6919       token_sequence_length( $ifirst, $ilast );
6920 }
6921
6922 sub excess_line_length {
6923
6924     # return number of characters by which a line of tokens ($ifirst..$ilast)
6925     # exceeds the allowable line length.
6926     my $ifirst = shift;
6927     my $ilast  = shift;
6928     if ( $ifirst < 0 ) { $ifirst = 0 }
6929     return leading_spaces_to_go($ifirst) +
6930       token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6931 }
6932
6933 sub finish_formatting {
6934
6935     # flush buffer and write any informative messages
6936     my $self = shift;
6937
6938     flush();
6939     $file_writer_object->decrement_output_line_number()
6940       ;    # fix up line number since it was incremented
6941     we_are_at_the_last_line();
6942     if ( $added_semicolon_count > 0 ) {
6943         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6944         my $what =
6945           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6946         write_logfile_entry("$added_semicolon_count $what added:\n");
6947         write_logfile_entry(
6948             "  $first at input line $first_added_semicolon_at\n");
6949
6950         if ( $added_semicolon_count > 1 ) {
6951             write_logfile_entry(
6952                 "   Last at input line $last_added_semicolon_at\n");
6953         }
6954         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
6955         write_logfile_entry("\n");
6956     }
6957
6958     if ( $deleted_semicolon_count > 0 ) {
6959         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6960         my $what =
6961           ( $deleted_semicolon_count > 1 )
6962           ? "semicolons were"
6963           : "semicolon was";
6964         write_logfile_entry(
6965             "$deleted_semicolon_count unnecessary $what deleted:\n");
6966         write_logfile_entry(
6967             "  $first at input line $first_deleted_semicolon_at\n");
6968
6969         if ( $deleted_semicolon_count > 1 ) {
6970             write_logfile_entry(
6971                 "   Last at input line $last_deleted_semicolon_at\n");
6972         }
6973         write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
6974         write_logfile_entry("\n");
6975     }
6976
6977     if ( $embedded_tab_count > 0 ) {
6978         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6979         my $what =
6980           ( $embedded_tab_count > 1 )
6981           ? "quotes or patterns"
6982           : "quote or pattern";
6983         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6984         write_logfile_entry(
6985 "This means the display of this script could vary with device or software\n"
6986         );
6987         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
6988
6989         if ( $embedded_tab_count > 1 ) {
6990             write_logfile_entry(
6991                 "   Last at input line $last_embedded_tab_at\n");
6992         }
6993         write_logfile_entry("\n");
6994     }
6995
6996     if ($first_tabbing_disagreement) {
6997         write_logfile_entry(
6998 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6999         );
7000     }
7001
7002     if ($in_tabbing_disagreement) {
7003         write_logfile_entry(
7004 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7005         );
7006     }
7007     else {
7008
7009         if ($last_tabbing_disagreement) {
7010
7011             write_logfile_entry(
7012 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7013             );
7014         }
7015         else {
7016             write_logfile_entry("No indentation disagreement seen\n");
7017         }
7018     }
7019     write_logfile_entry("\n");
7020
7021     $vertical_aligner_object->report_anything_unusual();
7022
7023     $file_writer_object->report_line_length_errors();
7024 }
7025
7026 sub check_options {
7027
7028     # This routine is called to check the Opts hash after it is defined
7029
7030     ($rOpts) = @_;
7031     my ( $tabbing_string, $tab_msg );
7032
7033     make_static_block_comment_pattern();
7034     make_static_side_comment_pattern();
7035     make_closing_side_comment_prefix();
7036     make_closing_side_comment_list_pattern();
7037     $format_skipping_pattern_begin =
7038       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7039     $format_skipping_pattern_end =
7040       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7041
7042     # If closing side comments ARE selected, then we can safely
7043     # delete old closing side comments unless closing side comment
7044     # warnings are requested.  This is a good idea because it will
7045     # eliminate any old csc's which fall below the line count threshold.
7046     # We cannot do this if warnings are turned on, though, because we
7047     # might delete some text which has been added.  So that must
7048     # be handled when comments are created.
7049     if ( $rOpts->{'closing-side-comments'} ) {
7050         if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7051             $rOpts->{'delete-closing-side-comments'} = 1;
7052         }
7053     }
7054
7055     # If closing side comments ARE NOT selected, but warnings ARE
7056     # selected and we ARE DELETING csc's, then we will pretend to be
7057     # adding with a huge interval.  This will force the comments to be
7058     # generated for comparison with the old comments, but not added.
7059     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7060         if ( $rOpts->{'delete-closing-side-comments'} ) {
7061             $rOpts->{'delete-closing-side-comments'}  = 0;
7062             $rOpts->{'closing-side-comments'}         = 1;
7063             $rOpts->{'closing-side-comment-interval'} = 100000000;
7064         }
7065     }
7066
7067     make_bli_pattern();
7068     make_block_brace_vertical_tightness_pattern();
7069
7070     if ( $rOpts->{'line-up-parentheses'} ) {
7071
7072         if (   $rOpts->{'indent-only'}
7073             || !$rOpts->{'add-newlines'}
7074             || !$rOpts->{'delete-old-newlines'} )
7075         {
7076             warn <<EOM;
7077 -----------------------------------------------------------------------
7078 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7079
7080 The -lp indentation logic requires that perltidy be able to coordinate
7081 arbitrarily large numbers of line breakpoints.  This isn't possible
7082 with these flags. Sometimes an acceptable workaround is to use -wocb=3
7083 -----------------------------------------------------------------------
7084 EOM
7085             $rOpts->{'line-up-parentheses'} = 0;
7086         }
7087     }
7088
7089     # At present, tabs are not compatable with the line-up-parentheses style
7090     # (it would be possible to entab the total leading whitespace
7091     # just prior to writing the line, if desired).
7092     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7093         warn <<EOM;
7094 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
7095 EOM
7096         $rOpts->{'tabs'} = 0;
7097     }
7098
7099     # Likewise, tabs are not compatable with outdenting..
7100     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7101         warn <<EOM;
7102 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7103 EOM
7104         $rOpts->{'tabs'} = 0;
7105     }
7106
7107     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7108         warn <<EOM;
7109 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
7110 EOM
7111         $rOpts->{'tabs'} = 0;
7112     }
7113
7114     if ( !$rOpts->{'space-for-semicolon'} ) {
7115         $want_left_space{'f'} = -1;
7116     }
7117
7118     if ( $rOpts->{'space-terminal-semicolon'} ) {
7119         $want_left_space{';'} = 1;
7120     }
7121
7122     # implement outdenting preferences for keywords
7123     %outdent_keyword = ();
7124     unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7125         @_ = qw(next last redo goto return);    # defaults
7126     }
7127
7128     # FUTURE: if not a keyword, assume that it is an identifier
7129     foreach (@_) {
7130         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7131             $outdent_keyword{$_} = 1;
7132         }
7133         else {
7134             warn "ignoring '$_' in -okwl list; not a perl keyword";
7135         }
7136     }
7137
7138     # implement user whitespace preferences
7139     if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7140         @want_left_space{@_} = (1) x scalar(@_);
7141     }
7142
7143     if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7144         @want_right_space{@_} = (1) x scalar(@_);
7145     }
7146
7147     if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7148         @want_left_space{@_} = (-1) x scalar(@_);
7149     }
7150
7151     if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7152         @want_right_space{@_} = (-1) x scalar(@_);
7153     }
7154     if ( $rOpts->{'dump-want-left-space'} ) {
7155         dump_want_left_space(*STDOUT);
7156         exit 1;
7157     }
7158
7159     if ( $rOpts->{'dump-want-right-space'} ) {
7160         dump_want_right_space(*STDOUT);
7161         exit 1;
7162     }
7163
7164     # default keywords for which space is introduced before an opening paren
7165     # (at present, including them messes up vertical alignment)
7166     @_ = qw(my local our and or err eq ne if else elsif until
7167       unless while for foreach return switch case given when);
7168     @space_after_keyword{@_} = (1) x scalar(@_);
7169
7170     # allow user to modify these defaults
7171     if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7172         @space_after_keyword{@_} = (1) x scalar(@_);
7173     }
7174
7175     if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7176         @space_after_keyword{@_} = (0) x scalar(@_);
7177     }
7178
7179     # implement user break preferences
7180     my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7181       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7182       . : ? && || and or err xor
7183     );
7184
7185     my $break_after = sub {
7186         foreach my $tok (@_) {
7187             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
7188             my $lbs = $left_bond_strength{$tok};
7189             my $rbs = $right_bond_strength{$tok};
7190             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7191                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7192                   ( $lbs, $rbs );
7193             }
7194         }
7195     };
7196
7197     my $break_before = sub {
7198         foreach my $tok (@_) {
7199             my $lbs = $left_bond_strength{$tok};
7200             my $rbs = $right_bond_strength{$tok};
7201             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7202                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7203                   ( $lbs, $rbs );
7204             }
7205         }
7206     };
7207
7208     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7209     $break_before->(@all_operators)
7210       if ( $rOpts->{'break-before-all-operators'} );
7211
7212     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7213     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7214
7215     # make note if breaks are before certain key types
7216     %want_break_before = ();
7217     foreach my $tok ( @all_operators, ',' ) {
7218         $want_break_before{$tok} =
7219           $left_bond_strength{$tok} < $right_bond_strength{$tok};
7220     }
7221
7222     # Coordinate ?/: breaks, which must be similar
7223     if ( !$want_break_before{':'} ) {
7224         $want_break_before{'?'}   = $want_break_before{':'};
7225         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7226         $left_bond_strength{'?'}  = NO_BREAK;
7227     }
7228
7229     # Define here tokens which may follow the closing brace of a do statement
7230     # on the same line, as in:
7231     #   } while ( $something);
7232     @_ = qw(until while unless if ; : );
7233     push @_, ',';
7234     @is_do_follower{@_} = (1) x scalar(@_);
7235
7236     # These tokens may follow the closing brace of an if or elsif block.
7237     # In other words, for cuddled else we want code to look like:
7238     #   } elsif ( $something) {
7239     #   } else {
7240     if ( $rOpts->{'cuddled-else'} ) {
7241         @_ = qw(else elsif);
7242         @is_if_brace_follower{@_} = (1) x scalar(@_);
7243     }
7244     else {
7245         %is_if_brace_follower = ();
7246     }
7247
7248     # nothing can follow the closing curly of an else { } block:
7249     %is_else_brace_follower = ();
7250
7251     # what can follow a multi-line anonymous sub definition closing curly:
7252     @_ = qw# ; : => or and  && || ~~ !~~ ) #;
7253     push @_, ',';
7254     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7255
7256     # what can follow a one-line anonynomous sub closing curly:
7257     # one-line anonumous subs also have ']' here...
7258     # see tk3.t and PP.pm
7259     @_ = qw#  ; : => or and  && || ) ] ~~ !~~ #;
7260     push @_, ',';
7261     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7262
7263     # What can follow a closing curly of a block
7264     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7265     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7266     @_ = qw#  ; : => or and  && || ) #;
7267     push @_, ',';
7268
7269     # allow cuddled continue if cuddled else is specified
7270     if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7271
7272     @is_other_brace_follower{@_} = (1) x scalar(@_);
7273
7274     $right_bond_strength{'{'} = WEAK;
7275     $left_bond_strength{'{'}  = VERY_STRONG;
7276
7277     # make -l=0  equal to -l=infinite
7278     if ( !$rOpts->{'maximum-line-length'} ) {
7279         $rOpts->{'maximum-line-length'} = 1000000;
7280     }
7281
7282     # make -lbl=0  equal to -lbl=infinite
7283     if ( !$rOpts->{'long-block-line-count'} ) {
7284         $rOpts->{'long-block-line-count'} = 1000000;
7285     }
7286
7287     my $ole = $rOpts->{'output-line-ending'};
7288     if ($ole) {
7289         my %endings = (
7290             dos  => "\015\012",
7291             win  => "\015\012",
7292             mac  => "\015",
7293             unix => "\012",
7294         );
7295         $ole = lc $ole;
7296         unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7297             my $str = join " ", keys %endings;
7298             die <<EOM;
7299 Unrecognized line ending '$ole'; expecting one of: $str
7300 EOM
7301         }
7302         if ( $rOpts->{'preserve-line-endings'} ) {
7303             warn "Ignoring -ple; conflicts with -ole\n";
7304             $rOpts->{'preserve-line-endings'} = undef;
7305         }
7306     }
7307
7308     # hashes used to simplify setting whitespace
7309     %tightness = (
7310         '{' => $rOpts->{'brace-tightness'},
7311         '}' => $rOpts->{'brace-tightness'},
7312         '(' => $rOpts->{'paren-tightness'},
7313         ')' => $rOpts->{'paren-tightness'},
7314         '[' => $rOpts->{'square-bracket-tightness'},
7315         ']' => $rOpts->{'square-bracket-tightness'},
7316     );
7317     %matching_token = (
7318         '{' => '}',
7319         '(' => ')',
7320         '[' => ']',
7321         '?' => ':',
7322     );
7323
7324     # frequently used parameters
7325     $rOpts_add_newlines          = $rOpts->{'add-newlines'};
7326     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
7327     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7328     $rOpts_block_brace_vertical_tightness =
7329       $rOpts->{'block-brace-vertical-tightness'};
7330     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
7331     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7332     $rOpts_break_at_old_ternary_breakpoints =
7333       $rOpts->{'break-at-old-ternary-breakpoints'};
7334     $rOpts_break_at_old_comma_breakpoints =
7335       $rOpts->{'break-at-old-comma-breakpoints'};
7336     $rOpts_break_at_old_keyword_breakpoints =
7337       $rOpts->{'break-at-old-keyword-breakpoints'};
7338     $rOpts_break_at_old_logical_breakpoints =
7339       $rOpts->{'break-at-old-logical-breakpoints'};
7340     $rOpts_closing_side_comment_else_flag =
7341       $rOpts->{'closing-side-comment-else-flag'};
7342     $rOpts_closing_side_comment_maximum_text =
7343       $rOpts->{'closing-side-comment-maximum-text'};
7344     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7345     $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
7346     $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
7347     $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
7348     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
7349     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
7350     $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7351     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
7352     $rOpts_short_concatenation_item_length =
7353       $rOpts->{'short-concatenation-item-length'};
7354     $rOpts_keep_old_blank_lines     = $rOpts->{'keep-old-blank-lines'};
7355     $rOpts_ignore_old_breakpoints   = $rOpts->{'ignore-old-breakpoints'};
7356     $rOpts_format_skipping          = $rOpts->{'format-skipping'};
7357     $rOpts_space_function_paren     = $rOpts->{'space-function-paren'};
7358     $rOpts_space_keyword_paren      = $rOpts->{'space-keyword-paren'};
7359     $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7360     $half_maximum_line_length       = $rOpts_maximum_line_length / 2;
7361
7362     # Note that both opening and closing tokens can access the opening
7363     # and closing flags of their container types.
7364     %opening_vertical_tightness = (
7365         '(' => $rOpts->{'paren-vertical-tightness'},
7366         '{' => $rOpts->{'brace-vertical-tightness'},
7367         '[' => $rOpts->{'square-bracket-vertical-tightness'},
7368         ')' => $rOpts->{'paren-vertical-tightness'},
7369         '}' => $rOpts->{'brace-vertical-tightness'},
7370         ']' => $rOpts->{'square-bracket-vertical-tightness'},
7371     );
7372
7373     %closing_vertical_tightness = (
7374         '(' => $rOpts->{'paren-vertical-tightness-closing'},
7375         '{' => $rOpts->{'brace-vertical-tightness-closing'},
7376         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7377         ')' => $rOpts->{'paren-vertical-tightness-closing'},
7378         '}' => $rOpts->{'brace-vertical-tightness-closing'},
7379         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7380     );
7381
7382     # assume flag for '>' same as ')' for closing qw quotes
7383     %closing_token_indentation = (
7384         ')' => $rOpts->{'closing-paren-indentation'},
7385         '}' => $rOpts->{'closing-brace-indentation'},
7386         ']' => $rOpts->{'closing-square-bracket-indentation'},
7387         '>' => $rOpts->{'closing-paren-indentation'},
7388     );
7389
7390     %opening_token_right = (
7391         '(' => $rOpts->{'opening-paren-right'},
7392         '{' => $rOpts->{'opening-hash-brace-right'},
7393         '[' => $rOpts->{'opening-square-bracket-right'},
7394     );
7395
7396     %stack_opening_token = (
7397         '(' => $rOpts->{'stack-opening-paren'},
7398         '{' => $rOpts->{'stack-opening-hash-brace'},
7399         '[' => $rOpts->{'stack-opening-square-bracket'},
7400     );
7401
7402     %stack_closing_token = (
7403         ')' => $rOpts->{'stack-closing-paren'},
7404         '}' => $rOpts->{'stack-closing-hash-brace'},
7405         ']' => $rOpts->{'stack-closing-square-bracket'},
7406     );
7407 }
7408
7409 sub make_static_block_comment_pattern {
7410
7411     # create the pattern used to identify static block comments
7412     $static_block_comment_pattern = '^\s*##';
7413
7414     # allow the user to change it
7415     if ( $rOpts->{'static-block-comment-prefix'} ) {
7416         my $prefix = $rOpts->{'static-block-comment-prefix'};
7417         $prefix =~ s/^\s*//;
7418         my $pattern = $prefix;
7419
7420         # user may give leading caret to force matching left comments only
7421         if ( $prefix !~ /^\^#/ ) {
7422             if ( $prefix !~ /^#/ ) {
7423                 die
7424 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7425             }
7426             $pattern = '^\s*' . $prefix;
7427         }
7428         eval "'##'=~/$pattern/";
7429         if ($@) {
7430             die
7431 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7432         }
7433         $static_block_comment_pattern = $pattern;
7434     }
7435 }
7436
7437 sub make_format_skipping_pattern {
7438     my ( $opt_name, $default ) = @_;
7439     my $param = $rOpts->{$opt_name};
7440     unless ($param) { $param = $default }
7441     $param =~ s/^\s*//;
7442     if ( $param !~ /^#/ ) {
7443         die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7444     }
7445     my $pattern = '^' . $param . '\s';
7446     eval "'#'=~/$pattern/";
7447     if ($@) {
7448         die
7449 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7450     }
7451     return $pattern;
7452 }
7453
7454 sub make_closing_side_comment_list_pattern {
7455
7456     # turn any input list into a regex for recognizing selected block types
7457     $closing_side_comment_list_pattern = '^\w+';
7458     if ( defined( $rOpts->{'closing-side-comment-list'} )
7459         && $rOpts->{'closing-side-comment-list'} )
7460     {
7461         $closing_side_comment_list_pattern =
7462           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7463     }
7464 }
7465
7466 sub make_bli_pattern {
7467
7468     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7469         && $rOpts->{'brace-left-and-indent-list'} )
7470     {
7471         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7472     }
7473
7474     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7475 }
7476
7477 sub make_block_brace_vertical_tightness_pattern {
7478
7479     # turn any input list into a regex for recognizing selected block types
7480     $block_brace_vertical_tightness_pattern =
7481       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7482
7483     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7484         && $rOpts->{'block-brace-vertical-tightness-list'} )
7485     {
7486         $block_brace_vertical_tightness_pattern =
7487           make_block_pattern( '-bbvtl',
7488             $rOpts->{'block-brace-vertical-tightness-list'} );
7489     }
7490 }
7491
7492 sub make_block_pattern {
7493
7494     #  given a string of block-type keywords, return a regex to match them
7495     #  The only tricky part is that labels are indicated with a single ':'
7496     #  and the 'sub' token text may have additional text after it (name of
7497     #  sub).
7498     #
7499     #  Example:
7500     #
7501     #   input string: "if else elsif unless while for foreach do : sub";
7502     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7503
7504     my ( $abbrev, $string ) = @_;
7505     my @list  = split_words($string);
7506     my @words = ();
7507     my %seen;
7508     for my $i (@list) {
7509         next if $seen{$i};
7510         $seen{$i} = 1;
7511         if ( $i eq 'sub' ) {
7512         }
7513         elsif ( $i eq ':' ) {
7514             push @words, '\w+:';
7515         }
7516         elsif ( $i =~ /^\w/ ) {
7517             push @words, $i;
7518         }
7519         else {
7520             warn "unrecognized block type $i after $abbrev, ignoring\n";
7521         }
7522     }
7523     my $pattern = '(' . join( '|', @words ) . ')$';
7524     if ( $seen{'sub'} ) {
7525         $pattern = '(' . $pattern . '|sub)';
7526     }
7527     $pattern = '^' . $pattern;
7528     return $pattern;
7529 }
7530
7531 sub make_static_side_comment_pattern {
7532
7533     # create the pattern used to identify static side comments
7534     $static_side_comment_pattern = '^##';
7535
7536     # allow the user to change it
7537     if ( $rOpts->{'static-side-comment-prefix'} ) {
7538         my $prefix = $rOpts->{'static-side-comment-prefix'};
7539         $prefix =~ s/^\s*//;
7540         my $pattern = '^' . $prefix;
7541         eval "'##'=~/$pattern/";
7542         if ($@) {
7543             die
7544 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7545         }
7546         $static_side_comment_pattern = $pattern;
7547     }
7548 }
7549
7550 sub make_closing_side_comment_prefix {
7551
7552     # Be sure we have a valid closing side comment prefix
7553     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7554     my $csc_prefix_pattern;
7555     if ( !defined($csc_prefix) ) {
7556         $csc_prefix         = '## end';
7557         $csc_prefix_pattern = '^##\s+end';
7558     }
7559     else {
7560         my $test_csc_prefix = $csc_prefix;
7561         if ( $test_csc_prefix !~ /^#/ ) {
7562             $test_csc_prefix = '#' . $test_csc_prefix;
7563         }
7564
7565         # make a regex to recognize the prefix
7566         my $test_csc_prefix_pattern = $test_csc_prefix;
7567
7568         # escape any special characters
7569         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7570
7571         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7572
7573         # allow exact number of intermediate spaces to vary
7574         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7575
7576         # make sure we have a good pattern
7577         # if we fail this we probably have an error in escaping
7578         # characters.
7579         eval "'##'=~/$test_csc_prefix_pattern/";
7580         if ($@) {
7581
7582             # shouldn't happen..must have screwed up escaping, above
7583             report_definite_bug();
7584             warn
7585 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7586
7587             # just warn and keep going with defaults
7588             warn "Please consider using a simpler -cscp prefix\n";
7589             warn "Using default -cscp instead; please check output\n";
7590         }
7591         else {
7592             $csc_prefix         = $test_csc_prefix;
7593             $csc_prefix_pattern = $test_csc_prefix_pattern;
7594         }
7595     }
7596     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7597     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7598 }
7599
7600 sub dump_want_left_space {
7601     my $fh = shift;
7602     local $" = "\n";
7603     print $fh <<EOM;
7604 These values are the main control of whitespace to the left of a token type;
7605 They may be altered with the -wls parameter.
7606 For a list of token types, use perltidy --dump-token-types (-dtt)
7607  1 means the token wants a space to its left
7608 -1 means the token does not want a space to its left
7609 ------------------------------------------------------------------------
7610 EOM
7611     foreach ( sort keys %want_left_space ) {
7612         print $fh "$_\t$want_left_space{$_}\n";
7613     }
7614 }
7615
7616 sub dump_want_right_space {
7617     my $fh = shift;
7618     local $" = "\n";
7619     print $fh <<EOM;
7620 These values are the main control of whitespace to the right of a token type;
7621 They may be altered with the -wrs parameter.
7622 For a list of token types, use perltidy --dump-token-types (-dtt)
7623  1 means the token wants a space to its right
7624 -1 means the token does not want a space to its right
7625 ------------------------------------------------------------------------
7626 EOM
7627     foreach ( sort keys %want_right_space ) {
7628         print $fh "$_\t$want_right_space{$_}\n";
7629     }
7630 }
7631
7632 {    # begin is_essential_whitespace
7633
7634     my %is_sort_grep_map;
7635     my %is_for_foreach;
7636
7637     BEGIN {
7638
7639         @_ = qw(sort grep map);
7640         @is_sort_grep_map{@_} = (1) x scalar(@_);
7641
7642         @_ = qw(for foreach);
7643         @is_for_foreach{@_} = (1) x scalar(@_);
7644
7645     }
7646
7647     sub is_essential_whitespace {
7648
7649         # Essential whitespace means whitespace which cannot be safely deleted
7650         # without risking the introduction of a syntax error.
7651         # We are given three tokens and their types:
7652         # ($tokenl, $typel) is the token to the left of the space in question
7653         # ($tokenr, $typer) is the token to the right of the space in question
7654         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7655         #
7656         # This is a slow routine but is not needed too often except when -mangle
7657         # is used.
7658         #
7659         # Note: This routine should almost never need to be changed.  It is
7660         # for avoiding syntax problems rather than for formatting.
7661         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7662
7663         my $result =
7664
7665           # never combine two bare words or numbers
7666           # examples:  and ::ok(1)
7667           #            return ::spw(...)
7668           #            for bla::bla:: abc
7669           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7670           #            $input eq"quit" to make $inputeq"quit"
7671           #            my $size=-s::SINK if $file;  <==OK but we won't do it
7672           # don't join something like: for bla::bla:: abc
7673           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7674           ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7675
7676           # do not combine a number with a concatination dot
7677           # example: pom.caputo:
7678           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7679           || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7680           || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7681
7682           # do not join a minus with a bare word, because you might form
7683           # a file test operator.  Example from Complex.pm:
7684           # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7685           || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7686
7687           # and something like this could become ambiguous without space
7688           # after the '-':
7689           #   use constant III=>1;
7690           #   $a = $b - III;
7691           # and even this:
7692           #   $a = - III;
7693           || ( ( $tokenl eq '-' )
7694             && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7695
7696           # '= -' should not become =- or you will get a warning
7697           # about reversed -=
7698           # || ($tokenr eq '-')
7699
7700           # keep a space between a quote and a bareword to prevent the
7701           # bareword from becomming a quote modifier.
7702           || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7703
7704           # keep a space between a token ending in '$' and any word;
7705           # this caused trouble:  "die @$ if $@"
7706           || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7707             && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7708
7709           # perl is very fussy about spaces before <<
7710           || ( $tokenr =~ /^\<\</ )
7711
7712           # avoid combining tokens to create new meanings. Example:
7713           #     $a+ +$b must not become $a++$b
7714           || ( $is_digraph{ $tokenl . $tokenr } )
7715           || ( $is_trigraph{ $tokenl . $tokenr } )
7716
7717           # another example: do not combine these two &'s:
7718           #     allow_options & &OPT_EXECCGI
7719           || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7720
7721           # don't combine $$ or $# with any alphanumeric
7722           # (testfile mangle.t with --mangle)
7723           || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7724
7725           # retain any space after possible filehandle
7726           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7727           || ( $typel eq 'Z' )
7728
7729           # Perl is sensitive to whitespace after the + here:
7730           #  $b = xvals $a + 0.1 * yvals $a;
7731           || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
7732
7733           # keep paren separate in 'use Foo::Bar ()'
7734           || ( $tokenr eq '('
7735             && $typel   eq 'w'
7736             && $typell  eq 'k'
7737             && $tokenll eq 'use' )
7738
7739           # keep any space between filehandle and paren:
7740           # file mangle.t with --mangle:
7741           || ( $typel eq 'Y' && $tokenr eq '(' )
7742
7743           # retain any space after here doc operator ( hereerr.t)
7744           || ( $typel eq 'h' )
7745
7746           # be careful with a space around ++ and --, to avoid ambiguity as to
7747           # which token it applies
7748           || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7749           || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7750
7751           # need space after foreach my; for example, this will fail in
7752           # older versions of Perl:
7753           # foreach my$ft(@filetypes)...
7754           || (
7755             $tokenl eq 'my'
7756
7757             #  /^(for|foreach)$/
7758             && $is_for_foreach{$tokenll}
7759             && $tokenr =~ /^\$/
7760           )
7761
7762           # must have space between grep and left paren; "grep(" will fail
7763           || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7764
7765           # don't stick numbers next to left parens, as in:
7766           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7767           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7768
7769           # We must be sure that a space between a ? and a quoted string
7770           # remains if the space before the ? remains.  [Loca.pm, lockarea]
7771           # ie,
7772           #    $b=join $comma ? ',' : ':', @_;  # ok
7773           #    $b=join $comma?',' : ':', @_;    # ok!
7774           #    $b=join $comma ?',' : ':', @_;   # error!
7775           # Not really required:
7776           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
7777
7778           # do not remove space between an '&' and a bare word because
7779           # it may turn into a function evaluation, like here
7780           # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
7781           #    $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
7782           || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7783
7784           ;    # the value of this long logic sequence is the result we want
7785         return $result;
7786     }
7787 }
7788
7789 sub set_white_space_flag {
7790
7791     #    This routine examines each pair of nonblank tokens and
7792     #    sets values for array @white_space_flag.
7793     #
7794     #    $white_space_flag[$j] is a flag indicating whether a white space
7795     #    BEFORE token $j is needed, with the following values:
7796     #
7797     #            -1 do not want a space before token $j
7798     #             0 optional space or $j is a whitespace
7799     #             1 want a space before token $j
7800     #
7801     #
7802     #   The values for the first token will be defined based
7803     #   upon the contents of the "to_go" output array.
7804     #
7805     #   Note: retain debug print statements because they are usually
7806     #   required after adding new token types.
7807
7808     BEGIN {
7809
7810         # initialize these global hashes, which control the use of
7811         # whitespace around tokens:
7812         #
7813         # %binary_ws_rules
7814         # %want_left_space
7815         # %want_right_space
7816         # %space_after_keyword
7817         #
7818         # Many token types are identical to the tokens themselves.
7819         # See the tokenizer for a complete list. Here are some special types:
7820         #   k = perl keyword
7821         #   f = semicolon in for statement
7822         #   m = unary minus
7823         #   p = unary plus
7824         # Note that :: is excluded since it should be contained in an identifier
7825         # Note that '->' is excluded because it never gets space
7826         # parentheses and brackets are excluded since they are handled specially
7827         # curly braces are included but may be overridden by logic, such as
7828         # newline logic.
7829
7830         # NEW_TOKENS: create a whitespace rule here.  This can be as
7831         # simple as adding your new letter to @spaces_both_sides, for
7832         # example.
7833
7834         @_ = qw" L { ( [ ";
7835         @is_opening_type{@_} = (1) x scalar(@_);
7836
7837         @_ = qw" R } ) ] ";
7838         @is_closing_type{@_} = (1) x scalar(@_);
7839
7840         my @spaces_both_sides = qw"
7841           + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7842           .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
7843           &&= ||= //= <=> A k f w F n C Y U G v
7844           ";
7845
7846         my @spaces_left_side = qw"
7847           t ! ~ m p { \ h pp mm Z j
7848           ";
7849         push( @spaces_left_side, '#' );    # avoids warning message
7850
7851         my @spaces_right_side = qw"
7852           ; } ) ] R J ++ -- **=
7853           ";
7854         push( @spaces_right_side, ',' );    # avoids warning message
7855         @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7856         @want_right_space{@spaces_both_sides} =
7857           (1) x scalar(@spaces_both_sides);
7858         @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
7859         @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7860         @want_left_space{@spaces_right_side} =
7861           (-1) x scalar(@spaces_right_side);
7862         @want_right_space{@spaces_right_side} =
7863           (1) x scalar(@spaces_right_side);
7864         $want_left_space{'L'}   = WS_NO;
7865         $want_left_space{'->'}  = WS_NO;
7866         $want_right_space{'->'} = WS_NO;
7867         $want_left_space{'**'}  = WS_NO;
7868         $want_right_space{'**'} = WS_NO;
7869
7870         # hash type information must stay tightly bound
7871         # as in :  ${xxxx}
7872         $binary_ws_rules{'i'}{'L'} = WS_NO;
7873         $binary_ws_rules{'i'}{'{'} = WS_YES;
7874         $binary_ws_rules{'k'}{'{'} = WS_YES;
7875         $binary_ws_rules{'U'}{'{'} = WS_YES;
7876         $binary_ws_rules{'i'}{'['} = WS_NO;
7877         $binary_ws_rules{'R'}{'L'} = WS_NO;
7878         $binary_ws_rules{'R'}{'{'} = WS_NO;
7879         $binary_ws_rules{'t'}{'L'} = WS_NO;
7880         $binary_ws_rules{'t'}{'{'} = WS_NO;
7881         $binary_ws_rules{'}'}{'L'} = WS_NO;
7882         $binary_ws_rules{'}'}{'{'} = WS_NO;
7883         $binary_ws_rules{'$'}{'L'} = WS_NO;
7884         $binary_ws_rules{'$'}{'{'} = WS_NO;
7885         $binary_ws_rules{'@'}{'L'} = WS_NO;
7886         $binary_ws_rules{'@'}{'{'} = WS_NO;
7887         $binary_ws_rules{'='}{'L'} = WS_YES;
7888
7889         # the following includes ') {'
7890         # as in :    if ( xxx ) { yyy }
7891         $binary_ws_rules{']'}{'L'} = WS_NO;
7892         $binary_ws_rules{']'}{'{'} = WS_NO;
7893         $binary_ws_rules{')'}{'{'} = WS_YES;
7894         $binary_ws_rules{')'}{'['} = WS_NO;
7895         $binary_ws_rules{']'}{'['} = WS_NO;
7896         $binary_ws_rules{']'}{'{'} = WS_NO;
7897         $binary_ws_rules{'}'}{'['} = WS_NO;
7898         $binary_ws_rules{'R'}{'['} = WS_NO;
7899
7900         $binary_ws_rules{']'}{'++'} = WS_NO;
7901         $binary_ws_rules{']'}{'--'} = WS_NO;
7902         $binary_ws_rules{')'}{'++'} = WS_NO;
7903         $binary_ws_rules{')'}{'--'} = WS_NO;
7904
7905         $binary_ws_rules{'R'}{'++'} = WS_NO;
7906         $binary_ws_rules{'R'}{'--'} = WS_NO;
7907
7908         ########################################################
7909         # should no longer be necessary (see niek.pl)
7910         ##$binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
7911         ##$binary_ws_rules{'w'}{':'} = WS_NO;
7912         ########################################################
7913         $binary_ws_rules{'i'}{'Q'} = WS_YES;
7914         $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
7915
7916         # FIXME: we need to split 'i' into variables and functions
7917         # and have no space for functions but space for variables.  For now,
7918         # I have a special patch in the special rules below
7919         $binary_ws_rules{'i'}{'('} = WS_NO;
7920
7921         $binary_ws_rules{'w'}{'('} = WS_NO;
7922         $binary_ws_rules{'w'}{'{'} = WS_YES;
7923     }
7924     my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7925     my ( $last_token, $last_type, $last_block_type, $token, $type,
7926         $block_type );
7927     my (@white_space_flag);
7928     my $j_tight_closing_paren = -1;
7929
7930     if ( $max_index_to_go >= 0 ) {
7931         $token      = $tokens_to_go[$max_index_to_go];
7932         $type       = $types_to_go[$max_index_to_go];
7933         $block_type = $block_type_to_go[$max_index_to_go];
7934     }
7935     else {
7936         $token      = ' ';
7937         $type       = 'b';
7938         $block_type = '';
7939     }
7940
7941     # loop over all tokens
7942     my ( $j, $ws );
7943
7944     for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7945
7946         if ( $$rtoken_type[$j] eq 'b' ) {
7947             $white_space_flag[$j] = WS_OPTIONAL;
7948             next;
7949         }
7950
7951         # set a default value, to be changed as needed
7952         $ws              = undef;
7953         $last_token      = $token;
7954         $last_type       = $type;
7955         $last_block_type = $block_type;
7956         $token           = $$rtokens[$j];
7957         $type            = $$rtoken_type[$j];
7958         $block_type      = $$rblock_type[$j];
7959
7960         #---------------------------------------------------------------
7961         # section 1:
7962         # handle space on the inside of opening braces
7963         #---------------------------------------------------------------
7964
7965         #    /^[L\{\(\[]$/
7966         if ( $is_opening_type{$last_type} ) {
7967
7968             $j_tight_closing_paren = -1;
7969
7970             # let's keep empty matched braces together: () {} []
7971             # except for BLOCKS
7972             if ( $token eq $matching_token{$last_token} ) {
7973                 if ($block_type) {
7974                     $ws = WS_YES;
7975                 }
7976                 else {
7977                     $ws = WS_NO;
7978                 }
7979             }
7980             else {
7981
7982                 # we're considering the right of an opening brace
7983                 # tightness = 0 means always pad inside with space
7984                 # tightness = 1 means pad inside if "complex"
7985                 # tightness = 2 means never pad inside with space
7986
7987                 my $tightness;
7988                 if (   $last_type eq '{'
7989                     && $last_token eq '{'
7990                     && $last_block_type )
7991                 {
7992                     $tightness = $rOpts_block_brace_tightness;
7993                 }
7994                 else { $tightness = $tightness{$last_token} }
7995
7996     #=================================================================
7997     # Patch for fabrice_bug.pl
7998     # We must always avoid spaces around a bare word beginning with ^ as in:
7999     #    my $before = ${^PREMATCH};
8000     # Because all of the following cause an error in perl:
8001     #    my $before = ${ ^PREMATCH };
8002     #    my $before = ${ ^PREMATCH};
8003     #    my $before = ${^PREMATCH };
8004     # So if brace tightness flag is -bt=0 we must temporarily reset to bt=1.
8005     # Note that here we must set tightness=1 and not 2 so that the closing space
8006     # is also avoided (via the $j_tight_closing_paren flag in coding)
8007                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8008
8009               #=================================================================
8010
8011                 if ( $tightness <= 0 ) {
8012                     $ws = WS_YES;
8013                 }
8014                 elsif ( $tightness > 1 ) {
8015                     $ws = WS_NO;
8016                 }
8017                 else {
8018
8019                     # Patch to count '-foo' as single token so that
8020                     # each of  $a{-foo} and $a{foo} and $a{'foo'} do
8021                     # not get spaces with default formatting.
8022                     my $j_here = $j;
8023                     ++$j_here
8024                       if ( $token eq '-'
8025                         && $last_token eq '{'
8026                         && $$rtoken_type[ $j + 1 ] eq 'w' );
8027
8028                     # $j_next is where a closing token should be if
8029                     # the container has a single token
8030                     my $j_next =
8031                       ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8032                       ? $j_here + 2
8033                       : $j_here + 1;
8034                     my $tok_next  = $$rtokens[$j_next];
8035                     my $type_next = $$rtoken_type[$j_next];
8036
8037                     # for tightness = 1, if there is just one token
8038                     # within the matching pair, we will keep it tight
8039                     if (
8040                         $tok_next eq $matching_token{$last_token}
8041
8042                         # but watch out for this: [ [ ]    (misc.t)
8043                         && $last_token ne $token
8044                       )
8045                     {
8046
8047                         # remember where to put the space for the closing paren
8048                         $j_tight_closing_paren = $j_next;
8049                         $ws                    = WS_NO;
8050                     }
8051                     else {
8052                         $ws = WS_YES;
8053                     }
8054                 }
8055             }
8056         }    # done with opening braces and brackets
8057         my $ws_1 = $ws
8058           if FORMATTER_DEBUG_FLAG_WHITE;
8059
8060         #---------------------------------------------------------------
8061         # section 2:
8062         # handle space on inside of closing brace pairs
8063         #---------------------------------------------------------------
8064
8065         #   /[\}\)\]R]/
8066         if ( $is_closing_type{$type} ) {
8067
8068             if ( $j == $j_tight_closing_paren ) {
8069
8070                 $j_tight_closing_paren = -1;
8071                 $ws                    = WS_NO;
8072             }
8073             else {
8074
8075                 if ( !defined($ws) ) {
8076
8077                     my $tightness;
8078                     if ( $type eq '}' && $token eq '}' && $block_type ) {
8079                         $tightness = $rOpts_block_brace_tightness;
8080                     }
8081                     else { $tightness = $tightness{$token} }
8082
8083                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8084                 }
8085             }
8086         }
8087
8088         my $ws_2 = $ws
8089           if FORMATTER_DEBUG_FLAG_WHITE;
8090
8091         #---------------------------------------------------------------
8092         # section 3:
8093         # use the binary table
8094         #---------------------------------------------------------------
8095         if ( !defined($ws) ) {
8096             $ws = $binary_ws_rules{$last_type}{$type};
8097         }
8098         my $ws_3 = $ws
8099           if FORMATTER_DEBUG_FLAG_WHITE;
8100
8101         #---------------------------------------------------------------
8102         # section 4:
8103         # some special cases
8104         #---------------------------------------------------------------
8105         if ( $token eq '(' ) {
8106
8107             # This will have to be tweaked as tokenization changes.
8108             # We usually want a space at '} (', for example:
8109             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8110             #
8111             # But not others:
8112             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8113             # At present, the above & block is marked as type L/R so this case
8114             # won't go through here.
8115             if ( $last_type eq '}' ) { $ws = WS_YES }
8116
8117             # NOTE: some older versions of Perl had occasional problems if
8118             # spaces are introduced between keywords or functions and opening
8119             # parens.  So the default is not to do this except is certain
8120             # cases.  The current Perl seems to tolerate spaces.
8121
8122             # Space between keyword and '('
8123             elsif ( $last_type eq 'k' ) {
8124                 $ws = WS_NO
8125                   unless ( $rOpts_space_keyword_paren
8126                     || $space_after_keyword{$last_token} );
8127             }
8128
8129             # Space between function and '('
8130             # -----------------------------------------------------
8131             # 'w' and 'i' checks for something like:
8132             #   myfun(    &myfun(   ->myfun(
8133             # -----------------------------------------------------
8134             elsif (( $last_type =~ /^[wUG]$/ )
8135                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8136             {
8137                 $ws = WS_NO unless ($rOpts_space_function_paren);
8138             }
8139
8140             # space between something like $i and ( in
8141             # for $i ( 0 .. 20 ) {
8142             # FIXME: eventually, type 'i' needs to be split into multiple
8143             # token types so this can be a hardwired rule.
8144             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8145                 $ws = WS_YES;
8146             }
8147
8148             # allow constant function followed by '()' to retain no space
8149             elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8150                 $ws = WS_NO;
8151             }
8152         }
8153
8154         # patch for SWITCH/CASE: make space at ']{' optional
8155         # since the '{' might begin a case or when block
8156         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8157             $ws = WS_OPTIONAL;
8158         }
8159
8160         # keep space between 'sub' and '{' for anonymous sub definition
8161         if ( $type eq '{' ) {
8162             if ( $last_token eq 'sub' ) {
8163                 $ws = WS_YES;
8164             }
8165
8166             # this is needed to avoid no space in '){'
8167             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8168
8169             # avoid any space before the brace or bracket in something like
8170             #  @opts{'a','b',...}
8171             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8172                 $ws = WS_NO;
8173             }
8174         }
8175
8176         elsif ( $type eq 'i' ) {
8177
8178             # never a space before ->
8179             if ( $token =~ /^\-\>/ ) {
8180                 $ws = WS_NO;
8181             }
8182         }
8183
8184         # retain any space between '-' and bare word
8185         elsif ( $type eq 'w' || $type eq 'C' ) {
8186             $ws = WS_OPTIONAL if $last_type eq '-';
8187
8188             # never a space before ->
8189             if ( $token =~ /^\-\>/ ) {
8190                 $ws = WS_NO;
8191             }
8192         }
8193
8194         # retain any space between '-' and bare word
8195         # example: avoid space between 'USER' and '-' here:
8196         #   $myhash{USER-NAME}='steve';
8197         elsif ( $type eq 'm' || $type eq '-' ) {
8198             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8199         }
8200
8201         # always space before side comment
8202         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8203
8204         # always preserver whatever space was used after a possible
8205         # filehandle (except _) or here doc operator
8206         if (
8207             $type ne '#'
8208             && ( ( $last_type eq 'Z' && $last_token ne '_' )
8209                 || $last_type eq 'h' )
8210           )
8211         {
8212             $ws = WS_OPTIONAL;
8213         }
8214
8215         my $ws_4 = $ws
8216           if FORMATTER_DEBUG_FLAG_WHITE;
8217
8218         #---------------------------------------------------------------
8219         # section 5:
8220         # default rules not covered above
8221         #---------------------------------------------------------------
8222         # if we fall through to here,
8223         # look at the pre-defined hash tables for the two tokens, and
8224         # if (they are equal) use the common value
8225         # if (either is zero or undef) use the other
8226         # if (either is -1) use it
8227         # That is,
8228         # left  vs right
8229         #  1    vs    1     -->  1
8230         #  0    vs    0     -->  0
8231         # -1    vs   -1     --> -1
8232         #
8233         #  0    vs   -1     --> -1
8234         #  0    vs    1     -->  1
8235         #  1    vs    0     -->  1
8236         # -1    vs    0     --> -1
8237         #
8238         # -1    vs    1     --> -1
8239         #  1    vs   -1     --> -1
8240         if ( !defined($ws) ) {
8241             my $wl = $want_left_space{$type};
8242             my $wr = $want_right_space{$last_type};
8243             if ( !defined($wl) ) { $wl = 0 }
8244             if ( !defined($wr) ) { $wr = 0 }
8245             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8246         }
8247
8248         if ( !defined($ws) ) {
8249             $ws = 0;
8250             write_diagnostics(
8251                 "WS flag is undefined for tokens $last_token $token\n");
8252         }
8253
8254         # Treat newline as a whitespace. Otherwise, we might combine
8255         # 'Send' and '-recipients' here according to the above rules:
8256         #    my $msg = new Fax::Send
8257         #      -recipients => $to,
8258         #      -data => $data;
8259         if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8260
8261         if (   ( $ws == 0 )
8262             && $j > 0
8263             && $j < $jmax
8264             && ( $last_type !~ /^[Zh]$/ ) )
8265         {
8266
8267             # If this happens, we have a non-fatal but undesirable
8268             # hole in the above rules which should be patched.
8269             write_diagnostics(
8270                 "WS flag is zero for tokens $last_token $token\n");
8271         }
8272         $white_space_flag[$j] = $ws;
8273
8274         FORMATTER_DEBUG_FLAG_WHITE && do {
8275             my $str = substr( $last_token, 0, 15 );
8276             $str .= ' ' x ( 16 - length($str) );
8277             if ( !defined($ws_1) ) { $ws_1 = "*" }
8278             if ( !defined($ws_2) ) { $ws_2 = "*" }
8279             if ( !defined($ws_3) ) { $ws_3 = "*" }
8280             if ( !defined($ws_4) ) { $ws_4 = "*" }
8281             print
8282 "WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8283         };
8284     }
8285     return \@white_space_flag;
8286 }
8287
8288 {    # begin print_line_of_tokens
8289
8290     my $rtoken_type;
8291     my $rtokens;
8292     my $rlevels;
8293     my $rslevels;
8294     my $rblock_type;
8295     my $rcontainer_type;
8296     my $rcontainer_environment;
8297     my $rtype_sequence;
8298     my $input_line;
8299     my $rnesting_tokens;
8300     my $rci_levels;
8301     my $rnesting_blocks;
8302
8303     my $in_quote;
8304     my $python_indentation_level;
8305
8306     # These local token variables are stored by store_token_to_go:
8307     my $block_type;
8308     my $ci_level;
8309     my $container_environment;
8310     my $container_type;
8311     my $in_continued_quote;
8312     my $level;
8313     my $nesting_blocks;
8314     my $no_internal_newlines;
8315     my $slevel;
8316     my $token;
8317     my $type;
8318     my $type_sequence;
8319
8320     # routine to pull the jth token from the line of tokens
8321     sub extract_token {
8322         my $j = shift;
8323         $token                 = $$rtokens[$j];
8324         $type                  = $$rtoken_type[$j];
8325         $block_type            = $$rblock_type[$j];
8326         $container_type        = $$rcontainer_type[$j];
8327         $container_environment = $$rcontainer_environment[$j];
8328         $type_sequence         = $$rtype_sequence[$j];
8329         $level                 = $$rlevels[$j];
8330         $slevel                = $$rslevels[$j];
8331         $nesting_blocks        = $$rnesting_blocks[$j];
8332         $ci_level              = $$rci_levels[$j];
8333     }
8334
8335     {
8336         my @saved_token;
8337
8338         sub save_current_token {
8339
8340             @saved_token = (
8341                 $block_type,            $ci_level,
8342                 $container_environment, $container_type,
8343                 $in_continued_quote,    $level,
8344                 $nesting_blocks,        $no_internal_newlines,
8345                 $slevel,                $token,
8346                 $type,                  $type_sequence,
8347             );
8348         }
8349
8350         sub restore_current_token {
8351             (
8352                 $block_type,            $ci_level,
8353                 $container_environment, $container_type,
8354                 $in_continued_quote,    $level,
8355                 $nesting_blocks,        $no_internal_newlines,
8356                 $slevel,                $token,
8357                 $type,                  $type_sequence,
8358             ) = @saved_token;
8359         }
8360     }
8361
8362     # Routine to place the current token into the output stream.
8363     # Called once per output token.
8364     sub store_token_to_go {
8365
8366         my $flag = $no_internal_newlines;
8367         if ( $_[0] ) { $flag = 1 }
8368
8369         $tokens_to_go[ ++$max_index_to_go ]            = $token;
8370         $types_to_go[$max_index_to_go]                 = $type;
8371         $nobreak_to_go[$max_index_to_go]               = $flag;
8372         $old_breakpoint_to_go[$max_index_to_go]        = 0;
8373         $forced_breakpoint_to_go[$max_index_to_go]     = 0;
8374         $block_type_to_go[$max_index_to_go]            = $block_type;
8375         $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
8376         $container_environment_to_go[$max_index_to_go] = $container_environment;
8377         $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
8378         $ci_levels_to_go[$max_index_to_go]             = $ci_level;
8379         $mate_index_to_go[$max_index_to_go]            = -1;
8380         $matching_token_to_go[$max_index_to_go]        = '';
8381         $bond_strength_to_go[$max_index_to_go]         = 0;
8382
8383         # Note: negative levels are currently retained as a diagnostic so that
8384         # the 'final indentation level' is correctly reported for bad scripts.
8385         # But this means that every use of $level as an index must be checked.
8386         # If this becomes too much of a problem, we might give up and just clip
8387         # them at zero.
8388         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8389         $levels_to_go[$max_index_to_go] = $level;
8390         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8391         $lengths_to_go[ $max_index_to_go + 1 ] =
8392           $lengths_to_go[$max_index_to_go] + length($token);
8393
8394         # Define the indentation that this token would have if it started
8395         # a new line.  We have to do this now because we need to know this
8396         # when considering one-line blocks.
8397         set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8398
8399         if ( $type ne 'b' ) {
8400             $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8401             $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
8402             $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8403             $last_nonblank_index_to_go      = $max_index_to_go;
8404             $last_nonblank_type_to_go       = $type;
8405             $last_nonblank_token_to_go      = $token;
8406             if ( $type eq ',' ) {
8407                 $comma_count_in_batch++;
8408             }
8409         }
8410
8411         FORMATTER_DEBUG_FLAG_STORE && do {
8412             my ( $a, $b, $c ) = caller();
8413             print
8414 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8415         };
8416     }
8417
8418     sub insert_new_token_to_go {
8419
8420         # insert a new token into the output stream.  use same level as
8421         # previous token; assumes a character at max_index_to_go.
8422         save_current_token();
8423         ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8424
8425         if ( $max_index_to_go == UNDEFINED_INDEX ) {
8426             warning("code bug: bad call to insert_new_token_to_go\n");
8427         }
8428         $level = $levels_to_go[$max_index_to_go];
8429
8430         # FIXME: it seems to be necessary to use the next, rather than
8431         # previous, value of this variable when creating a new blank (align.t)
8432         #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
8433         $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
8434         $ci_level              = $ci_levels_to_go[$max_index_to_go];
8435         $container_environment = $container_environment_to_go[$max_index_to_go];
8436         $in_continued_quote    = 0;
8437         $block_type            = "";
8438         $type_sequence         = "";
8439         store_token_to_go();
8440         restore_current_token();
8441         return;
8442     }
8443
8444     sub print_line_of_tokens {
8445
8446         my $line_of_tokens = shift;
8447
8448         # This routine is called once per input line to process all of
8449         # the tokens on that line.  This is the first stage of
8450         # beautification.
8451         #
8452         # Full-line comments and blank lines may be processed immediately.
8453         #
8454         # For normal lines of code, the tokens are stored one-by-one,
8455         # via calls to 'sub store_token_to_go', until a known line break
8456         # point is reached.  Then, the batch of collected tokens is
8457         # passed along to 'sub output_line_to_go' for further
8458         # processing.  This routine decides if there should be
8459         # whitespace between each pair of non-white tokens, so later
8460         # routines only need to decide on any additional line breaks.
8461         # Any whitespace is initally a single space character.  Later,
8462         # the vertical aligner may expand that to be multiple space
8463         # characters if necessary for alignment.
8464
8465         # extract input line number for error messages
8466         $input_line_number = $line_of_tokens->{_line_number};
8467
8468         $rtoken_type            = $line_of_tokens->{_rtoken_type};
8469         $rtokens                = $line_of_tokens->{_rtokens};
8470         $rlevels                = $line_of_tokens->{_rlevels};
8471         $rslevels               = $line_of_tokens->{_rslevels};
8472         $rblock_type            = $line_of_tokens->{_rblock_type};
8473         $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
8474         $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8475         $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
8476         $input_line             = $line_of_tokens->{_line_text};
8477         $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
8478         $rci_levels             = $line_of_tokens->{_rci_levels};
8479         $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
8480
8481         $in_continued_quote = $starting_in_quote =
8482           $line_of_tokens->{_starting_in_quote};
8483         $in_quote        = $line_of_tokens->{_ending_in_quote};
8484         $ending_in_quote = $in_quote;
8485         $python_indentation_level =
8486           $line_of_tokens->{_python_indentation_level};
8487
8488         my $j;
8489         my $j_next;
8490         my $jmax;
8491         my $next_nonblank_token;
8492         my $next_nonblank_token_type;
8493         my $rwhite_space_flag;
8494
8495         $jmax                    = @$rtokens - 1;
8496         $block_type              = "";
8497         $container_type          = "";
8498         $container_environment   = "";
8499         $type_sequence           = "";
8500         $no_internal_newlines    = 1 - $rOpts_add_newlines;
8501         $is_static_block_comment = 0;
8502
8503         # Handle a continued quote..
8504         if ($in_continued_quote) {
8505
8506             # A line which is entirely a quote or pattern must go out
8507             # verbatim.  Note: the \n is contained in $input_line.
8508             if ( $jmax <= 0 ) {
8509                 if ( ( $input_line =~ "\t" ) ) {
8510                     note_embedded_tab();
8511                 }
8512                 write_unindented_line("$input_line");
8513                 $last_line_had_side_comment = 0;
8514                 return;
8515             }
8516
8517             # prior to version 20010406, perltidy had a bug which placed
8518             # continuation indentation before the last line of some multiline
8519             # quotes and patterns -- exactly the lines passing this way.
8520             # To help find affected lines in scripts run with these
8521             # versions, run with '-chk', and it will warn of any quotes or
8522             # patterns which might have been modified by these early
8523             # versions.
8524             if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8525                 warning(
8526 "-chk: please check this line for extra leading whitespace\n"
8527                 );
8528             }
8529         }
8530
8531         # Write line verbatim if we are in a formatting skip section
8532         if ($in_format_skipping_section) {
8533             write_unindented_line("$input_line");
8534             $last_line_had_side_comment = 0;
8535
8536             # Note: extra space appended to comment simplifies pattern matching
8537             if (   $jmax == 0
8538                 && $$rtoken_type[0] eq '#'
8539                 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8540             {
8541                 $in_format_skipping_section = 0;
8542                 write_logfile_entry("Exiting formatting skip section\n");
8543             }
8544             return;
8545         }
8546
8547         # See if we are entering a formatting skip section
8548         if (   $rOpts_format_skipping
8549             && $jmax == 0
8550             && $$rtoken_type[0] eq '#'
8551             && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8552         {
8553             flush();
8554             $in_format_skipping_section = 1;
8555             write_logfile_entry("Entering formatting skip section\n");
8556             write_unindented_line("$input_line");
8557             $last_line_had_side_comment = 0;
8558             return;
8559         }
8560
8561         # delete trailing blank tokens
8562         if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8563
8564         # Handle a blank line..
8565         if ( $jmax < 0 ) {
8566
8567             # If keep-old-blank-lines is zero, we delete all
8568             # old blank lines and let the blank line rules generate any
8569             # needed blanks.
8570             if ($rOpts_keep_old_blank_lines) {
8571                 flush();
8572                 $file_writer_object->write_blank_code_line(
8573                     $rOpts_keep_old_blank_lines == 2 );
8574                 $last_line_leading_type = 'b';
8575             }
8576             $last_line_had_side_comment = 0;
8577             return;
8578         }
8579
8580         # see if this is a static block comment (starts with ## by default)
8581         my $is_static_block_comment_without_leading_space = 0;
8582         if (   $jmax == 0
8583             && $$rtoken_type[0] eq '#'
8584             && $rOpts->{'static-block-comments'}
8585             && $input_line =~ /$static_block_comment_pattern/o )
8586         {
8587             $is_static_block_comment = 1;
8588             $is_static_block_comment_without_leading_space =
8589               substr( $input_line, 0, 1 ) eq '#';
8590         }
8591
8592         # Check for comments which are line directives
8593         # Treat exactly as static block comments without leading space
8594         # reference: perlsyn, near end, section Plain Old Comments (Not!)
8595         # example: '# line 42 "new_filename.plx"'
8596         if (
8597                $jmax == 0
8598             && $$rtoken_type[0] eq '#'
8599             && $input_line =~ /^\#   \s*
8600                                line \s+ (\d+)   \s*
8601                                (?:\s("?)([^"]+)\2)? \s*
8602                                $/x
8603           )
8604         {
8605             $is_static_block_comment                       = 1;
8606             $is_static_block_comment_without_leading_space = 1;
8607         }
8608
8609         # create a hanging side comment if appropriate
8610         if (
8611                $jmax == 0
8612             && $$rtoken_type[0] eq '#'    # only token is a comment
8613             && $last_line_had_side_comment    # last line had side comment
8614             && $input_line =~ /^\s/           # there is some leading space
8615             && !$is_static_block_comment    # do not make static comment hanging
8616             && $rOpts->{'hanging-side-comments'}    # user is allowing this
8617           )
8618         {
8619
8620             # We will insert an empty qw string at the start of the token list
8621             # to force this comment to be a side comment. The vertical aligner
8622             # should then line it up with the previous side comment.
8623             unshift @$rtoken_type,            'q';
8624             unshift @$rtokens,                '';
8625             unshift @$rlevels,                $$rlevels[0];
8626             unshift @$rslevels,               $$rslevels[0];
8627             unshift @$rblock_type,            '';
8628             unshift @$rcontainer_type,        '';
8629             unshift @$rcontainer_environment, '';
8630             unshift @$rtype_sequence,         '';
8631             unshift @$rnesting_tokens,        $$rnesting_tokens[0];
8632             unshift @$rci_levels,             $$rci_levels[0];
8633             unshift @$rnesting_blocks,        $$rnesting_blocks[0];
8634             $jmax = 1;
8635         }
8636
8637         # remember if this line has a side comment
8638         $last_line_had_side_comment =
8639           ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8640
8641         # Handle a block (full-line) comment..
8642         if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8643
8644             if ( $rOpts->{'delete-block-comments'} ) { return }
8645
8646             if ( $rOpts->{'tee-block-comments'} ) {
8647                 $file_writer_object->tee_on();
8648             }
8649
8650             destroy_one_line_block();
8651             output_line_to_go();
8652
8653             # output a blank line before block comments
8654             if (
8655                    $last_line_leading_type !~ /^[#b]$/
8656                 && $rOpts->{'blanks-before-comments'}    # only if allowed
8657                 && !
8658                 $is_static_block_comment    # never before static block comments
8659               )
8660             {
8661                 flush();                    # switching to new output stream
8662                 $file_writer_object->write_blank_code_line();
8663                 $last_line_leading_type = 'b';
8664             }
8665
8666             # TRIM COMMENTS -- This could be turned off as a option
8667             $$rtokens[0] =~ s/\s*$//;       # trim right end
8668
8669             if (
8670                 $rOpts->{'indent-block-comments'}
8671                 && (  !$rOpts->{'indent-spaced-block-comments'}
8672                     || $input_line =~ /^\s+/ )
8673                 && !$is_static_block_comment_without_leading_space
8674               )
8675             {
8676                 extract_token(0);
8677                 store_token_to_go();
8678                 output_line_to_go();
8679             }
8680             else {
8681                 flush();    # switching to new output stream
8682                 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8683                 $last_line_leading_type = '#';
8684             }
8685             if ( $rOpts->{'tee-block-comments'} ) {
8686                 $file_writer_object->tee_off();
8687             }
8688             return;
8689         }
8690
8691         # compare input/output indentation except for continuation lines
8692         # (because they have an unknown amount of initial blank space)
8693         # and lines which are quotes (because they may have been outdented)
8694         # Note: this test is placed here because we know the continuation flag
8695         # at this point, which allows us to avoid non-meaningful checks.
8696         my $structural_indentation_level = $$rlevels[0];
8697         compare_indentation_levels( $python_indentation_level,
8698             $structural_indentation_level )
8699           unless ( $python_indentation_level < 0
8700             || ( $$rci_levels[0] > 0 )
8701             || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8702           );
8703
8704         #   Patch needed for MakeMaker.  Do not break a statement
8705         #   in which $VERSION may be calculated.  See MakeMaker.pm;
8706         #   this is based on the coding in it.
8707         #   The first line of a file that matches this will be eval'd:
8708         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8709         #   Examples:
8710         #     *VERSION = \'1.01';
8711         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
8712         #   We will pass such a line straight through without breaking
8713         #   it unless -npvl is used
8714
8715         my $is_VERSION_statement = 0;
8716
8717         if (
8718               !$saw_VERSION_in_this_file
8719             && $input_line =~ /VERSION/    # quick check to reject most lines
8720             && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8721           )
8722         {
8723             $saw_VERSION_in_this_file = 1;
8724             $is_VERSION_statement     = 1;
8725             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8726             $no_internal_newlines = 1;
8727         }
8728
8729         # take care of indentation-only
8730         # NOTE: In previous versions we sent all qw lines out immediately here.
8731         # No longer doing this: also write a line which is entirely a 'qw' list
8732         # to allow stacking of opening and closing tokens.  Note that interior
8733         # qw lines will still go out at the end of this routine.
8734         if ( $rOpts->{'indent-only'} ) {
8735             flush();
8736             trim($input_line);
8737
8738             extract_token(0);
8739             $token                 = $input_line;
8740             $type                  = 'q';
8741             $block_type            = "";
8742             $container_type        = "";
8743             $container_environment = "";
8744             $type_sequence         = "";
8745             store_token_to_go();
8746             output_line_to_go();
8747             return;
8748         }
8749
8750         push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
8751         push( @$rtoken_type, 'b', 'b' );
8752         ($rwhite_space_flag) =
8753           set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8754
8755         # find input tabbing to allow checks for tabbing disagreement
8756         ## not used for now
8757         ##$input_line_tabbing = "";
8758         ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8759
8760         # if the buffer hasn't been flushed, add a leading space if
8761         # necessary to keep essential whitespace. This is really only
8762         # necessary if we are squeezing out all ws.
8763         if ( $max_index_to_go >= 0 ) {
8764
8765             $old_line_count_in_batch++;
8766
8767             if (
8768                 is_essential_whitespace(
8769                     $last_last_nonblank_token,
8770                     $last_last_nonblank_type,
8771                     $tokens_to_go[$max_index_to_go],
8772                     $types_to_go[$max_index_to_go],
8773                     $$rtokens[0],
8774                     $$rtoken_type[0]
8775                 )
8776               )
8777             {
8778                 my $slevel = $$rslevels[0];
8779                 insert_new_token_to_go( ' ', 'b', $slevel,
8780                     $no_internal_newlines );
8781             }
8782         }
8783
8784         # If we just saw the end of an elsif block, write nag message
8785         # if we do not see another elseif or an else.
8786         if ($looking_for_else) {
8787
8788             unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8789                 write_logfile_entry("(No else block)\n");
8790             }
8791             $looking_for_else = 0;
8792         }
8793
8794         # This is a good place to kill incomplete one-line blocks
8795         if (   ( $semicolons_before_block_self_destruct == 0 )
8796             && ( $max_index_to_go >= 0 )
8797             && ( $types_to_go[$max_index_to_go] eq ';' )
8798             && ( $$rtokens[0] ne '}' ) )
8799         {
8800             destroy_one_line_block();
8801             output_line_to_go();
8802         }
8803
8804         # loop to process the tokens one-by-one
8805         $type  = 'b';
8806         $token = "";
8807
8808         foreach $j ( 0 .. $jmax ) {
8809
8810             # pull out the local values for this token
8811             extract_token($j);
8812
8813             if ( $type eq '#' ) {
8814
8815                 # trim trailing whitespace
8816                 # (there is no option at present to prevent this)
8817                 $token =~ s/\s*$//;
8818
8819                 if (
8820                     $rOpts->{'delete-side-comments'}
8821
8822                     # delete closing side comments if necessary
8823                     || (   $rOpts->{'delete-closing-side-comments'}
8824                         && $token =~ /$closing_side_comment_prefix_pattern/o
8825                         && $last_nonblank_block_type =~
8826                         /$closing_side_comment_list_pattern/o )
8827                   )
8828                 {
8829                     if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8830                         unstore_token_to_go();
8831                     }
8832                     last;
8833                 }
8834             }
8835
8836             # If we are continuing after seeing a right curly brace, flush
8837             # buffer unless we see what we are looking for, as in
8838             #   } else ...
8839             if ( $rbrace_follower && $type ne 'b' ) {
8840
8841                 unless ( $rbrace_follower->{$token} ) {
8842                     output_line_to_go();
8843                 }
8844                 $rbrace_follower = undef;
8845             }
8846
8847             $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8848             $next_nonblank_token      = $$rtokens[$j_next];
8849             $next_nonblank_token_type = $$rtoken_type[$j_next];
8850
8851             #--------------------------------------------------------
8852             # Start of section to patch token text
8853             #--------------------------------------------------------
8854
8855             # Modify certain tokens here for whitespace
8856             # The following is not yet done, but could be:
8857             #   sub (x x x)
8858             if ( $type =~ /^[wit]$/ ) {
8859
8860                 # Examples:
8861                 # change '$  var'  to '$var' etc
8862                 #        '-> new'  to '->new'
8863                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8864                     $token =~ s/\s*//g;
8865                 }
8866
8867                 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8868             }
8869
8870             # change 'LABEL   :'   to 'LABEL:'
8871             elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8872
8873             # patch to add space to something like "x10"
8874             # This avoids having to split this token in the pre-tokenizer
8875             elsif ( $type eq 'n' ) {
8876                 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8877             }
8878
8879             elsif ( $type eq 'Q' ) {
8880                 note_embedded_tab() if ( $token =~ "\t" );
8881
8882                 # make note of something like '$var = s/xxx/yyy/;'
8883                 # in case it should have been '$var =~ s/xxx/yyy/;'
8884                 if (
8885                        $token =~ /^(s|tr|y|m|\/)/
8886                     && $last_nonblank_token =~ /^(=|==|!=)$/
8887
8888                     # precededed by simple scalar
8889                     && $last_last_nonblank_type eq 'i'
8890                     && $last_last_nonblank_token =~ /^\$/
8891
8892                     # followed by some kind of termination
8893                     # (but give complaint if we can's see far enough ahead)
8894                     && $next_nonblank_token =~ /^[; \)\}]$/
8895
8896                     # scalar is not decleared
8897                     && !(
8898                            $types_to_go[0] eq 'k'
8899                         && $tokens_to_go[0] =~ /^(my|our|local)$/
8900                     )
8901                   )
8902                 {
8903                     my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8904                     complain(
8905 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8906                     );
8907                 }
8908             }
8909
8910            # trim blanks from right of qw quotes
8911            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8912             elsif ( $type eq 'q' ) {
8913                 $token =~ s/\s*$//;
8914                 note_embedded_tab() if ( $token =~ "\t" );
8915             }
8916
8917             #--------------------------------------------------------
8918             # End of section to patch token text
8919             #--------------------------------------------------------
8920
8921             # insert any needed whitespace
8922             if (   ( $type ne 'b' )
8923                 && ( $max_index_to_go >= 0 )
8924                 && ( $types_to_go[$max_index_to_go] ne 'b' )
8925                 && $rOpts_add_whitespace )
8926             {
8927                 my $ws = $$rwhite_space_flag[$j];
8928
8929                 if ( $ws == 1 ) {
8930                     insert_new_token_to_go( ' ', 'b', $slevel,
8931                         $no_internal_newlines );
8932                 }
8933             }
8934
8935             # Do not allow breaks which would promote a side comment to a
8936             # block comment.  In order to allow a break before an opening
8937             # or closing BLOCK, followed by a side comment, those sections
8938             # of code will handle this flag separately.
8939             my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8940             my $is_opening_BLOCK =
8941               (      $type eq '{'
8942                   && $token eq '{'
8943                   && $block_type
8944                   && $block_type ne 't' );
8945             my $is_closing_BLOCK =
8946               (      $type eq '}'
8947                   && $token eq '}'
8948                   && $block_type
8949                   && $block_type ne 't' );
8950
8951             if (   $side_comment_follows
8952                 && !$is_opening_BLOCK
8953                 && !$is_closing_BLOCK )
8954             {
8955                 $no_internal_newlines = 1;
8956             }
8957
8958             # We're only going to handle breaking for code BLOCKS at this
8959             # (top) level.  Other indentation breaks will be handled by
8960             # sub scan_list, which is better suited to dealing with them.
8961             if ($is_opening_BLOCK) {
8962
8963                 # Tentatively output this token.  This is required before
8964                 # calling starting_one_line_block.  We may have to unstore
8965                 # it, though, if we have to break before it.
8966                 store_token_to_go($side_comment_follows);
8967
8968                 # Look ahead to see if we might form a one-line block
8969                 my $too_long =
8970                   starting_one_line_block( $j, $jmax, $level, $slevel,
8971                     $ci_level, $rtokens, $rtoken_type, $rblock_type );
8972                 clear_breakpoint_undo_stack();
8973
8974                 # to simplify the logic below, set a flag to indicate if
8975                 # this opening brace is far from the keyword which introduces it
8976                 my $keyword_on_same_line = 1;
8977                 if (   ( $max_index_to_go >= 0 )
8978                     && ( $last_nonblank_type eq ')' ) )
8979                 {
8980                     if (   $block_type =~ /^(if|else|elsif)$/
8981                         && ( $tokens_to_go[0] eq '}' )
8982                         && $rOpts_cuddled_else )
8983                     {
8984                         $keyword_on_same_line = 1;
8985                     }
8986                     elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8987                     {
8988                         $keyword_on_same_line = 0;
8989                     }
8990                 }
8991
8992                 # decide if user requested break before '{'
8993                 my $want_break =
8994
8995                   # use -bl flag if not a sub block of any type
8996                   $block_type !~ /^sub/
8997                   ? $rOpts->{'opening-brace-on-new-line'}
8998
8999                   # use -sbl flag for a named sub block
9000                   : $block_type !~ /^sub\W*$/
9001                   ? $rOpts->{'opening-sub-brace-on-new-line'}
9002
9003                   # use -asbl flag for an anonymous sub block
9004                   : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9005
9006                 # Break before an opening '{' ...
9007                 if (
9008
9009                     # if requested
9010                     $want_break
9011
9012                     # and we were unable to start looking for a block,
9013                     && $index_start_one_line_block == UNDEFINED_INDEX
9014
9015                     # or if it will not be on same line as its keyword, so that
9016                     # it will be outdented (eval.t, overload.t), and the user
9017                     # has not insisted on keeping it on the right
9018                     || (   !$keyword_on_same_line
9019                         && !$rOpts->{'opening-brace-always-on-right'} )
9020
9021                   )
9022                 {
9023
9024                     # but only if allowed
9025                     unless ($no_internal_newlines) {
9026
9027                         # since we already stored this token, we must unstore it
9028                         unstore_token_to_go();
9029
9030                         # then output the line
9031                         output_line_to_go();
9032
9033                         # and now store this token at the start of a new line
9034                         store_token_to_go($side_comment_follows);
9035                     }
9036                 }
9037
9038                 # Now update for side comment
9039                 if ($side_comment_follows) { $no_internal_newlines = 1 }
9040
9041                 # now output this line
9042                 unless ($no_internal_newlines) {
9043                     output_line_to_go();
9044                 }
9045             }
9046
9047             elsif ($is_closing_BLOCK) {
9048
9049                 # If there is a pending one-line block ..
9050                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9051
9052                     # we have to terminate it if..
9053                     if (
9054
9055                     # it is too long (final length may be different from
9056                     # initial estimate). note: must allow 1 space for this token
9057                         excess_line_length( $index_start_one_line_block,
9058                             $max_index_to_go ) >= 0
9059
9060                         # or if it has too many semicolons
9061                         || (   $semicolons_before_block_self_destruct == 0
9062                             && $last_nonblank_type ne ';' )
9063                       )
9064                     {
9065                         destroy_one_line_block();
9066                     }
9067                 }
9068
9069                 # put a break before this closing curly brace if appropriate
9070                 unless ( $no_internal_newlines
9071                     || $index_start_one_line_block != UNDEFINED_INDEX )
9072                 {
9073
9074                     # add missing semicolon if ...
9075                     # there are some tokens
9076                     if (
9077                         ( $max_index_to_go > 0 )
9078
9079                         # and we don't have one
9080                         && ( $last_nonblank_type ne ';' )
9081
9082                         # patch until some block type issues are fixed:
9083                         # Do not add semi-colon for block types '{',
9084                         # '}', and ';' because we cannot be sure yet
9085                         # that this is a block and not an anonomyous
9086                         # hash (blktype.t, blktype1.t)
9087                         && ( $block_type !~ /^[\{\};]$/ )
9088
9089                         # patch: and do not add semi-colons for recently
9090                         # added block types (see tmp/semicolon.t)
9091                         && ( $block_type !~
9092                             /^(switch|case|given|when|default)$/ )
9093
9094                         # it seems best not to add semicolons in these
9095                         # special block types: sort|map|grep
9096                         && ( !$is_sort_map_grep{$block_type} )
9097
9098                         # and we are allowed to do so.
9099                         && $rOpts->{'add-semicolons'}
9100                       )
9101                     {
9102
9103                         save_current_token();
9104                         $token  = ';';
9105                         $type   = ';';
9106                         $level  = $levels_to_go[$max_index_to_go];
9107                         $slevel = $nesting_depth_to_go[$max_index_to_go];
9108                         $nesting_blocks =
9109                           $nesting_blocks_to_go[$max_index_to_go];
9110                         $ci_level       = $ci_levels_to_go[$max_index_to_go];
9111                         $block_type     = "";
9112                         $container_type = "";
9113                         $container_environment = "";
9114                         $type_sequence         = "";
9115
9116                         # Note - we remove any blank AFTER extracting its
9117                         # parameters such as level, etc, above
9118                         if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9119                             unstore_token_to_go();
9120                         }
9121                         store_token_to_go();
9122
9123                         note_added_semicolon();
9124                         restore_current_token();
9125                     }
9126
9127                     # then write out everything before this closing curly brace
9128                     output_line_to_go();
9129
9130                 }
9131
9132                 # Now update for side comment
9133                 if ($side_comment_follows) { $no_internal_newlines = 1 }
9134
9135                 # store the closing curly brace
9136                 store_token_to_go();
9137
9138                 # ok, we just stored a closing curly brace.  Often, but
9139                 # not always, we want to end the line immediately.
9140                 # So now we have to check for special cases.
9141
9142                 # if this '}' successfully ends a one-line block..
9143                 my $is_one_line_block = 0;
9144                 my $keep_going        = 0;
9145                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9146
9147                     # Remember the type of token just before the
9148                     # opening brace.  It would be more general to use
9149                     # a stack, but this will work for one-line blocks.
9150                     $is_one_line_block =
9151                       $types_to_go[$index_start_one_line_block];
9152
9153                     # we have to actually make it by removing tentative
9154                     # breaks that were set within it
9155                     undo_forced_breakpoint_stack(0);
9156                     set_nobreaks( $index_start_one_line_block,
9157                         $max_index_to_go - 1 );
9158
9159                     # then re-initialize for the next one-line block
9160                     destroy_one_line_block();
9161
9162                     # then decide if we want to break after the '}' ..
9163                     # We will keep going to allow certain brace followers as in:
9164                     #   do { $ifclosed = 1; last } unless $losing;
9165                     #
9166                     # But make a line break if the curly ends a
9167                     # significant block:
9168                     if (
9169                         $is_block_without_semicolon{$block_type}
9170
9171                         # if needless semicolon follows we handle it later
9172                         && $next_nonblank_token ne ';'
9173                       )
9174                     {
9175                         output_line_to_go() unless ($no_internal_newlines);
9176                     }
9177                 }
9178
9179                 # set string indicating what we need to look for brace follower
9180                 # tokens
9181                 if ( $block_type eq 'do' ) {
9182                     $rbrace_follower = \%is_do_follower;
9183                 }
9184                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9185                     $rbrace_follower = \%is_if_brace_follower;
9186                 }
9187                 elsif ( $block_type eq 'else' ) {
9188                     $rbrace_follower = \%is_else_brace_follower;
9189                 }
9190
9191                 # added eval for borris.t
9192                 elsif ($is_sort_map_grep_eval{$block_type}
9193                     || $is_one_line_block eq 'G' )
9194                 {
9195                     $rbrace_follower = undef;
9196                     $keep_going      = 1;
9197                 }
9198
9199                 # anonymous sub
9200                 elsif ( $block_type =~ /^sub\W*$/ ) {
9201
9202                     if ($is_one_line_block) {
9203                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
9204                     }
9205                     else {
9206                         $rbrace_follower = \%is_anon_sub_brace_follower;
9207                     }
9208                 }
9209
9210                 # None of the above: specify what can follow a closing
9211                 # brace of a block which is not an
9212                 # if/elsif/else/do/sort/map/grep/eval
9213                 # Testfiles:
9214                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9215                 else {
9216                     $rbrace_follower = \%is_other_brace_follower;
9217                 }
9218
9219                 # See if an elsif block is followed by another elsif or else;
9220                 # complain if not.
9221                 if ( $block_type eq 'elsif' ) {
9222
9223                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
9224                         $looking_for_else = 1;    # ok, check on next line
9225                     }
9226                     else {
9227
9228                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9229                             write_logfile_entry("No else block :(\n");
9230                         }
9231                     }
9232                 }
9233
9234                 # keep going after certain block types (map,sort,grep,eval)
9235                 # added eval for borris.t
9236                 if ($keep_going) {
9237
9238                     # keep going
9239                 }
9240
9241                 # if no more tokens, postpone decision until re-entring
9242                 elsif ( ( $next_nonblank_token_type eq 'b' )
9243                     && $rOpts_add_newlines )
9244                 {
9245                     unless ($rbrace_follower) {
9246                         output_line_to_go() unless ($no_internal_newlines);
9247                     }
9248                 }
9249
9250                 elsif ($rbrace_follower) {
9251
9252                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
9253                         output_line_to_go() unless ($no_internal_newlines);
9254                     }
9255                     $rbrace_follower = undef;
9256                 }
9257
9258                 else {
9259                     output_line_to_go() unless ($no_internal_newlines);
9260                 }
9261
9262             }    # end treatment of closing block token
9263
9264             # handle semicolon
9265             elsif ( $type eq ';' ) {
9266
9267                 # kill one-line blocks with too many semicolons
9268                 $semicolons_before_block_self_destruct--;
9269                 if (
9270                     ( $semicolons_before_block_self_destruct < 0 )
9271                     || (   $semicolons_before_block_self_destruct == 0
9272                         && $next_nonblank_token_type !~ /^[b\}]$/ )
9273                   )
9274                 {
9275                     destroy_one_line_block();
9276                 }
9277
9278                 # Remove unnecessary semicolons, but not after bare
9279                 # blocks, where it could be unsafe if the brace is
9280                 # mistokenized.
9281                 if (
9282                     (
9283                         $last_nonblank_token eq '}'
9284                         && (
9285                             $is_block_without_semicolon{
9286                                 $last_nonblank_block_type}
9287                             || $last_nonblank_block_type =~ /^sub\s+\w/
9288                             || $last_nonblank_block_type =~ /^\w+:$/ )
9289                     )
9290                     || $last_nonblank_type eq ';'
9291                   )
9292                 {
9293
9294                     if (
9295                         $rOpts->{'delete-semicolons'}
9296
9297                         # don't delete ; before a # because it would promote it
9298                         # to a block comment
9299                         && ( $next_nonblank_token_type ne '#' )
9300                       )
9301                     {
9302                         note_deleted_semicolon();
9303                         output_line_to_go()
9304                           unless ( $no_internal_newlines
9305                             || $index_start_one_line_block != UNDEFINED_INDEX );
9306                         next;
9307                     }
9308                     else {
9309                         write_logfile_entry("Extra ';'\n");
9310                     }
9311                 }
9312                 store_token_to_go();
9313
9314                 output_line_to_go()
9315                   unless ( $no_internal_newlines
9316                     || ( $rOpts_keep_interior_semicolons && $j < $jmax )
9317                     || ( $next_nonblank_token eq '}' ) );
9318
9319             }
9320
9321             # handle here_doc target string
9322             elsif ( $type eq 'h' ) {
9323                 $no_internal_newlines =
9324                   1;    # no newlines after seeing here-target
9325                 destroy_one_line_block();
9326                 store_token_to_go();
9327             }
9328
9329             # handle all other token types
9330             else {
9331
9332                 # if this is a blank...
9333                 if ( $type eq 'b' ) {
9334
9335                     # make it just one character
9336                     $token = ' ' if $rOpts_add_whitespace;
9337
9338                     # delete it if unwanted by whitespace rules
9339                     # or we are deleting all whitespace
9340                     my $ws = $$rwhite_space_flag[ $j + 1 ];
9341                     if ( ( defined($ws) && $ws == -1 )
9342                         || $rOpts_delete_old_whitespace )
9343                     {
9344
9345                         # unless it might make a syntax error
9346                         next
9347                           unless is_essential_whitespace(
9348                             $last_last_nonblank_token,
9349                             $last_last_nonblank_type,
9350                             $tokens_to_go[$max_index_to_go],
9351                             $types_to_go[$max_index_to_go],
9352                             $$rtokens[ $j + 1 ],
9353                             $$rtoken_type[ $j + 1 ]
9354                           );
9355                     }
9356                 }
9357                 store_token_to_go();
9358             }
9359
9360             # remember two previous nonblank OUTPUT tokens
9361             if ( $type ne '#' && $type ne 'b' ) {
9362                 $last_last_nonblank_token = $last_nonblank_token;
9363                 $last_last_nonblank_type  = $last_nonblank_type;
9364                 $last_nonblank_token      = $token;
9365                 $last_nonblank_type       = $type;
9366                 $last_nonblank_block_type = $block_type;
9367             }
9368
9369             # unset the continued-quote flag since it only applies to the
9370             # first token, and we want to resume normal formatting if
9371             # there are additional tokens on the line
9372             $in_continued_quote = 0;
9373
9374         }    # end of loop over all tokens in this 'line_of_tokens'
9375
9376         # we have to flush ..
9377         if (
9378
9379             # if there is a side comment
9380             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9381
9382             # if this line ends in a quote
9383             # NOTE: This is critically important for insuring that quoted lines
9384             # do not get processed by things like -sot and -sct
9385             || $in_quote
9386
9387             # if this is a VERSION statement
9388             || $is_VERSION_statement
9389
9390             # to keep a label on one line if that is how it is now
9391             || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9392
9393             # if we are instructed to keep all old line breaks
9394             || !$rOpts->{'delete-old-newlines'}
9395           )
9396         {
9397             destroy_one_line_block();
9398             output_line_to_go();
9399         }
9400
9401         # mark old line breakpoints in current output stream
9402         if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9403             $old_breakpoint_to_go[$max_index_to_go] = 1;
9404         }
9405     }    # end sub print_line_of_tokens
9406 }    # end print_line_of_tokens
9407
9408 # sub output_line_to_go sends one logical line of tokens on down the
9409 # pipeline to the VerticalAligner package, breaking the line into continuation
9410 # lines as necessary.  The line of tokens is ready to go in the "to_go"
9411 # arrays.
9412 sub output_line_to_go {
9413
9414     # debug stuff; this routine can be called from many points
9415     FORMATTER_DEBUG_FLAG_OUTPUT && do {
9416         my ( $a, $b, $c ) = caller;
9417         write_diagnostics(
9418 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9419         );
9420         my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9421         write_diagnostics("$output_str\n");
9422     };
9423
9424     # just set a tentative breakpoint if we might be in a one-line block
9425     if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9426         set_forced_breakpoint($max_index_to_go);
9427         return;
9428     }
9429
9430     my $cscw_block_comment;
9431     $cscw_block_comment = add_closing_side_comment()
9432       if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9433
9434     match_opening_and_closing_tokens();
9435
9436     # tell the -lp option we are outputting a batch so it can close
9437     # any unfinished items in its stack
9438     finish_lp_batch();
9439
9440     # If this line ends in a code block brace, set breaks at any
9441     # previous closing code block braces to breakup a chain of code
9442     # blocks on one line.  This is very rare but can happen for
9443     # user-defined subs.  For example we might be looking at this:
9444     #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9445     my $saw_good_break = 0;    # flag to force breaks even if short line
9446     if (
9447
9448         # looking for opening or closing block brace
9449         $block_type_to_go[$max_index_to_go]
9450
9451         # but not one of these which are never duplicated on a line:
9452         # until|while|for|if|elsif|else
9453         && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9454       )
9455     {
9456         my $lev = $nesting_depth_to_go[$max_index_to_go];
9457
9458         # Walk backwards from the end and
9459         # set break at any closing block braces at the same level.
9460         # But quit if we are not in a chain of blocks.
9461         for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
9462             last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
9463             next if ( $levels_to_go[$i] > $lev );    # skip past higher level
9464
9465             if ( $block_type_to_go[$i] ) {
9466                 if ( $tokens_to_go[$i] eq '}' ) {
9467                     set_forced_breakpoint($i);
9468                     $saw_good_break = 1;
9469                 }
9470             }
9471
9472             # quit if we see anything besides words, function, blanks
9473             # at this level
9474             elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
9475         }
9476     }
9477
9478     my $imin = 0;
9479     my $imax = $max_index_to_go;
9480
9481     # trim any blank tokens
9482     if ( $max_index_to_go >= 0 ) {
9483         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9484         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9485     }
9486
9487     # anything left to write?
9488     if ( $imin <= $imax ) {
9489
9490         # add a blank line before certain key types
9491         if ( $last_line_leading_type !~ /^[#b]/ ) {
9492             my $want_blank    = 0;
9493             my $leading_token = $tokens_to_go[$imin];
9494             my $leading_type  = $types_to_go[$imin];
9495
9496             # blank lines before subs except declarations and one-liners
9497             # MCONVERSION LOCATION - for sub tokenization change
9498             if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9499                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9500                   && (
9501                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9502                         $imax ) !~ /^[\;\}]$/
9503                   );
9504             }
9505
9506             # break before all package declarations
9507             # MCONVERSION LOCATION - for tokenizaton change
9508             elsif ($leading_token =~ /^(package\s)/
9509                 && $leading_type eq 'i' )
9510             {
9511                 $want_blank = ( $rOpts->{'blanks-before-subs'} );
9512             }
9513
9514             # break before certain key blocks except one-liners
9515             if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9516                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9517                   && (
9518                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9519                         $imax ) ne '}'
9520                   );
9521             }
9522
9523             # Break before certain block types if we haven't had a
9524             # break at this level for a while.  This is the
9525             # difficult decision..
9526             elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9527                 && $leading_type eq 'k' )
9528             {
9529                 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9530                 if ( !defined($lc) ) { $lc = 0 }
9531
9532                 $want_blank =
9533                      $rOpts->{'blanks-before-blocks'}
9534                   && $lc >= $rOpts->{'long-block-line-count'}
9535                   && $file_writer_object->get_consecutive_nonblank_lines() >=
9536                   $rOpts->{'long-block-line-count'}
9537                   && (
9538                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9539                         $imax ) ne '}'
9540                   );
9541             }
9542
9543             if ($want_blank) {
9544
9545                 # future: send blank line down normal path to VerticalAligner
9546                 Perl::Tidy::VerticalAligner::flush();
9547                 $file_writer_object->write_blank_code_line();
9548             }
9549         }
9550
9551         # update blank line variables and count number of consecutive
9552         # non-blank, non-comment lines at this level
9553         $last_last_line_leading_level = $last_line_leading_level;
9554         $last_line_leading_level      = $levels_to_go[$imin];
9555         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9556         $last_line_leading_type = $types_to_go[$imin];
9557         if (   $last_line_leading_level == $last_last_line_leading_level
9558             && $last_line_leading_type ne 'b'
9559             && $last_line_leading_type ne '#'
9560             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9561         {
9562             $nonblank_lines_at_depth[$last_line_leading_level]++;
9563         }
9564         else {
9565             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9566         }
9567
9568         FORMATTER_DEBUG_FLAG_FLUSH && do {
9569             my ( $package, $file, $line ) = caller;
9570             print
9571 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9572         };
9573
9574         # add a couple of extra terminal blank tokens
9575         pad_array_to_go();
9576
9577         # set all forced breakpoints for good list formatting
9578         my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9579
9580         if (
9581             $max_index_to_go > 0
9582             && (
9583                    $is_long_line
9584                 || $old_line_count_in_batch > 1
9585                 || is_unbalanced_batch()
9586                 || (
9587                     $comma_count_in_batch
9588                     && (   $rOpts_maximum_fields_per_table > 0
9589                         || $rOpts_comma_arrow_breakpoints == 0 )
9590                 )
9591             )
9592           )
9593         {
9594             $saw_good_break ||= scan_list();
9595         }
9596
9597         # let $ri_first and $ri_last be references to lists of
9598         # first and last tokens of line fragments to output..
9599         my ( $ri_first, $ri_last );
9600
9601         # write a single line if..
9602         if (
9603
9604             # we aren't allowed to add any newlines
9605             !$rOpts_add_newlines
9606
9607             # or, we don't already have an interior breakpoint
9608             # and we didn't see a good breakpoint
9609             || (
9610                    !$forced_breakpoint_count
9611                 && !$saw_good_break
9612
9613                 # and this line is 'short'
9614                 && !$is_long_line
9615             )
9616           )
9617         {
9618             @$ri_first = ($imin);
9619             @$ri_last  = ($imax);
9620         }
9621
9622         # otherwise use multiple lines
9623         else {
9624
9625             ( $ri_first, $ri_last, my $colon_count ) =
9626               set_continuation_breaks($saw_good_break);
9627
9628             break_all_chain_tokens( $ri_first, $ri_last );
9629
9630             break_equals( $ri_first, $ri_last );
9631
9632             # now we do a correction step to clean this up a bit
9633             # (The only time we would not do this is for debugging)
9634             if ( $rOpts->{'recombine'} ) {
9635                 ( $ri_first, $ri_last ) =
9636                   recombine_breakpoints( $ri_first, $ri_last );
9637             }
9638
9639             insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
9640         }
9641
9642         # do corrector step if -lp option is used
9643         my $do_not_pad = 0;
9644         if ($rOpts_line_up_parentheses) {
9645             $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9646         }
9647         send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9648     }
9649     prepare_for_new_input_lines();
9650
9651     # output any new -cscw block comment
9652     if ($cscw_block_comment) {
9653         flush();
9654         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9655     }
9656 }
9657
9658 sub note_added_semicolon {
9659     $last_added_semicolon_at = $input_line_number;
9660     if ( $added_semicolon_count == 0 ) {
9661         $first_added_semicolon_at = $last_added_semicolon_at;
9662     }
9663     $added_semicolon_count++;
9664     write_logfile_entry("Added ';' here\n");
9665 }
9666
9667 sub note_deleted_semicolon {
9668     $last_deleted_semicolon_at = $input_line_number;
9669     if ( $deleted_semicolon_count == 0 ) {
9670         $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9671     }
9672     $deleted_semicolon_count++;
9673     write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
9674 }
9675
9676 sub note_embedded_tab {
9677     $embedded_tab_count++;
9678     $last_embedded_tab_at = $input_line_number;
9679     if ( !$first_embedded_tab_at ) {
9680         $first_embedded_tab_at = $last_embedded_tab_at;
9681     }
9682
9683     if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9684         write_logfile_entry("Embedded tabs in quote or pattern\n");
9685     }
9686 }
9687
9688 sub starting_one_line_block {
9689
9690     # after seeing an opening curly brace, look for the closing brace
9691     # and see if the entire block will fit on a line.  This routine is
9692     # not always right because it uses the old whitespace, so a check
9693     # is made later (at the closing brace) to make sure we really
9694     # have a one-line block.  We have to do this preliminary check,
9695     # though, because otherwise we would always break at a semicolon
9696     # within a one-line block if the block contains multiple statements.
9697
9698     my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9699         $rblock_type )
9700       = @_;
9701
9702     # kill any current block - we can only go 1 deep
9703     destroy_one_line_block();
9704
9705     # return value:
9706     #  1=distance from start of block to opening brace exceeds line length
9707     #  0=otherwise
9708
9709     my $i_start = 0;
9710
9711     # shouldn't happen: there must have been a prior call to
9712     # store_token_to_go to put the opening brace in the output stream
9713     if ( $max_index_to_go < 0 ) {
9714         warning("program bug: store_token_to_go called incorrectly\n");
9715         report_definite_bug();
9716     }
9717     else {
9718
9719         # cannot use one-line blocks with cuddled else else/elsif lines
9720         if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9721             return 0;
9722         }
9723     }
9724
9725     my $block_type = $$rblock_type[$j];
9726
9727     # find the starting keyword for this block (such as 'if', 'else', ...)
9728
9729     if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9730         $i_start = $max_index_to_go;
9731     }
9732
9733     elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9734
9735         # For something like "if (xxx) {", the keyword "if" will be
9736         # just after the most recent break. This will be 0 unless
9737         # we have just killed a one-line block and are starting another.
9738         # (doif.t)
9739         $i_start = $index_max_forced_break + 1;
9740         if ( $types_to_go[$i_start] eq 'b' ) {
9741             $i_start++;
9742         }
9743
9744         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9745             return 0;
9746         }
9747     }
9748
9749     # the previous nonblank token should start these block types
9750     elsif (
9751         ( $last_last_nonblank_token_to_go eq $block_type )
9752         || (   $block_type =~ /^sub/
9753             && $last_last_nonblank_token_to_go =~ /^sub/ )
9754       )
9755     {
9756         $i_start = $last_last_nonblank_index_to_go;
9757     }
9758
9759     # patch for SWITCH/CASE to retain one-line case/when blocks
9760     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9761         $i_start = $index_max_forced_break + 1;
9762         if ( $types_to_go[$i_start] eq 'b' ) {
9763             $i_start++;
9764         }
9765         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9766             return 0;
9767         }
9768     }
9769
9770     else {
9771         return 1;
9772     }
9773
9774     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9775
9776     my $i;
9777
9778     # see if length is too long to even start
9779     if ( $pos > $rOpts_maximum_line_length ) {
9780         return 1;
9781     }
9782
9783     for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9784
9785         # old whitespace could be arbitrarily large, so don't use it
9786         if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9787         else                              { $pos += length( $$rtokens[$i] ) }
9788
9789         # Return false result if we exceed the maximum line length,
9790         if ( $pos > $rOpts_maximum_line_length ) {
9791             return 0;
9792         }
9793
9794         # or encounter another opening brace before finding the closing brace.
9795         elsif ($$rtokens[$i] eq '{'
9796             && $$rtoken_type[$i] eq '{'
9797             && $$rblock_type[$i] )
9798         {
9799             return 0;
9800         }
9801
9802         # if we find our closing brace..
9803         elsif ($$rtokens[$i] eq '}'
9804             && $$rtoken_type[$i] eq '}'
9805             && $$rblock_type[$i] )
9806         {
9807
9808             # be sure any trailing comment also fits on the line
9809             my $i_nonblank =
9810               ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9811
9812             if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9813                 $pos += length( $$rtokens[$i_nonblank] );
9814
9815                 if ( $i_nonblank > $i + 1 ) {
9816                     $pos += length( $$rtokens[ $i + 1 ] );
9817                 }
9818
9819                 if ( $pos > $rOpts_maximum_line_length ) {
9820                     return 0;
9821                 }
9822             }
9823
9824             # ok, it's a one-line block
9825             create_one_line_block( $i_start, 20 );
9826             return 0;
9827         }
9828
9829         # just keep going for other characters
9830         else {
9831         }
9832     }
9833
9834     # Allow certain types of new one-line blocks to form by joining
9835     # input lines.  These can be safely done, but for other block types,
9836     # we keep old one-line blocks but do not form new ones. It is not
9837     # always a good idea to make as many one-line blocks as possible,
9838     # so other types are not done.  The user can always use -mangle.
9839     if ( $is_sort_map_grep_eval{$block_type} ) {
9840         create_one_line_block( $i_start, 1 );
9841     }
9842
9843     return 0;
9844 }
9845
9846 sub unstore_token_to_go {
9847
9848     # remove most recent token from output stream
9849     if ( $max_index_to_go > 0 ) {
9850         $max_index_to_go--;
9851     }
9852     else {
9853         $max_index_to_go = UNDEFINED_INDEX;
9854     }
9855
9856 }
9857
9858 sub want_blank_line {
9859     flush();
9860     $file_writer_object->want_blank_line();
9861 }
9862
9863 sub write_unindented_line {
9864     flush();
9865     $file_writer_object->write_line( $_[0] );
9866 }
9867
9868 sub undo_ci {
9869
9870     # Undo continuation indentation in certain sequences
9871     # For example, we can undo continuation indation in sort/map/grep chains
9872     #    my $dat1 = pack( "n*",
9873     #        map { $_, $lookup->{$_} }
9874     #          sort { $a <=> $b }
9875     #          grep { $lookup->{$_} ne $default } keys %$lookup );
9876     # To align the map/sort/grep keywords like this:
9877     #    my $dat1 = pack( "n*",
9878     #        map { $_, $lookup->{$_} }
9879     #        sort { $a <=> $b }
9880     #        grep { $lookup->{$_} ne $default } keys %$lookup );
9881     my ( $ri_first, $ri_last ) = @_;
9882     my ( $line_1, $line_2, $lev_last );
9883     my $this_line_is_semicolon_terminated;
9884     my $max_line = @$ri_first - 1;
9885
9886     # looking at each line of this batch..
9887     # We are looking at leading tokens and looking for a sequence
9888     # all at the same level and higher level than enclosing lines.
9889     foreach my $line ( 0 .. $max_line ) {
9890
9891         my $ibeg = $$ri_first[$line];
9892         my $lev  = $levels_to_go[$ibeg];
9893         if ( $line > 0 ) {
9894
9895             # if we have started a chain..
9896             if ($line_1) {
9897
9898                 # see if it continues..
9899                 if ( $lev == $lev_last ) {
9900                     if (   $types_to_go[$ibeg] eq 'k'
9901                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
9902                     {
9903
9904                         # chain continues...
9905                         # check for chain ending at end of a a statement
9906                         if ( $line == $max_line ) {
9907
9908                             # see of this line ends a statement
9909                             my $iend = $$ri_last[$line];
9910                             $this_line_is_semicolon_terminated =
9911                               $types_to_go[$iend] eq ';'
9912
9913                               # with possible side comment
9914                               || ( $types_to_go[$iend] eq '#'
9915                                 && $iend - $ibeg >= 2
9916                                 && $types_to_go[ $iend - 2 ] eq ';'
9917                                 && $types_to_go[ $iend - 1 ] eq 'b' );
9918                         }
9919                         $line_2 = $line if ($this_line_is_semicolon_terminated);
9920                     }
9921                     else {
9922
9923                         # kill chain
9924                         $line_1 = undef;
9925                     }
9926                 }
9927                 elsif ( $lev < $lev_last ) {
9928
9929                     # chain ends with previous line
9930                     $line_2 = $line - 1;
9931                 }
9932                 elsif ( $lev > $lev_last ) {
9933
9934                     # kill chain
9935                     $line_1 = undef;
9936                 }
9937
9938                 # undo the continuation indentation if a chain ends
9939                 if ( defined($line_2) && defined($line_1) ) {
9940                     my $continuation_line_count = $line_2 - $line_1 + 1;
9941                     @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
9942                       (0) x ($continuation_line_count);
9943                     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
9944                       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
9945                     $line_1 = undef;
9946                 }
9947             }
9948
9949             # not in a chain yet..
9950             else {
9951
9952                 # look for start of a new sort/map/grep chain
9953                 if ( $lev > $lev_last ) {
9954                     if (   $types_to_go[$ibeg] eq 'k'
9955                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
9956                     {
9957                         $line_1 = $line;
9958                     }
9959                 }
9960             }
9961         }
9962         $lev_last = $lev;
9963     }
9964 }
9965
9966 sub undo_lp_ci {
9967
9968     # If there is a single, long parameter within parens, like this:
9969     #
9970     #  $self->command( "/msg "
9971     #        . $infoline->chan
9972     #        . " You said $1, but did you know that it's square was "
9973     #        . $1 * $1 . " ?" );
9974     #
9975     # we can remove the continuation indentation of the 2nd and higher lines
9976     # to achieve this effect, which is more pleasing:
9977     #
9978     #  $self->command("/msg "
9979     #                 . $infoline->chan
9980     #                 . " You said $1, but did you know that it's square was "
9981     #                 . $1 * $1 . " ?");
9982
9983     my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9984     my $max_line = @$ri_first - 1;
9985
9986     # must be multiple lines
9987     return unless $max_line > $line_open;
9988
9989     my $lev_start     = $levels_to_go[$i_start];
9990     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9991
9992     # see if all additional lines in this container have continuation
9993     # indentation
9994     my $n;
9995     my $line_1 = 1 + $line_open;
9996     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
9997         my $ibeg = $$ri_first[$n];
9998         my $iend = $$ri_last[$n];
9999         if ( $ibeg eq $closing_index ) { $n--; last }
10000         return if ( $lev_start != $levels_to_go[$ibeg] );
10001         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10002         last   if ( $closing_index <= $iend );
10003     }
10004
10005     # we can reduce the indentation of all continuation lines
10006     my $continuation_line_count = $n - $line_open;
10007     @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10008       (0) x ($continuation_line_count);
10009     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10010       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10011 }
10012
10013 sub set_logical_padding {
10014
10015     # Look at a batch of lines and see if extra padding can improve the
10016     # alignment when there are certain leading operators. Here is an
10017     # example, in which some extra space is introduced before
10018     # '( $year' to make it line up with the subsequent lines:
10019     #
10020     #       if (   ( $Year < 1601 )
10021     #           || ( $Year > 2899 )
10022     #           || ( $EndYear < 1601 )
10023     #           || ( $EndYear > 2899 ) )
10024     #       {
10025     #           &Error_OutOfRange;
10026     #       }
10027     #
10028     my ( $ri_first, $ri_last ) = @_;
10029     my $max_line = @$ri_first - 1;
10030
10031     my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
10032         $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10033
10034     # looking at each line of this batch..
10035     foreach $line ( 0 .. $max_line - 1 ) {
10036
10037         # see if the next line begins with a logical operator
10038         $ibeg      = $$ri_first[$line];
10039         $iend      = $$ri_last[$line];
10040         $ibeg_next = $$ri_first[ $line + 1 ];
10041         $tok_next  = $tokens_to_go[$ibeg_next];
10042         $type_next = $types_to_go[$ibeg_next];
10043
10044         $has_leading_op_next = ( $tok_next =~ /^\w/ )
10045           ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
10046           : $is_chain_operator{$type_next};    # and, or
10047
10048         next unless ($has_leading_op_next);
10049
10050         # next line must not be at lesser depth
10051         next
10052           if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
10053
10054         # identify the token in this line to be padded on the left
10055         $ipad = undef;
10056
10057         # handle lines at same depth...
10058         if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
10059
10060             # if this is not first line of the batch ...
10061             if ( $line > 0 ) {
10062
10063                 # and we have leading operator..
10064                 next if $has_leading_op;
10065
10066                 # Introduce padding if..
10067                 # 1. the previous line is at lesser depth, or
10068                 # 2. the previous line ends in an assignment
10069                 # 3. the previous line ends in a 'return'
10070                 # 4. the previous line ends in a comma
10071                 # Example 1: previous line at lesser depth
10072                 #       if (   ( $Year < 1601 )      # <- we are here but
10073                 #           || ( $Year > 2899 )      #  list has not yet
10074                 #           || ( $EndYear < 1601 )   # collapsed vertically
10075                 #           || ( $EndYear > 2899 ) )
10076                 #       {
10077                 #
10078                 # Example 2: previous line ending in assignment:
10079                 #    $leapyear =
10080                 #        $year % 4   ? 0     # <- We are here
10081                 #      : $year % 100 ? 1
10082                 #      : $year % 400 ? 0
10083                 #      : 1;
10084                 #
10085                 # Example 3: previous line ending in comma:
10086                 #    push @expr,
10087                 #        /test/   ? undef
10088                 #      : eval($_) ? 1
10089                 #      : eval($_) ? 1
10090                 #      :            0;
10091
10092                 # be sure levels agree (do not indent after an indented 'if')
10093                 next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
10094
10095                 # allow padding on first line after a comma but only if:
10096                 # (1) this is line 2 and
10097                 # (2) there are at more than three lines and
10098                 # (3) lines 3 and 4 have the same leading operator
10099                 # These rules try to prevent padding within a long
10100                 # comma-separated list.
10101                 my $ok_comma;
10102                 if (   $types_to_go[$iendm] eq ','
10103                     && $line == 1
10104                     && $max_line > 2 )
10105                 {
10106                     my $ibeg_next_next = $$ri_first[ $line + 2 ];
10107                     my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
10108                     $ok_comma = $tok_next_next eq $tok_next;
10109                 }
10110
10111                 next
10112                   unless (
10113                        $is_assignment{ $types_to_go[$iendm] }
10114                     || $ok_comma
10115                     || ( $nesting_depth_to_go[$ibegm] <
10116                         $nesting_depth_to_go[$ibeg] )
10117                     || (   $types_to_go[$iendm] eq 'k'
10118                         && $tokens_to_go[$iendm] eq 'return' )
10119                   );
10120
10121                 # we will add padding before the first token
10122                 $ipad = $ibeg;
10123             }
10124
10125             # for first line of the batch..
10126             else {
10127
10128                 # WARNING: Never indent if first line is starting in a
10129                 # continued quote, which would change the quote.
10130                 next if $starting_in_quote;
10131
10132                 # if this is text after closing '}'
10133                 # then look for an interior token to pad
10134                 if ( $types_to_go[$ibeg] eq '}' ) {
10135
10136                 }
10137
10138                 # otherwise, we might pad if it looks really good
10139                 else {
10140
10141                     # we might pad token $ibeg, so be sure that it
10142                     # is at the same depth as the next line.
10143                     next
10144                       if ( $nesting_depth_to_go[$ibeg] !=
10145                         $nesting_depth_to_go[$ibeg_next] );
10146
10147                     # We can pad on line 1 of a statement if at least 3
10148                     # lines will be aligned. Otherwise, it
10149                     # can look very confusing.
10150
10151                  # We have to be careful not to pad if there are too few
10152                  # lines.  The current rule is:
10153                  # (1) in general we require at least 3 consecutive lines
10154                  # with the same leading chain operator token,
10155                  # (2) but an exception is that we only require two lines
10156                  # with leading colons if there are no more lines.  For example,
10157                  # the first $i in the following snippet would get padding
10158                  # by the second rule:
10159                  #
10160                  #   $i == 1 ? ( "First", "Color" )
10161                  # : $i == 2 ? ( "Then",  "Rarity" )
10162                  # :           ( "Then",  "Name" );
10163
10164                     if ( $max_line > 1 ) {
10165                         my $leading_token = $tokens_to_go[$ibeg_next];
10166                         my $tokens_differ;
10167
10168                         # never indent line 1 of a '.' series because
10169                         # previous line is most likely at same level.
10170                         # TODO: we should also look at the leasing_spaces
10171                         # of the last output line and skip if it is same
10172                         # as this line.
10173                         next if ( $leading_token eq '.' );
10174
10175                         my $count = 1;
10176                         foreach my $l ( 2 .. 3 ) {
10177                             last if ( $line + $l > $max_line );
10178                             my $ibeg_next_next = $$ri_first[ $line + $l ];
10179                             if ( $tokens_to_go[$ibeg_next_next] ne
10180                                 $leading_token )
10181                             {
10182                                 $tokens_differ = 1;
10183                                 last;
10184                             }
10185                             $count++;
10186                         }
10187                         next if ($tokens_differ);
10188                         next if ( $count < 3 && $leading_token ne ':' );
10189                         $ipad = $ibeg;
10190                     }
10191                     else {
10192                         next;
10193                     }
10194                 }
10195             }
10196         }
10197
10198         # find interior token to pad if necessary
10199         if ( !defined($ipad) ) {
10200
10201             for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
10202
10203                 # find any unclosed container
10204                 next
10205                   unless ( $type_sequence_to_go[$i]
10206                     && $mate_index_to_go[$i] > $iend );
10207
10208                 # find next nonblank token to pad
10209                 $ipad = $i + 1;
10210                 if ( $types_to_go[$ipad] eq 'b' ) {
10211                     $ipad++;
10212                     last if ( $ipad > $iend );
10213                 }
10214             }
10215             last unless $ipad;
10216         }
10217
10218         # next line must not be at greater depth
10219         my $iend_next = $$ri_last[ $line + 1 ];
10220         next
10221           if ( $nesting_depth_to_go[ $iend_next + 1 ] >
10222             $nesting_depth_to_go[$ipad] );
10223
10224         # lines must be somewhat similar to be padded..
10225         my $inext_next = $ibeg_next + 1;
10226         if ( $types_to_go[$inext_next] eq 'b' ) {
10227             $inext_next++;
10228         }
10229         my $type      = $types_to_go[$ipad];
10230         my $type_next = $types_to_go[ $ipad + 1 ];
10231
10232         # see if there are multiple continuation lines
10233         my $logical_continuation_lines = 1;
10234         if ( $line + 2 <= $max_line ) {
10235             my $leading_token  = $tokens_to_go[$ibeg_next];
10236             my $ibeg_next_next = $$ri_first[ $line + 2 ];
10237             if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
10238                 && $nesting_depth_to_go[$ibeg_next] eq
10239                 $nesting_depth_to_go[$ibeg_next_next] )
10240             {
10241                 $logical_continuation_lines++;
10242             }
10243         }
10244
10245         # see if leading types match
10246         my $types_match = $types_to_go[$inext_next] eq $type;
10247         my $matches_without_bang;
10248
10249         # if first line has leading ! then compare the following token
10250         if ( !$types_match && $type eq '!' ) {
10251             $types_match = $matches_without_bang =
10252               $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
10253         }
10254
10255         if (
10256
10257             # either we have multiple continuation lines to follow
10258             # and we are not padding the first token
10259             ( $logical_continuation_lines > 1 && $ipad > 0 )
10260
10261             # or..
10262             || (
10263
10264                 # types must match
10265                 $types_match
10266
10267                 # and keywords must match if keyword
10268                 && !(
10269                        $type eq 'k'
10270                     && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
10271                 )
10272             )
10273           )
10274         {
10275
10276             #----------------------begin special checks--------------
10277             #
10278             # SPECIAL CHECK 1:
10279             # A check is needed before we can make the pad.
10280             # If we are in a list with some long items, we want each
10281             # item to stand out.  So in the following example, the
10282             # first line begining with '$casefold->' would look good
10283             # padded to align with the next line, but then it
10284             # would be indented more than the last line, so we
10285             # won't do it.
10286             #
10287             #  ok(
10288             #      $casefold->{code}         eq '0041'
10289             #        && $casefold->{status}  eq 'C'
10290             #        && $casefold->{mapping} eq '0061',
10291             #      'casefold 0x41'
10292             #  );
10293             #
10294             # Note:
10295             # It would be faster, and almost as good, to use a comma
10296             # count, and not pad if comma_count > 1 and the previous
10297             # line did not end with a comma.
10298             #
10299             my $ok_to_pad = 1;
10300
10301             my $ibg   = $$ri_first[ $line + 1 ];
10302             my $depth = $nesting_depth_to_go[ $ibg + 1 ];
10303
10304             # just use simplified formula for leading spaces to avoid
10305             # needless sub calls
10306             my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
10307
10308             # look at each line beyond the next ..
10309             my $l = $line + 1;
10310             foreach $l ( $line + 2 .. $max_line ) {
10311                 my $ibg = $$ri_first[$l];
10312
10313                 # quit looking at the end of this container
10314                 last
10315                   if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
10316                   || ( $nesting_depth_to_go[$ibg] < $depth );
10317
10318                 # cannot do the pad if a later line would be
10319                 # outdented more
10320                 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
10321                     $ok_to_pad = 0;
10322                     last;
10323                 }
10324             }
10325
10326             # don't pad if we end in a broken list
10327             if ( $l == $max_line ) {
10328                 my $i2 = $$ri_last[$l];
10329                 if ( $types_to_go[$i2] eq '#' ) {
10330                     my $i1 = $$ri_first[$l];
10331                     next
10332                       if (
10333                         terminal_type( \@types_to_go, \@block_type_to_go, $i1,
10334                             $i2 ) eq ','
10335                       );
10336                 }
10337             }
10338
10339             # SPECIAL CHECK 2:
10340             # a minus may introduce a quoted variable, and we will
10341             # add the pad only if this line begins with a bare word,
10342             # such as for the word 'Button' here:
10343             #    [
10344             #         Button      => "Print letter \"~$_\"",
10345             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10346             #        -accelerator => "Meta+$_"
10347             #    ];
10348             #
10349             #  On the other hand, if 'Button' is quoted, it looks best
10350             #  not to pad:
10351             #    [
10352             #        'Button'     => "Print letter \"~$_\"",
10353             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10354             #        -accelerator => "Meta+$_"
10355             #    ];
10356             if ( $types_to_go[$ibeg_next] eq 'm' ) {
10357                 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
10358             }
10359
10360             next unless $ok_to_pad;
10361
10362             #----------------------end special check---------------
10363
10364             my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
10365             my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
10366             $pad_spaces = $length_2 - $length_1;
10367
10368             # If the first line has a leading ! and the second does
10369             # not, then remove one space to try to align the next
10370             # leading characters, which are often the same.  For example:
10371             #  if (  !$ts
10372             #      || $ts == $self->Holder
10373             #      || $self->Holder->Type eq "Arena" )
10374             #
10375             # This usually helps readability, but if there are subsequent
10376             # ! operators things will still get messed up.  For example:
10377             #
10378             #  if (  !exists $Net::DNS::typesbyname{$qtype}
10379             #      && exists $Net::DNS::classesbyname{$qtype}
10380             #      && !exists $Net::DNS::classesbyname{$qclass}
10381             #      && exists $Net::DNS::typesbyname{$qclass} )
10382             # We can't fix that.
10383             if ($matches_without_bang) { $pad_spaces-- }
10384
10385             # make sure this won't change if -lp is used
10386             my $indentation_1 = $leading_spaces_to_go[$ibeg];
10387             if ( ref($indentation_1) ) {
10388                 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
10389                     my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
10390                     unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
10391                         $pad_spaces = 0;
10392                     }
10393                 }
10394             }
10395
10396             # we might be able to handle a pad of -1 by removing a blank
10397             # token
10398             if ( $pad_spaces < 0 ) {
10399
10400                 if ( $pad_spaces == -1 ) {
10401                     if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
10402                         $tokens_to_go[ $ipad - 1 ] = '';
10403                     }
10404                 }
10405                 $pad_spaces = 0;
10406             }
10407
10408             # now apply any padding for alignment
10409             if ( $ipad >= 0 && $pad_spaces ) {
10410
10411                 my $length_t = total_line_length( $ibeg, $iend );
10412                 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
10413                     $tokens_to_go[$ipad] =
10414                       ' ' x $pad_spaces . $tokens_to_go[$ipad];
10415                 }
10416             }
10417         }
10418     }
10419     continue {
10420         $iendm          = $iend;
10421         $ibegm          = $ibeg;
10422         $has_leading_op = $has_leading_op_next;
10423     }    # end of loop over lines
10424     return;
10425 }
10426
10427 sub correct_lp_indentation {
10428
10429     # When the -lp option is used, we need to make a last pass through
10430     # each line to correct the indentation positions in case they differ
10431     # from the predictions.  This is necessary because perltidy uses a
10432     # predictor/corrector method for aligning with opening parens.  The
10433     # predictor is usually good, but sometimes stumbles.  The corrector
10434     # tries to patch things up once the actual opening paren locations
10435     # are known.
10436     my ( $ri_first, $ri_last ) = @_;
10437     my $do_not_pad = 0;
10438
10439     #  Note on flag '$do_not_pad':
10440     #  We want to avoid a situation like this, where the aligner inserts
10441     #  whitespace before the '=' to align it with a previous '=', because
10442     #  otherwise the parens might become mis-aligned in a situation like
10443     #  this, where the '=' has become aligned with the previous line,
10444     #  pushing the opening '(' forward beyond where we want it.
10445     #
10446     #  $mkFloor::currentRoom = '';
10447     #  $mkFloor::c_entry     = $c->Entry(
10448     #                                 -width        => '10',
10449     #                                 -relief       => 'sunken',
10450     #                                 ...
10451     #                                 );
10452     #
10453     #  We leave it to the aligner to decide how to do this.
10454
10455     # first remove continuation indentation if appropriate
10456     my $max_line = @$ri_first - 1;
10457
10458     # looking at each line of this batch..
10459     my ( $ibeg, $iend );
10460     my $line;
10461     foreach $line ( 0 .. $max_line ) {
10462         $ibeg = $$ri_first[$line];
10463         $iend = $$ri_last[$line];
10464
10465         # looking at each token in this output line..
10466         my $i;
10467         foreach $i ( $ibeg .. $iend ) {
10468
10469             # How many space characters to place before this token
10470             # for special alignment.  Actual padding is done in the
10471             # continue block.
10472
10473             # looking for next unvisited indentation item
10474             my $indentation = $leading_spaces_to_go[$i];
10475             if ( !$indentation->get_MARKED() ) {
10476                 $indentation->set_MARKED(1);
10477
10478                 # looking for indentation item for which we are aligning
10479                 # with parens, braces, and brackets
10480                 next unless ( $indentation->get_ALIGN_PAREN() );
10481
10482                 # skip closed container on this line
10483                 if ( $i > $ibeg ) {
10484                     my $im = $i - 1;
10485                     if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
10486                     if (   $type_sequence_to_go[$im]
10487                         && $mate_index_to_go[$im] <= $iend )
10488                     {
10489                         next;
10490                     }
10491                 }
10492
10493                 if ( $line == 1 && $i == $ibeg ) {
10494                     $do_not_pad = 1;
10495                 }
10496
10497                 # Ok, let's see what the error is and try to fix it
10498                 my $actual_pos;
10499                 my $predicted_pos = $indentation->get_SPACES();
10500                 if ( $i > $ibeg ) {
10501
10502                     # token is mid-line - use length to previous token
10503                     $actual_pos = total_line_length( $ibeg, $i - 1 );
10504
10505                     # for mid-line token, we must check to see if all
10506                     # additional lines have continuation indentation,
10507                     # and remove it if so.  Otherwise, we do not get
10508                     # good alignment.
10509                     my $closing_index = $indentation->get_CLOSED();
10510                     if ( $closing_index > $iend ) {
10511                         my $ibeg_next = $$ri_first[ $line + 1 ];
10512                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
10513                             undo_lp_ci( $line, $i, $closing_index, $ri_first,
10514                                 $ri_last );
10515                         }
10516                     }
10517                 }
10518                 elsif ( $line > 0 ) {
10519
10520                     # handle case where token starts a new line;
10521                     # use length of previous line
10522                     my $ibegm = $$ri_first[ $line - 1 ];
10523                     my $iendm = $$ri_last[ $line - 1 ];
10524                     $actual_pos = total_line_length( $ibegm, $iendm );
10525
10526                     # follow -pt style
10527                     ++$actual_pos
10528                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
10529                 }
10530                 else {
10531
10532                     # token is first character of first line of batch
10533                     $actual_pos = $predicted_pos;
10534                 }
10535
10536                 my $move_right = $actual_pos - $predicted_pos;
10537
10538                 # done if no error to correct (gnu2.t)
10539                 if ( $move_right == 0 ) {
10540                     $indentation->set_RECOVERABLE_SPACES($move_right);
10541                     next;
10542                 }
10543
10544                 # if we have not seen closure for this indentation in
10545                 # this batch, we can only pass on a request to the
10546                 # vertical aligner
10547                 my $closing_index = $indentation->get_CLOSED();
10548
10549                 if ( $closing_index < 0 ) {
10550                     $indentation->set_RECOVERABLE_SPACES($move_right);
10551                     next;
10552                 }
10553
10554                 # If necessary, look ahead to see if there is really any
10555                 # leading whitespace dependent on this whitespace, and
10556                 # also find the longest line using this whitespace.
10557                 # Since it is always safe to move left if there are no
10558                 # dependents, we only need to do this if we may have
10559                 # dependent nodes or need to move right.
10560
10561                 my $right_margin = 0;
10562                 my $have_child   = $indentation->get_HAVE_CHILD();
10563
10564                 my %saw_indentation;
10565                 my $line_count = 1;
10566                 $saw_indentation{$indentation} = $indentation;
10567
10568                 if ( $have_child || $move_right > 0 ) {
10569                     $have_child = 0;
10570                     my $max_length = 0;
10571                     if ( $i == $ibeg ) {
10572                         $max_length = total_line_length( $ibeg, $iend );
10573                     }
10574
10575                     # look ahead at the rest of the lines of this batch..
10576                     my $line_t;
10577                     foreach $line_t ( $line + 1 .. $max_line ) {
10578                         my $ibeg_t = $$ri_first[$line_t];
10579                         my $iend_t = $$ri_last[$line_t];
10580                         last if ( $closing_index <= $ibeg_t );
10581
10582                         # remember all different indentation objects
10583                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
10584                         $saw_indentation{$indentation_t} = $indentation_t;
10585                         $line_count++;
10586
10587                         # remember longest line in the group
10588                         my $length_t = total_line_length( $ibeg_t, $iend_t );
10589                         if ( $length_t > $max_length ) {
10590                             $max_length = $length_t;
10591                         }
10592                     }
10593                     $right_margin = $rOpts_maximum_line_length - $max_length;
10594                     if ( $right_margin < 0 ) { $right_margin = 0 }
10595                 }
10596
10597                 my $first_line_comma_count =
10598                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
10599                 my $comma_count = $indentation->get_COMMA_COUNT();
10600                 my $arrow_count = $indentation->get_ARROW_COUNT();
10601
10602                 # This is a simple approximate test for vertical alignment:
10603                 # if we broke just after an opening paren, brace, bracket,
10604                 # and there are 2 or more commas in the first line,
10605                 # and there are no '=>'s,
10606                 # then we are probably vertically aligned.  We could set
10607                 # an exact flag in sub scan_list, but this is good
10608                 # enough.
10609                 my $indentation_count = keys %saw_indentation;
10610                 my $is_vertically_aligned =
10611                   (      $i == $ibeg
10612                       && $first_line_comma_count > 1
10613                       && $indentation_count == 1
10614                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
10615
10616                 # Make the move if possible ..
10617                 if (
10618
10619                     # we can always move left
10620                     $move_right < 0
10621
10622                     # but we should only move right if we are sure it will
10623                     # not spoil vertical alignment
10624                     || ( $comma_count == 0 )
10625                     || ( $comma_count > 0 && !$is_vertically_aligned )
10626                   )
10627                 {
10628                     my $move =
10629                       ( $move_right <= $right_margin )
10630                       ? $move_right
10631                       : $right_margin;
10632
10633                     foreach ( keys %saw_indentation ) {
10634                         $saw_indentation{$_}
10635                           ->permanently_decrease_AVAILABLE_SPACES( -$move );
10636                     }
10637                 }
10638
10639                 # Otherwise, record what we want and the vertical aligner
10640                 # will try to recover it.
10641                 else {
10642                     $indentation->set_RECOVERABLE_SPACES($move_right);
10643                 }
10644             }
10645         }
10646     }
10647     return $do_not_pad;
10648 }
10649
10650 # flush is called to output any tokens in the pipeline, so that
10651 # an alternate source of lines can be written in the correct order
10652
10653 sub flush {
10654     destroy_one_line_block();
10655     output_line_to_go();
10656     Perl::Tidy::VerticalAligner::flush();
10657 }
10658
10659 sub reset_block_text_accumulator {
10660
10661     # save text after 'if' and 'elsif' to append after 'else'
10662     if ($accumulating_text_for_block) {
10663
10664         if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10665             push @{$rleading_block_if_elsif_text}, $leading_block_text;
10666         }
10667     }
10668     $accumulating_text_for_block        = "";
10669     $leading_block_text                 = "";
10670     $leading_block_text_level           = 0;
10671     $leading_block_text_length_exceeded = 0;
10672     $leading_block_text_line_number     = 0;
10673     $leading_block_text_line_length     = 0;
10674 }
10675
10676 sub set_block_text_accumulator {
10677     my $i = shift;
10678     $accumulating_text_for_block = $tokens_to_go[$i];
10679     if ( $accumulating_text_for_block !~ /^els/ ) {
10680         $rleading_block_if_elsif_text = [];
10681     }
10682     $leading_block_text       = "";
10683     $leading_block_text_level = $levels_to_go[$i];
10684     $leading_block_text_line_number =
10685       $vertical_aligner_object->get_output_line_number();
10686     $leading_block_text_length_exceeded = 0;
10687
10688     # this will contain the column number of the last character
10689     # of the closing side comment
10690     $leading_block_text_line_length =
10691       length($accumulating_text_for_block) +
10692       length( $rOpts->{'closing-side-comment-prefix'} ) +
10693       $leading_block_text_level * $rOpts_indent_columns + 3;
10694 }
10695
10696 sub accumulate_block_text {
10697     my $i = shift;
10698
10699     # accumulate leading text for -csc, ignoring any side comments
10700     if (   $accumulating_text_for_block
10701         && !$leading_block_text_length_exceeded
10702         && $types_to_go[$i] ne '#' )
10703     {
10704
10705         my $added_length = length( $tokens_to_go[$i] );
10706         $added_length += 1 if $i == 0;
10707         my $new_line_length = $leading_block_text_line_length + $added_length;
10708
10709         # we can add this text if we don't exceed some limits..
10710         if (
10711
10712             # we must not have already exceeded the text length limit
10713             length($leading_block_text) <
10714             $rOpts_closing_side_comment_maximum_text
10715
10716             # and either:
10717             # the new total line length must be below the line length limit
10718             # or the new length must be below the text length limit
10719             # (ie, we may allow one token to exceed the text length limit)
10720             && ( $new_line_length < $rOpts_maximum_line_length
10721                 || length($leading_block_text) + $added_length <
10722                 $rOpts_closing_side_comment_maximum_text )
10723
10724             # UNLESS: we are adding a closing paren before the brace we seek.
10725             # This is an attempt to avoid situations where the ... to be
10726             # added are longer than the omitted right paren, as in:
10727
10728             #   foreach my $item (@a_rather_long_variable_name_here) {
10729             #      &whatever;
10730             #   } ## end foreach my $item (@a_rather_long_variable_name_here...
10731
10732             || (
10733                 $tokens_to_go[$i] eq ')'
10734                 && (
10735                     (
10736                            $i + 1 <= $max_index_to_go
10737                         && $block_type_to_go[ $i + 1 ] eq
10738                         $accumulating_text_for_block
10739                     )
10740                     || (   $i + 2 <= $max_index_to_go
10741                         && $block_type_to_go[ $i + 2 ] eq
10742                         $accumulating_text_for_block )
10743                 )
10744             )
10745           )
10746         {
10747
10748             # add an extra space at each newline
10749             if ( $i == 0 ) { $leading_block_text .= ' ' }
10750
10751             # add the token text
10752             $leading_block_text .= $tokens_to_go[$i];
10753             $leading_block_text_line_length = $new_line_length;
10754         }
10755
10756         # show that text was truncated if necessary
10757         elsif ( $types_to_go[$i] ne 'b' ) {
10758             $leading_block_text_length_exceeded = 1;
10759             $leading_block_text .= '...';
10760         }
10761     }
10762 }
10763
10764 {
10765     my %is_if_elsif_else_unless_while_until_for_foreach;
10766
10767     BEGIN {
10768
10769         # These block types may have text between the keyword and opening
10770         # curly.  Note: 'else' does not, but must be included to allow trailing
10771         # if/elsif text to be appended.
10772         # patch for SWITCH/CASE: added 'case' and 'when'
10773         @_ = qw(if elsif else unless while until for foreach case when);
10774         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10775     }
10776
10777     sub accumulate_csc_text {
10778
10779         # called once per output buffer when -csc is used. Accumulates
10780         # the text placed after certain closing block braces.
10781         # Defines and returns the following for this buffer:
10782
10783         my $block_leading_text = "";    # the leading text of the last '}'
10784         my $rblock_leading_if_elsif_text;
10785         my $i_block_leading_text =
10786           -1;    # index of token owning block_leading_text
10787         my $block_line_count    = 100;    # how many lines the block spans
10788         my $terminal_type       = 'b';    # type of last nonblank token
10789         my $i_terminal          = 0;      # index of last nonblank token
10790         my $terminal_block_type = "";
10791
10792         for my $i ( 0 .. $max_index_to_go ) {
10793             my $type       = $types_to_go[$i];
10794             my $block_type = $block_type_to_go[$i];
10795             my $token      = $tokens_to_go[$i];
10796
10797             # remember last nonblank token type
10798             if ( $type ne '#' && $type ne 'b' ) {
10799                 $terminal_type       = $type;
10800                 $terminal_block_type = $block_type;
10801                 $i_terminal          = $i;
10802             }
10803
10804             my $type_sequence = $type_sequence_to_go[$i];
10805             if ( $block_type && $type_sequence ) {
10806
10807                 if ( $token eq '}' ) {
10808
10809                     # restore any leading text saved when we entered this block
10810                     if ( defined( $block_leading_text{$type_sequence} ) ) {
10811                         ( $block_leading_text, $rblock_leading_if_elsif_text ) =
10812                           @{ $block_leading_text{$type_sequence} };
10813                         $i_block_leading_text = $i;
10814                         delete $block_leading_text{$type_sequence};
10815                         $rleading_block_if_elsif_text =
10816                           $rblock_leading_if_elsif_text;
10817                     }
10818
10819                     # if we run into a '}' then we probably started accumulating
10820                     # at something like a trailing 'if' clause..no harm done.
10821                     if (   $accumulating_text_for_block
10822                         && $levels_to_go[$i] <= $leading_block_text_level )
10823                     {
10824                         my $lev = $levels_to_go[$i];
10825                         reset_block_text_accumulator();
10826                     }
10827
10828                     if ( defined( $block_opening_line_number{$type_sequence} ) )
10829                     {
10830                         my $output_line_number =
10831                           $vertical_aligner_object->get_output_line_number();
10832                         $block_line_count =
10833                           $output_line_number -
10834                           $block_opening_line_number{$type_sequence} + 1;
10835                         delete $block_opening_line_number{$type_sequence};
10836                     }
10837                     else {
10838
10839                         # Error: block opening line undefined for this line..
10840                         # This shouldn't be possible, but it is not a
10841                         # significant problem.
10842                     }
10843                 }
10844
10845                 elsif ( $token eq '{' ) {
10846
10847                     my $line_number =
10848                       $vertical_aligner_object->get_output_line_number();
10849                     $block_opening_line_number{$type_sequence} = $line_number;
10850
10851                     if (   $accumulating_text_for_block
10852                         && $levels_to_go[$i] == $leading_block_text_level )
10853                     {
10854
10855                         if ( $accumulating_text_for_block eq $block_type ) {
10856
10857                             # save any leading text before we enter this block
10858                             $block_leading_text{$type_sequence} = [
10859                                 $leading_block_text,
10860                                 $rleading_block_if_elsif_text
10861                             ];
10862                             $block_opening_line_number{$type_sequence} =
10863                               $leading_block_text_line_number;
10864                             reset_block_text_accumulator();
10865                         }
10866                         else {
10867
10868                             # shouldn't happen, but not a serious error.
10869                             # We were accumulating -csc text for block type
10870                             # $accumulating_text_for_block and unexpectedly
10871                             # encountered a '{' for block type $block_type.
10872                         }
10873                     }
10874                 }
10875             }
10876
10877             if (   $type eq 'k'
10878                 && $csc_new_statement_ok
10879                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
10880                 && $token =~ /$closing_side_comment_list_pattern/o )
10881             {
10882                 set_block_text_accumulator($i);
10883             }
10884             else {
10885
10886                 # note: ignoring type 'q' because of tricks being played
10887                 # with 'q' for hanging side comments
10888                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10889                     $csc_new_statement_ok =
10890                       ( $block_type || $type eq 'J' || $type eq ';' );
10891                 }
10892                 if (   $type eq ';'
10893                     && $accumulating_text_for_block
10894                     && $levels_to_go[$i] == $leading_block_text_level )
10895                 {
10896                     reset_block_text_accumulator();
10897                 }
10898                 else {
10899                     accumulate_block_text($i);
10900                 }
10901             }
10902         }
10903
10904         # Treat an 'else' block specially by adding preceding 'if' and
10905         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
10906         # especially for cuddled-else formatting.
10907         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10908             $block_leading_text =
10909               make_else_csc_text( $i_terminal, $terminal_block_type,
10910                 $block_leading_text, $rblock_leading_if_elsif_text );
10911         }
10912
10913         return ( $terminal_type, $i_terminal, $i_block_leading_text,
10914             $block_leading_text, $block_line_count );
10915     }
10916 }
10917
10918 sub make_else_csc_text {
10919
10920     # create additional -csc text for an 'else' and optionally 'elsif',
10921     # depending on the value of switch
10922     # $rOpts_closing_side_comment_else_flag:
10923     #
10924     #  = 0 add 'if' text to trailing else
10925     #  = 1 same as 0 plus:
10926     #      add 'if' to 'elsif's if can fit in line length
10927     #      add last 'elsif' to trailing else if can fit in one line
10928     #  = 2 same as 1 but do not check if exceed line length
10929     #
10930     # $rif_elsif_text = a reference to a list of all previous closing
10931     # side comments created for this if block
10932     #
10933     my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10934     my $csc_text = $block_leading_text;
10935
10936     if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10937     {
10938         return $csc_text;
10939     }
10940
10941     my $count = @{$rif_elsif_text};
10942     return $csc_text unless ($count);
10943
10944     my $if_text = '[ if' . $rif_elsif_text->[0];
10945
10946     # always show the leading 'if' text on 'else'
10947     if ( $block_type eq 'else' ) {
10948         $csc_text .= $if_text;
10949     }
10950
10951     # see if that's all
10952     if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10953         return $csc_text;
10954     }
10955
10956     my $last_elsif_text = "";
10957     if ( $count > 1 ) {
10958         $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10959         if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10960     }
10961
10962     # tentatively append one more item
10963     my $saved_text = $csc_text;
10964     if ( $block_type eq 'else' ) {
10965         $csc_text .= $last_elsif_text;
10966     }
10967     else {
10968         $csc_text .= ' ' . $if_text;
10969     }
10970
10971     # all done if no length checks requested
10972     if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10973         return $csc_text;
10974     }
10975
10976     # undo it if line length exceeded
10977     my $length =
10978       length($csc_text) +
10979       length($block_type) +
10980       length( $rOpts->{'closing-side-comment-prefix'} ) +
10981       $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10982     if ( $length > $rOpts_maximum_line_length ) {
10983         $csc_text = $saved_text;
10984     }
10985     return $csc_text;
10986 }
10987
10988 {    # sub balance_csc_text
10989
10990     my %matching_char;
10991
10992     BEGIN {
10993         %matching_char = (
10994             '{' => '}',
10995             '(' => ')',
10996             '[' => ']',
10997             '}' => '{',
10998             ')' => '(',
10999             ']' => '[',
11000         );
11001     }
11002
11003     sub balance_csc_text {
11004
11005         # Append characters to balance a closing side comment so that editors
11006         # such as vim can correctly jump through code.
11007         # Simple Example:
11008         #  input  = ## end foreach my $foo ( sort { $b  ...
11009         #  output = ## end foreach my $foo ( sort { $b  ...})
11010
11011         # NOTE: This routine does not currently filter out structures within
11012         # quoted text because the bounce algorithims in text editors do not
11013         # necessarily do this either (a version of vim was checked and
11014         # did not do this).
11015
11016         # Some complex examples which will cause trouble for some editors:
11017         #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
11018         #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
11019         #  if ( $1 eq '{' ) {
11020         # test file test1/braces.pl has many such examples.
11021
11022         my ($csc) = @_;
11023
11024         # loop to examine characters one-by-one, RIGHT to LEFT and
11025         # build a balancing ending, LEFT to RIGHT.
11026         for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
11027
11028             my $char = substr( $csc, $pos, 1 );
11029
11030             # ignore everything except structural characters
11031             next unless ( $matching_char{$char} );
11032
11033             # pop most recently appended character
11034             my $top = chop($csc);
11035
11036             # push it back plus the mate to the newest character
11037             # unless they balance each other.
11038             $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
11039         }
11040
11041         # return the balanced string
11042         return $csc;
11043     }
11044 }
11045
11046 sub add_closing_side_comment {
11047
11048     # add closing side comments after closing block braces if -csc used
11049     my $cscw_block_comment;
11050
11051     #---------------------------------------------------------------
11052     # Step 1: loop through all tokens of this line to accumulate
11053     # the text needed to create the closing side comments. Also see
11054     # how the line ends.
11055     #---------------------------------------------------------------
11056
11057     my ( $terminal_type, $i_terminal, $i_block_leading_text,
11058         $block_leading_text, $block_line_count )
11059       = accumulate_csc_text();
11060
11061     #---------------------------------------------------------------
11062     # Step 2: make the closing side comment if this ends a block
11063     #---------------------------------------------------------------
11064     my $have_side_comment = $i_terminal != $max_index_to_go;
11065
11066     # if this line might end in a block closure..
11067     if (
11068         $terminal_type eq '}'
11069
11070         # ..and either
11071         && (
11072
11073             # the block is long enough
11074             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
11075
11076             # or there is an existing comment to check
11077             || (   $have_side_comment
11078                 && $rOpts->{'closing-side-comment-warnings'} )
11079         )
11080
11081         # .. and if this is one of the types of interest
11082         && $block_type_to_go[$i_terminal] =~
11083         /$closing_side_comment_list_pattern/o
11084
11085         # .. but not an anonymous sub
11086         # These are not normally of interest, and their closing braces are
11087         # often followed by commas or semicolons anyway.  This also avoids
11088         # possible erratic output due to line numbering inconsistencies
11089         # in the cases where their closing braces terminate a line.
11090         && $block_type_to_go[$i_terminal] ne 'sub'
11091
11092         # ..and the corresponding opening brace must is not in this batch
11093         # (because we do not need to tag one-line blocks, although this
11094         # should also be caught with a positive -csci value)
11095         && $mate_index_to_go[$i_terminal] < 0
11096
11097         # ..and either
11098         && (
11099
11100             # this is the last token (line doesnt have a side comment)
11101             !$have_side_comment
11102
11103             # or the old side comment is a closing side comment
11104             || $tokens_to_go[$max_index_to_go] =~
11105             /$closing_side_comment_prefix_pattern/o
11106         )
11107       )
11108     {
11109
11110         # then make the closing side comment text
11111         my $token =
11112 "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
11113
11114         # append any extra descriptive text collected above
11115         if ( $i_block_leading_text == $i_terminal ) {
11116             $token .= $block_leading_text;
11117         }
11118
11119         $token = balance_csc_text($token)
11120           if $rOpts->{'closing-side-comments-balanced'};
11121
11122         $token =~ s/\s*$//;    # trim any trailing whitespace
11123
11124         # handle case of existing closing side comment
11125         if ($have_side_comment) {
11126
11127             # warn if requested and tokens differ significantly
11128             if ( $rOpts->{'closing-side-comment-warnings'} ) {
11129                 my $old_csc = $tokens_to_go[$max_index_to_go];
11130                 my $new_csc = $token;
11131                 $new_csc =~ s/\s+//g;            # trim all whitespace
11132                 $old_csc =~ s/\s+//g;            # trim all whitespace
11133                 $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11134                 $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11135                 $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
11136                 my $new_trailing_dots = $1;
11137                 $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
11138
11139                 # Patch to handle multiple closing side comments at
11140                 # else and elsif's.  These have become too complicated
11141                 # to check, so if we see an indication of
11142                 # '[ if' or '[ # elsif', then assume they were made
11143                 # by perltidy.
11144                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
11145                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
11146                 }
11147                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
11148                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
11149                 }
11150
11151                 # if old comment is contained in new comment,
11152                 # only compare the common part.
11153                 if ( length($new_csc) > length($old_csc) ) {
11154                     $new_csc = substr( $new_csc, 0, length($old_csc) );
11155                 }
11156
11157                 # if the new comment is shorter and has been limited,
11158                 # only compare the common part.
11159                 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
11160                 {
11161                     $old_csc = substr( $old_csc, 0, length($new_csc) );
11162                 }
11163
11164                 # any remaining difference?
11165                 if ( $new_csc ne $old_csc ) {
11166
11167                     # just leave the old comment if we are below the threshold
11168                     # for creating side comments
11169                     if ( $block_line_count <
11170                         $rOpts->{'closing-side-comment-interval'} )
11171                     {
11172                         $token = undef;
11173                     }
11174
11175                     # otherwise we'll make a note of it
11176                     else {
11177
11178                         warning(
11179 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
11180                         );
11181
11182                      # save the old side comment in a new trailing block comment
11183                         my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
11184                         $year  += 1900;
11185                         $month += 1;
11186                         $cscw_block_comment =
11187 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
11188                     }
11189                 }
11190                 else {
11191
11192                     # No differences.. we can safely delete old comment if we
11193                     # are below the threshold
11194                     if ( $block_line_count <
11195                         $rOpts->{'closing-side-comment-interval'} )
11196                     {
11197                         $token = undef;
11198                         unstore_token_to_go()
11199                           if ( $types_to_go[$max_index_to_go] eq '#' );
11200                         unstore_token_to_go()
11201                           if ( $types_to_go[$max_index_to_go] eq 'b' );
11202                     }
11203                 }
11204             }
11205
11206             # switch to the new csc (unless we deleted it!)
11207             $tokens_to_go[$max_index_to_go] = $token if $token;
11208         }
11209
11210         # handle case of NO existing closing side comment
11211         else {
11212
11213             # insert the new side comment into the output token stream
11214             my $type          = '#';
11215             my $block_type    = '';
11216             my $type_sequence = '';
11217             my $container_environment =
11218               $container_environment_to_go[$max_index_to_go];
11219             my $level                = $levels_to_go[$max_index_to_go];
11220             my $slevel               = $nesting_depth_to_go[$max_index_to_go];
11221             my $no_internal_newlines = 0;
11222
11223             my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
11224             my $ci_level           = $ci_levels_to_go[$max_index_to_go];
11225             my $in_continued_quote = 0;
11226
11227             # first insert a blank token
11228             insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
11229
11230             # then the side comment
11231             insert_new_token_to_go( $token, $type, $slevel,
11232                 $no_internal_newlines );
11233         }
11234     }
11235     return $cscw_block_comment;
11236 }
11237
11238 sub previous_nonblank_token {
11239     my ($i)  = @_;
11240     my $name = "";
11241     my $im   = $i - 1;
11242     return "" if ( $im < 0 );
11243     if ( $types_to_go[$im] eq 'b' ) { $im--; }
11244     return "" if ( $im < 0 );
11245     $name = $tokens_to_go[$im];
11246
11247     # prepend any sub name to an isolated -> to avoid unwanted alignments
11248     # [test case is test8/penco.pl]
11249     if ( $name eq '->' ) {
11250         $im--;
11251         if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
11252             $name = $tokens_to_go[$im] . $name;
11253         }
11254     }
11255     return $name;
11256 }
11257
11258 sub send_lines_to_vertical_aligner {
11259
11260     my ( $ri_first, $ri_last, $do_not_pad ) = @_;
11261
11262     my $rindentation_list = [0];    # ref to indentations for each line
11263
11264     # define the array @matching_token_to_go for the output tokens
11265     # which will be non-blank for each special token (such as =>)
11266     # for which alignment is required.
11267     set_vertical_alignment_markers( $ri_first, $ri_last );
11268
11269     # flush if necessary to avoid unwanted alignment
11270     my $must_flush = 0;
11271     if ( @$ri_first > 1 ) {
11272
11273         # flush before a long if statement
11274         if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
11275             $must_flush = 1;
11276         }
11277     }
11278     if ($must_flush) {
11279         Perl::Tidy::VerticalAligner::flush();
11280     }
11281
11282     undo_ci( $ri_first, $ri_last );
11283
11284     set_logical_padding( $ri_first, $ri_last );
11285
11286     # loop to prepare each line for shipment
11287     my $n_last_line = @$ri_first - 1;
11288     my $in_comma_list;
11289     for my $n ( 0 .. $n_last_line ) {
11290         my $ibeg = $$ri_first[$n];
11291         my $iend = $$ri_last[$n];
11292
11293         my ( $rtokens, $rfields, $rpatterns ) =
11294           make_alignment_patterns( $ibeg, $iend );
11295
11296         my ( $indentation, $lev, $level_end, $terminal_type,
11297             $is_semicolon_terminated, $is_outdented_line )
11298           = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
11299             $ri_first, $ri_last, $rindentation_list );
11300
11301         # we will allow outdenting of long lines..
11302         my $outdent_long_lines = (
11303
11304             # which are long quotes, if allowed
11305             ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
11306
11307             # which are long block comments, if allowed
11308               || (
11309                    $types_to_go[$ibeg] eq '#'
11310                 && $rOpts->{'outdent-long-comments'}
11311
11312                 # but not if this is a static block comment
11313                 && !$is_static_block_comment
11314               )
11315         );
11316
11317         my $level_jump =
11318           $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
11319
11320         my $rvertical_tightness_flags =
11321           set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
11322             $ri_first, $ri_last );
11323
11324         # flush an outdented line to avoid any unwanted vertical alignment
11325         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11326
11327         my $is_terminal_ternary = 0;
11328         if (   $tokens_to_go[$ibeg] eq ':'
11329             || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
11330         {
11331             if (   ( $terminal_type eq ';' && $level_end <= $lev )
11332                 || ( $level_end < $lev ) )
11333             {
11334                 $is_terminal_ternary = 1;
11335             }
11336         }
11337
11338         # send this new line down the pipe
11339         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
11340         Perl::Tidy::VerticalAligner::append_line(
11341             $lev,
11342             $level_end,
11343             $indentation,
11344             $rfields,
11345             $rtokens,
11346             $rpatterns,
11347             $forced_breakpoint_to_go[$iend] || $in_comma_list,
11348             $outdent_long_lines,
11349             $is_terminal_ternary,
11350             $is_semicolon_terminated,
11351             $do_not_pad,
11352             $rvertical_tightness_flags,
11353             $level_jump,
11354         );
11355         $in_comma_list =
11356           $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
11357
11358         # flush an outdented line to avoid any unwanted vertical alignment
11359         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11360
11361         $do_not_pad = 0;
11362
11363     }    # end of loop to output each line
11364
11365     # remember indentation of lines containing opening containers for
11366     # later use by sub set_adjusted_indentation
11367     save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
11368 }
11369
11370 {        # begin make_alignment_patterns
11371
11372     my %block_type_map;
11373     my %keyword_map;
11374
11375     BEGIN {
11376
11377         # map related block names into a common name to
11378         # allow alignment
11379         %block_type_map = (
11380             'unless'  => 'if',
11381             'else'    => 'if',
11382             'elsif'   => 'if',
11383             'when'    => 'if',
11384             'default' => 'if',
11385             'case'    => 'if',
11386             'sort'    => 'map',
11387             'grep'    => 'map',
11388         );
11389
11390         # map certain keywords to the same 'if' class to align
11391         # long if/elsif sequences. [elsif.pl]
11392         %keyword_map = (
11393             'unless'  => 'if',
11394             'else'    => 'if',
11395             'elsif'   => 'if',
11396             'when'    => 'given',
11397             'default' => 'given',
11398             'case'    => 'switch',
11399
11400             # treat an 'undef' similar to numbers and quotes
11401             'undef' => 'Q',
11402         );
11403     }
11404
11405     sub make_alignment_patterns {
11406
11407         # Here we do some important preliminary work for the
11408         # vertical aligner.  We create three arrays for one
11409         # output line. These arrays contain strings that can
11410         # be tested by the vertical aligner to see if
11411         # consecutive lines can be aligned vertically.
11412         #
11413         # The three arrays are indexed on the vertical
11414         # alignment fields and are:
11415         # @tokens - a list of any vertical alignment tokens for this line.
11416         #   These are tokens, such as '=' '&&' '#' etc which
11417         #   we want to might align vertically.  These are
11418         #   decorated with various information such as
11419         #   nesting depth to prevent unwanted vertical
11420         #   alignment matches.
11421         # @fields - the actual text of the line between the vertical alignment
11422         #   tokens.
11423         # @patterns - a modified list of token types, one for each alignment
11424         #   field.  These should normally each match before alignment is
11425         #   allowed, even when the alignment tokens match.
11426         my ( $ibeg, $iend ) = @_;
11427         my @tokens   = ();
11428         my @fields   = ();
11429         my @patterns = ();
11430         my $i_start  = $ibeg;
11431         my $i;
11432
11433         my $depth                 = 0;
11434         my @container_name        = ("");
11435         my @multiple_comma_arrows = (undef);
11436
11437         my $j = 0;    # field index
11438
11439         $patterns[0] = "";
11440         for $i ( $ibeg .. $iend ) {
11441
11442             # Keep track of containers balanced on this line only.
11443             # These are used below to prevent unwanted cross-line alignments.
11444             # Unbalanced containers already avoid aligning across
11445             # container boundaries.
11446             if ( $tokens_to_go[$i] eq '(' ) {
11447
11448                 # if container is balanced on this line...
11449                 my $i_mate = $mate_index_to_go[$i];
11450                 if ( $i_mate > $i && $i_mate <= $iend ) {
11451                     $depth++;
11452                     my $seqno = $type_sequence_to_go[$i];
11453                     my $count = comma_arrow_count($seqno);
11454                     $multiple_comma_arrows[$depth] = $count && $count > 1;
11455
11456                     # Append the previous token name to make the container name
11457                     # more unique.  This name will also be given to any commas
11458                     # within this container, and it helps avoid undesirable
11459                     # alignments of different types of containers.
11460                     my $name = previous_nonblank_token($i);
11461                     $name =~ s/^->//;
11462                     $container_name[$depth] = "+" . $name;
11463
11464                     # Make the container name even more unique if necessary.
11465                     # If we are not vertically aligning this opening paren,
11466                     # append a character count to avoid bad alignment because
11467                     # it usually looks bad to align commas within continers
11468                     # for which the opening parens do not align.  Here
11469                     # is an example very BAD alignment of commas (because
11470                     # the atan2 functions are not all aligned):
11471                     #    $XY =
11472                     #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
11473                     #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
11474                     #      $X * atan2( $X,            1 ) -
11475                     #      $Y * atan2( $Y,            1 );
11476                     #
11477                     # On the other hand, it is usually okay to align commas if
11478                     # opening parens align, such as:
11479                     #    glVertex3d( $cx + $s * $xs, $cy,            $z );
11480                     #    glVertex3d( $cx,            $cy + $s * $ys, $z );
11481                     #    glVertex3d( $cx - $s * $xs, $cy,            $z );
11482                     #    glVertex3d( $cx,            $cy - $s * $ys, $z );
11483                     #
11484                     # To distinguish between these situations, we will
11485                     # append the length of the line from the previous matching
11486                     # token, or beginning of line, to the function name.  This
11487                     # will allow the vertical aligner to reject undesirable
11488                     # matches.
11489
11490                     # if we are not aligning on this paren...
11491                     if ( $matching_token_to_go[$i] eq '' ) {
11492
11493                         # Sum length from previous alignment, or start of line.
11494                         # Note that we have to sum token lengths here because
11495                         # padding has been done and so array $lengths_to_go
11496                         # is now wrong.
11497                         my $len =
11498                           length(
11499                             join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11500                         $len += leading_spaces_to_go($i_start)
11501                           if ( $i_start == $ibeg );
11502
11503                         # tack length onto the container name to make unique
11504                         $container_name[$depth] .= "-" . $len;
11505                     }
11506                 }
11507             }
11508             elsif ( $tokens_to_go[$i] eq ')' ) {
11509                 $depth-- if $depth > 0;
11510             }
11511
11512             # if we find a new synchronization token, we are done with
11513             # a field
11514             if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
11515
11516                 my $tok = my $raw_tok = $matching_token_to_go[$i];
11517
11518                 # make separators in different nesting depths unique
11519                 # by appending the nesting depth digit.
11520                 if ( $raw_tok ne '#' ) {
11521                     $tok .= "$nesting_depth_to_go[$i]";
11522                 }
11523
11524                 # also decorate commas with any container name to avoid
11525                 # unwanted cross-line alignments.
11526                 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
11527                     if ( $container_name[$depth] ) {
11528                         $tok .= $container_name[$depth];
11529                     }
11530                 }
11531
11532                 # Patch to avoid aligning leading and trailing if, unless.
11533                 # Mark trailing if, unless statements with container names.
11534                 # This makes them different from leading if, unless which
11535                 # are not so marked at present.  If we ever need to name
11536                 # them too, we could use ci to distinguish them.
11537                 # Example problem to avoid:
11538                 #    return ( 2, "DBERROR" )
11539                 #      if ( $retval == 2 );
11540                 #    if   ( scalar @_ ) {
11541                 #        my ( $a, $b, $c, $d, $e, $f ) = @_;
11542                 #    }
11543                 if ( $raw_tok eq '(' ) {
11544                     my $ci = $ci_levels_to_go[$ibeg];
11545                     if (   $container_name[$depth] =~ /^\+(if|unless)/
11546                         && $ci )
11547                     {
11548                         $tok .= $container_name[$depth];
11549                     }
11550                 }
11551
11552                 # Decorate block braces with block types to avoid
11553                 # unwanted alignments such as the following:
11554                 # foreach ( @{$routput_array} ) { $fh->print($_) }
11555                 # eval                          { $fh->close() };
11556                 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
11557                     my $block_type = $block_type_to_go[$i];
11558
11559                     # map certain related block types to allow
11560                     # else blocks to align
11561                     $block_type = $block_type_map{$block_type}
11562                       if ( defined( $block_type_map{$block_type} ) );
11563
11564                     # remove sub names to allow one-line sub braces to align
11565                     # regardless of name
11566                     if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
11567
11568                     # allow all control-type blocks to align
11569                     if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
11570
11571                     $tok .= $block_type;
11572                 }
11573
11574                 # concatenate the text of the consecutive tokens to form
11575                 # the field
11576                 push( @fields,
11577                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11578
11579                 # store the alignment token for this field
11580                 push( @tokens, $tok );
11581
11582                 # get ready for the next batch
11583                 $i_start = $i;
11584                 $j++;
11585                 $patterns[$j] = "";
11586             }
11587
11588             # continue accumulating tokens
11589             # handle non-keywords..
11590             if ( $types_to_go[$i] ne 'k' ) {
11591                 my $type = $types_to_go[$i];
11592
11593                 # Mark most things before arrows as a quote to
11594                 # get them to line up. Testfile: mixed.pl.
11595                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
11596                     my $next_type = $types_to_go[ $i + 1 ];
11597                     my $i_next_nonblank =
11598                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
11599
11600                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
11601                         $type = 'Q';
11602
11603                         # Patch to ignore leading minus before words,
11604                         # by changing pattern 'mQ' into just 'Q',
11605                         # so that we can align things like this:
11606                         #  Button   => "Print letter \"~$_\"",
11607                         #  -command => [ sub { print "$_[0]\n" }, $_ ],
11608                         if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
11609                     }
11610                 }
11611
11612                 # patch to make numbers and quotes align
11613                 if ( $type eq 'n' ) { $type = 'Q' }
11614
11615                 # patch to ignore any ! in patterns
11616                 if ( $type eq '!' ) { $type = '' }
11617
11618                 $patterns[$j] .= $type;
11619             }
11620
11621             # for keywords we have to use the actual text
11622             else {
11623
11624                 my $tok = $tokens_to_go[$i];
11625
11626                 # but map certain keywords to a common string to allow
11627                 # alignment.
11628                 $tok = $keyword_map{$tok}
11629                   if ( defined( $keyword_map{$tok} ) );
11630                 $patterns[$j] .= $tok;
11631             }
11632         }
11633
11634         # done with this line .. join text of tokens to make the last field
11635         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
11636         return ( \@tokens, \@fields, \@patterns );
11637     }
11638
11639 }    # end make_alignment_patterns
11640
11641 {    # begin unmatched_indexes
11642
11643     # closure to keep track of unbalanced containers.
11644     # arrays shared by the routines in this block:
11645     my @unmatched_opening_indexes_in_this_batch;
11646     my @unmatched_closing_indexes_in_this_batch;
11647     my %comma_arrow_count;
11648
11649     sub is_unbalanced_batch {
11650         @unmatched_opening_indexes_in_this_batch +
11651           @unmatched_closing_indexes_in_this_batch;
11652     }
11653
11654     sub comma_arrow_count {
11655         my $seqno = $_[0];
11656         return $comma_arrow_count{$seqno};
11657     }
11658
11659     sub match_opening_and_closing_tokens {
11660
11661         # Match up indexes of opening and closing braces, etc, in this batch.
11662         # This has to be done after all tokens are stored because unstoring
11663         # of tokens would otherwise cause trouble.
11664
11665         @unmatched_opening_indexes_in_this_batch = ();
11666         @unmatched_closing_indexes_in_this_batch = ();
11667         %comma_arrow_count                       = ();
11668
11669         my ( $i, $i_mate, $token );
11670         foreach $i ( 0 .. $max_index_to_go ) {
11671             if ( $type_sequence_to_go[$i] ) {
11672                 $token = $tokens_to_go[$i];
11673                 if ( $token =~ /^[\(\[\{\?]$/ ) {
11674                     push @unmatched_opening_indexes_in_this_batch, $i;
11675                 }
11676                 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
11677
11678                     $i_mate = pop @unmatched_opening_indexes_in_this_batch;
11679                     if ( defined($i_mate) && $i_mate >= 0 ) {
11680                         if ( $type_sequence_to_go[$i_mate] ==
11681                             $type_sequence_to_go[$i] )
11682                         {
11683                             $mate_index_to_go[$i]      = $i_mate;
11684                             $mate_index_to_go[$i_mate] = $i;
11685                         }
11686                         else {
11687                             push @unmatched_opening_indexes_in_this_batch,
11688                               $i_mate;
11689                             push @unmatched_closing_indexes_in_this_batch, $i;
11690                         }
11691                     }
11692                     else {
11693                         push @unmatched_closing_indexes_in_this_batch, $i;
11694                     }
11695                 }
11696             }
11697             elsif ( $tokens_to_go[$i] eq '=>' ) {
11698                 if (@unmatched_opening_indexes_in_this_batch) {
11699                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
11700                     my $seqno = $type_sequence_to_go[$j];
11701                     $comma_arrow_count{$seqno}++;
11702                 }
11703             }
11704         }
11705     }
11706
11707     sub save_opening_indentation {
11708
11709         # This should be called after each batch of tokens is output. It
11710         # saves indentations of lines of all unmatched opening tokens.
11711         # These will be used by sub get_opening_indentation.
11712
11713         my ( $ri_first, $ri_last, $rindentation_list ) = @_;
11714
11715         # we no longer need indentations of any saved indentations which
11716         # are unmatched closing tokens in this batch, because we will
11717         # never encounter them again.  So we can delete them to keep
11718         # the hash size down.
11719         foreach (@unmatched_closing_indexes_in_this_batch) {
11720             my $seqno = $type_sequence_to_go[$_];
11721             delete $saved_opening_indentation{$seqno};
11722         }
11723
11724         # we need to save indentations of any unmatched opening tokens
11725         # in this batch because we may need them in a subsequent batch.
11726         foreach (@unmatched_opening_indexes_in_this_batch) {
11727             my $seqno = $type_sequence_to_go[$_];
11728             $saved_opening_indentation{$seqno} = [
11729                 lookup_opening_indentation(
11730                     $_, $ri_first, $ri_last, $rindentation_list
11731                 )
11732             ];
11733         }
11734     }
11735 }    # end unmatched_indexes
11736
11737 sub get_opening_indentation {
11738
11739     # get the indentation of the line which output the opening token
11740     # corresponding to a given closing token in the current output batch.
11741     #
11742     # given:
11743     # $i_closing - index in this line of a closing token ')' '}' or ']'
11744     #
11745     # $ri_first - reference to list of the first index $i for each output
11746     #               line in this batch
11747     # $ri_last - reference to list of the last index $i for each output line
11748     #              in this batch
11749     # $rindentation_list - reference to a list containing the indentation
11750     #            used for each line.
11751     #
11752     # return:
11753     #   -the indentation of the line which contained the opening token
11754     #    which matches the token at index $i_opening
11755     #   -and its offset (number of columns) from the start of the line
11756     #
11757     my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
11758
11759     # first, see if the opening token is in the current batch
11760     my $i_opening = $mate_index_to_go[$i_closing];
11761     my ( $indent, $offset, $is_leading, $exists );
11762     $exists = 1;
11763     if ( $i_opening >= 0 ) {
11764
11765         # it is..look up the indentation
11766         ( $indent, $offset, $is_leading ) =
11767           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
11768             $rindentation_list );
11769     }
11770
11771     # if not, it should have been stored in the hash by a previous batch
11772     else {
11773         my $seqno = $type_sequence_to_go[$i_closing];
11774         if ($seqno) {
11775             if ( $saved_opening_indentation{$seqno} ) {
11776                 ( $indent, $offset, $is_leading ) =
11777                   @{ $saved_opening_indentation{$seqno} };
11778             }
11779
11780             # some kind of serious error
11781             # (example is badfile.t)
11782             else {
11783                 $indent     = 0;
11784                 $offset     = 0;
11785                 $is_leading = 0;
11786                 $exists     = 0;
11787             }
11788         }
11789
11790         # if no sequence number it must be an unbalanced container
11791         else {
11792             $indent     = 0;
11793             $offset     = 0;
11794             $is_leading = 0;
11795             $exists     = 0;
11796         }
11797     }
11798     return ( $indent, $offset, $is_leading, $exists );
11799 }
11800
11801 sub lookup_opening_indentation {
11802
11803     # get the indentation of the line in the current output batch
11804     # which output a selected opening token
11805     #
11806     # given:
11807     #   $i_opening - index of an opening token in the current output batch
11808     #                whose line indentation we need
11809     #   $ri_first - reference to list of the first index $i for each output
11810     #               line in this batch
11811     #   $ri_last - reference to list of the last index $i for each output line
11812     #              in this batch
11813     #   $rindentation_list - reference to a list containing the indentation
11814     #            used for each line.  (NOTE: the first slot in
11815     #            this list is the last returned line number, and this is
11816     #            followed by the list of indentations).
11817     #
11818     # return
11819     #   -the indentation of the line which contained token $i_opening
11820     #   -and its offset (number of columns) from the start of the line
11821
11822     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11823
11824     my $nline = $rindentation_list->[0];    # line number of previous lookup
11825
11826     # reset line location if necessary
11827     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
11828
11829     # find the correct line
11830     unless ( $i_opening > $ri_last->[-1] ) {
11831         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11832     }
11833
11834     # error - token index is out of bounds - shouldn't happen
11835     else {
11836         warning(
11837 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
11838         );
11839         report_definite_bug();
11840         $nline = $#{$ri_last};
11841     }
11842
11843     $rindentation_list->[0] =
11844       $nline;    # save line number to start looking next call
11845     my $ibeg       = $ri_start->[$nline];
11846     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
11847     my $is_leading = ( $ibeg == $i_opening );
11848     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
11849 }
11850
11851 {
11852     my %is_if_elsif_else_unless_while_until_for_foreach;
11853
11854     BEGIN {
11855
11856         # These block types may have text between the keyword and opening
11857         # curly.  Note: 'else' does not, but must be included to allow trailing
11858         # if/elsif text to be appended.
11859         # patch for SWITCH/CASE: added 'case' and 'when'
11860         @_ = qw(if elsif else unless while until for foreach case when);
11861         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11862     }
11863
11864     sub set_adjusted_indentation {
11865
11866         # This routine has the final say regarding the actual indentation of
11867         # a line.  It starts with the basic indentation which has been
11868         # defined for the leading token, and then takes into account any
11869         # options that the user has set regarding special indenting and
11870         # outdenting.
11871
11872         my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11873             $rindentation_list )
11874           = @_;
11875
11876         # we need to know the last token of this line
11877         my ( $terminal_type, $i_terminal ) =
11878           terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11879
11880         my $is_outdented_line = 0;
11881
11882         my $is_semicolon_terminated = $terminal_type eq ';'
11883           && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11884
11885         ##########################################################
11886         # Section 1: set a flag and a default indentation
11887         #
11888         # Most lines are indented according to the initial token.
11889         # But it is common to outdent to the level just after the
11890         # terminal token in certain cases...
11891         # adjust_indentation flag:
11892         #       0 - do not adjust
11893         #       1 - outdent
11894         #       2 - vertically align with opening token
11895         #       3 - indent
11896         ##########################################################
11897         my $adjust_indentation         = 0;
11898         my $default_adjust_indentation = $adjust_indentation;
11899
11900         my (
11901             $opening_indentation, $opening_offset,
11902             $is_leading,          $opening_exists
11903         );
11904
11905         # if we are at a closing token of some type..
11906         if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11907
11908             # get the indentation of the line containing the corresponding
11909             # opening token
11910             (
11911                 $opening_indentation, $opening_offset,
11912                 $is_leading,          $opening_exists
11913               )
11914               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11915                 $rindentation_list );
11916
11917             # First set the default behavior:
11918             # default behavior is to outdent closing lines
11919             # of the form:   ");  };  ];  )->xxx;"
11920             if (
11921                 $is_semicolon_terminated
11922
11923                 # and 'cuddled parens' of the form:   ")->pack("
11924                 || (
11925                        $terminal_type eq '('
11926                     && $types_to_go[$ibeg] eq ')'
11927                     && ( $nesting_depth_to_go[$iend] + 1 ==
11928                         $nesting_depth_to_go[$ibeg] )
11929                 )
11930               )
11931             {
11932                 $adjust_indentation = 1;
11933             }
11934
11935             # TESTING: outdent something like '),'
11936             if (
11937                 $terminal_type eq ','
11938
11939                 # allow just one character before the comma
11940                 && $i_terminal == $ibeg + 1
11941
11942                 # requre LIST environment; otherwise, we may outdent too much --
11943                 # this can happen in calls without parentheses (overload.t);
11944                 && $container_environment_to_go[$i_terminal] eq 'LIST'
11945               )
11946             {
11947                 $adjust_indentation = 1;
11948             }
11949
11950             # undo continuation indentation of a terminal closing token if
11951             # it is the last token before a level decrease.  This will allow
11952             # a closing token to line up with its opening counterpart, and
11953             # avoids a indentation jump larger than 1 level.
11954             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11955                 && $i_terminal == $ibeg )
11956             {
11957                 my $ci        = $ci_levels_to_go[$ibeg];
11958                 my $lev       = $levels_to_go[$ibeg];
11959                 my $next_type = $types_to_go[ $ibeg + 1 ];
11960                 my $i_next_nonblank =
11961                   ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11962                 if (   $i_next_nonblank <= $max_index_to_go
11963                     && $levels_to_go[$i_next_nonblank] < $lev )
11964                 {
11965                     $adjust_indentation = 1;
11966                 }
11967             }
11968
11969             # YVES patch 1 of 2:
11970             # Undo ci of line with leading closing eval brace,
11971             # but not beyond the indention of the line with
11972             # the opening brace.
11973             if (   $block_type_to_go[$ibeg] eq 'eval'
11974                 && !$rOpts->{'line-up-parentheses'}
11975                 && !$rOpts->{'indent-closing-brace'} )
11976             {
11977                 (
11978                     $opening_indentation, $opening_offset,
11979                     $is_leading,          $opening_exists
11980                   )
11981                   = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11982                     $rindentation_list );
11983                 my $indentation = $leading_spaces_to_go[$ibeg];
11984                 if ( defined($opening_indentation)
11985                     && $indentation > $opening_indentation )
11986                 {
11987                     $adjust_indentation = 1;
11988                 }
11989             }
11990
11991             $default_adjust_indentation = $adjust_indentation;
11992
11993             # Now modify default behavior according to user request:
11994             # handle option to indent non-blocks of the form );  };  ];
11995             # But don't do special indentation to something like ')->pack('
11996             if ( !$block_type_to_go[$ibeg] ) {
11997                 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11998                 if ( $cti == 1 ) {
11999                     if (   $i_terminal <= $ibeg + 1
12000                         || $is_semicolon_terminated )
12001                     {
12002                         $adjust_indentation = 2;
12003                     }
12004                     else {
12005                         $adjust_indentation = 0;
12006                     }
12007                 }
12008                 elsif ( $cti == 2 ) {
12009                     if ($is_semicolon_terminated) {
12010                         $adjust_indentation = 3;
12011                     }
12012                     else {
12013                         $adjust_indentation = 0;
12014                     }
12015                 }
12016                 elsif ( $cti == 3 ) {
12017                     $adjust_indentation = 3;
12018                 }
12019             }
12020
12021             # handle option to indent blocks
12022             else {
12023                 if (
12024                     $rOpts->{'indent-closing-brace'}
12025                     && (
12026                         $i_terminal == $ibeg    #  isolated terminal '}'
12027                         || $is_semicolon_terminated
12028                     )
12029                   )                             #  } xxxx ;
12030                 {
12031                     $adjust_indentation = 3;
12032                 }
12033             }
12034         }
12035
12036         # if at ');', '};', '>;', and '];' of a terminal qw quote
12037         elsif ($$rpatterns[0] =~ /^qb*;$/
12038             && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
12039         {
12040             if ( $closing_token_indentation{$1} == 0 ) {
12041                 $adjust_indentation = 1;
12042             }
12043             else {
12044                 $adjust_indentation = 3;
12045             }
12046         }
12047
12048         # if line begins with a ':', align it with any
12049         # previous line leading with corresponding ?
12050         elsif ( $types_to_go[$ibeg] eq ':' ) {
12051             (
12052                 $opening_indentation, $opening_offset,
12053                 $is_leading,          $opening_exists
12054               )
12055               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12056                 $rindentation_list );
12057             if ($is_leading) { $adjust_indentation = 2; }
12058         }
12059
12060         ##########################################################
12061         # Section 2: set indentation according to flag set above
12062         #
12063         # Select the indentation object to define leading
12064         # whitespace.  If we are outdenting something like '} } );'
12065         # then we want to use one level below the last token
12066         # ($i_terminal) in order to get it to fully outdent through
12067         # all levels.
12068         ##########################################################
12069         my $indentation;
12070         my $lev;
12071         my $level_end = $levels_to_go[$iend];
12072
12073         if ( $adjust_indentation == 0 ) {
12074             $indentation = $leading_spaces_to_go[$ibeg];
12075             $lev         = $levels_to_go[$ibeg];
12076         }
12077         elsif ( $adjust_indentation == 1 ) {
12078             $indentation = $reduced_spaces_to_go[$i_terminal];
12079             $lev         = $levels_to_go[$i_terminal];
12080         }
12081
12082         # handle indented closing token which aligns with opening token
12083         elsif ( $adjust_indentation == 2 ) {
12084
12085             # handle option to align closing token with opening token
12086             $lev = $levels_to_go[$ibeg];
12087
12088             # calculate spaces needed to align with opening token
12089             my $space_count =
12090               get_SPACES($opening_indentation) + $opening_offset;
12091
12092             # Indent less than the previous line.
12093             #
12094             # Problem: For -lp we don't exactly know what it was if there
12095             # were recoverable spaces sent to the aligner.  A good solution
12096             # would be to force a flush of the vertical alignment buffer, so
12097             # that we would know.  For now, this rule is used for -lp:
12098             #
12099             # When the last line did not start with a closing token we will
12100             # be optimistic that the aligner will recover everything wanted.
12101             #
12102             # This rule will prevent us from breaking a hierarchy of closing
12103             # tokens, and in a worst case will leave a closing paren too far
12104             # indented, but this is better than frequently leaving it not
12105             # indented enough.
12106             my $last_spaces = get_SPACES($last_indentation_written);
12107             if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
12108                 $last_spaces +=
12109                   get_RECOVERABLE_SPACES($last_indentation_written);
12110             }
12111
12112             # reset the indentation to the new space count if it works
12113             # only options are all or none: nothing in-between looks good
12114             $lev = $levels_to_go[$ibeg];
12115             if ( $space_count < $last_spaces ) {
12116                 if ($rOpts_line_up_parentheses) {
12117                     my $lev = $levels_to_go[$ibeg];
12118                     $indentation =
12119                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12120                 }
12121                 else {
12122                     $indentation = $space_count;
12123                 }
12124             }
12125
12126             # revert to default if it doesnt work
12127             else {
12128                 $space_count = leading_spaces_to_go($ibeg);
12129                 if ( $default_adjust_indentation == 0 ) {
12130                     $indentation = $leading_spaces_to_go[$ibeg];
12131                 }
12132                 elsif ( $default_adjust_indentation == 1 ) {
12133                     $indentation = $reduced_spaces_to_go[$i_terminal];
12134                     $lev         = $levels_to_go[$i_terminal];
12135                 }
12136             }
12137         }
12138
12139         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
12140         else {
12141
12142             # handle -icb (indented closing code block braces)
12143             # Updated method for indented block braces: indent one full level if
12144             # there is no continuation indentation.  This will occur for major
12145             # structures such as sub, if, else, but not for things like map
12146             # blocks.
12147             #
12148             # Note: only code blocks without continuation indentation are
12149             # handled here (if, else, unless, ..). In the following snippet,
12150             # the terminal brace of the sort block will have continuation
12151             # indentation as shown so it will not be handled by the coding
12152             # here.  We would have to undo the continuation indentation to do
12153             # this, but it probably looks ok as is.  This is a possible future
12154             # update for semicolon terminated lines.
12155             #
12156             #     if ($sortby eq 'date' or $sortby eq 'size') {
12157             #         @files = sort {
12158             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
12159             #                 or $a cmp $b
12160             #                 } @files;
12161             #         }
12162             #
12163             if (   $block_type_to_go[$ibeg]
12164                 && $ci_levels_to_go[$i_terminal] == 0 )
12165             {
12166                 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
12167                 $indentation = $spaces + $rOpts_indent_columns;
12168
12169                 # NOTE: for -lp we could create a new indentation object, but
12170                 # there is probably no need to do it
12171             }
12172
12173             # handle -icp and any -icb block braces which fall through above
12174             # test such as the 'sort' block mentioned above.
12175             else {
12176
12177                 # There are currently two ways to handle -icp...
12178                 # One way is to use the indentation of the previous line:
12179                 # $indentation = $last_indentation_written;
12180
12181                 # The other way is to use the indentation that the previous line
12182                 # would have had if it hadn't been adjusted:
12183                 $indentation = $last_unadjusted_indentation;
12184
12185                 # Current method: use the minimum of the two. This avoids
12186                 # inconsistent indentation.
12187                 if ( get_SPACES($last_indentation_written) <
12188                     get_SPACES($indentation) )
12189                 {
12190                     $indentation = $last_indentation_written;
12191                 }
12192             }
12193
12194             # use previous indentation but use own level
12195             # to cause list to be flushed properly
12196             $lev = $levels_to_go[$ibeg];
12197         }
12198
12199         # remember indentation except for multi-line quotes, which get
12200         # no indentation
12201         unless ( $ibeg == 0 && $starting_in_quote ) {
12202             $last_indentation_written    = $indentation;
12203             $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
12204             $last_leading_token          = $tokens_to_go[$ibeg];
12205         }
12206
12207         # be sure lines with leading closing tokens are not outdented more
12208         # than the line which contained the corresponding opening token.
12209
12210         #############################################################
12211         # updated per bug report in alex_bug.pl: we must not
12212         # mess with the indentation of closing logical braces so
12213         # we must treat something like '} else {' as if it were
12214         # an isolated brace my $is_isolated_block_brace = (
12215         # $iend == $ibeg ) && $block_type_to_go[$ibeg];
12216         #############################################################
12217         my $is_isolated_block_brace = $block_type_to_go[$ibeg]
12218           && ( $iend == $ibeg
12219             || $is_if_elsif_else_unless_while_until_for_foreach{
12220                 $block_type_to_go[$ibeg] } );
12221
12222         # only do this for a ':; which is aligned with its leading '?'
12223         my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
12224         if (   defined($opening_indentation)
12225             && !$is_isolated_block_brace
12226             && !$is_unaligned_colon )
12227         {
12228             if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
12229                 $indentation = $opening_indentation;
12230             }
12231         }
12232
12233         # remember the indentation of each line of this batch
12234         push @{$rindentation_list}, $indentation;
12235
12236         # outdent lines with certain leading tokens...
12237         if (
12238
12239             # must be first word of this batch
12240             $ibeg == 0
12241
12242             # and ...
12243             && (
12244
12245                 # certain leading keywords if requested
12246                 (
12247                        $rOpts->{'outdent-keywords'}
12248                     && $types_to_go[$ibeg] eq 'k'
12249                     && $outdent_keyword{ $tokens_to_go[$ibeg] }
12250                 )
12251
12252                 # or labels if requested
12253                 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
12254
12255                 # or static block comments if requested
12256                 || (   $types_to_go[$ibeg] eq '#'
12257                     && $rOpts->{'outdent-static-block-comments'}
12258                     && $is_static_block_comment )
12259             )
12260           )
12261
12262         {
12263             my $space_count = leading_spaces_to_go($ibeg);
12264             if ( $space_count > 0 ) {
12265                 $space_count -= $rOpts_continuation_indentation;
12266                 $is_outdented_line = 1;
12267                 if ( $space_count < 0 ) { $space_count = 0 }
12268
12269                 # do not promote a spaced static block comment to non-spaced;
12270                 # this is not normally necessary but could be for some
12271                 # unusual user inputs (such as -ci = -i)
12272                 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
12273                     $space_count = 1;
12274                 }
12275
12276                 if ($rOpts_line_up_parentheses) {
12277                     $indentation =
12278                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12279                 }
12280                 else {
12281                     $indentation = $space_count;
12282                 }
12283             }
12284         }
12285
12286         return ( $indentation, $lev, $level_end, $terminal_type,
12287             $is_semicolon_terminated, $is_outdented_line );
12288     }
12289 }
12290
12291 sub set_vertical_tightness_flags {
12292
12293     my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
12294
12295     # Define vertical tightness controls for the nth line of a batch.
12296     # We create an array of parameters which tell the vertical aligner
12297     # if we should combine this line with the next line to achieve the
12298     # desired vertical tightness.  The array of parameters contains:
12299     #
12300     #   [0] type: 1=is opening tok 2=is closing tok  3=is opening block brace
12301     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
12302     #             if closing: spaces of padding to use
12303     #   [2] sequence number of container
12304     #   [3] valid flag: do not append if this flag is false. Will be
12305     #       true if appropriate -vt flag is set.  Otherwise, Will be
12306     #       made true only for 2 line container in parens with -lp
12307     #
12308     # These flags are used by sub set_leading_whitespace in
12309     # the vertical aligner
12310
12311     my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
12312
12313     # For non-BLOCK tokens, we will need to examine the next line
12314     # too, so we won't consider the last line.
12315     if ( $n < $n_last_line ) {
12316
12317         # see if last token is an opening token...not a BLOCK...
12318         my $ibeg_next = $$ri_first[ $n + 1 ];
12319         my $token_end = $tokens_to_go[$iend];
12320         my $iend_next = $$ri_last[ $n + 1 ];
12321         if (
12322                $type_sequence_to_go[$iend]
12323             && !$block_type_to_go[$iend]
12324             && $is_opening_token{$token_end}
12325             && (
12326                 $opening_vertical_tightness{$token_end} > 0
12327
12328                 # allow 2-line method call to be closed up
12329                 || (   $rOpts_line_up_parentheses
12330                     && $token_end eq '('
12331                     && $iend > $ibeg
12332                     && $types_to_go[ $iend - 1 ] ne 'b' )
12333             )
12334           )
12335         {
12336
12337             # avoid multiple jumps in nesting depth in one line if
12338             # requested
12339             my $ovt       = $opening_vertical_tightness{$token_end};
12340             my $iend_next = $$ri_last[ $n + 1 ];
12341             unless (
12342                 $ovt < 2
12343                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
12344                     $nesting_depth_to_go[$ibeg_next] )
12345               )
12346             {
12347
12348                 # If -vt flag has not been set, mark this as invalid
12349                 # and aligner will validate it if it sees the closing paren
12350                 # within 2 lines.
12351                 my $valid_flag = $ovt;
12352                 @{$rvertical_tightness_flags} =
12353                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
12354             }
12355         }
12356
12357         # see if first token of next line is a closing token...
12358         # ..and be sure this line does not have a side comment
12359         my $token_next = $tokens_to_go[$ibeg_next];
12360         if (   $type_sequence_to_go[$ibeg_next]
12361             && !$block_type_to_go[$ibeg_next]
12362             && $is_closing_token{$token_next}
12363             && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
12364         {
12365             my $ovt = $opening_vertical_tightness{$token_next};
12366             my $cvt = $closing_vertical_tightness{$token_next};
12367             if (
12368
12369                 # never append a trailing line like   )->pack(
12370                 # because it will throw off later alignment
12371                 (
12372                     $nesting_depth_to_go[$ibeg_next] ==
12373                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
12374                 )
12375                 && (
12376                     $cvt == 2
12377                     || (
12378                         $container_environment_to_go[$ibeg_next] ne 'LIST'
12379                         && (
12380                             $cvt == 1
12381
12382                             # allow closing up 2-line method calls
12383                             || (   $rOpts_line_up_parentheses
12384                                 && $token_next eq ')' )
12385                         )
12386                     )
12387                 )
12388               )
12389             {
12390
12391                 # decide which trailing closing tokens to append..
12392                 my $ok = 0;
12393                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
12394                 else {
12395                     my $str = join( '',
12396                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
12397
12398                     # append closing token if followed by comment or ';'
12399                     if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
12400                 }
12401
12402                 if ($ok) {
12403                     my $valid_flag = $cvt;
12404                     @{$rvertical_tightness_flags} = (
12405                         2,
12406                         $tightness{$token_next} == 2 ? 0 : 1,
12407                         $type_sequence_to_go[$ibeg_next], $valid_flag,
12408                     );
12409                 }
12410             }
12411         }
12412
12413         # Opening Token Right
12414         # If requested, move an isolated trailing opening token to the end of
12415         # the previous line which ended in a comma.  We could do this
12416         # in sub recombine_breakpoints but that would cause problems
12417         # with -lp formatting.  The problem is that indentation will
12418         # quickly move far to the right in nested expressions.  By
12419         # doing it after indentation has been set, we avoid changes
12420         # to the indentation.  Actual movement of the token takes place
12421         # in sub write_leader_and_string.
12422         if (
12423             $opening_token_right{ $tokens_to_go[$ibeg_next] }
12424
12425             # previous line is not opening
12426             # (use -sot to combine with it)
12427             && !$is_opening_token{$token_end}
12428
12429             # previous line ended in one of these
12430             # (add other cases if necessary; '=>' and '.' are not necessary
12431             ##&& ($is_opening_token{$token_end} || $token_end eq ',')
12432             && !$block_type_to_go[$ibeg_next]
12433
12434             # this is a line with just an opening token
12435             && (   $iend_next == $ibeg_next
12436                 || $iend_next == $ibeg_next + 2
12437                 && $types_to_go[$iend_next] eq '#' )
12438
12439             # looks bad if we align vertically with the wrong container
12440             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
12441           )
12442         {
12443             my $valid_flag = 1;
12444             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12445             @{$rvertical_tightness_flags} =
12446               ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
12447         }
12448
12449         # Stacking of opening and closing tokens
12450         my $stackable;
12451         my $token_beg_next = $tokens_to_go[$ibeg_next];
12452
12453         # patch to make something like 'qw(' behave like an opening paren
12454         # (aran.t)
12455         if ( $types_to_go[$ibeg_next] eq 'q' ) {
12456             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
12457                 $token_beg_next = $1;
12458             }
12459         }
12460
12461         if (   $is_closing_token{$token_end}
12462             && $is_closing_token{$token_beg_next} )
12463         {
12464             $stackable = $stack_closing_token{$token_beg_next}
12465               unless ( $block_type_to_go[$ibeg_next] )
12466               ;    # shouldn't happen; just checking
12467         }
12468         elsif ($is_opening_token{$token_end}
12469             && $is_opening_token{$token_beg_next} )
12470         {
12471             $stackable = $stack_opening_token{$token_beg_next}
12472               unless ( $block_type_to_go[$ibeg_next] )
12473               ;    # shouldn't happen; just checking
12474         }
12475
12476         if ($stackable) {
12477
12478             my $is_semicolon_terminated;
12479             if ( $n + 1 == $n_last_line ) {
12480                 my ( $terminal_type, $i_terminal ) = terminal_type(
12481                     \@types_to_go, \@block_type_to_go,
12482                     $ibeg_next,    $iend_next
12483                 );
12484                 $is_semicolon_terminated = $terminal_type eq ';'
12485                   && $nesting_depth_to_go[$iend_next] <
12486                   $nesting_depth_to_go[$ibeg_next];
12487             }
12488
12489             # this must be a line with just an opening token
12490             # or end in a semicolon
12491             if (
12492                 $is_semicolon_terminated
12493                 || (   $iend_next == $ibeg_next
12494                     || $iend_next == $ibeg_next + 2
12495                     && $types_to_go[$iend_next] eq '#' )
12496               )
12497             {
12498                 my $valid_flag = 1;
12499                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12500                 @{$rvertical_tightness_flags} =
12501                   ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
12502                   );
12503             }
12504         }
12505     }
12506
12507     # Check for a last line with isolated opening BLOCK curly
12508     elsif ($rOpts_block_brace_vertical_tightness
12509         && $ibeg eq $iend
12510         && $types_to_go[$iend] eq '{'
12511         && $block_type_to_go[$iend] =~
12512         /$block_brace_vertical_tightness_pattern/o )
12513     {
12514         @{$rvertical_tightness_flags} =
12515           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
12516     }
12517
12518     # pack in the sequence numbers of the ends of this line
12519     $rvertical_tightness_flags->[4] = get_seqno($ibeg);
12520     $rvertical_tightness_flags->[5] = get_seqno($iend);
12521     return $rvertical_tightness_flags;
12522 }
12523
12524 sub get_seqno {
12525
12526     # get opening and closing sequence numbers of a token for the vertical
12527     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
12528     # to be treated somewhat like opening and closing tokens for stacking
12529     # tokens by the vertical aligner.
12530     my ($ii) = @_;
12531     my $seqno = $type_sequence_to_go[$ii];
12532     if ( $types_to_go[$ii] eq 'q' ) {
12533         my $SEQ_QW = -1;
12534         if ( $ii > 0 ) {
12535             $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
12536         }
12537         else {
12538             if ( !$ending_in_quote ) {
12539                 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
12540             }
12541         }
12542     }
12543     return ($seqno);
12544 }
12545
12546 {
12547     my %is_vertical_alignment_type;
12548     my %is_vertical_alignment_keyword;
12549
12550     BEGIN {
12551
12552         @_ = qw#
12553           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
12554           { ? : => =~ && || // ~~ !~~
12555           #;
12556         @is_vertical_alignment_type{@_} = (1) x scalar(@_);
12557
12558         @_ = qw(if unless and or err eq ne for foreach while until);
12559         @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
12560     }
12561
12562     sub set_vertical_alignment_markers {
12563
12564         # This routine takes the first step toward vertical alignment of the
12565         # lines of output text.  It looks for certain tokens which can serve as
12566         # vertical alignment markers (such as an '=').
12567         #
12568         # Method: We look at each token $i in this output batch and set
12569         # $matching_token_to_go[$i] equal to those tokens at which we would
12570         # accept vertical alignment.
12571
12572         # nothing to do if we aren't allowed to change whitespace
12573         if ( !$rOpts_add_whitespace ) {
12574             for my $i ( 0 .. $max_index_to_go ) {
12575                 $matching_token_to_go[$i] = '';
12576             }
12577             return;
12578         }
12579
12580         my ( $ri_first, $ri_last ) = @_;
12581
12582         # remember the index of last nonblank token before any sidecomment
12583         my $i_terminal = $max_index_to_go;
12584         if ( $types_to_go[$i_terminal] eq '#' ) {
12585             if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
12586                 if ( $i_terminal > 0 ) { --$i_terminal }
12587             }
12588         }
12589
12590         # look at each line of this batch..
12591         my $last_vertical_alignment_before_index;
12592         my $vert_last_nonblank_type;
12593         my $vert_last_nonblank_token;
12594         my $vert_last_nonblank_block_type;
12595         my $max_line = @$ri_first - 1;
12596         my ( $i, $type, $token, $block_type, $alignment_type );
12597         my ( $ibeg, $iend, $line );
12598
12599         foreach $line ( 0 .. $max_line ) {
12600             $ibeg                                 = $$ri_first[$line];
12601             $iend                                 = $$ri_last[$line];
12602             $last_vertical_alignment_before_index = -1;
12603             $vert_last_nonblank_type              = '';
12604             $vert_last_nonblank_token             = '';
12605             $vert_last_nonblank_block_type        = '';
12606
12607             # look at each token in this output line..
12608             foreach $i ( $ibeg .. $iend ) {
12609                 $alignment_type = '';
12610                 $type           = $types_to_go[$i];
12611                 $block_type     = $block_type_to_go[$i];
12612                 $token          = $tokens_to_go[$i];
12613
12614                 # check for flag indicating that we should not align
12615                 # this token
12616                 if ( $matching_token_to_go[$i] ) {
12617                     $matching_token_to_go[$i] = '';
12618                     next;
12619                 }
12620
12621                 #--------------------------------------------------------
12622                 # First see if we want to align BEFORE this token
12623                 #--------------------------------------------------------
12624
12625                 # The first possible token that we can align before
12626                 # is index 2 because: 1) it doesn't normally make sense to
12627                 # align before the first token and 2) the second
12628                 # token must be a blank if we are to align before
12629                 # the third
12630                 if ( $i < $ibeg + 2 ) { }
12631
12632                 # must follow a blank token
12633                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
12634
12635                 # align a side comment --
12636                 elsif ( $type eq '#' ) {
12637
12638                     unless (
12639
12640                         # it is a static side comment
12641                         (
12642                                $rOpts->{'static-side-comments'}
12643                             && $token =~ /$static_side_comment_pattern/o
12644                         )
12645
12646                         # or a closing side comment
12647                         || (   $vert_last_nonblank_block_type
12648                             && $token =~
12649                             /$closing_side_comment_prefix_pattern/o )
12650                       )
12651                     {
12652                         $alignment_type = $type;
12653                     }    ## Example of a static side comment
12654                 }
12655
12656                 # otherwise, do not align two in a row to create a
12657                 # blank field
12658                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
12659
12660                 # align before one of these keywords
12661                 # (within a line, since $i>1)
12662                 elsif ( $type eq 'k' ) {
12663
12664                     #  /^(if|unless|and|or|eq|ne)$/
12665                     if ( $is_vertical_alignment_keyword{$token} ) {
12666                         $alignment_type = $token;
12667                     }
12668                 }
12669
12670                 # align before one of these types..
12671                 # Note: add '.' after new vertical aligner is operational
12672                 elsif ( $is_vertical_alignment_type{$type} ) {
12673                     $alignment_type = $token;
12674
12675                     # Do not align a terminal token.  Although it might
12676                     # occasionally look ok to do this, it has been found to be
12677                     # a good general rule.  The main problems are:
12678                     # (1) that the terminal token (such as an = or :) might get
12679                     # moved far to the right where it is hard to see because
12680                     # nothing follows it, and
12681                     # (2) doing so may prevent other good alignments.
12682                     if ( $i == $iend || $i >= $i_terminal ) {
12683                         $alignment_type = "";
12684                     }
12685
12686                     # Do not align leading ': (' or '. ('.  This would prevent
12687                     # alignment in something like the following:
12688                     #   $extra_space .=
12689                     #       ( $input_line_number < 10 )  ? "  "
12690                     #     : ( $input_line_number < 100 ) ? " "
12691                     #     :                                "";
12692                     # or
12693                     #  $code =
12694                     #      ( $case_matters ? $accessor : " lc($accessor) " )
12695                     #    . ( $yesno        ? " eq "       : " ne " )
12696                     if (   $i == $ibeg + 2
12697                         && $types_to_go[$ibeg] =~ /^[\.\:]$/
12698                         && $types_to_go[ $i - 1 ] eq 'b' )
12699                     {
12700                         $alignment_type = "";
12701                     }
12702
12703                     # For a paren after keyword, only align something like this:
12704                     #    if    ( $a ) { &a }
12705                     #    elsif ( $b ) { &b }
12706                     if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
12707                         $alignment_type = ""
12708                           unless $vert_last_nonblank_token =~
12709                               /^(if|unless|elsif)$/;
12710                     }
12711
12712                     # be sure the alignment tokens are unique
12713                     # This didn't work well: reason not determined
12714                     # if ($token ne $type) {$alignment_type .= $type}
12715                 }
12716
12717                 # NOTE: This is deactivated because it causes the previous
12718                 # if/elsif alignment to fail
12719                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
12720                 #{ $alignment_type = $type; }
12721
12722                 if ($alignment_type) {
12723                     $last_vertical_alignment_before_index = $i;
12724                 }
12725
12726                 #--------------------------------------------------------
12727                 # Next see if we want to align AFTER the previous nonblank
12728                 #--------------------------------------------------------
12729
12730                 # We want to line up ',' and interior ';' tokens, with the added
12731                 # space AFTER these tokens.  (Note: interior ';' is included
12732                 # because it may occur in short blocks).
12733                 if (
12734
12735                     # we haven't already set it
12736                     !$alignment_type
12737
12738                     # and its not the first token of the line
12739                     && ( $i > $ibeg )
12740
12741                     # and it follows a blank
12742                     && $types_to_go[ $i - 1 ] eq 'b'
12743
12744                     # and previous token IS one of these:
12745                     && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12746
12747                     # and it's NOT one of these
12748                     && ( $type !~ /^[b\#\)\]\}]$/ )
12749
12750                     # then go ahead and align
12751                   )
12752
12753                 {
12754                     $alignment_type = $vert_last_nonblank_type;
12755                 }
12756
12757                 #--------------------------------------------------------
12758                 # then store the value
12759                 #--------------------------------------------------------
12760                 $matching_token_to_go[$i] = $alignment_type;
12761                 if ( $type ne 'b' ) {
12762                     $vert_last_nonblank_type       = $type;
12763                     $vert_last_nonblank_token      = $token;
12764                     $vert_last_nonblank_block_type = $block_type;
12765                 }
12766             }
12767         }
12768     }
12769 }
12770
12771 sub terminal_type {
12772
12773     #    returns type of last token on this line (terminal token), as follows:
12774     #    returns # for a full-line comment
12775     #    returns ' ' for a blank line
12776     #    otherwise returns final token type
12777
12778     my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
12779
12780     # check for full-line comment..
12781     if ( $$rtype[$ibeg] eq '#' ) {
12782         return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
12783     }
12784     else {
12785
12786         # start at end and walk bakwards..
12787         for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
12788
12789             # skip past any side comment and blanks
12790             next if ( $$rtype[$i] eq 'b' );
12791             next if ( $$rtype[$i] eq '#' );
12792
12793             # found it..make sure it is a BLOCK termination,
12794             # but hide a terminal } after sort/grep/map because it is not
12795             # necessarily the end of the line.  (terminal.t)
12796             my $terminal_type = $$rtype[$i];
12797             if (
12798                 $terminal_type eq '}'
12799                 && ( !$$rblock_type[$i]
12800                     || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
12801               )
12802             {
12803                 $terminal_type = 'b';
12804             }
12805             return wantarray ? ( $terminal_type, $i ) : $terminal_type;
12806         }
12807
12808         # empty line
12809         return wantarray ? ( ' ', $ibeg ) : ' ';
12810     }
12811 }
12812
12813 {
12814     my %is_good_keyword_breakpoint;
12815     my %is_lt_gt_le_ge;
12816
12817     sub set_bond_strengths {
12818
12819         BEGIN {
12820
12821             @_ = qw(if unless while until for foreach);
12822             @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
12823
12824             @_ = qw(lt gt le ge);
12825             @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
12826
12827             ###############################################################
12828             # NOTE: NO_BREAK's set here are HINTS which may not be honored;
12829             # essential NO_BREAKS's must be enforced in section 2, below.
12830             ###############################################################
12831
12832             # adding NEW_TOKENS: add a left and right bond strength by
12833             # mimmicking what is done for an existing token type.  You
12834             # can skip this step at first and take the default, then
12835             # tweak later to get desired results.
12836
12837             # The bond strengths should roughly follow precenence order where
12838             # possible.  If you make changes, please check the results very
12839             # carefully on a variety of scripts.
12840
12841             # no break around possible filehandle
12842             $left_bond_strength{'Z'}  = NO_BREAK;
12843             $right_bond_strength{'Z'} = NO_BREAK;
12844
12845             # never put a bare word on a new line:
12846             # example print (STDERR, "bla"); will fail with break after (
12847             $left_bond_strength{'w'} = NO_BREAK;
12848
12849         # blanks always have infinite strength to force breaks after real tokens
12850             $right_bond_strength{'b'} = NO_BREAK;
12851
12852             # try not to break on exponentation
12853             @_                       = qw" ** .. ... <=> ";
12854             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12855             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12856
12857             # The comma-arrow has very low precedence but not a good break point
12858             $left_bond_strength{'=>'}  = NO_BREAK;
12859             $right_bond_strength{'=>'} = NOMINAL;
12860
12861             # ok to break after label
12862             $left_bond_strength{'J'}  = NO_BREAK;
12863             $right_bond_strength{'J'} = NOMINAL;
12864             $left_bond_strength{'j'}  = STRONG;
12865             $right_bond_strength{'j'} = STRONG;
12866             $left_bond_strength{'A'}  = STRONG;
12867             $right_bond_strength{'A'} = STRONG;
12868
12869             $left_bond_strength{'->'}  = STRONG;
12870             $right_bond_strength{'->'} = VERY_STRONG;
12871
12872             # breaking AFTER modulus operator is ok:
12873             @_ = qw" % ";
12874             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12875             @right_bond_strength{@_} =
12876               ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
12877
12878             # Break AFTER math operators * and /
12879             @_                       = qw" * / x  ";
12880             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12881             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12882
12883             # Break AFTER weakest math operators + and -
12884             # Make them weaker than * but a bit stronger than '.'
12885             @_ = qw" + - ";
12886             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12887             @right_bond_strength{@_} =
12888               ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
12889
12890             # breaking BEFORE these is just ok:
12891             @_                       = qw" >> << ";
12892             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12893             @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
12894
12895             # breaking before the string concatenation operator seems best
12896             # because it can be hard to see at the end of a line
12897             $right_bond_strength{'.'} = STRONG;
12898             $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
12899
12900             @_                       = qw"} ] ) ";
12901             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12902             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12903
12904             # make these a little weaker than nominal so that they get
12905             # favored for end-of-line characters
12906             @_ = qw"!= == =~ !~ ~~ !~~";
12907             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12908             @right_bond_strength{@_} =
12909               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
12910
12911             # break AFTER these
12912             @_ = qw" < >  | & >= <=";
12913             @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
12914             @right_bond_strength{@_} =
12915               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
12916
12917             # breaking either before or after a quote is ok
12918             # but bias for breaking before a quote
12919             $left_bond_strength{'Q'}  = NOMINAL;
12920             $right_bond_strength{'Q'} = NOMINAL + 0.02;
12921             $left_bond_strength{'q'}  = NOMINAL;
12922             $right_bond_strength{'q'} = NOMINAL;
12923
12924             # starting a line with a keyword is usually ok
12925             $left_bond_strength{'k'} = NOMINAL;
12926
12927             # we usually want to bond a keyword strongly to what immediately
12928             # follows, rather than leaving it stranded at the end of a line
12929             $right_bond_strength{'k'} = STRONG;
12930
12931             $left_bond_strength{'G'}  = NOMINAL;
12932             $right_bond_strength{'G'} = STRONG;
12933
12934             # it is good to break AFTER various assignment operators
12935             @_ = qw(
12936               = **= += *= &= <<= &&=
12937               -= /= |= >>= ||= //=
12938               .= %= ^=
12939               x=
12940             );
12941             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12942             @right_bond_strength{@_} =
12943               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12944
12945             # break BEFORE '&&' and '||' and '//'
12946             # set strength of '||' to same as '=' so that chains like
12947             # $a = $b || $c || $d   will break before the first '||'
12948             $right_bond_strength{'||'} = NOMINAL;
12949             $left_bond_strength{'||'}  = $right_bond_strength{'='};
12950
12951             # same thing for '//'
12952             $right_bond_strength{'//'} = NOMINAL;
12953             $left_bond_strength{'//'}  = $right_bond_strength{'='};
12954
12955             # set strength of && a little higher than ||
12956             $right_bond_strength{'&&'} = NOMINAL;
12957             $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
12958
12959             $left_bond_strength{';'}  = VERY_STRONG;
12960             $right_bond_strength{';'} = VERY_WEAK;
12961             $left_bond_strength{'f'}  = VERY_STRONG;
12962
12963             # make right strength of for ';' a little less than '='
12964             # to make for contents break after the ';' to avoid this:
12965             #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12966             #     $number_of_fields )
12967             # and make it weaker than ',' and 'and' too
12968             $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12969
12970             # The strengths of ?/: should be somewhere between
12971             # an '=' and a quote (NOMINAL),
12972             # make strength of ':' slightly less than '?' to help
12973             # break long chains of ? : after the colons
12974             $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
12975             $right_bond_strength{':'} = NO_BREAK;
12976             $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
12977             $right_bond_strength{'?'} = NO_BREAK;
12978
12979             $left_bond_strength{','}  = VERY_STRONG;
12980             $right_bond_strength{','} = VERY_WEAK;
12981
12982             # Set bond strengths of certain keywords
12983             # make 'or', 'err', 'and' slightly weaker than a ','
12984             $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
12985             $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
12986             $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
12987             $left_bond_strength{'xor'}  = NOMINAL;
12988             $right_bond_strength{'and'} = NOMINAL;
12989             $right_bond_strength{'or'}  = NOMINAL;
12990             $right_bond_strength{'err'} = NOMINAL;
12991             $right_bond_strength{'xor'} = STRONG;
12992         }
12993
12994         # patch-its always ok to break at end of line
12995         $nobreak_to_go[$max_index_to_go] = 0;
12996
12997         # adding a small 'bias' to strengths is a simple way to make a line
12998         # break at the first of a sequence of identical terms.  For example,
12999         # to force long string of conditional operators to break with
13000         # each line ending in a ':', we can add a small number to the bond
13001         # strength of each ':'
13002         my $colon_bias = 0;
13003         my $amp_bias   = 0;
13004         my $bar_bias   = 0;
13005         my $and_bias   = 0;
13006         my $or_bias    = 0;
13007         my $dot_bias   = 0;
13008         my $f_bias     = 0;
13009         my $code_bias  = -.01;
13010         my $type       = 'b';
13011         my $token      = ' ';
13012         my $last_type;
13013         my $last_nonblank_type  = $type;
13014         my $last_nonblank_token = $token;
13015         my $delta_bias          = 0.0001;
13016         my $list_str            = $left_bond_strength{'?'};
13017
13018         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
13019             $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
13020         );
13021
13022         # preliminary loop to compute bond strengths
13023         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
13024             $last_type = $type;
13025             if ( $type ne 'b' ) {
13026                 $last_nonblank_type  = $type;
13027                 $last_nonblank_token = $token;
13028             }
13029             $type = $types_to_go[$i];
13030
13031             # strength on both sides of a blank is the same
13032             if ( $type eq 'b' && $last_type ne 'b' ) {
13033                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
13034                 next;
13035             }
13036
13037             $token               = $tokens_to_go[$i];
13038             $block_type          = $block_type_to_go[$i];
13039             $i_next              = $i + 1;
13040             $next_type           = $types_to_go[$i_next];
13041             $next_token          = $tokens_to_go[$i_next];
13042             $total_nesting_depth = $nesting_depth_to_go[$i_next];
13043             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13044             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
13045             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
13046
13047             # Some token chemistry...  The decision about where to break a
13048             # line depends upon a "bond strength" between tokens.  The LOWER
13049             # the bond strength, the MORE likely a break.  The strength
13050             # values are based on trial-and-error, and need to be tweaked
13051             # occasionally to get desired results.  Things to keep in mind
13052             # are:
13053             #   1. relative strengths are important.  small differences
13054             #      in strengths can make big formatting differences.
13055             #   2. each indentation level adds one unit of bond strength
13056             #   3. a value of NO_BREAK makes an unbreakable bond
13057             #   4. a value of VERY_WEAK is the strength of a ','
13058             #   5. values below NOMINAL are considered ok break points
13059             #   6. values above NOMINAL are considered poor break points
13060             # We are computing the strength of the bond between the current
13061             # token and the NEXT token.
13062             my $bond_str = VERY_STRONG;    # a default, high strength
13063
13064             #---------------------------------------------------------------
13065             # section 1:
13066             # use minimum of left and right bond strengths if defined;
13067             # digraphs and trigraphs like to break on their left
13068             #---------------------------------------------------------------
13069             my $bsr = $right_bond_strength{$type};
13070
13071             if ( !defined($bsr) ) {
13072
13073                 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
13074                     $bsr = STRONG;
13075                 }
13076                 else {
13077                     $bsr = VERY_STRONG;
13078                 }
13079             }
13080
13081             # define right bond strengths of certain keywords
13082             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
13083                 $bsr = $right_bond_strength{$token};
13084             }
13085             elsif ( $token eq 'ne' or $token eq 'eq' ) {
13086                 $bsr = NOMINAL;
13087             }
13088             my $bsl = $left_bond_strength{$next_nonblank_type};
13089
13090             # set terminal bond strength to the nominal value
13091             # this will cause good preceding breaks to be retained
13092             if ( $i_next_nonblank > $max_index_to_go ) {
13093                 $bsl = NOMINAL;
13094             }
13095
13096             if ( !defined($bsl) ) {
13097
13098                 if (   $is_digraph{$next_nonblank_type}
13099                     || $is_trigraph{$next_nonblank_type} )
13100                 {
13101                     $bsl = WEAK;
13102                 }
13103                 else {
13104                     $bsl = VERY_STRONG;
13105                 }
13106             }
13107
13108             # define right bond strengths of certain keywords
13109             if ( $next_nonblank_type eq 'k'
13110                 && defined( $left_bond_strength{$next_nonblank_token} ) )
13111             {
13112                 $bsl = $left_bond_strength{$next_nonblank_token};
13113             }
13114             elsif ($next_nonblank_token eq 'ne'
13115                 or $next_nonblank_token eq 'eq' )
13116             {
13117                 $bsl = NOMINAL;
13118             }
13119             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
13120                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
13121             }
13122
13123             # Note: it might seem that we would want to keep a NO_BREAK if
13124             # either token has this value.  This didn't work, because in an
13125             # arrow list, it prevents the comma from separating from the
13126             # following bare word (which is probably quoted by its arrow).
13127             # So necessary NO_BREAK's have to be handled as special cases
13128             # in the final section.
13129             $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
13130             my $bond_str_1 = $bond_str;
13131
13132             #---------------------------------------------------------------
13133             # section 2:
13134             # special cases
13135             #---------------------------------------------------------------
13136
13137             # allow long lines before final { in an if statement, as in:
13138             #    if (..........
13139             #      ..........)
13140             #    {
13141             #
13142             # Otherwise, the line before the { tends to be too short.
13143             if ( $type eq ')' ) {
13144                 if ( $next_nonblank_type eq '{' ) {
13145                     $bond_str = VERY_WEAK + 0.03;
13146                 }
13147             }
13148
13149             elsif ( $type eq '(' ) {
13150                 if ( $next_nonblank_type eq '{' ) {
13151                     $bond_str = NOMINAL;
13152                 }
13153             }
13154
13155             # break on something like '} (', but keep this stronger than a ','
13156             # example is in 'howe.pl'
13157             elsif ( $type eq 'R' or $type eq '}' ) {
13158                 if ( $next_nonblank_type eq '(' ) {
13159                     $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
13160                 }
13161             }
13162
13163             #-----------------------------------------------------------------
13164             # adjust bond strength bias
13165             #-----------------------------------------------------------------
13166
13167             # TESTING: add any bias set by sub scan_list at old comma
13168             # break points.
13169             elsif ( $type eq ',' ) {
13170                 $bond_str += $bond_strength_to_go[$i];
13171             }
13172
13173             elsif ( $type eq 'f' ) {
13174                 $bond_str += $f_bias;
13175                 $f_bias   += $delta_bias;
13176             }
13177
13178           # in long ?: conditionals, bias toward just one set per line (colon.t)
13179             elsif ( $type eq ':' ) {
13180                 if ( !$want_break_before{$type} ) {
13181                     $bond_str   += $colon_bias;
13182                     $colon_bias += $delta_bias;
13183                 }
13184             }
13185
13186             if (   $next_nonblank_type eq ':'
13187                 && $want_break_before{$next_nonblank_type} )
13188             {
13189                 $bond_str   += $colon_bias;
13190                 $colon_bias += $delta_bias;
13191             }
13192
13193             # if leading '.' is used, align all but 'short' quotes;
13194             # the idea is to not place something like "\n" on a single line.
13195             elsif ( $next_nonblank_type eq '.' ) {
13196                 if ( $want_break_before{'.'} ) {
13197                     unless (
13198                         $last_nonblank_type eq '.'
13199                         && (
13200                             length($token) <=
13201                             $rOpts_short_concatenation_item_length )
13202                         && ( $token !~ /^[\)\]\}]$/ )
13203                       )
13204                     {
13205                         $dot_bias += $delta_bias;
13206                     }
13207                     $bond_str += $dot_bias;
13208                 }
13209             }
13210             elsif ($next_nonblank_type eq '&&'
13211                 && $want_break_before{$next_nonblank_type} )
13212             {
13213                 $bond_str += $amp_bias;
13214                 $amp_bias += $delta_bias;
13215             }
13216             elsif ($next_nonblank_type eq '||'
13217                 && $want_break_before{$next_nonblank_type} )
13218             {
13219                 $bond_str += $bar_bias;
13220                 $bar_bias += $delta_bias;
13221             }
13222             elsif ( $next_nonblank_type eq 'k' ) {
13223
13224                 if (   $next_nonblank_token eq 'and'
13225                     && $want_break_before{$next_nonblank_token} )
13226                 {
13227                     $bond_str += $and_bias;
13228                     $and_bias += $delta_bias;
13229                 }
13230                 elsif ($next_nonblank_token =~ /^(or|err)$/
13231                     && $want_break_before{$next_nonblank_token} )
13232                 {
13233                     $bond_str += $or_bias;
13234                     $or_bias  += $delta_bias;
13235                 }
13236
13237                 # FIXME: needs more testing
13238                 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
13239                     $bond_str = $list_str if ( $bond_str > $list_str );
13240                 }
13241                 elsif ( $token eq 'err'
13242                     && !$want_break_before{$token} )
13243                 {
13244                     $bond_str += $or_bias;
13245                     $or_bias  += $delta_bias;
13246                 }
13247             }
13248
13249             if ( $type eq ':'
13250                 && !$want_break_before{$type} )
13251             {
13252                 $bond_str   += $colon_bias;
13253                 $colon_bias += $delta_bias;
13254             }
13255             elsif ( $type eq '&&'
13256                 && !$want_break_before{$type} )
13257             {
13258                 $bond_str += $amp_bias;
13259                 $amp_bias += $delta_bias;
13260             }
13261             elsif ( $type eq '||'
13262                 && !$want_break_before{$type} )
13263             {
13264                 $bond_str += $bar_bias;
13265                 $bar_bias += $delta_bias;
13266             }
13267             elsif ( $type eq 'k' ) {
13268
13269                 if ( $token eq 'and'
13270                     && !$want_break_before{$token} )
13271                 {
13272                     $bond_str += $and_bias;
13273                     $and_bias += $delta_bias;
13274                 }
13275                 elsif ( $token eq 'or'
13276                     && !$want_break_before{$token} )
13277                 {
13278                     $bond_str += $or_bias;
13279                     $or_bias  += $delta_bias;
13280                 }
13281             }
13282
13283             # keep matrix and hash indices together
13284             # but make them a little below STRONG to allow breaking open
13285             # something like {'some-word'}{'some-very-long-word'} at the }{
13286             # (bracebrk.t)
13287             if (   ( $type eq ']' or $type eq 'R' )
13288                 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
13289               )
13290             {
13291                 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
13292             }
13293
13294             if ( $next_nonblank_token =~ /^->/ ) {
13295
13296                 # increase strength to the point where a break in the following
13297                 # will be after the opening paren rather than at the arrow:
13298                 #    $a->$b($c);
13299                 if ( $type eq 'i' ) {
13300                     $bond_str = 1.45 * STRONG;
13301                 }
13302
13303                 elsif ( $type =~ /^[\)\]\}R]$/ ) {
13304                     $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
13305                 }
13306
13307                 # otherwise make strength before an '->' a little over a '+'
13308                 else {
13309                     if ( $bond_str <= NOMINAL ) {
13310                         $bond_str = NOMINAL + 0.01;
13311                     }
13312                 }
13313             }
13314
13315             if ( $token eq ')' && $next_nonblank_token eq '[' ) {
13316                 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
13317             }
13318
13319             # map1.t -- correct for a quirk in perl
13320             if (   $token eq '('
13321                 && $next_nonblank_type eq 'i'
13322                 && $last_nonblank_type eq 'k'
13323                 && $is_sort_map_grep{$last_nonblank_token} )
13324
13325               #     /^(sort|map|grep)$/ )
13326             {
13327                 $bond_str = NO_BREAK;
13328             }
13329
13330             # extrude.t: do not break before paren at:
13331             #    -l pid_filename(
13332             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
13333                 $bond_str = NO_BREAK;
13334             }
13335
13336             # good to break after end of code blocks
13337             if ( $type eq '}' && $block_type ) {
13338
13339                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
13340                 $code_bias += $delta_bias;
13341             }
13342
13343             if ( $type eq 'k' ) {
13344
13345                 # allow certain control keywords to stand out
13346                 if (   $next_nonblank_type eq 'k'
13347                     && $is_last_next_redo_return{$token} )
13348                 {
13349                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
13350                 }
13351
13352 # Don't break after keyword my.  This is a quick fix for a
13353 # rare problem with perl. An example is this line from file
13354 # Container.pm:
13355 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
13356
13357                 if ( $token eq 'my' ) {
13358                     $bond_str = NO_BREAK;
13359                 }
13360
13361             }
13362
13363             # good to break before 'if', 'unless', etc
13364             if ( $is_if_brace_follower{$next_nonblank_token} ) {
13365                 $bond_str = VERY_WEAK;
13366             }
13367
13368             if ( $next_nonblank_type eq 'k' ) {
13369
13370                 # keywords like 'unless', 'if', etc, within statements
13371                 # make good breaks
13372                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
13373                     $bond_str = VERY_WEAK / 1.05;
13374                 }
13375             }
13376
13377             # try not to break before a comma-arrow
13378             elsif ( $next_nonblank_type eq '=>' ) {
13379                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
13380             }
13381
13382          #----------------------------------------------------------------------
13383          # only set NO_BREAK's from here on
13384          #----------------------------------------------------------------------
13385             if ( $type eq 'C' or $type eq 'U' ) {
13386
13387                 # use strict requires that bare word and => not be separated
13388                 if ( $next_nonblank_type eq '=>' ) {
13389                     $bond_str = NO_BREAK;
13390                 }
13391
13392                 # Never break between a bareword and a following paren because
13393                 # perl may give an error.  For example, if a break is placed
13394                 # between 'to_filehandle' and its '(' the following line will
13395                 # give a syntax error [Carp.pm]: my( $no) =fileno(
13396                 # to_filehandle( $in)) ;
13397                 if ( $next_nonblank_token eq '(' ) {
13398                     $bond_str = NO_BREAK;
13399                 }
13400             }
13401
13402            # use strict requires that bare word within braces not start new line
13403             elsif ( $type eq 'L' ) {
13404
13405                 if ( $next_nonblank_type eq 'w' ) {
13406                     $bond_str = NO_BREAK;
13407                 }
13408             }
13409
13410             # in older version of perl, use strict can cause problems with
13411             # breaks before bare words following opening parens.  For example,
13412             # this will fail under older versions if a break is made between
13413             # '(' and 'MAIL':
13414             #  use strict;
13415             #  open( MAIL, "a long filename or command");
13416             #  close MAIL;
13417             elsif ( $type eq '{' ) {
13418
13419                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
13420
13421                     # but it's fine to break if the word is followed by a '=>'
13422                     # or if it is obviously a sub call
13423                     my $i_next_next_nonblank = $i_next_nonblank + 1;
13424                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
13425                     if (   $next_next_type eq 'b'
13426                         && $i_next_nonblank < $max_index_to_go )
13427                     {
13428                         $i_next_next_nonblank++;
13429                         $next_next_type = $types_to_go[$i_next_next_nonblank];
13430                     }
13431
13432                     ##if ( $next_next_type ne '=>' ) {
13433                     # these are ok: '->xxx', '=>', '('
13434
13435                     # We'll check for an old breakpoint and keep a leading
13436                     # bareword if it was that way in the input file.
13437                     # Presumably it was ok that way.  For example, the
13438                     # following would remain unchanged:
13439                     #
13440                     # @months = (
13441                     #   January,   February, March,    April,
13442                     #   May,       June,     July,     August,
13443                     #   September, October,  November, December,
13444                     # );
13445                     #
13446                     # This should be sufficient:
13447                     if ( !$old_breakpoint_to_go[$i]
13448                         && ( $next_next_type eq ',' || $next_next_type eq '}' )
13449                       )
13450                     {
13451                         $bond_str = NO_BREAK;
13452                     }
13453                 }
13454             }
13455
13456             elsif ( $type eq 'w' ) {
13457
13458                 if ( $next_nonblank_type eq 'R' ) {
13459                     $bond_str = NO_BREAK;
13460                 }
13461
13462                 # use strict requires that bare word and => not be separated
13463                 if ( $next_nonblank_type eq '=>' ) {
13464                     $bond_str = NO_BREAK;
13465                 }
13466             }
13467
13468             # in fact, use strict hates bare words on any new line.  For
13469             # example, a break before the underscore here provokes the
13470             # wrath of use strict:
13471             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
13472             elsif ( $type eq 'F' ) {
13473                 $bond_str = NO_BREAK;
13474             }
13475
13476             # use strict does not allow separating type info from trailing { }
13477             # testfile is readmail.pl
13478             elsif ( $type eq 't' or $type eq 'i' ) {
13479
13480                 if ( $next_nonblank_type eq 'L' ) {
13481                     $bond_str = NO_BREAK;
13482                 }
13483             }
13484
13485             # Do not break between a possible filehandle and a ? or / and do
13486             # not introduce a break after it if there is no blank
13487             # (extrude.t)
13488             elsif ( $type eq 'Z' ) {
13489
13490                 # dont break..
13491                 if (
13492
13493                     # if there is no blank and we do not want one. Examples:
13494                     #    print $x++    # do not break after $x
13495                     #    print HTML"HELLO"   # break ok after HTML
13496                     (
13497                            $next_type ne 'b'
13498                         && defined( $want_left_space{$next_type} )
13499                         && $want_left_space{$next_type} == WS_NO
13500                     )
13501
13502                     # or we might be followed by the start of a quote
13503                     || $next_nonblank_type =~ /^[\/\?]$/
13504                   )
13505                 {
13506                     $bond_str = NO_BREAK;
13507                 }
13508             }
13509
13510             # Do not break before a possible file handle
13511             if ( $next_nonblank_type eq 'Z' ) {
13512                 $bond_str = NO_BREAK;
13513             }
13514
13515             # As a defensive measure, do not break between a '(' and a
13516             # filehandle.  In some cases, this can cause an error.  For
13517             # example, the following program works:
13518             #    my $msg="hi!\n";
13519             #    print
13520             #    ( STDOUT
13521             #    $msg
13522             #    );
13523             #
13524             # But this program fails:
13525             #    my $msg="hi!\n";
13526             #    print
13527             #    (
13528             #    STDOUT
13529             #    $msg
13530             #    );
13531             #
13532             # This is normally only a problem with the 'extrude' option
13533             if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
13534                 $bond_str = NO_BREAK;
13535             }
13536
13537             # Breaking before a ++ can cause perl to guess wrong. For
13538             # example the following line will cause a syntax error
13539             # with -extrude if we break between '$i' and '++' [fixstyle2]
13540             #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
13541             elsif ( $next_nonblank_type eq '++' ) {
13542                 $bond_str = NO_BREAK;
13543             }
13544
13545             # Breaking before a ? before a quote can cause trouble if
13546             # they are not separated by a blank.
13547             # Example: a syntax error occurs if you break before the ? here
13548             #  my$logic=join$all?' && ':' || ',@regexps;
13549             # From: Professional_Perl_Programming_Code/multifind.pl
13550             elsif ( $next_nonblank_type eq '?' ) {
13551                 $bond_str = NO_BREAK
13552                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
13553             }
13554
13555             # Breaking before a . followed by a number
13556             # can cause trouble if there is no intervening space
13557             # Example: a syntax error occurs if you break before the .2 here
13558             #  $str .= pack($endian.2, ensurrogate($ord));
13559             # From: perl58/Unicode.pm
13560             elsif ( $next_nonblank_type eq '.' ) {
13561                 $bond_str = NO_BREAK
13562                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
13563             }
13564
13565             # patch to put cuddled elses back together when on multiple
13566             # lines, as in: } \n else \n { \n
13567             if ($rOpts_cuddled_else) {
13568
13569                 if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
13570                     || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
13571                 {
13572                     $bond_str = NO_BREAK;
13573                 }
13574             }
13575
13576             # keep '}' together with ';'
13577             if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
13578                 $bond_str = NO_BREAK;
13579             }
13580
13581             # never break between sub name and opening paren
13582             if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
13583                 $bond_str = NO_BREAK;
13584             }
13585
13586             #---------------------------------------------------------------
13587             # section 3:
13588             # now take nesting depth into account
13589             #---------------------------------------------------------------
13590             # final strength incorporates the bond strength and nesting depth
13591             my $strength;
13592
13593             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
13594                 if ( $total_nesting_depth > 0 ) {
13595                     $strength = $bond_str + $total_nesting_depth;
13596                 }
13597                 else {
13598                     $strength = $bond_str;
13599                 }
13600             }
13601             else {
13602                 $strength = NO_BREAK;
13603             }
13604
13605             # always break after side comment
13606             if ( $type eq '#' ) { $strength = 0 }
13607
13608             $bond_strength_to_go[$i] = $strength;
13609
13610             FORMATTER_DEBUG_FLAG_BOND && do {
13611                 my $str = substr( $token, 0, 15 );
13612                 $str .= ' ' x ( 16 - length($str) );
13613                 print
13614 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
13615             };
13616         }
13617     }
13618
13619 }
13620
13621 sub pad_array_to_go {
13622
13623     # to simplify coding in scan_list and set_bond_strengths, it helps
13624     # to create some extra blank tokens at the end of the arrays
13625     $tokens_to_go[ $max_index_to_go + 1 ] = '';
13626     $tokens_to_go[ $max_index_to_go + 2 ] = '';
13627     $types_to_go[ $max_index_to_go + 1 ]  = 'b';
13628     $types_to_go[ $max_index_to_go + 2 ]  = 'b';
13629     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
13630       $nesting_depth_to_go[$max_index_to_go];
13631
13632     #    /^[R\}\)\]]$/
13633     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
13634         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
13635
13636             # shouldn't happen:
13637             unless ( get_saw_brace_error() ) {
13638                 warning(
13639 "Program bug in scan_list: hit nesting error which should have been caught\n"
13640                 );
13641                 report_definite_bug();
13642             }
13643         }
13644         else {
13645             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
13646         }
13647     }
13648
13649     #       /^[L\{\(\[]$/
13650     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
13651         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
13652     }
13653 }
13654
13655 {    # begin scan_list
13656
13657     my (
13658         $block_type,                $current_depth,
13659         $depth,                     $i,
13660         $i_last_nonblank_token,     $last_colon_sequence_number,
13661         $last_nonblank_token,       $last_nonblank_type,
13662         $last_old_breakpoint_count, $minimum_depth,
13663         $next_nonblank_block_type,  $next_nonblank_token,
13664         $next_nonblank_type,        $old_breakpoint_count,
13665         $starting_breakpoint_count, $starting_depth,
13666         $token,                     $type,
13667         $type_sequence,
13668     );
13669
13670     my (
13671         @breakpoint_stack,              @breakpoint_undo_stack,
13672         @comma_index,                   @container_type,
13673         @identifier_count_stack,        @index_before_arrow,
13674         @interrupted_list,              @item_count_stack,
13675         @last_comma_index,              @last_dot_index,
13676         @last_nonblank_type,            @old_breakpoint_count_stack,
13677         @opening_structure_index_stack, @rfor_semicolon_list,
13678         @has_old_logical_breakpoints,   @rand_or_list,
13679         @i_equals,
13680     );
13681
13682     # routine to define essential variables when we go 'up' to
13683     # a new depth
13684     sub check_for_new_minimum_depth {
13685         my $depth = shift;
13686         if ( $depth < $minimum_depth ) {
13687
13688             $minimum_depth = $depth;
13689
13690             # these arrays need not retain values between calls
13691             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
13692             $container_type[$depth]                = "";
13693             $identifier_count_stack[$depth]        = 0;
13694             $index_before_arrow[$depth]            = -1;
13695             $interrupted_list[$depth]              = 1;
13696             $item_count_stack[$depth]              = 0;
13697             $last_nonblank_type[$depth]            = "";
13698             $opening_structure_index_stack[$depth] = -1;
13699
13700             $breakpoint_undo_stack[$depth]       = undef;
13701             $comma_index[$depth]                 = undef;
13702             $last_comma_index[$depth]            = undef;
13703             $last_dot_index[$depth]              = undef;
13704             $old_breakpoint_count_stack[$depth]  = undef;
13705             $has_old_logical_breakpoints[$depth] = 0;
13706             $rand_or_list[$depth]                = [];
13707             $rfor_semicolon_list[$depth]         = [];
13708             $i_equals[$depth]                    = -1;
13709
13710             # these arrays must retain values between calls
13711             if ( !defined( $has_broken_sublist[$depth] ) ) {
13712                 $dont_align[$depth]         = 0;
13713                 $has_broken_sublist[$depth] = 0;
13714                 $want_comma_break[$depth]   = 0;
13715             }
13716         }
13717     }
13718
13719     # routine to decide which commas to break at within a container;
13720     # returns:
13721     #   $bp_count = number of comma breakpoints set
13722     #   $do_not_break_apart = a flag indicating if container need not
13723     #     be broken open
13724     sub set_comma_breakpoints {
13725
13726         my $dd                 = shift;
13727         my $bp_count           = 0;
13728         my $do_not_break_apart = 0;
13729
13730         # anything to do?
13731         if ( $item_count_stack[$dd] ) {
13732
13733             # handle commas not in containers...
13734             if ( $dont_align[$dd] ) {
13735                 do_uncontained_comma_breaks($dd);
13736             }
13737
13738             # handle commas within containers...
13739             else {
13740                 my $fbc = $forced_breakpoint_count;
13741
13742                 # always open comma lists not preceded by keywords,
13743                 # barewords, identifiers (that is, anything that doesn't
13744                 # look like a function call)
13745                 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
13746
13747                 set_comma_breakpoints_do(
13748                     $dd,
13749                     $opening_structure_index_stack[$dd],
13750                     $i,
13751                     $item_count_stack[$dd],
13752                     $identifier_count_stack[$dd],
13753                     $comma_index[$dd],
13754                     $next_nonblank_type,
13755                     $container_type[$dd],
13756                     $interrupted_list[$dd],
13757                     \$do_not_break_apart,
13758                     $must_break_open,
13759                 );
13760                 $bp_count = $forced_breakpoint_count - $fbc;
13761                 $do_not_break_apart = 0 if $must_break_open;
13762             }
13763         }
13764         return ( $bp_count, $do_not_break_apart );
13765     }
13766
13767     sub do_uncontained_comma_breaks {
13768
13769         # Handle commas not in containers...
13770         # This is a catch-all routine for commas that we
13771         # don't know what to do with because the don't fall
13772         # within containers.  We will bias the bond strength
13773         # to break at commas which ended lines in the input
13774         # file.  This usually works better than just trying
13775         # to put as many items on a line as possible.  A
13776         # downside is that if the input file is garbage it
13777         # won't work very well. However, the user can always
13778         # prevent following the old breakpoints with the
13779         # -iob flag.
13780         my $dd   = shift;
13781         my $bias = -.01;
13782         foreach my $ii ( @{ $comma_index[$dd] } ) {
13783             if ( $old_breakpoint_to_go[$ii] ) {
13784                 $bond_strength_to_go[$ii] = $bias;
13785
13786                 # reduce bias magnitude to force breaks in order
13787                 $bias *= 0.99;
13788             }
13789         }
13790
13791         # Also put a break before the first comma if
13792         # (1) there was a break there in the input, and
13793         # (2) that was exactly one previous break in the input
13794         #
13795         # For example, we will follow the user and break after
13796         # 'print' in this snippet:
13797         #    print
13798         #      "conformability (Not the same dimension)\n",
13799         #      "\t", $have, " is ", text_unit($hu), "\n",
13800         #      "\t", $want, " is ", text_unit($wu), "\n",
13801         #      ;
13802         my $i_first_comma = $comma_index[$dd]->[0];
13803         if ( $old_breakpoint_to_go[$i_first_comma] ) {
13804             my $level_comma = $levels_to_go[$i_first_comma];
13805             my $ibreak      = -1;
13806             my $obp_count   = 0;
13807             for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
13808                 if ( $old_breakpoint_to_go[$ii] ) {
13809                     $obp_count++;
13810                     last if ( $obp_count > 1 );
13811                     $ibreak = $ii
13812                       if ( $levels_to_go[$ii] == $level_comma );
13813                 }
13814             }
13815             if ( $ibreak >= 0 && $obp_count == 1 ) {
13816                 set_forced_breakpoint($ibreak);
13817             }
13818         }
13819     }
13820
13821     my %is_logical_container;
13822
13823     BEGIN {
13824         @_ = qw# if elsif unless while and or err not && | || ? : ! #;
13825         @is_logical_container{@_} = (1) x scalar(@_);
13826     }
13827
13828     sub set_for_semicolon_breakpoints {
13829         my $dd = shift;
13830         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
13831             set_forced_breakpoint($_);
13832         }
13833     }
13834
13835     sub set_logical_breakpoints {
13836         my $dd = shift;
13837         if (
13838                $item_count_stack[$dd] == 0
13839             && $is_logical_container{ $container_type[$dd] }
13840
13841             # TESTING:
13842             || $has_old_logical_breakpoints[$dd]
13843           )
13844         {
13845
13846             # Look for breaks in this order:
13847             # 0   1    2   3
13848             # or  and  ||  &&
13849             foreach my $i ( 0 .. 3 ) {
13850                 if ( $rand_or_list[$dd][$i] ) {
13851                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
13852                         set_forced_breakpoint($_);
13853                     }
13854
13855                     # break at any 'if' and 'unless' too
13856                     foreach ( @{ $rand_or_list[$dd][4] } ) {
13857                         set_forced_breakpoint($_);
13858                     }
13859                     $rand_or_list[$dd] = [];
13860                     last;
13861                 }
13862             }
13863         }
13864     }
13865
13866     sub is_unbreakable_container {
13867
13868         # never break a container of one of these types
13869         # because bad things can happen (map1.t)
13870         my $dd = shift;
13871         $is_sort_map_grep{ $container_type[$dd] };
13872     }
13873
13874     sub scan_list {
13875
13876         # This routine is responsible for setting line breaks for all lists,
13877         # so that hierarchical structure can be displayed and so that list
13878         # items can be vertically aligned.  The output of this routine is
13879         # stored in the array @forced_breakpoint_to_go, which is used to set
13880         # final breakpoints.
13881
13882         $starting_depth = $nesting_depth_to_go[0];
13883
13884         $block_type                 = ' ';
13885         $current_depth              = $starting_depth;
13886         $i                          = -1;
13887         $last_colon_sequence_number = -1;
13888         $last_nonblank_token        = ';';
13889         $last_nonblank_type         = ';';
13890         $last_nonblank_block_type   = ' ';
13891         $last_old_breakpoint_count  = 0;
13892         $minimum_depth = $current_depth + 1;    # forces update in check below
13893         $old_breakpoint_count      = 0;
13894         $starting_breakpoint_count = $forced_breakpoint_count;
13895         $token                     = ';';
13896         $type                      = ';';
13897         $type_sequence             = '';
13898
13899         check_for_new_minimum_depth($current_depth);
13900
13901         my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13902         my $want_previous_breakpoint = -1;
13903
13904         my $saw_good_breakpoint;
13905         my $i_line_end   = -1;
13906         my $i_line_start = -1;
13907
13908         # loop over all tokens in this batch
13909         while ( ++$i <= $max_index_to_go ) {
13910             if ( $type ne 'b' ) {
13911                 $i_last_nonblank_token    = $i - 1;
13912                 $last_nonblank_type       = $type;
13913                 $last_nonblank_token      = $token;
13914                 $last_nonblank_block_type = $block_type;
13915             }
13916             $type          = $types_to_go[$i];
13917             $block_type    = $block_type_to_go[$i];
13918             $token         = $tokens_to_go[$i];
13919             $type_sequence = $type_sequence_to_go[$i];
13920             my $next_type       = $types_to_go[ $i + 1 ];
13921             my $next_token      = $tokens_to_go[ $i + 1 ];
13922             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13923             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
13924             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
13925             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13926
13927             # set break if flag was set
13928             if ( $want_previous_breakpoint >= 0 ) {
13929                 set_forced_breakpoint($want_previous_breakpoint);
13930                 $want_previous_breakpoint = -1;
13931             }
13932
13933             $last_old_breakpoint_count = $old_breakpoint_count;
13934             if ( $old_breakpoint_to_go[$i] ) {
13935                 $i_line_end   = $i;
13936                 $i_line_start = $i_next_nonblank;
13937
13938                 $old_breakpoint_count++;
13939
13940                 # Break before certain keywords if user broke there and
13941                 # this is a 'safe' break point. The idea is to retain
13942                 # any preferred breaks for sequential list operations,
13943                 # like a schwartzian transform.
13944                 if ($rOpts_break_at_old_keyword_breakpoints) {
13945                     if (
13946                            $next_nonblank_type eq 'k'
13947                         && $is_keyword_returning_list{$next_nonblank_token}
13948                         && (   $type =~ /^[=\)\]\}Riw]$/
13949                             || $type eq 'k'
13950                             && $is_keyword_returning_list{$token} )
13951                       )
13952                     {
13953
13954                         # we actually have to set this break next time through
13955                         # the loop because if we are at a closing token (such
13956                         # as '}') which forms a one-line block, this break might
13957                         # get undone.
13958                         $want_previous_breakpoint = $i;
13959                     }
13960                 }
13961             }
13962             next if ( $type eq 'b' );
13963             $depth = $nesting_depth_to_go[ $i + 1 ];
13964
13965             # safety check - be sure we always break after a comment
13966             # Shouldn't happen .. an error here probably means that the
13967             # nobreak flag did not get turned off correctly during
13968             # formatting.
13969             if ( $type eq '#' ) {
13970                 if ( $i != $max_index_to_go ) {
13971                     warning(
13972 "Non-fatal program bug: backup logic needed to break after a comment\n"
13973                     );
13974                     report_definite_bug();
13975                     $nobreak_to_go[$i] = 0;
13976                     set_forced_breakpoint($i);
13977                 }
13978             }
13979
13980             # Force breakpoints at certain tokens in long lines.
13981             # Note that such breakpoints will be undone later if these tokens
13982             # are fully contained within parens on a line.
13983             if (
13984
13985                 # break before a keyword within a line
13986                 $type eq 'k'
13987                 && $i > 0
13988
13989                 # if one of these keywords:
13990                 && $token =~ /^(if|unless|while|until|for)$/
13991
13992                 # but do not break at something like '1 while'
13993                 && ( $last_nonblank_type ne 'n' || $i > 2 )
13994
13995                 # and let keywords follow a closing 'do' brace
13996                 && $last_nonblank_block_type ne 'do'
13997
13998                 && (
13999                     $is_long_line
14000
14001                     # or container is broken (by side-comment, etc)
14002                     || (   $next_nonblank_token eq '('
14003                         && $mate_index_to_go[$i_next_nonblank] < $i )
14004                 )
14005               )
14006             {
14007                 set_forced_breakpoint( $i - 1 );
14008             }
14009
14010             # remember locations of '||'  and '&&' for possible breaks if we
14011             # decide this is a long logical expression.
14012             if ( $type eq '||' ) {
14013                 push @{ $rand_or_list[$depth][2] }, $i;
14014                 ++$has_old_logical_breakpoints[$depth]
14015                   if ( ( $i == $i_line_start || $i == $i_line_end )
14016                     && $rOpts_break_at_old_logical_breakpoints );
14017             }
14018             elsif ( $type eq '&&' ) {
14019                 push @{ $rand_or_list[$depth][3] }, $i;
14020                 ++$has_old_logical_breakpoints[$depth]
14021                   if ( ( $i == $i_line_start || $i == $i_line_end )
14022                     && $rOpts_break_at_old_logical_breakpoints );
14023             }
14024             elsif ( $type eq 'f' ) {
14025                 push @{ $rfor_semicolon_list[$depth] }, $i;
14026             }
14027             elsif ( $type eq 'k' ) {
14028                 if ( $token eq 'and' ) {
14029                     push @{ $rand_or_list[$depth][1] }, $i;
14030                     ++$has_old_logical_breakpoints[$depth]
14031                       if ( ( $i == $i_line_start || $i == $i_line_end )
14032                         && $rOpts_break_at_old_logical_breakpoints );
14033                 }
14034
14035                 # break immediately at 'or's which are probably not in a logical
14036                 # block -- but we will break in logical breaks below so that
14037                 # they do not add to the forced_breakpoint_count
14038                 elsif ( $token eq 'or' ) {
14039                     push @{ $rand_or_list[$depth][0] }, $i;
14040                     ++$has_old_logical_breakpoints[$depth]
14041                       if ( ( $i == $i_line_start || $i == $i_line_end )
14042                         && $rOpts_break_at_old_logical_breakpoints );
14043                     if ( $is_logical_container{ $container_type[$depth] } ) {
14044                     }
14045                     else {
14046                         if ($is_long_line) { set_forced_breakpoint($i) }
14047                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
14048                             && $rOpts_break_at_old_logical_breakpoints )
14049                         {
14050                             $saw_good_breakpoint = 1;
14051                         }
14052                     }
14053                 }
14054                 elsif ( $token eq 'if' || $token eq 'unless' ) {
14055                     push @{ $rand_or_list[$depth][4] }, $i;
14056                     if ( ( $i == $i_line_start || $i == $i_line_end )
14057                         && $rOpts_break_at_old_logical_breakpoints )
14058                     {
14059                         set_forced_breakpoint($i);
14060                     }
14061                 }
14062             }
14063             elsif ( $is_assignment{$type} ) {
14064                 $i_equals[$depth] = $i;
14065             }
14066
14067             if ($type_sequence) {
14068
14069                 # handle any postponed closing breakpoints
14070                 if ( $token =~ /^[\)\]\}\:]$/ ) {
14071                     if ( $type eq ':' ) {
14072                         $last_colon_sequence_number = $type_sequence;
14073
14074                         # TESTING: retain break at a ':' line break
14075                         if ( ( $i == $i_line_start || $i == $i_line_end )
14076                             && $rOpts_break_at_old_ternary_breakpoints )
14077                         {
14078
14079                             # TESTING:
14080                             set_forced_breakpoint($i);
14081
14082                             # break at previous '='
14083                             if ( $i_equals[$depth] > 0 ) {
14084                                 set_forced_breakpoint( $i_equals[$depth] );
14085                                 $i_equals[$depth] = -1;
14086                             }
14087                         }
14088                     }
14089                     if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
14090                         my $inc = ( $type eq ':' ) ? 0 : 1;
14091                         set_forced_breakpoint( $i - $inc );
14092                         delete $postponed_breakpoint{$type_sequence};
14093                     }
14094                 }
14095
14096                 # set breaks at ?/: if they will get separated (and are
14097                 # not a ?/: chain), or if the '?' is at the end of the
14098                 # line
14099                 elsif ( $token eq '?' ) {
14100                     my $i_colon = $mate_index_to_go[$i];
14101                     if (
14102                         $i_colon <= 0  # the ':' is not in this batch
14103                         || $i == 0     # this '?' is the first token of the line
14104                         || $i ==
14105                         $max_index_to_go    # or this '?' is the last token
14106                       )
14107                     {
14108
14109                         # don't break at a '?' if preceded by ':' on
14110                         # this line of previous ?/: pair on this line.
14111                         # This is an attempt to preserve a chain of ?/:
14112                         # expressions (elsif2.t).  And don't break if
14113                         # this has a side comment.
14114                         set_forced_breakpoint($i)
14115                           unless (
14116                             $type_sequence == (
14117                                 $last_colon_sequence_number +
14118                                   TYPE_SEQUENCE_INCREMENT
14119                             )
14120                             || $tokens_to_go[$max_index_to_go] eq '#'
14121                           );
14122                         set_closing_breakpoint($i);
14123                     }
14124                 }
14125             }
14126
14127 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
14128
14129             #------------------------------------------------------------
14130             # Handle Increasing Depth..
14131             #
14132             # prepare for a new list when depth increases
14133             # token $i is a '(','{', or '['
14134             #------------------------------------------------------------
14135             if ( $depth > $current_depth ) {
14136
14137                 $breakpoint_stack[$depth]       = $forced_breakpoint_count;
14138                 $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
14139                 $has_broken_sublist[$depth]     = 0;
14140                 $identifier_count_stack[$depth] = 0;
14141                 $index_before_arrow[$depth]     = -1;
14142                 $interrupted_list[$depth]       = 0;
14143                 $item_count_stack[$depth]       = 0;
14144                 $last_comma_index[$depth]       = undef;
14145                 $last_dot_index[$depth]         = undef;
14146                 $last_nonblank_type[$depth]     = $last_nonblank_type;
14147                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
14148                 $opening_structure_index_stack[$depth] = $i;
14149                 $rand_or_list[$depth]                  = [];
14150                 $rfor_semicolon_list[$depth]           = [];
14151                 $i_equals[$depth]                      = -1;
14152                 $want_comma_break[$depth]              = 0;
14153                 $container_type[$depth] =
14154                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
14155                   ? $last_nonblank_token
14156                   : "";
14157                 $has_old_logical_breakpoints[$depth] = 0;
14158
14159                 # if line ends here then signal closing token to break
14160                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
14161                 {
14162                     set_closing_breakpoint($i);
14163                 }
14164
14165                 # Not all lists of values should be vertically aligned..
14166                 $dont_align[$depth] =
14167
14168                   # code BLOCKS are handled at a higher level
14169                   ( $block_type ne "" )
14170
14171                   # certain paren lists
14172                   || ( $type eq '(' ) && (
14173
14174                     # it does not usually look good to align a list of
14175                     # identifiers in a parameter list, as in:
14176                     #    my($var1, $var2, ...)
14177                     # (This test should probably be refined, for now I'm just
14178                     # testing for any keyword)
14179                     ( $last_nonblank_type eq 'k' )
14180
14181                     # a trailing '(' usually indicates a non-list
14182                     || ( $next_nonblank_type eq '(' )
14183                   );
14184
14185                 # patch to outdent opening brace of long if/for/..
14186                 # statements (like this one).  See similar coding in
14187                 # set_continuation breaks.  We have also catch it here for
14188                 # short line fragments which otherwise will not go through
14189                 # set_continuation_breaks.
14190                 if (
14191                     $block_type
14192
14193                     # if we have the ')' but not its '(' in this batch..
14194                     && ( $last_nonblank_token eq ')' )
14195                     && $mate_index_to_go[$i_last_nonblank_token] < 0
14196
14197                     # and user wants brace to left
14198                     && !$rOpts->{'opening-brace-always-on-right'}
14199
14200                     && ( $type  eq '{' )    # should be true
14201                     && ( $token eq '{' )    # should be true
14202                   )
14203                 {
14204                     set_forced_breakpoint( $i - 1 );
14205                 }
14206             }
14207
14208             #------------------------------------------------------------
14209             # Handle Decreasing Depth..
14210             #
14211             # finish off any old list when depth decreases
14212             # token $i is a ')','}', or ']'
14213             #------------------------------------------------------------
14214             elsif ( $depth < $current_depth ) {
14215
14216                 check_for_new_minimum_depth($depth);
14217
14218                 # force all outer logical containers to break after we see on
14219                 # old breakpoint
14220                 $has_old_logical_breakpoints[$depth] ||=
14221                   $has_old_logical_breakpoints[$current_depth];
14222
14223                 # Patch to break between ') {' if the paren list is broken.
14224                 # There is similar logic in set_continuation_breaks for
14225                 # non-broken lists.
14226                 if (   $token eq ')'
14227                     && $next_nonblank_block_type
14228                     && $interrupted_list[$current_depth]
14229                     && $next_nonblank_type eq '{'
14230                     && !$rOpts->{'opening-brace-always-on-right'} )
14231                 {
14232                     set_forced_breakpoint($i);
14233                 }
14234
14235 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
14236
14237                 # set breaks at commas if necessary
14238                 my ( $bp_count, $do_not_break_apart ) =
14239                   set_comma_breakpoints($current_depth);
14240
14241                 my $i_opening = $opening_structure_index_stack[$current_depth];
14242                 my $saw_opening_structure = ( $i_opening >= 0 );
14243
14244                 # this term is long if we had to break at interior commas..
14245                 my $is_long_term = $bp_count > 0;
14246
14247                 # ..or if the length between opening and closing parens exceeds
14248                 # allowed line length
14249                 if ( !$is_long_term && $saw_opening_structure ) {
14250                     my $i_opening_minus = find_token_starting_list($i_opening);
14251
14252                     # Note: we have to allow for one extra space after a
14253                     # closing token so that we do not strand a comma or
14254                     # semicolon, hence the '>=' here (oneline.t)
14255                     $is_long_term =
14256                       excess_line_length( $i_opening_minus, $i ) >= 0;
14257                 }
14258
14259                 # We've set breaks after all comma-arrows.  Now we have to
14260                 # undo them if this can be a one-line block
14261                 # (the only breakpoints set will be due to comma-arrows)
14262                 if (
14263
14264                     # user doesn't require breaking after all comma-arrows
14265                     ( $rOpts_comma_arrow_breakpoints != 0 )
14266
14267                     # and if the opening structure is in this batch
14268                     && $saw_opening_structure
14269
14270                     # and either on the same old line
14271                     && (
14272                         $old_breakpoint_count_stack[$current_depth] ==
14273                         $last_old_breakpoint_count
14274
14275                         # or user wants to form long blocks with arrows
14276                         || $rOpts_comma_arrow_breakpoints == 2
14277                     )
14278
14279                   # and we made some breakpoints between the opening and closing
14280                     && ( $breakpoint_undo_stack[$current_depth] <
14281                         $forced_breakpoint_undo_count )
14282
14283                     # and this block is short enough to fit on one line
14284                     # Note: use < because need 1 more space for possible comma
14285                     && !$is_long_term
14286
14287                   )
14288                 {
14289                     undo_forced_breakpoint_stack(
14290                         $breakpoint_undo_stack[$current_depth] );
14291                 }
14292
14293                 # now see if we have any comma breakpoints left
14294                 my $has_comma_breakpoints =
14295                   ( $breakpoint_stack[$current_depth] !=
14296                       $forced_breakpoint_count );
14297
14298                 # update broken-sublist flag of the outer container
14299                 $has_broken_sublist[$depth] =
14300                      $has_broken_sublist[$depth]
14301                   || $has_broken_sublist[$current_depth]
14302                   || $is_long_term
14303                   || $has_comma_breakpoints;
14304
14305 # Having come to the closing ')', '}', or ']', now we have to decide if we
14306 # should 'open up' the structure by placing breaks at the opening and
14307 # closing containers.  This is a tricky decision.  Here are some of the
14308 # basic considerations:
14309 #
14310 # -If this is a BLOCK container, then any breakpoints will have already
14311 # been set (and according to user preferences), so we need do nothing here.
14312 #
14313 # -If we have a comma-separated list for which we can align the list items,
14314 # then we need to do so because otherwise the vertical aligner cannot
14315 # currently do the alignment.
14316 #
14317 # -If this container does itself contain a container which has been broken
14318 # open, then it should be broken open to properly show the structure.
14319 #
14320 # -If there is nothing to align, and no other reason to break apart,
14321 # then do not do it.
14322 #
14323 # We will not break open the parens of a long but 'simple' logical expression.
14324 # For example:
14325 #
14326 # This is an example of a simple logical expression and its formatting:
14327 #
14328 #     if ( $bigwasteofspace1 && $bigwasteofspace2
14329 #         || $bigwasteofspace3 && $bigwasteofspace4 )
14330 #
14331 # Most people would prefer this than the 'spacey' version:
14332 #
14333 #     if (
14334 #         $bigwasteofspace1 && $bigwasteofspace2
14335 #         || $bigwasteofspace3 && $bigwasteofspace4
14336 #     )
14337 #
14338 # To illustrate the rules for breaking logical expressions, consider:
14339 #
14340 #             FULLY DENSE:
14341 #             if ( $opt_excl
14342 #                 and ( exists $ids_excl_uc{$id_uc}
14343 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
14344 #
14345 # This is on the verge of being difficult to read.  The current default is to
14346 # open it up like this:
14347 #
14348 #             DEFAULT:
14349 #             if (
14350 #                 $opt_excl
14351 #                 and ( exists $ids_excl_uc{$id_uc}
14352 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
14353 #               )
14354 #
14355 # This is a compromise which tries to avoid being too dense and to spacey.
14356 # A more spaced version would be:
14357 #
14358 #             SPACEY:
14359 #             if (
14360 #                 $opt_excl
14361 #                 and (
14362 #                     exists $ids_excl_uc{$id_uc}
14363 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
14364 #                 )
14365 #               )
14366 #
14367 # Some people might prefer the spacey version -- an option could be added.  The
14368 # innermost expression contains a long block '( exists $ids_...  ')'.
14369 #
14370 # Here is how the logic goes: We will force a break at the 'or' that the
14371 # innermost expression contains, but we will not break apart its opening and
14372 # closing containers because (1) it contains no multi-line sub-containers itself,
14373 # and (2) there is no alignment to be gained by breaking it open like this
14374 #
14375 #             and (
14376 #                 exists $ids_excl_uc{$id_uc}
14377 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
14378 #             )
14379 #
14380 # (although this looks perfectly ok and might be good for long expressions).  The
14381 # outer 'if' container, though, contains a broken sub-container, so it will be
14382 # broken open to avoid too much density.  Also, since it contains no 'or's, there
14383 # will be a forced break at its 'and'.
14384
14385                 # set some flags telling something about this container..
14386                 my $is_simple_logical_expression = 0;
14387                 if (   $item_count_stack[$current_depth] == 0
14388                     && $saw_opening_structure
14389                     && $tokens_to_go[$i_opening] eq '('
14390                     && $is_logical_container{ $container_type[$current_depth] }
14391                   )
14392                 {
14393
14394                     # This seems to be a simple logical expression with
14395                     # no existing breakpoints.  Set a flag to prevent
14396                     # opening it up.
14397                     if ( !$has_comma_breakpoints ) {
14398                         $is_simple_logical_expression = 1;
14399                     }
14400
14401                     # This seems to be a simple logical expression with
14402                     # breakpoints (broken sublists, for example).  Break
14403                     # at all 'or's and '||'s.
14404                     else {
14405                         set_logical_breakpoints($current_depth);
14406                     }
14407                 }
14408
14409                 if ( $is_long_term
14410                     && @{ $rfor_semicolon_list[$current_depth] } )
14411                 {
14412                     set_for_semicolon_breakpoints($current_depth);
14413
14414                     # open up a long 'for' or 'foreach' container to allow
14415                     # leading term alignment unless -lp is used.
14416                     $has_comma_breakpoints = 1
14417                       unless $rOpts_line_up_parentheses;
14418                 }
14419
14420                 if (
14421
14422                     # breaks for code BLOCKS are handled at a higher level
14423                     !$block_type
14424
14425                     # we do not need to break at the top level of an 'if'
14426                     # type expression
14427                     && !$is_simple_logical_expression
14428
14429                     ## modification to keep ': (' containers vertically tight;
14430                     ## but probably better to let user set -vt=1 to avoid
14431                     ## inconsistency with other paren types
14432                     ## && ($container_type[$current_depth] ne ':')
14433
14434                     # otherwise, we require one of these reasons for breaking:
14435                     && (
14436
14437                         # - this term has forced line breaks
14438                         $has_comma_breakpoints
14439
14440                        # - the opening container is separated from this batch
14441                        #   for some reason (comment, blank line, code block)
14442                        # - this is a non-paren container spanning multiple lines
14443                         || !$saw_opening_structure
14444
14445                         # - this is a long block contained in another breakable
14446                         #   container
14447                         || (   $is_long_term
14448                             && $container_environment_to_go[$i_opening] ne
14449                             'BLOCK' )
14450                     )
14451                   )
14452                 {
14453
14454                     # For -lp option, we must put a breakpoint before
14455                     # the token which has been identified as starting
14456                     # this indentation level.  This is necessary for
14457                     # proper alignment.
14458                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
14459                     {
14460                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
14461                         if (   $i_opening + 1 < $max_index_to_go
14462                             && $types_to_go[ $i_opening + 1 ] eq 'b' )
14463                         {
14464                             $item = $leading_spaces_to_go[ $i_opening + 2 ];
14465                         }
14466                         if ( defined($item) ) {
14467                             my $i_start_2 = $item->get_STARTING_INDEX();
14468                             if (
14469                                 defined($i_start_2)
14470
14471                                 # we are breaking after an opening brace, paren,
14472                                 # so don't break before it too
14473                                 && $i_start_2 ne $i_opening
14474                               )
14475                             {
14476
14477                                 # Only break for breakpoints at the same
14478                                 # indentation level as the opening paren
14479                                 my $test1 = $nesting_depth_to_go[$i_opening];
14480                                 my $test2 = $nesting_depth_to_go[$i_start_2];
14481                                 if ( $test2 == $test1 ) {
14482                                     set_forced_breakpoint( $i_start_2 - 1 );
14483                                 }
14484                             }
14485                         }
14486                     }
14487
14488                     # break after opening structure.
14489                     # note: break before closing structure will be automatic
14490                     if ( $minimum_depth <= $current_depth ) {
14491
14492                         set_forced_breakpoint($i_opening)
14493                           unless ( $do_not_break_apart
14494                             || is_unbreakable_container($current_depth) );
14495
14496                         # break at '.' of lower depth level before opening token
14497                         if ( $last_dot_index[$depth] ) {
14498                             set_forced_breakpoint( $last_dot_index[$depth] );
14499                         }
14500
14501                         # break before opening structure if preeced by another
14502                         # closing structure and a comma.  This is normally
14503                         # done by the previous closing brace, but not
14504                         # if it was a one-line block.
14505                         if ( $i_opening > 2 ) {
14506                             my $i_prev =
14507                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
14508                               ? $i_opening - 2
14509                               : $i_opening - 1;
14510
14511                             if (   $types_to_go[$i_prev] eq ','
14512                                 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
14513                             {
14514                                 set_forced_breakpoint($i_prev);
14515                             }
14516
14517                             # also break before something like ':('  or '?('
14518                             # if appropriate.
14519                             elsif (
14520                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
14521                             {
14522                                 my $token_prev = $tokens_to_go[$i_prev];
14523                                 if ( $want_break_before{$token_prev} ) {
14524                                     set_forced_breakpoint($i_prev);
14525                                 }
14526                             }
14527                         }
14528                     }
14529
14530                     # break after comma following closing structure
14531                     if ( $next_type eq ',' ) {
14532                         set_forced_breakpoint( $i + 1 );
14533                     }
14534
14535                     # break before an '=' following closing structure
14536                     if (
14537                         $is_assignment{$next_nonblank_type}
14538                         && ( $breakpoint_stack[$current_depth] !=
14539                             $forced_breakpoint_count )
14540                       )
14541                     {
14542                         set_forced_breakpoint($i);
14543                     }
14544
14545                     # break at any comma before the opening structure Added
14546                     # for -lp, but seems to be good in general.  It isn't
14547                     # obvious how far back to look; the '5' below seems to
14548                     # work well and will catch the comma in something like
14549                     #  push @list, myfunc( $param, $param, ..
14550
14551                     my $icomma = $last_comma_index[$depth];
14552                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
14553                         unless ( $forced_breakpoint_to_go[$icomma] ) {
14554                             set_forced_breakpoint($icomma);
14555                         }
14556                     }
14557                 }    # end logic to open up a container
14558
14559                 # Break open a logical container open if it was already open
14560                 elsif ($is_simple_logical_expression
14561                     && $has_old_logical_breakpoints[$current_depth] )
14562                 {
14563                     set_logical_breakpoints($current_depth);
14564                 }
14565
14566                 # Handle long container which does not get opened up
14567                 elsif ($is_long_term) {
14568
14569                     # must set fake breakpoint to alert outer containers that
14570                     # they are complex
14571                     set_fake_breakpoint();
14572                 }
14573             }
14574
14575             #------------------------------------------------------------
14576             # Handle this token
14577             #------------------------------------------------------------
14578
14579             $current_depth = $depth;
14580
14581             # handle comma-arrow
14582             if ( $type eq '=>' ) {
14583                 next if ( $last_nonblank_type eq '=>' );
14584                 next if $rOpts_break_at_old_comma_breakpoints;
14585                 next if $rOpts_comma_arrow_breakpoints == 3;
14586                 $want_comma_break[$depth]   = 1;
14587                 $index_before_arrow[$depth] = $i_last_nonblank_token;
14588                 next;
14589             }
14590
14591             elsif ( $type eq '.' ) {
14592                 $last_dot_index[$depth] = $i;
14593             }
14594
14595             # Turn off alignment if we are sure that this is not a list
14596             # environment.  To be safe, we will do this if we see certain
14597             # non-list tokens, such as ';', and also the environment is
14598             # not a list.  Note that '=' could be in any of the = operators
14599             # (lextest.t). We can't just use the reported environment
14600             # because it can be incorrect in some cases.
14601             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
14602                 && $container_environment_to_go[$i] ne 'LIST' )
14603             {
14604                 $dont_align[$depth]         = 1;
14605                 $want_comma_break[$depth]   = 0;
14606                 $index_before_arrow[$depth] = -1;
14607             }
14608
14609             # now just handle any commas
14610             next unless ( $type eq ',' );
14611
14612             $last_dot_index[$depth]   = undef;
14613             $last_comma_index[$depth] = $i;
14614
14615             # break here if this comma follows a '=>'
14616             # but not if there is a side comment after the comma
14617             if ( $want_comma_break[$depth] ) {
14618
14619                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
14620                     $want_comma_break[$depth]   = 0;
14621                     $index_before_arrow[$depth] = -1;
14622                     next;
14623                 }
14624
14625                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14626
14627                 # break before the previous token if it looks safe
14628                 # Example of something that we will not try to break before:
14629                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
14630                 # Also we don't want to break at a binary operator (like +):
14631                 # $c->createOval(
14632                 #    $x + $R, $y +
14633                 #    $R => $x - $R,
14634                 #    $y - $R, -fill   => 'black',
14635                 # );
14636                 my $ibreak = $index_before_arrow[$depth] - 1;
14637                 if (   $ibreak > 0
14638                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
14639                 {
14640                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
14641                     if ( $types_to_go[$ibreak]  eq 'b' ) { $ibreak-- }
14642                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
14643
14644                         # don't break pointer calls, such as the following:
14645                         #  File::Spec->curdir  => 1,
14646                         # (This is tokenized as adjacent 'w' tokens)
14647                         if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
14648                             set_forced_breakpoint($ibreak);
14649                         }
14650                     }
14651                 }
14652
14653                 $want_comma_break[$depth]   = 0;
14654                 $index_before_arrow[$depth] = -1;
14655
14656                 # handle list which mixes '=>'s and ','s:
14657                 # treat any list items so far as an interrupted list
14658                 $interrupted_list[$depth] = 1;
14659                 next;
14660             }
14661
14662             # break after all commas above starting depth
14663             if ( $depth < $starting_depth && !$dont_align[$depth] ) {
14664                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14665                 next;
14666             }
14667
14668             # add this comma to the list..
14669             my $item_count = $item_count_stack[$depth];
14670             if ( $item_count == 0 ) {
14671
14672                 # but do not form a list with no opening structure
14673                 # for example:
14674
14675                 #            open INFILE_COPY, ">$input_file_copy"
14676                 #              or die ("very long message");
14677
14678                 if ( ( $opening_structure_index_stack[$depth] < 0 )
14679                     && $container_environment_to_go[$i] eq 'BLOCK' )
14680                 {
14681                     $dont_align[$depth] = 1;
14682                 }
14683             }
14684
14685             $comma_index[$depth][$item_count] = $i;
14686             ++$item_count_stack[$depth];
14687             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
14688                 $identifier_count_stack[$depth]++;
14689             }
14690         }
14691
14692         #-------------------------------------------
14693         # end of loop over all tokens in this batch
14694         #-------------------------------------------
14695
14696         # set breaks for any unfinished lists ..
14697         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
14698
14699             $interrupted_list[$dd] = 1;
14700             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
14701             set_comma_breakpoints($dd);
14702             set_logical_breakpoints($dd)
14703               if ( $has_old_logical_breakpoints[$dd] );
14704             set_for_semicolon_breakpoints($dd);
14705
14706             # break open container...
14707             my $i_opening = $opening_structure_index_stack[$dd];
14708             set_forced_breakpoint($i_opening)
14709               unless (
14710                 is_unbreakable_container($dd)
14711
14712                 # Avoid a break which would place an isolated ' or "
14713                 # on a line
14714                 || (   $type eq 'Q'
14715                     && $i_opening >= $max_index_to_go - 2
14716                     && $token =~ /^['"]$/ )
14717               );
14718         }
14719
14720         # Return a flag indicating if the input file had some good breakpoints.
14721         # This flag will be used to force a break in a line shorter than the
14722         # allowed line length.
14723         if ( $has_old_logical_breakpoints[$current_depth] ) {
14724             $saw_good_breakpoint = 1;
14725         }
14726         return $saw_good_breakpoint;
14727     }
14728 }    # end scan_list
14729
14730 sub find_token_starting_list {
14731
14732     # When testing to see if a block will fit on one line, some
14733     # previous token(s) may also need to be on the line; particularly
14734     # if this is a sub call.  So we will look back at least one
14735     # token. NOTE: This isn't perfect, but not critical, because
14736     # if we mis-identify a block, it will be wrapped and therefore
14737     # fixed the next time it is formatted.
14738     my $i_opening_paren = shift;
14739     my $i_opening_minus = $i_opening_paren;
14740     my $im1             = $i_opening_paren - 1;
14741     my $im2             = $i_opening_paren - 2;
14742     my $im3             = $i_opening_paren - 3;
14743     my $typem1          = $types_to_go[$im1];
14744     my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
14745     if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
14746         $i_opening_minus = $i_opening_paren;
14747     }
14748     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
14749         $i_opening_minus = $im1 if $im1 >= 0;
14750
14751         # walk back to improve length estimate
14752         for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
14753             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
14754             $i_opening_minus = $j;
14755         }
14756         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
14757     }
14758     elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
14759     elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
14760         $i_opening_minus = $im2;
14761     }
14762     return $i_opening_minus;
14763 }
14764
14765 {    # begin set_comma_breakpoints_do
14766
14767     my %is_keyword_with_special_leading_term;
14768
14769     BEGIN {
14770
14771         # These keywords have prototypes which allow a special leading item
14772         # followed by a list
14773         @_ =
14774           qw(formline grep kill map printf sprintf push chmod join pack unshift);
14775         @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
14776     }
14777
14778     sub set_comma_breakpoints_do {
14779
14780         # Given a list with some commas, set breakpoints at some of the
14781         # commas, if necessary, to make it easy to read.  This list is
14782         # an example:
14783         my (
14784             $depth,               $i_opening_paren,  $i_closing_paren,
14785             $item_count,          $identifier_count, $rcomma_index,
14786             $next_nonblank_type,  $list_type,        $interrupted,
14787             $rdo_not_break_apart, $must_break_open,
14788         ) = @_;
14789
14790         # nothing to do if no commas seen
14791         return if ( $item_count < 1 );
14792         my $i_first_comma     = $$rcomma_index[0];
14793         my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
14794         my $i_last_comma      = $i_true_last_comma;
14795         if ( $i_last_comma >= $max_index_to_go ) {
14796             $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
14797             return if ( $item_count < 1 );
14798         }
14799
14800         #---------------------------------------------------------------
14801         # find lengths of all items in the list to calculate page layout
14802         #---------------------------------------------------------------
14803         my $comma_count = $item_count;
14804         my @item_lengths;
14805         my @i_term_begin;
14806         my @i_term_end;
14807         my @i_term_comma;
14808         my $i_prev_plus;
14809         my @max_length = ( 0, 0 );
14810         my $first_term_length;
14811         my $i      = $i_opening_paren;
14812         my $is_odd = 1;
14813
14814         for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
14815             $is_odd      = 1 - $is_odd;
14816             $i_prev_plus = $i + 1;
14817             $i           = $$rcomma_index[$j];
14818
14819             my $i_term_end =
14820               ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
14821             my $i_term_begin =
14822               ( $types_to_go[$i_prev_plus] eq 'b' )
14823               ? $i_prev_plus + 1
14824               : $i_prev_plus;
14825             push @i_term_begin, $i_term_begin;
14826             push @i_term_end,   $i_term_end;
14827             push @i_term_comma, $i;
14828
14829             # note: currently adding 2 to all lengths (for comma and space)
14830             my $length =
14831               2 + token_sequence_length( $i_term_begin, $i_term_end );
14832             push @item_lengths, $length;
14833
14834             if ( $j == 0 ) {
14835                 $first_term_length = $length;
14836             }
14837             else {
14838
14839                 if ( $length > $max_length[$is_odd] ) {
14840                     $max_length[$is_odd] = $length;
14841                 }
14842             }
14843         }
14844
14845         # now we have to make a distinction between the comma count and item
14846         # count, because the item count will be one greater than the comma
14847         # count if the last item is not terminated with a comma
14848         my $i_b =
14849           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
14850           ? $i_last_comma + 1
14851           : $i_last_comma;
14852         my $i_e =
14853           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
14854           ? $i_closing_paren - 2
14855           : $i_closing_paren - 1;
14856         my $i_effective_last_comma = $i_last_comma;
14857
14858         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
14859
14860         if ( $last_item_length > 0 ) {
14861
14862             # add 2 to length because other lengths include a comma and a blank
14863             $last_item_length += 2;
14864             push @item_lengths, $last_item_length;
14865             push @i_term_begin, $i_b + 1;
14866             push @i_term_end,   $i_e;
14867             push @i_term_comma, undef;
14868
14869             my $i_odd = $item_count % 2;
14870
14871             if ( $last_item_length > $max_length[$i_odd] ) {
14872                 $max_length[$i_odd] = $last_item_length;
14873             }
14874
14875             $item_count++;
14876             $i_effective_last_comma = $i_e + 1;
14877
14878             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
14879                 $identifier_count++;
14880             }
14881         }
14882
14883         #---------------------------------------------------------------
14884         # End of length calculations
14885         #---------------------------------------------------------------
14886
14887         #---------------------------------------------------------------
14888         # Compound List Rule 1:
14889         # Break at (almost) every comma for a list containing a broken
14890         # sublist.  This has higher priority than the Interrupted List
14891         # Rule.
14892         #---------------------------------------------------------------
14893         if ( $has_broken_sublist[$depth] ) {
14894
14895             # Break at every comma except for a comma between two
14896             # simple, small terms.  This prevents long vertical
14897             # columns of, say, just 0's.
14898             my $small_length = 10;    # 2 + actual maximum length wanted
14899
14900             # We'll insert a break in long runs of small terms to
14901             # allow alignment in uniform tables.
14902             my $skipped_count = 0;
14903             my $columns       = table_columns_available($i_first_comma);
14904             my $fields        = int( $columns / $small_length );
14905             if (   $rOpts_maximum_fields_per_table
14906                 && $fields > $rOpts_maximum_fields_per_table )
14907             {
14908                 $fields = $rOpts_maximum_fields_per_table;
14909             }
14910             my $max_skipped_count = $fields - 1;
14911
14912             my $is_simple_last_term = 0;
14913             my $is_simple_next_term = 0;
14914             foreach my $j ( 0 .. $item_count ) {
14915                 $is_simple_last_term = $is_simple_next_term;
14916                 $is_simple_next_term = 0;
14917                 if (   $j < $item_count
14918                     && $i_term_end[$j] == $i_term_begin[$j]
14919                     && $item_lengths[$j] <= $small_length )
14920                 {
14921                     $is_simple_next_term = 1;
14922                 }
14923                 next if $j == 0;
14924                 if (   $is_simple_last_term
14925                     && $is_simple_next_term
14926                     && $skipped_count < $max_skipped_count )
14927                 {
14928                     $skipped_count++;
14929                 }
14930                 else {
14931                     $skipped_count = 0;
14932                     my $i = $i_term_comma[ $j - 1 ];
14933                     last unless defined $i;
14934                     set_forced_breakpoint($i);
14935                 }
14936             }
14937
14938             # always break at the last comma if this list is
14939             # interrupted; we wouldn't want to leave a terminal '{', for
14940             # example.
14941             if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14942             return;
14943         }
14944
14945 #my ( $a, $b, $c ) = caller();
14946 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
14947 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
14948 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14949
14950         #---------------------------------------------------------------
14951         # Interrupted List Rule:
14952         # A list is is forced to use old breakpoints if it was interrupted
14953         # by side comments or blank lines, or requested by user.
14954         #---------------------------------------------------------------
14955         if (   $rOpts_break_at_old_comma_breakpoints
14956             || $interrupted
14957             || $i_opening_paren < 0 )
14958         {
14959             copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14960             return;
14961         }
14962
14963         #---------------------------------------------------------------
14964         # Looks like a list of items.  We have to look at it and size it up.
14965         #---------------------------------------------------------------
14966
14967         my $opening_token = $tokens_to_go[$i_opening_paren];
14968         my $opening_environment =
14969           $container_environment_to_go[$i_opening_paren];
14970
14971         #-------------------------------------------------------------------
14972         # Return if this will fit on one line
14973         #-------------------------------------------------------------------
14974
14975         my $i_opening_minus = find_token_starting_list($i_opening_paren);
14976         return
14977           unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14978
14979         #-------------------------------------------------------------------
14980         # Now we know that this block spans multiple lines; we have to set
14981         # at least one breakpoint -- real or fake -- as a signal to break
14982         # open any outer containers.
14983         #-------------------------------------------------------------------
14984         set_fake_breakpoint();
14985
14986         # be sure we do not extend beyond the current list length
14987         if ( $i_effective_last_comma >= $max_index_to_go ) {
14988             $i_effective_last_comma = $max_index_to_go - 1;
14989         }
14990
14991         # Set a flag indicating if we need to break open to keep -lp
14992         # items aligned.  This is necessary if any of the list terms
14993         # exceeds the available space after the '('.
14994         my $need_lp_break_open = $must_break_open;
14995         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14996             my $columns_if_unbroken = $rOpts_maximum_line_length -
14997               total_line_length( $i_opening_minus, $i_opening_paren );
14998             $need_lp_break_open =
14999                  ( $max_length[0] > $columns_if_unbroken )
15000               || ( $max_length[1] > $columns_if_unbroken )
15001               || ( $first_term_length > $columns_if_unbroken );
15002         }
15003
15004         # Specify if the list must have an even number of fields or not.
15005         # It is generally safest to assume an even number, because the
15006         # list items might be a hash list.  But if we can be sure that
15007         # it is not a hash, then we can allow an odd number for more
15008         # flexibility.
15009         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
15010
15011         if (   $identifier_count >= $item_count - 1
15012             || $is_assignment{$next_nonblank_type}
15013             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
15014           )
15015         {
15016             $odd_or_even = 1;
15017         }
15018
15019         # do we have a long first term which should be
15020         # left on a line by itself?
15021         my $use_separate_first_term = (
15022             $odd_or_even == 1       # only if we can use 1 field/line
15023               && $item_count > 3    # need several items
15024               && $first_term_length >
15025               2 * $max_length[0] - 2    # need long first term
15026               && $first_term_length >
15027               2 * $max_length[1] - 2    # need long first term
15028         );
15029
15030         # or do we know from the type of list that the first term should
15031         # be placed alone?
15032         if ( !$use_separate_first_term ) {
15033             if ( $is_keyword_with_special_leading_term{$list_type} ) {
15034                 $use_separate_first_term = 1;
15035
15036                 # should the container be broken open?
15037                 if ( $item_count < 3 ) {
15038                     if ( $i_first_comma - $i_opening_paren < 4 ) {
15039                         $$rdo_not_break_apart = 1;
15040                     }
15041                 }
15042                 elsif ($first_term_length < 20
15043                     && $i_first_comma - $i_opening_paren < 4 )
15044                 {
15045                     my $columns = table_columns_available($i_first_comma);
15046                     if ( $first_term_length < $columns ) {
15047                         $$rdo_not_break_apart = 1;
15048                     }
15049                 }
15050             }
15051         }
15052
15053         # if so,
15054         if ($use_separate_first_term) {
15055
15056             # ..set a break and update starting values
15057             $use_separate_first_term = 1;
15058             set_forced_breakpoint($i_first_comma);
15059             $i_opening_paren = $i_first_comma;
15060             $i_first_comma   = $$rcomma_index[1];
15061             $item_count--;
15062             return if $comma_count == 1;
15063             shift @item_lengths;
15064             shift @i_term_begin;
15065             shift @i_term_end;
15066             shift @i_term_comma;
15067         }
15068
15069         # if not, update the metrics to include the first term
15070         else {
15071             if ( $first_term_length > $max_length[0] ) {
15072                 $max_length[0] = $first_term_length;
15073             }
15074         }
15075
15076         # Field width parameters
15077         my $pair_width = ( $max_length[0] + $max_length[1] );
15078         my $max_width =
15079           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
15080
15081         # Number of free columns across the page width for laying out tables
15082         my $columns = table_columns_available($i_first_comma);
15083
15084         # Estimated maximum number of fields which fit this space
15085         # This will be our first guess
15086         my $number_of_fields_max =
15087           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
15088             $pair_width );
15089         my $number_of_fields = $number_of_fields_max;
15090
15091         # Find the best-looking number of fields
15092         # and make this our second guess if possible
15093         my ( $number_of_fields_best, $ri_ragged_break_list,
15094             $new_identifier_count )
15095           = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
15096             $max_width );
15097
15098         if (   $number_of_fields_best != 0
15099             && $number_of_fields_best < $number_of_fields_max )
15100         {
15101             $number_of_fields = $number_of_fields_best;
15102         }
15103
15104         # ----------------------------------------------------------------------
15105         # If we are crowded and the -lp option is being used, try to
15106         # undo some indentation
15107         # ----------------------------------------------------------------------
15108         if (
15109             $rOpts_line_up_parentheses
15110             && (
15111                 $number_of_fields == 0
15112                 || (   $number_of_fields == 1
15113                     && $number_of_fields != $number_of_fields_best )
15114             )
15115           )
15116         {
15117             my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
15118             if ( $available_spaces > 0 ) {
15119
15120                 my $spaces_wanted = $max_width - $columns;    # for 1 field
15121
15122                 if ( $number_of_fields_best == 0 ) {
15123                     $number_of_fields_best =
15124                       get_maximum_fields_wanted( \@item_lengths );
15125                 }
15126
15127                 if ( $number_of_fields_best != 1 ) {
15128                     my $spaces_wanted_2 =
15129                       1 + $pair_width - $columns;             # for 2 fields
15130                     if ( $available_spaces > $spaces_wanted_2 ) {
15131                         $spaces_wanted = $spaces_wanted_2;
15132                     }
15133                 }
15134
15135                 if ( $spaces_wanted > 0 ) {
15136                     my $deleted_spaces =
15137                       reduce_lp_indentation( $i_first_comma, $spaces_wanted );
15138
15139                     # redo the math
15140                     if ( $deleted_spaces > 0 ) {
15141                         $columns = table_columns_available($i_first_comma);
15142                         $number_of_fields_max =
15143                           maximum_number_of_fields( $columns, $odd_or_even,
15144                             $max_width, $pair_width );
15145                         $number_of_fields = $number_of_fields_max;
15146
15147                         if (   $number_of_fields_best == 1
15148                             && $number_of_fields >= 1 )
15149                         {
15150                             $number_of_fields = $number_of_fields_best;
15151                         }
15152                     }
15153                 }
15154             }
15155         }
15156
15157         # try for one column if two won't work
15158         if ( $number_of_fields <= 0 ) {
15159             $number_of_fields = int( $columns / $max_width );
15160         }
15161
15162         # The user can place an upper bound on the number of fields,
15163         # which can be useful for doing maintenance on tables
15164         if (   $rOpts_maximum_fields_per_table
15165             && $number_of_fields > $rOpts_maximum_fields_per_table )
15166         {
15167             $number_of_fields = $rOpts_maximum_fields_per_table;
15168         }
15169
15170         # How many columns (characters) and lines would this container take
15171         # if no additional whitespace were added?
15172         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
15173             $i_effective_last_comma + 1 );
15174         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
15175         my $packed_lines = 1 + int( $packed_columns / $columns );
15176
15177         # are we an item contained in an outer list?
15178         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
15179
15180         if ( $number_of_fields <= 0 ) {
15181
15182 #         #---------------------------------------------------------------
15183 #         # We're in trouble.  We can't find a single field width that works.
15184 #         # There is no simple answer here; we may have a single long list
15185 #         # item, or many.
15186 #         #---------------------------------------------------------------
15187 #
15188 #         In many cases, it may be best to not force a break if there is just one
15189 #         comma, because the standard continuation break logic will do a better
15190 #         job without it.
15191 #
15192 #         In the common case that all but one of the terms can fit
15193 #         on a single line, it may look better not to break open the
15194 #         containing parens.  Consider, for example
15195 #
15196 #             $color =
15197 #               join ( '/',
15198 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
15199 #                 keys %colors );
15200 #
15201 #         which will look like this with the container broken:
15202 #
15203 #             $color = join (
15204 #                 '/',
15205 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
15206 #             );
15207 #
15208 #         Here is an example of this rule for a long last term:
15209 #
15210 #             log_message( 0, 256, 128,
15211 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
15212 #
15213 #         And here is an example with a long first term:
15214 #
15215 #         $s = sprintf(
15216 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
15217 #             $r, $pu, $ps, $cu, $cs, $tt
15218 #           )
15219 #           if $style eq 'all';
15220
15221             my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
15222             my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
15223             my $long_first_term =
15224               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
15225
15226             # break at every comma ...
15227             if (
15228
15229                 # if requested by user or is best looking
15230                 $number_of_fields_best == 1
15231
15232                 # or if this is a sublist of a larger list
15233                 || $in_hierarchical_list
15234
15235                 # or if multiple commas and we dont have a long first or last
15236                 # term
15237                 || ( $comma_count > 1
15238                     && !( $long_last_term || $long_first_term ) )
15239               )
15240             {
15241                 foreach ( 0 .. $comma_count - 1 ) {
15242                     set_forced_breakpoint( $$rcomma_index[$_] );
15243                 }
15244             }
15245             elsif ($long_last_term) {
15246
15247                 set_forced_breakpoint($i_last_comma);
15248                 $$rdo_not_break_apart = 1 unless $must_break_open;
15249             }
15250             elsif ($long_first_term) {
15251
15252                 set_forced_breakpoint($i_first_comma);
15253             }
15254             else {
15255
15256                 # let breaks be defined by default bond strength logic
15257             }
15258             return;
15259         }
15260
15261         # --------------------------------------------------------
15262         # We have a tentative field count that seems to work.
15263         # How many lines will this require?
15264         # --------------------------------------------------------
15265         my $formatted_lines = $item_count / ($number_of_fields);
15266         if ( $formatted_lines != int $formatted_lines ) {
15267             $formatted_lines = 1 + int $formatted_lines;
15268         }
15269
15270         # So far we've been trying to fill out to the right margin.  But
15271         # compact tables are easier to read, so let's see if we can use fewer
15272         # fields without increasing the number of lines.
15273         $number_of_fields =
15274           compactify_table( $item_count, $number_of_fields, $formatted_lines,
15275             $odd_or_even );
15276
15277         # How many spaces across the page will we fill?
15278         my $columns_per_line =
15279           ( int $number_of_fields / 2 ) * $pair_width +
15280           ( $number_of_fields % 2 ) * $max_width;
15281
15282         my $formatted_columns;
15283
15284         if ( $number_of_fields > 1 ) {
15285             $formatted_columns =
15286               ( $pair_width * ( int( $item_count / 2 ) ) +
15287                   ( $item_count % 2 ) * $max_width );
15288         }
15289         else {
15290             $formatted_columns = $max_width * $item_count;
15291         }
15292         if ( $formatted_columns < $packed_columns ) {
15293             $formatted_columns = $packed_columns;
15294         }
15295
15296         my $unused_columns = $formatted_columns - $packed_columns;
15297
15298         # set some empirical parameters to help decide if we should try to
15299         # align; high sparsity does not look good, especially with few lines
15300         my $sparsity = ($unused_columns) / ($formatted_columns);
15301         my $max_allowed_sparsity =
15302             ( $item_count < 3 )    ? 0.1
15303           : ( $packed_lines == 1 ) ? 0.15
15304           : ( $packed_lines == 2 ) ? 0.4
15305           :                          0.7;
15306
15307         # Begin check for shortcut methods, which avoid treating a list
15308         # as a table for relatively small parenthesized lists.  These
15309         # are usually easier to read if not formatted as tables.
15310         if (
15311             $packed_lines <= 2    # probably can fit in 2 lines
15312             && $item_count < 9    # doesn't have too many items
15313             && $opening_environment eq 'BLOCK'    # not a sub-container
15314             && $opening_token       eq '('        # is paren list
15315           )
15316         {
15317
15318             # Shortcut method 1: for -lp and just one comma:
15319             # This is a no-brainer, just break at the comma.
15320             if (
15321                 $rOpts_line_up_parentheses        # -lp
15322                 && $item_count == 2               # two items, one comma
15323                 && !$must_break_open
15324               )
15325             {
15326                 my $i_break = $$rcomma_index[0];
15327                 set_forced_breakpoint($i_break);
15328                 $$rdo_not_break_apart = 1;
15329                 set_non_alignment_flags( $comma_count, $rcomma_index );
15330                 return;
15331
15332             }
15333
15334             # method 2 is for most small ragged lists which might look
15335             # best if not displayed as a table.
15336             if (
15337                 ( $number_of_fields == 2 && $item_count == 3 )
15338                 || (
15339                     $new_identifier_count > 0    # isn't all quotes
15340                     && $sparsity > 0.15
15341                 )    # would be fairly spaced gaps if aligned
15342               )
15343             {
15344
15345                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15346                     $ri_ragged_break_list );
15347                 ++$break_count if ($use_separate_first_term);
15348
15349                 # NOTE: we should really use the true break count here,
15350                 # which can be greater if there are large terms and
15351                 # little space, but usually this will work well enough.
15352                 unless ($must_break_open) {
15353
15354                     if ( $break_count <= 1 ) {
15355                         $$rdo_not_break_apart = 1;
15356                     }
15357                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15358                     {
15359                         $$rdo_not_break_apart = 1;
15360                     }
15361                 }
15362                 set_non_alignment_flags( $comma_count, $rcomma_index );
15363                 return;
15364             }
15365
15366         }    # end shortcut methods
15367
15368         # debug stuff
15369
15370         FORMATTER_DEBUG_FLAG_SPARSE && do {
15371             print
15372 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
15373
15374         };
15375
15376         #---------------------------------------------------------------
15377         # Compound List Rule 2:
15378         # If this list is too long for one line, and it is an item of a
15379         # larger list, then we must format it, regardless of sparsity
15380         # (ian.t).  One reason that we have to do this is to trigger
15381         # Compound List Rule 1, above, which causes breaks at all commas of
15382         # all outer lists.  In this way, the structure will be properly
15383         # displayed.
15384         #---------------------------------------------------------------
15385
15386         # Decide if this list is too long for one line unless broken
15387         my $total_columns = table_columns_available($i_opening_paren);
15388         my $too_long      = $packed_columns > $total_columns;
15389
15390         # For a paren list, include the length of the token just before the
15391         # '(' because this is likely a sub call, and we would have to
15392         # include the sub name on the same line as the list.  This is still
15393         # imprecise, but not too bad.  (steve.t)
15394         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
15395
15396             $too_long = excess_line_length( $i_opening_minus,
15397                 $i_effective_last_comma + 1 ) > 0;
15398         }
15399
15400         # FIXME: For an item after a '=>', try to include the length of the
15401         # thing before the '=>'.  This is crude and should be improved by
15402         # actually looking back token by token.
15403         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
15404             my $i_opening_minus = $i_opening_paren - 4;
15405             if ( $i_opening_minus >= 0 ) {
15406                 $too_long = excess_line_length( $i_opening_minus,
15407                     $i_effective_last_comma + 1 ) > 0;
15408             }
15409         }
15410
15411         # Always break lists contained in '[' and '{' if too long for 1 line,
15412         # and always break lists which are too long and part of a more complex
15413         # structure.
15414         my $must_break_open_container = $must_break_open
15415           || ( $too_long
15416             && ( $in_hierarchical_list || $opening_token ne '(' ) );
15417
15418 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
15419
15420         #---------------------------------------------------------------
15421         # The main decision:
15422         # Now decide if we will align the data into aligned columns.  Do not
15423         # attempt to align columns if this is a tiny table or it would be
15424         # too spaced.  It seems that the more packed lines we have, the
15425         # sparser the list that can be allowed and still look ok.
15426         #---------------------------------------------------------------
15427
15428         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
15429             || ( $formatted_lines < 2 )
15430             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
15431           )
15432         {
15433
15434             #---------------------------------------------------------------
15435             # too sparse: would look ugly if aligned in a table;
15436             #---------------------------------------------------------------
15437
15438             # use old breakpoints if this is a 'big' list
15439             # FIXME: goal is to improve set_ragged_breakpoints so that
15440             # this is not necessary.
15441             if ( $packed_lines > 2 && $item_count > 10 ) {
15442                 write_logfile_entry("List sparse: using old breakpoints\n");
15443                 copy_old_breakpoints( $i_first_comma, $i_last_comma );
15444             }
15445
15446             # let the continuation logic handle it if 2 lines
15447             else {
15448
15449                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15450                     $ri_ragged_break_list );
15451                 ++$break_count if ($use_separate_first_term);
15452
15453                 unless ($must_break_open_container) {
15454                     if ( $break_count <= 1 ) {
15455                         $$rdo_not_break_apart = 1;
15456                     }
15457                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15458                     {
15459                         $$rdo_not_break_apart = 1;
15460                     }
15461                 }
15462                 set_non_alignment_flags( $comma_count, $rcomma_index );
15463             }
15464             return;
15465         }
15466
15467         #---------------------------------------------------------------
15468         # go ahead and format as a table
15469         #---------------------------------------------------------------
15470         write_logfile_entry(
15471             "List: auto formatting with $number_of_fields fields/row\n");
15472
15473         my $j_first_break =
15474           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
15475
15476         for (
15477             my $j = $j_first_break ;
15478             $j < $comma_count ;
15479             $j += $number_of_fields
15480           )
15481         {
15482             my $i = $$rcomma_index[$j];
15483             set_forced_breakpoint($i);
15484         }
15485         return;
15486     }
15487 }
15488
15489 sub set_non_alignment_flags {
15490
15491     # set flag which indicates that these commas should not be
15492     # aligned
15493     my ( $comma_count, $rcomma_index ) = @_;
15494     foreach ( 0 .. $comma_count - 1 ) {
15495         $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
15496     }
15497 }
15498
15499 sub study_list_complexity {
15500
15501     # Look for complex tables which should be formatted with one term per line.
15502     # Returns the following:
15503     #
15504     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
15505     #    which are hard to read
15506     #  $number_of_fields_best = suggested number of fields based on
15507     #    complexity; = 0 if any number may be used.
15508     #
15509     my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
15510     my $item_count            = @{$ri_term_begin};
15511     my $complex_item_count    = 0;
15512     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
15513     my $i_max                 = @{$ritem_lengths} - 1;
15514     ##my @item_complexity;
15515
15516     my $i_last_last_break = -3;
15517     my $i_last_break      = -2;
15518     my @i_ragged_break_list;
15519
15520     my $definitely_complex = 30;
15521     my $definitely_simple  = 12;
15522     my $quote_count        = 0;
15523
15524     for my $i ( 0 .. $i_max ) {
15525         my $ib = $ri_term_begin->[$i];
15526         my $ie = $ri_term_end->[$i];
15527
15528         # define complexity: start with the actual term length
15529         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
15530
15531         ##TBD: join types here and check for variations
15532         ##my $str=join "", @tokens_to_go[$ib..$ie];
15533
15534         my $is_quote = 0;
15535         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
15536             $is_quote = 1;
15537             $quote_count++;
15538         }
15539         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
15540             $quote_count++;
15541         }
15542
15543         if ( $ib eq $ie ) {
15544             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
15545                 $complex_item_count++;
15546                 $weighted_length *= 2;
15547             }
15548             else {
15549             }
15550         }
15551         else {
15552             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
15553                 $complex_item_count++;
15554                 $weighted_length *= 2;
15555             }
15556             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
15557                 $weighted_length += 4;
15558             }
15559         }
15560
15561         # add weight for extra tokens.
15562         $weighted_length += 2 * ( $ie - $ib );
15563
15564 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
15565 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
15566
15567 ##push @item_complexity, $weighted_length;
15568
15569         # now mark a ragged break after this item it if it is 'long and
15570         # complex':
15571         if ( $weighted_length >= $definitely_complex ) {
15572
15573             # if we broke after the previous term
15574             # then break before it too
15575             if (   $i_last_break == $i - 1
15576                 && $i > 1
15577                 && $i_last_last_break != $i - 2 )
15578             {
15579
15580                 ## FIXME: don't strand a small term
15581                 pop @i_ragged_break_list;
15582                 push @i_ragged_break_list, $i - 2;
15583                 push @i_ragged_break_list, $i - 1;
15584             }
15585
15586             push @i_ragged_break_list, $i;
15587             $i_last_last_break = $i_last_break;
15588             $i_last_break      = $i;
15589         }
15590
15591         # don't break before a small last term -- it will
15592         # not look good on a line by itself.
15593         elsif ($i == $i_max
15594             && $i_last_break == $i - 1
15595             && $weighted_length <= $definitely_simple )
15596         {
15597             pop @i_ragged_break_list;
15598         }
15599     }
15600
15601     my $identifier_count = $i_max + 1 - $quote_count;
15602
15603     # Need more tuning here..
15604     if (   $max_width > 12
15605         && $complex_item_count > $item_count / 2
15606         && $number_of_fields_best != 2 )
15607     {
15608         $number_of_fields_best = 1;
15609     }
15610
15611     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
15612 }
15613
15614 sub get_maximum_fields_wanted {
15615
15616     # Not all tables look good with more than one field of items.
15617     # This routine looks at a table and decides if it should be
15618     # formatted with just one field or not.
15619     # This coding is still under development.
15620     my ($ritem_lengths) = @_;
15621
15622     my $number_of_fields_best = 0;
15623
15624     # For just a few items, we tentatively assume just 1 field.
15625     my $item_count = @{$ritem_lengths};
15626     if ( $item_count <= 5 ) {
15627         $number_of_fields_best = 1;
15628     }
15629
15630     # For larger tables, look at it both ways and see what looks best
15631     else {
15632
15633         my $is_odd            = 1;
15634         my @max_length        = ( 0, 0 );
15635         my @last_length_2     = ( undef, undef );
15636         my @first_length_2    = ( undef, undef );
15637         my $last_length       = undef;
15638         my $total_variation_1 = 0;
15639         my $total_variation_2 = 0;
15640         my @total_variation_2 = ( 0, 0 );
15641         for ( my $j = 0 ; $j < $item_count ; $j++ ) {
15642
15643             $is_odd = 1 - $is_odd;
15644             my $length = $ritem_lengths->[$j];
15645             if ( $length > $max_length[$is_odd] ) {
15646                 $max_length[$is_odd] = $length;
15647             }
15648
15649             if ( defined($last_length) ) {
15650                 my $dl = abs( $length - $last_length );
15651                 $total_variation_1 += $dl;
15652             }
15653             $last_length = $length;
15654
15655             my $ll = $last_length_2[$is_odd];
15656             if ( defined($ll) ) {
15657                 my $dl = abs( $length - $ll );
15658                 $total_variation_2[$is_odd] += $dl;
15659             }
15660             else {
15661                 $first_length_2[$is_odd] = $length;
15662             }
15663             $last_length_2[$is_odd] = $length;
15664         }
15665         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
15666
15667         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
15668         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
15669             $number_of_fields_best = 1;
15670         }
15671     }
15672     return ($number_of_fields_best);
15673 }
15674
15675 sub table_columns_available {
15676     my $i_first_comma = shift;
15677     my $columns =
15678       $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
15679
15680     # Patch: the vertical formatter does not line up lines whose lengths
15681     # exactly equal the available line length because of allowances
15682     # that must be made for side comments.  Therefore, the number of
15683     # available columns is reduced by 1 character.
15684     $columns -= 1;
15685     return $columns;
15686 }
15687
15688 sub maximum_number_of_fields {
15689
15690     # how many fields will fit in the available space?
15691     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
15692     my $max_pairs        = int( $columns / $pair_width );
15693     my $number_of_fields = $max_pairs * 2;
15694     if (   $odd_or_even == 1
15695         && $max_pairs * $pair_width + $max_width <= $columns )
15696     {
15697         $number_of_fields++;
15698     }
15699     return $number_of_fields;
15700 }
15701
15702 sub compactify_table {
15703
15704     # given a table with a certain number of fields and a certain number
15705     # of lines, see if reducing the number of fields will make it look
15706     # better.
15707     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
15708     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
15709         my $min_fields;
15710
15711         for (
15712             $min_fields = $number_of_fields ;
15713             $min_fields >= $odd_or_even
15714             && $min_fields * $formatted_lines >= $item_count ;
15715             $min_fields -= $odd_or_even
15716           )
15717         {
15718             $number_of_fields = $min_fields;
15719         }
15720     }
15721     return $number_of_fields;
15722 }
15723
15724 sub set_ragged_breakpoints {
15725
15726     # Set breakpoints in a list that cannot be formatted nicely as a
15727     # table.
15728     my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
15729
15730     my $break_count = 0;
15731     foreach (@$ri_ragged_break_list) {
15732         my $j = $ri_term_comma->[$_];
15733         if ($j) {
15734             set_forced_breakpoint($j);
15735             $break_count++;
15736         }
15737     }
15738     return $break_count;
15739 }
15740
15741 sub copy_old_breakpoints {
15742     my ( $i_first_comma, $i_last_comma ) = @_;
15743     for my $i ( $i_first_comma .. $i_last_comma ) {
15744         if ( $old_breakpoint_to_go[$i] ) {
15745             set_forced_breakpoint($i);
15746         }
15747     }
15748 }
15749
15750 sub set_nobreaks {
15751     my ( $i, $j ) = @_;
15752     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
15753
15754         FORMATTER_DEBUG_FLAG_NOBREAK && do {
15755             my ( $a, $b, $c ) = caller();
15756             print(
15757 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
15758             );
15759         };
15760
15761         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
15762     }
15763
15764     # shouldn't happen; non-critical error
15765     else {
15766         FORMATTER_DEBUG_FLAG_NOBREAK && do {
15767             my ( $a, $b, $c ) = caller();
15768             print(
15769 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
15770             );
15771         };
15772     }
15773 }
15774
15775 sub set_fake_breakpoint {
15776
15777     # Just bump up the breakpoint count as a signal that there are breaks.
15778     # This is useful if we have breaks but may want to postpone deciding where
15779     # to make them.
15780     $forced_breakpoint_count++;
15781 }
15782
15783 sub set_forced_breakpoint {
15784     my $i = shift;
15785
15786     return unless defined $i && $i >= 0;
15787
15788     # when called with certain tokens, use bond strengths to decide
15789     # if we break before or after it
15790     my $token = $tokens_to_go[$i];
15791
15792     if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
15793         if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
15794     }
15795
15796     # breaks are forced before 'if' and 'unless'
15797     elsif ( $is_if_unless{$token} ) { $i-- }
15798
15799     if ( $i >= 0 && $i <= $max_index_to_go ) {
15800         my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
15801
15802         FORMATTER_DEBUG_FLAG_FORCE && do {
15803             my ( $a, $b, $c ) = caller();
15804             print
15805 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
15806         };
15807
15808         if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
15809             $forced_breakpoint_to_go[$i_nonblank] = 1;
15810
15811             if ( $i_nonblank > $index_max_forced_break ) {
15812                 $index_max_forced_break = $i_nonblank;
15813             }
15814             $forced_breakpoint_count++;
15815             $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
15816               $i_nonblank;
15817
15818             # if we break at an opening container..break at the closing
15819             if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
15820                 set_closing_breakpoint($i_nonblank);
15821             }
15822         }
15823     }
15824 }
15825
15826 sub clear_breakpoint_undo_stack {
15827     $forced_breakpoint_undo_count = 0;
15828 }
15829
15830 sub undo_forced_breakpoint_stack {
15831
15832     my $i_start = shift;
15833     if ( $i_start < 0 ) {
15834         $i_start = 0;
15835         my ( $a, $b, $c ) = caller();
15836         warning(
15837 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
15838         );
15839     }
15840
15841     while ( $forced_breakpoint_undo_count > $i_start ) {
15842         my $i =
15843           $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
15844         if ( $i >= 0 && $i <= $max_index_to_go ) {
15845             $forced_breakpoint_to_go[$i] = 0;
15846             $forced_breakpoint_count--;
15847
15848             FORMATTER_DEBUG_FLAG_UNDOBP && do {
15849                 my ( $a, $b, $c ) = caller();
15850                 print(
15851 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
15852                 );
15853             };
15854         }
15855
15856         # shouldn't happen, but not a critical error
15857         else {
15858             FORMATTER_DEBUG_FLAG_UNDOBP && do {
15859                 my ( $a, $b, $c ) = caller();
15860                 print(
15861 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
15862                 );
15863             };
15864         }
15865     }
15866 }
15867
15868 {    # begin recombine_breakpoints
15869
15870     my %is_amp_amp;
15871     my %is_ternary;
15872     my %is_math_op;
15873
15874     BEGIN {
15875
15876         @_ = qw( && || );
15877         @is_amp_amp{@_} = (1) x scalar(@_);
15878
15879         @_ = qw( ? : );
15880         @is_ternary{@_} = (1) x scalar(@_);
15881
15882         @_ = qw( + - * / );
15883         @is_math_op{@_} = (1) x scalar(@_);
15884     }
15885
15886     sub recombine_breakpoints {
15887
15888         # sub set_continuation_breaks is very liberal in setting line breaks
15889         # for long lines, always setting breaks at good breakpoints, even
15890         # when that creates small lines.  Occasionally small line fragments
15891         # are produced which would look better if they were combined.
15892         # That's the task of this routine, recombine_breakpoints.
15893         #
15894         # $ri_beg = ref to array of BEGinning indexes of each line
15895         # $ri_end = ref to array of ENDing indexes of each line
15896         my ( $ri_beg, $ri_end ) = @_;
15897
15898         my $more_to_do = 1;
15899
15900         # We keep looping over all of the lines of this batch
15901         # until there are no more possible recombinations
15902         my $nmax_last = @$ri_end;
15903         while ($more_to_do) {
15904             my $n_best = 0;
15905             my $bs_best;
15906             my $n;
15907             my $nmax = @$ri_end - 1;
15908
15909             # safety check for infinite loop
15910             unless ( $nmax < $nmax_last ) {
15911
15912             # shouldn't happen because splice below decreases nmax on each pass:
15913             # but i get paranoid sometimes
15914                 die "Program bug-infinite loop in recombine breakpoints\n";
15915             }
15916             $nmax_last  = $nmax;
15917             $more_to_do = 0;
15918             my $previous_outdentable_closing_paren;
15919             my $leading_amp_count = 0;
15920             my $this_line_is_semicolon_terminated;
15921
15922             # loop over all remaining lines in this batch
15923             for $n ( 1 .. $nmax ) {
15924
15925                 #----------------------------------------------------------
15926                 # If we join the current pair of lines,
15927                 # line $n-1 will become the left part of the joined line
15928                 # line $n will become the right part of the joined line
15929                 #
15930                 # Here are Indexes of the endpoint tokens of the two lines:
15931                 #
15932                 #  -----line $n-1--- | -----line $n-----
15933                 #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
15934                 #                    ^
15935                 #                    |
15936                 # We want to decide if we should remove the line break
15937                 # betwen the tokens at $iend_1 and $ibeg_2
15938                 #
15939                 # We will apply a number of ad-hoc tests to see if joining
15940                 # here will look ok.  The code will just issue a 'next'
15941                 # command if the join doesn't look good.  If we get through
15942                 # the gauntlet of tests, the lines will be recombined.
15943                 #----------------------------------------------------------
15944                 #
15945                 # beginning and ending tokens of the lines we are working on
15946                 my $ibeg_1 = $$ri_beg[ $n - 1 ];
15947                 my $iend_1 = $$ri_end[ $n - 1 ];
15948                 my $iend_2 = $$ri_end[$n];
15949                 my $ibeg_2 = $$ri_beg[$n];
15950
15951                 my $ibeg_nmax = $$ri_beg[$nmax];
15952
15953                 # some beginning indexes of other lines, which may not exist
15954                 my $ibeg_0 = $n > 1          ? $$ri_beg[ $n - 2 ] : -1;
15955                 my $ibeg_3 = $n < $nmax      ? $$ri_beg[ $n + 1 ] : -1;
15956                 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
15957
15958                 my $bs_tweak = 0;
15959
15960                 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15961                 #        $nesting_depth_to_go[$ibeg_1] );
15962
15963 ##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
15964
15965                 # If line $n is the last line, we set some flags and
15966                 # do any special checks for it
15967                 if ( $n == $nmax ) {
15968
15969                     # a terminal '{' should stay where it is
15970                     next if $types_to_go[$ibeg_2] eq '{';
15971
15972                     # set flag if statement $n ends in ';'
15973                     $this_line_is_semicolon_terminated =
15974                       $types_to_go[$iend_2] eq ';'
15975
15976                       # with possible side comment
15977                       || ( $types_to_go[$iend_2] eq '#'
15978                         && $iend_2 - $ibeg_2 >= 2
15979                         && $types_to_go[ $iend_2 - 2 ] eq ';'
15980                         && $types_to_go[ $iend_2 - 1 ] eq 'b' );
15981                 }
15982
15983                 #----------------------------------------------------------
15984                 # Section 1: examine token at $iend_1 (right end of first line
15985                 # of pair)
15986                 #----------------------------------------------------------
15987
15988                 # an isolated '}' may join with a ';' terminated segment
15989                 if ( $types_to_go[$iend_1] eq '}' ) {
15990
15991                     # Check for cases where combining a semicolon terminated
15992                     # statement with a previous isolated closing paren will
15993                     # allow the combined line to be outdented.  This is
15994                     # generally a good move.  For example, we can join up
15995                     # the last two lines here:
15996                     #  (
15997                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
15998                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
15999                     #    )
16000                     #    = stat($file);
16001                     #
16002                     # to get:
16003                     #  (
16004                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
16005                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
16006                     #  ) = stat($file);
16007                     #
16008                     # which makes the parens line up.
16009                     #
16010                     # Another example, from Joe Matarazzo, probably looks best
16011                     # with the 'or' clause appended to the trailing paren:
16012                     #  $self->some_method(
16013                     #      PARAM1 => 'foo',
16014                     #      PARAM2 => 'bar'
16015                     #  ) or die "Some_method didn't work";
16016                     #
16017                     $previous_outdentable_closing_paren =
16018                       $this_line_is_semicolon_terminated    # ends in ';'
16019                       && $ibeg_1 == $iend_1    # only one token on last line
16020                       && $tokens_to_go[$iend_1] eq
16021                       ')'                      # must be structural paren
16022
16023                       # only &&, ||, and : if no others seen
16024                       # (but note: our count made below could be wrong
16025                       # due to intervening comments)
16026                       && ( $leading_amp_count == 0
16027                         || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
16028
16029                       # but leading colons probably line up with with a
16030                       # previous colon or question (count could be wrong).
16031                       && $types_to_go[$ibeg_2] ne ':'
16032
16033                       # only one step in depth allowed.  this line must not
16034                       # begin with a ')' itself.
16035                       && ( $nesting_depth_to_go[$iend_1] ==
16036                         $nesting_depth_to_go[$iend_2] + 1 );
16037
16038                     # YVES patch 2 of 2:
16039                     # Allow cuddled eval chains, like this:
16040                     #   eval {
16041                     #       #STUFF;
16042                     #       1; # return true
16043                     #   } or do {
16044                     #       #handle error
16045                     #   };
16046                     # This patch works together with a patch in
16047                     # setting adjusted indentation (where the closing eval
16048                     # brace is outdented if possible).
16049                     # The problem is that an 'eval' block has continuation
16050                     # indentation and it looks better to undo it in some
16051                     # cases.  If we do not use this patch we would get:
16052                     #   eval {
16053                     #       #STUFF;
16054                     #       1; # return true
16055                     #       }
16056                     #       or do {
16057                     #       #handle error
16058                     #     };
16059                     # The alternative, for uncuddled style, is to create
16060                     # a patch in set_adjusted_indentation which undoes
16061                     # the indentation of a leading line like 'or do {'.
16062                     # This doesn't work well with -icb through
16063                     if (
16064                            $block_type_to_go[$iend_1] eq 'eval'
16065                         && !$rOpts->{'line-up-parentheses'}
16066                         && !$rOpts->{'indent-closing-brace'}
16067                         && $tokens_to_go[$iend_2] eq '{'
16068                         && (
16069                             ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ )
16070                             || (   $types_to_go[$ibeg_2] eq 'k'
16071                                 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
16072                             || $is_if_unless{ $tokens_to_go[$ibeg_2] }
16073                         )
16074                       )
16075                     {
16076                         $previous_outdentable_closing_paren ||= 1;
16077                     }
16078
16079                     next
16080                       unless (
16081                         $previous_outdentable_closing_paren
16082
16083                         # handle '.' and '?' specially below
16084                         || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
16085                       );
16086                 }
16087
16088                 # YVES
16089                 # honor breaks at opening brace
16090                 # Added to prevent recombining something like this:
16091                 #  } || eval { package main;
16092                 elsif ( $types_to_go[$iend_1] eq '{' ) {
16093                     next if $forced_breakpoint_to_go[$iend_1];
16094                 }
16095
16096                 # do not recombine lines with ending &&, ||,
16097                 elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
16098                     next unless $want_break_before{ $types_to_go[$iend_1] };
16099                 }
16100
16101                 # keep a terminal colon
16102                 elsif ( $types_to_go[$iend_1] eq ':' ) {
16103                     next unless $want_break_before{ $types_to_go[$iend_1] };
16104                 }
16105
16106                 # Identify and recombine a broken ?/: chain
16107                 elsif ( $types_to_go[$iend_1] eq '?' ) {
16108
16109                     # Do not recombine different levels
16110                     next
16111                       if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
16112
16113                     # do not recombine unless next line ends in :
16114                     next unless $types_to_go[$iend_2] eq ':';
16115                 }
16116
16117                 # for lines ending in a comma...
16118                 elsif ( $types_to_go[$iend_1] eq ',' ) {
16119
16120                     # Do not recombine at comma which is following the
16121                     # input bias.
16122                     # TODO: might be best to make a special flag
16123                     next if ( $old_breakpoint_to_go[$iend_1] );
16124
16125                  # an isolated '},' may join with an identifier + ';'
16126                  # this is useful for the class of a 'bless' statement (bless.t)
16127                     if (   $types_to_go[$ibeg_1] eq '}'
16128                         && $types_to_go[$ibeg_2] eq 'i' )
16129                     {
16130                         next
16131                           unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
16132                             && ( $iend_2 == ( $ibeg_2 + 1 ) )
16133                             && $this_line_is_semicolon_terminated );
16134
16135                         # override breakpoint
16136                         $forced_breakpoint_to_go[$iend_1] = 0;
16137                     }
16138
16139                     # but otherwise ..
16140                     else {
16141
16142                         # do not recombine after a comma unless this will leave
16143                         # just 1 more line
16144                         next unless ( $n + 1 >= $nmax );
16145
16146                     # do not recombine if there is a change in indentation depth
16147                         next
16148                           if (
16149                             $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
16150
16151                         # do not recombine a "complex expression" after a
16152                         # comma.  "complex" means no parens.
16153                         my $saw_paren;
16154                         foreach my $ii ( $ibeg_2 .. $iend_2 ) {
16155                             if ( $tokens_to_go[$ii] eq '(' ) {
16156                                 $saw_paren = 1;
16157                                 last;
16158                             }
16159                         }
16160                         next if $saw_paren;
16161                     }
16162                 }
16163
16164                 # opening paren..
16165                 elsif ( $types_to_go[$iend_1] eq '(' ) {
16166
16167                     # No longer doing this
16168                 }
16169
16170                 elsif ( $types_to_go[$iend_1] eq ')' ) {
16171
16172                     # No longer doing this
16173                 }
16174
16175                 # keep a terminal for-semicolon
16176                 elsif ( $types_to_go[$iend_1] eq 'f' ) {
16177                     next;
16178                 }
16179
16180                 # if '=' at end of line ...
16181                 elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
16182
16183                     my $is_short_quote =
16184                       (      $types_to_go[$ibeg_2] eq 'Q'
16185                           && $ibeg_2 == $iend_2
16186                           && length( $tokens_to_go[$ibeg_2] ) <
16187                           $rOpts_short_concatenation_item_length );
16188                     my $is_ternary =
16189                       ( $types_to_go[$ibeg_1] eq '?'
16190                           && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
16191
16192                     # always join an isolated '=', a short quote, or if this
16193                     # will put ?/: at start of adjacent lines
16194                     if (   $ibeg_1 != $iend_1
16195                         && !$is_short_quote
16196                         && !$is_ternary )
16197                     {
16198                         next
16199                           unless (
16200                             (
16201
16202                                 # unless we can reduce this to two lines
16203                                 $nmax < $n + 2
16204
16205                              # or three lines, the last with a leading semicolon
16206                                 || (   $nmax == $n + 2
16207                                     && $types_to_go[$ibeg_nmax] eq ';' )
16208
16209                                 # or the next line ends with a here doc
16210                                 || $types_to_go[$iend_2] eq 'h'
16211
16212                                # or the next line ends in an open paren or brace
16213                                # and the break hasn't been forced [dima.t]
16214                                 || (  !$forced_breakpoint_to_go[$iend_1]
16215                                     && $types_to_go[$iend_2] eq '{' )
16216                             )
16217
16218                             # do not recombine if the two lines might align well
16219                             # this is a very approximate test for this
16220                             && (   $ibeg_3 >= 0
16221                                 && $types_to_go[$ibeg_2] ne
16222                                 $types_to_go[$ibeg_3] )
16223                           );
16224
16225                         # -lp users often prefer this:
16226                         #  my $title = function($env, $env, $sysarea,
16227                         #                       "bubba Borrower Entry");
16228                         #  so we will recombine if -lp is used we have ending
16229                         #  comma
16230                         if (  !$rOpts_line_up_parentheses
16231                             || $types_to_go[$iend_2] ne ',' )
16232                         {
16233
16234                            # otherwise, scan the rhs line up to last token for
16235                            # complexity.  Note that we are not counting the last
16236                            # token in case it is an opening paren.
16237                             my $tv    = 0;
16238                             my $depth = $nesting_depth_to_go[$ibeg_2];
16239                             for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
16240                                 if ( $nesting_depth_to_go[$i] != $depth ) {
16241                                     $tv++;
16242                                     last if ( $tv > 1 );
16243                                 }
16244                                 $depth = $nesting_depth_to_go[$i];
16245                             }
16246
16247                          # ok to recombine if no level changes before last token
16248                             if ( $tv > 0 ) {
16249
16250                                 # otherwise, do not recombine if more than two
16251                                 # level changes.
16252                                 next if ( $tv > 1 );
16253
16254                               # check total complexity of the two adjacent lines
16255                               # that will occur if we do this join
16256                                 my $istop =
16257                                   ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
16258                                 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
16259                                     if ( $nesting_depth_to_go[$i] != $depth ) {
16260                                         $tv++;
16261                                         last if ( $tv > 2 );
16262                                     }
16263                                     $depth = $nesting_depth_to_go[$i];
16264                                 }
16265
16266                         # do not recombine if total is more than 2 level changes
16267                                 next if ( $tv > 2 );
16268                             }
16269                         }
16270                     }
16271
16272                     unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16273                         $forced_breakpoint_to_go[$iend_1] = 0;
16274                     }
16275                 }
16276
16277                 # for keywords..
16278                 elsif ( $types_to_go[$iend_1] eq 'k' ) {
16279
16280                     # make major control keywords stand out
16281                     # (recombine.t)
16282                     next
16283                       if (
16284
16285                         #/^(last|next|redo|return)$/
16286                         $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16287
16288                         # but only if followed by multiple lines
16289                         && $n < $nmax
16290                       );
16291
16292                     if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16293                         next
16294                           unless $want_break_before{ $tokens_to_go[$iend_1] };
16295                     }
16296                 }
16297
16298                 # handle trailing + - * /
16299                 elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
16300
16301                     # combine lines if next line has single number
16302                     # or a short term followed by same operator
16303                     my $i_next_nonblank = $ibeg_2;
16304                     my $i_next_next     = $i_next_nonblank + 1;
16305                     $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16306                     my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
16307                       && (
16308                         $i_next_nonblank == $iend_2
16309                         || (   $i_next_next == $iend_2
16310                             && $is_math_op{ $types_to_go[$i_next_next] } )
16311                         || $types_to_go[$i_next_next] eq ';'
16312                       );
16313
16314                     # find token before last operator of previous line
16315                     my $iend_1_minus = $iend_1;
16316                     $iend_1_minus--
16317                       if ( $iend_1_minus > $ibeg_1 );
16318                     $iend_1_minus--
16319                       if ( $types_to_go[$iend_1_minus] eq 'b'
16320                         && $iend_1_minus > $ibeg_1 );
16321
16322                     my $short_term_follows =
16323                       (      $types_to_go[$iend_2] eq $types_to_go[$iend_1]
16324                           && $types_to_go[$iend_1_minus] =~ /^[in]$/
16325                           && $iend_2 <= $ibeg_2 + 2
16326                           && length( $tokens_to_go[$ibeg_2] ) <
16327                           $rOpts_short_concatenation_item_length );
16328
16329                     next
16330                       unless ( $number_follows || $short_term_follows );
16331                 }
16332
16333                 #----------------------------------------------------------
16334                 # Section 2: Now examine token at $ibeg_2 (left end of second
16335                 # line of pair)
16336                 #----------------------------------------------------------
16337
16338                 # join lines identified above as capable of
16339                 # causing an outdented line with leading closing paren
16340                 if ($previous_outdentable_closing_paren) {
16341                     $forced_breakpoint_to_go[$iend_1] = 0;
16342                 }
16343
16344                 # do not recombine lines with leading :
16345                 elsif ( $types_to_go[$ibeg_2] eq ':' ) {
16346                     $leading_amp_count++;
16347                     next if $want_break_before{ $types_to_go[$ibeg_2] };
16348                 }
16349
16350                 # handle lines with leading &&, ||
16351                 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16352
16353                     $leading_amp_count++;
16354
16355                     # ok to recombine if it follows a ? or :
16356                     # and is followed by an open paren..
16357                     my $ok =
16358                       (      $is_ternary{ $types_to_go[$ibeg_1] }
16359                           && $tokens_to_go[$iend_2] eq '(' )
16360
16361                     # or is followed by a ? or : at same depth
16362                     #
16363                     # We are looking for something like this. We can
16364                     # recombine the && line with the line above to make the
16365                     # structure more clear:
16366                     #  return
16367                     #    exists $G->{Attr}->{V}
16368                     #    && exists $G->{Attr}->{V}->{$u}
16369                     #    ? %{ $G->{Attr}->{V}->{$u} }
16370                     #    : ();
16371                     #
16372                     # We should probably leave something like this alone:
16373                     #  return
16374                     #       exists $G->{Attr}->{E}
16375                     #    && exists $G->{Attr}->{E}->{$u}
16376                     #    && exists $G->{Attr}->{E}->{$u}->{$v}
16377                     #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16378                     #    : ();
16379                     # so that we either have all of the &&'s (or ||'s)
16380                     # on one line, as in the first example, or break at
16381                     # each one as in the second example.  However, it
16382                     # sometimes makes things worse to check for this because
16383                     # it prevents multiple recombinations.  So this is not done.
16384                       || ( $ibeg_3 >= 0
16385                         && $is_ternary{ $types_to_go[$ibeg_3] }
16386                         && $nesting_depth_to_go[$ibeg_3] ==
16387                         $nesting_depth_to_go[$ibeg_2] );
16388
16389                     next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
16390                     $forced_breakpoint_to_go[$iend_1] = 0;
16391
16392                     # tweak the bond strength to give this joint priority
16393                     # over ? and :
16394                     $bs_tweak = 0.25;
16395                 }
16396
16397                 # Identify and recombine a broken ?/: chain
16398                 elsif ( $types_to_go[$ibeg_2] eq '?' ) {
16399
16400                     # Do not recombine different levels
16401                     my $lev = $levels_to_go[$ibeg_2];
16402                     next if ( $lev ne $levels_to_go[$ibeg_1] );
16403
16404                     # Do not recombine a '?' if either next line or
16405                     # previous line does not start with a ':'.  The reasons
16406                     # are that (1) no alignment of the ? will be possible
16407                     # and (2) the expression is somewhat complex, so the
16408                     # '?' is harder to see in the interior of the line.
16409                     my $follows_colon =
16410                       $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
16411                     my $precedes_colon =
16412                       $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16413                     next unless ( $follows_colon || $precedes_colon );
16414
16415                     # we will always combining a ? line following a : line
16416                     if ( !$follows_colon ) {
16417
16418                         # ...otherwise recombine only if it looks like a chain.
16419                         # we will just look at a few nearby lines to see if
16420                         # this looks like a chain.
16421                         my $local_count = 0;
16422                         foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
16423                             $local_count++
16424                               if $ii >= 0
16425                                   && $types_to_go[$ii] eq ':'
16426                                   && $levels_to_go[$ii] == $lev;
16427                         }
16428                         next unless ( $local_count > 1 );
16429                     }
16430                     $forced_breakpoint_to_go[$iend_1] = 0;
16431                 }
16432
16433                 # do not recombine lines with leading '.'
16434                 elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
16435                     my $i_next_nonblank = $ibeg_2 + 1;
16436                     if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16437                         $i_next_nonblank++;
16438                     }
16439
16440                     next
16441                       unless (
16442
16443                    # ... unless there is just one and we can reduce
16444                    # this to two lines if we do.  For example, this
16445                    #
16446                    #
16447                    #  $bodyA .=
16448                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16449                    #
16450                    #  looks better than this:
16451                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16452                    #    . '$args .= $pat;'
16453
16454                         (
16455                                $n == 2
16456                             && $n == $nmax
16457                             && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
16458                         )
16459
16460                         #  ... or this would strand a short quote , like this
16461                         #                . "some long qoute"
16462                         #                . "\n";
16463                         || (   $types_to_go[$i_next_nonblank] eq 'Q'
16464                             && $i_next_nonblank >= $iend_2 - 1
16465                             && length( $tokens_to_go[$i_next_nonblank] ) <
16466                             $rOpts_short_concatenation_item_length )
16467                       );
16468                 }
16469
16470                 # handle leading keyword..
16471                 elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
16472
16473                     # handle leading "or"
16474                     if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16475                         next
16476                           unless (
16477                             $this_line_is_semicolon_terminated
16478                             && (
16479
16480                                 # following 'if' or 'unless' or 'or'
16481                                 $types_to_go[$ibeg_1] eq 'k'
16482                                 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16483
16484                                 # important: only combine a very simple or
16485                                 # statement because the step below may have
16486                                 # combined a trailing 'and' with this or,
16487                                 # and we do not want to then combine
16488                                 # everything together
16489                                 && ( $iend_2 - $ibeg_2 <= 7 )
16490                             )
16491                           );
16492                     }
16493
16494                     # handle leading 'and'
16495                     elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
16496
16497                         # Decide if we will combine a single terminal 'and'
16498                         # after an 'if' or 'unless'.
16499
16500                         #     This looks best with the 'and' on the same
16501                         #     line as the 'if':
16502                         #
16503                         #         $a = 1
16504                         #           if $seconds and $nu < 2;
16505                         #
16506                         #     But this looks better as shown:
16507                         #
16508                         #         $a = 1
16509                         #           if !$this->{Parents}{$_}
16510                         #           or $this->{Parents}{$_} eq $_;
16511                         #
16512                         next
16513                           unless (
16514                             $this_line_is_semicolon_terminated
16515                             && (
16516
16517                                 # following 'if' or 'unless' or 'or'
16518                                 $types_to_go[$ibeg_1] eq 'k'
16519                                 && (   $is_if_unless{ $tokens_to_go[$ibeg_1] }
16520                                     || $tokens_to_go[$ibeg_1] eq 'or' )
16521                             )
16522                           );
16523                     }
16524
16525                     # handle leading "if" and "unless"
16526                     elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16527
16528                       # FIXME: This is still experimental..may not be too useful
16529                         next
16530                           unless (
16531                             $this_line_is_semicolon_terminated
16532
16533                             #  previous line begins with 'and' or 'or'
16534                             && $types_to_go[$ibeg_1] eq 'k'
16535                             && $is_and_or{ $tokens_to_go[$ibeg_1] }
16536
16537                           );
16538                     }
16539
16540                     # handle all other leading keywords
16541                     else {
16542
16543                         # keywords look best at start of lines,
16544                         # but combine things like "1 while"
16545                         unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
16546                             next
16547                               if ( ( $types_to_go[$iend_1] ne 'k' )
16548                                 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16549                         }
16550                     }
16551                 }
16552
16553                 # similar treatment of && and || as above for 'and' and 'or':
16554                 # NOTE: This block of code is currently bypassed because
16555                 # of a previous block but is retained for possible future use.
16556                 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16557
16558                     # maybe looking at something like:
16559                     # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16560
16561                     next
16562                       unless (
16563                         $this_line_is_semicolon_terminated
16564
16565                         # previous line begins with an 'if' or 'unless' keyword
16566                         && $types_to_go[$ibeg_1] eq 'k'
16567                         && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16568
16569                       );
16570                 }
16571
16572                 # handle leading + - * /
16573                 elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
16574                     my $i_next_nonblank = $ibeg_2 + 1;
16575                     if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16576                         $i_next_nonblank++;
16577                     }
16578
16579                     my $i_next_next = $i_next_nonblank + 1;
16580                     $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16581
16582                     my $is_number = (
16583                         $types_to_go[$i_next_nonblank] eq 'n'
16584                           && ( $i_next_nonblank >= $iend_2 - 1
16585                             || $types_to_go[$i_next_next] eq ';' )
16586                     );
16587
16588                     my $iend_1_nonblank =
16589                       $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
16590                     my $iend_2_nonblank =
16591                       $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
16592
16593                     my $is_short_term =
16594                       (      $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
16595                           && $types_to_go[$iend_2_nonblank] =~ /^[in]$/
16596                           && $types_to_go[$iend_1_nonblank] =~ /^[in]$/
16597                           && $iend_2_nonblank <= $ibeg_2 + 2
16598                           && length( $tokens_to_go[$iend_2_nonblank] ) <
16599                           $rOpts_short_concatenation_item_length );
16600
16601                     # Combine these lines if this line is a single
16602                     # number, or if it is a short term with same
16603                     # operator as the previous line.  For example, in
16604                     # the following code we will combine all of the
16605                     # short terms $A, $B, $C, $D, $E, $F, together
16606                     # instead of leaving them one per line:
16607                     #  my $time =
16608                     #    $A * $B * $C * $D * $E * $F *
16609                     #    ( 2. * $eps * $sigma * $area ) *
16610                     #    ( 1. / $tcold**3 - 1. / $thot**3 );
16611                     # This can be important in math-intensive code.
16612                     next
16613                       unless (
16614                            $is_number
16615                         || $is_short_term
16616
16617                         # or if we can reduce this to two lines if we do.
16618                         || (   $n == 2
16619                             && $n == $nmax
16620                             && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
16621                       );
16622                 }
16623
16624                 # handle line with leading = or similar
16625                 elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
16626                     next unless $n == 1;
16627                     next
16628                       unless (
16629
16630                         # unless we can reduce this to two lines
16631                         $nmax == 2
16632
16633                         # or three lines, the last with a leading semicolon
16634                         || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16635
16636                         # or the next line ends with a here doc
16637                         || $types_to_go[$iend_2] eq 'h'
16638                       );
16639                 }
16640
16641                 #----------------------------------------------------------
16642                 # Section 3:
16643                 # Combine the lines if we arrive here and it is possible
16644                 #----------------------------------------------------------
16645
16646                 # honor hard breakpoints
16647                 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16648
16649                 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
16650
16651                 # combined line cannot be too long
16652                 next
16653                   if excess_line_length( $ibeg_1, $iend_2 ) > 0;
16654
16655                 # do not recombine if we would skip in indentation levels
16656                 if ( $n < $nmax ) {
16657                     my $if_next = $$ri_beg[ $n + 1 ];
16658                     next
16659                       if (
16660                            $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16661                         && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16662
16663                         # but an isolated 'if (' is undesirable
16664                         && !(
16665                                $n == 1
16666                             && $iend_1 - $ibeg_1 <= 2
16667                             && $types_to_go[$ibeg_1]  eq 'k'
16668                             && $tokens_to_go[$ibeg_1] eq 'if'
16669                             && $tokens_to_go[$iend_1] ne '('
16670                         )
16671                       );
16672                 }
16673
16674                 # honor no-break's
16675                 next if ( $bs == NO_BREAK );
16676
16677                 # remember the pair with the greatest bond strength
16678                 if ( !$n_best ) {
16679                     $n_best  = $n;
16680                     $bs_best = $bs;
16681                 }
16682                 else {
16683
16684                     if ( $bs > $bs_best ) {
16685                         $n_best  = $n;
16686                         $bs_best = $bs;
16687                     }
16688                 }
16689             }
16690
16691             # recombine the pair with the greatest bond strength
16692             if ($n_best) {
16693                 splice @$ri_beg, $n_best, 1;
16694                 splice @$ri_end, $n_best - 1, 1;
16695
16696                 # keep going if we are still making progress
16697                 $more_to_do++;
16698             }
16699         }
16700         return ( $ri_beg, $ri_end );
16701     }
16702 }    # end recombine_breakpoints
16703
16704 sub break_all_chain_tokens {
16705
16706     # scan the current breakpoints looking for breaks at certain "chain
16707     # operators" (. : && || + etc) which often occur repeatedly in a long
16708     # statement.  If we see a break at any one, break at all similar tokens
16709     # within the same container.
16710     #
16711     my ( $ri_left, $ri_right ) = @_;
16712
16713     my %saw_chain_type;
16714     my %left_chain_type;
16715     my %right_chain_type;
16716     my %interior_chain_type;
16717     my $nmax = @$ri_right - 1;
16718
16719     # scan the left and right end tokens of all lines
16720     my $count = 0;
16721     for my $n ( 0 .. $nmax ) {
16722         my $il    = $$ri_left[$n];
16723         my $ir    = $$ri_right[$n];
16724         my $typel = $types_to_go[$il];
16725         my $typer = $types_to_go[$ir];
16726         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
16727         $typer = '+' if ( $typer eq '-' );
16728         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
16729         $typer = '*' if ( $typer eq '/' );
16730         my $tokenl = $tokens_to_go[$il];
16731         my $tokenr = $tokens_to_go[$ir];
16732
16733         if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
16734             next if ( $typel eq '?' );
16735             push @{ $left_chain_type{$typel} }, $il;
16736             $saw_chain_type{$typel} = 1;
16737             $count++;
16738         }
16739         if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
16740             next if ( $typer eq '?' );
16741             push @{ $right_chain_type{$typer} }, $ir;
16742             $saw_chain_type{$typer} = 1;
16743             $count++;
16744         }
16745     }
16746     return unless $count;
16747
16748     # now look for any interior tokens of the same types
16749     $count = 0;
16750     for my $n ( 0 .. $nmax ) {
16751         my $il = $$ri_left[$n];
16752         my $ir = $$ri_right[$n];
16753         for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
16754             my $type = $types_to_go[$i];
16755             $type = '+' if ( $type eq '-' );
16756             $type = '*' if ( $type eq '/' );
16757             if ( $saw_chain_type{$type} ) {
16758                 push @{ $interior_chain_type{$type} }, $i;
16759                 $count++;
16760             }
16761         }
16762     }
16763     return unless $count;
16764
16765     # now make a list of all new break points
16766     my @insert_list;
16767
16768     # loop over all chain types
16769     foreach my $type ( keys %saw_chain_type ) {
16770
16771         # quit if just ONE continuation line with leading .  For example--
16772         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
16773         #  . $contents;
16774         last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
16775
16776         # loop over all interior chain tokens
16777         foreach my $itest ( @{ $interior_chain_type{$type} } ) {
16778
16779             # loop over all left end tokens of same type
16780             if ( $left_chain_type{$type} ) {
16781                 next if $nobreak_to_go[ $itest - 1 ];
16782                 foreach my $i ( @{ $left_chain_type{$type} } ) {
16783                     next unless in_same_container( $i, $itest );
16784                     push @insert_list, $itest - 1;
16785
16786                     # Break at matching ? if this : is at a different level.
16787                     # For example, the ? before $THRf_DEAD in the following
16788                     # should get a break if its : gets a break.
16789                     #
16790                     # my $flags =
16791                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
16792                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
16793                     #   :              $THRf_R_JOINABLE;
16794                     if (   $type eq ':'
16795                         && $levels_to_go[$i] != $levels_to_go[$itest] )
16796                     {
16797                         my $i_question = $mate_index_to_go[$itest];
16798                         if ( $i_question > 0 ) {
16799                             push @insert_list, $i_question - 1;
16800                         }
16801                     }
16802                     last;
16803                 }
16804             }
16805
16806             # loop over all right end tokens of same type
16807             if ( $right_chain_type{$type} ) {
16808                 next if $nobreak_to_go[$itest];
16809                 foreach my $i ( @{ $right_chain_type{$type} } ) {
16810                     next unless in_same_container( $i, $itest );
16811                     push @insert_list, $itest;
16812
16813                     # break at matching ? if this : is at a different level
16814                     if (   $type eq ':'
16815                         && $levels_to_go[$i] != $levels_to_go[$itest] )
16816                     {
16817                         my $i_question = $mate_index_to_go[$itest];
16818                         if ( $i_question >= 0 ) {
16819                             push @insert_list, $i_question;
16820                         }
16821                     }
16822                     last;
16823                 }
16824             }
16825         }
16826     }
16827
16828     # insert any new break points
16829     if (@insert_list) {
16830         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16831     }
16832 }
16833
16834 sub break_equals {
16835
16836     # Look for assignment operators that could use a breakpoint.
16837     # For example, in the following snippet
16838     #
16839     #    $HOME = $ENV{HOME}
16840     #      || $ENV{LOGDIR}
16841     #      || $pw[7]
16842     #      || die "no home directory for user $<";
16843     #
16844     # we could break at the = to get this, which is a little nicer:
16845     #    $HOME =
16846     #         $ENV{HOME}
16847     #      || $ENV{LOGDIR}
16848     #      || $pw[7]
16849     #      || die "no home directory for user $<";
16850     #
16851     # The logic here follows the logic in set_logical_padding, which
16852     # will add the padding in the second line to improve alignment.
16853     #
16854     my ( $ri_left, $ri_right ) = @_;
16855     my $nmax = @$ri_right - 1;
16856     return unless ( $nmax >= 2 );
16857
16858     # scan the left ends of first two lines
16859     my $tokbeg = "";
16860     my $depth_beg;
16861     for my $n ( 1 .. 2 ) {
16862         my $il     = $$ri_left[$n];
16863         my $typel  = $types_to_go[$il];
16864         my $tokenl = $tokens_to_go[$il];
16865
16866         my $has_leading_op = ( $tokenl =~ /^\w/ )
16867           ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
16868           : $is_chain_operator{$typel};    # and, or
16869         return unless ($has_leading_op);
16870         if ( $n > 1 ) {
16871             return
16872               unless ( $tokenl eq $tokbeg
16873                 && $nesting_depth_to_go[$il] eq $depth_beg );
16874         }
16875         $tokbeg    = $tokenl;
16876         $depth_beg = $nesting_depth_to_go[$il];
16877     }
16878
16879     # now look for any interior tokens of the same types
16880     my $il = $$ri_left[0];
16881     my $ir = $$ri_right[0];
16882
16883     # now make a list of all new break points
16884     my @insert_list;
16885     for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
16886         my $type = $types_to_go[$i];
16887         if (   $is_assignment{$type}
16888             && $nesting_depth_to_go[$i] eq $depth_beg )
16889         {
16890             if ( $want_break_before{$type} ) {
16891                 push @insert_list, $i - 1;
16892             }
16893             else {
16894                 push @insert_list, $i;
16895             }
16896         }
16897     }
16898
16899     # Break after a 'return' followed by a chain of operators
16900     #  return ( $^O !~ /win32|dos/i )
16901     #    && ( $^O ne 'VMS' )
16902     #    && ( $^O ne 'OS2' )
16903     #    && ( $^O ne 'MacOS' );
16904     # To give:
16905     #  return
16906     #       ( $^O !~ /win32|dos/i )
16907     #    && ( $^O ne 'VMS' )
16908     #    && ( $^O ne 'OS2' )
16909     #    && ( $^O ne 'MacOS' );
16910     my $i = 0;
16911     if (   $types_to_go[$i] eq 'k'
16912         && $tokens_to_go[$i] eq 'return'
16913         && $ir > $il
16914         && $nesting_depth_to_go[$i] eq $depth_beg )
16915     {
16916         push @insert_list, $i;
16917     }
16918
16919     return unless (@insert_list);
16920
16921     # One final check...
16922     # scan second and thrid lines and be sure there are no assignments
16923     # we want to avoid breaking at an = to make something like this:
16924     #    unless ( $icon =
16925     #           $html_icons{"$type-$state"}
16926     #        or $icon = $html_icons{$type}
16927     #        or $icon = $html_icons{$state} )
16928     for my $n ( 1 .. 2 ) {
16929         my $il = $$ri_left[$n];
16930         my $ir = $$ri_right[$n];
16931         for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
16932             my $type = $types_to_go[$i];
16933             return
16934               if ( $is_assignment{$type}
16935                 && $nesting_depth_to_go[$i] eq $depth_beg );
16936         }
16937     }
16938
16939     # ok, insert any new break point
16940     if (@insert_list) {
16941         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16942     }
16943 }
16944
16945 sub insert_final_breaks {
16946
16947     my ( $ri_left, $ri_right ) = @_;
16948
16949     my $nmax = @$ri_right - 1;
16950
16951     # scan the left and right end tokens of all lines
16952     my $count         = 0;
16953     my $i_first_colon = -1;
16954     for my $n ( 0 .. $nmax ) {
16955         my $il    = $$ri_left[$n];
16956         my $ir    = $$ri_right[$n];
16957         my $typel = $types_to_go[$il];
16958         my $typer = $types_to_go[$ir];
16959         return if ( $typel eq '?' );
16960         return if ( $typer eq '?' );
16961         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
16962         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16963     }
16964
16965     # For long ternary chains,
16966     # if the first : we see has its # ? is in the interior
16967     # of a preceding line, then see if there are any good
16968     # breakpoints before the ?.
16969     if ( $i_first_colon > 0 ) {
16970         my $i_question = $mate_index_to_go[$i_first_colon];
16971         if ( $i_question > 0 ) {
16972             my @insert_list;
16973             for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16974                 my $token = $tokens_to_go[$ii];
16975                 my $type  = $types_to_go[$ii];
16976
16977                 # For now, a good break is either a comma or a 'return'.
16978                 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
16979                     && in_same_container( $ii, $i_question ) )
16980                 {
16981                     push @insert_list, $ii;
16982                     last;
16983                 }
16984             }
16985
16986             # insert any new break points
16987             if (@insert_list) {
16988                 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16989             }
16990         }
16991     }
16992 }
16993
16994 sub in_same_container {
16995
16996     # check to see if tokens at i1 and i2 are in the
16997     # same container, and not separated by a comma, ? or :
16998     my ( $i1, $i2 ) = @_;
16999     my $type  = $types_to_go[$i1];
17000     my $depth = $nesting_depth_to_go[$i1];
17001     return unless ( $nesting_depth_to_go[$i2] == $depth );
17002     if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
17003
17004     ###########################################################
17005     # This is potentially a very slow routine and not critical.
17006     # For safety just give up for large differences.
17007     # See test file 'infinite_loop.txt'
17008     # TODO: replace this loop with a data structure
17009     ###########################################################
17010     return if ( $i2 - $i1 > 200 );
17011
17012     for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
17013         next   if ( $nesting_depth_to_go[$i] > $depth );
17014         return if ( $nesting_depth_to_go[$i] < $depth );
17015
17016         my $tok = $tokens_to_go[$i];
17017         $tok = ',' if $tok eq '=>';    # treat => same as ,
17018
17019         # Example: we would not want to break at any of these .'s
17020         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
17021         if ( $type ne ':' ) {
17022             return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
17023         }
17024         else {
17025             return if ( $tok =~ /^[\,]$/ );
17026         }
17027     }
17028     return 1;
17029 }
17030
17031 sub set_continuation_breaks {
17032
17033     # Define an array of indexes for inserting newline characters to
17034     # keep the line lengths below the maximum desired length.  There is
17035     # an implied break after the last token, so it need not be included.
17036
17037     # Method:
17038     # This routine is part of series of routines which adjust line
17039     # lengths.  It is only called if a statement is longer than the
17040     # maximum line length, or if a preliminary scanning located
17041     # desirable break points.   Sub scan_list has already looked at
17042     # these tokens and set breakpoints (in array
17043     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
17044     # after commas, after opening parens, and before closing parens).
17045     # This routine will honor these breakpoints and also add additional
17046     # breakpoints as necessary to keep the line length below the maximum
17047     # requested.  It bases its decision on where the 'bond strength' is
17048     # lowest.
17049
17050     # Output: returns references to the arrays:
17051     #  @i_first
17052     #  @i_last
17053     # which contain the indexes $i of the first and last tokens on each
17054     # line.
17055
17056     # In addition, the array:
17057     #   $forced_breakpoint_to_go[$i]
17058     # may be updated to be =1 for any index $i after which there must be
17059     # a break.  This signals later routines not to undo the breakpoint.
17060
17061     my $saw_good_break = shift;
17062     my @i_first        = ();      # the first index to output
17063     my @i_last         = ();      # the last index to output
17064     my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
17065     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
17066
17067     set_bond_strengths();
17068
17069     my $imin = 0;
17070     my $imax = $max_index_to_go;
17071     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
17072     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
17073     my $i_begin = $imin;          # index for starting next iteration
17074
17075     my $leading_spaces          = leading_spaces_to_go($imin);
17076     my $line_count              = 0;
17077     my $last_break_strength     = NO_BREAK;
17078     my $i_last_break            = -1;
17079     my $max_bias                = 0.001;
17080     my $tiny_bias               = 0.0001;
17081     my $leading_alignment_token = "";
17082     my $leading_alignment_type  = "";
17083
17084     # see if any ?/:'s are in order
17085     my $colons_in_order = 1;
17086     my $last_tok        = "";
17087     my @colon_list  = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
17088     my $colon_count = @colon_list;
17089     foreach (@colon_list) {
17090         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
17091         $last_tok = $_;
17092     }
17093
17094     # This is a sufficient but not necessary condition for colon chain
17095     my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
17096
17097     #-------------------------------------------------------
17098     # BEGINNING of main loop to set continuation breakpoints
17099     # Keep iterating until we reach the end
17100     #-------------------------------------------------------
17101     while ( $i_begin <= $imax ) {
17102         my $lowest_strength        = NO_BREAK;
17103         my $starting_sum           = $lengths_to_go[$i_begin];
17104         my $i_lowest               = -1;
17105         my $i_test                 = -1;
17106         my $lowest_next_token      = '';
17107         my $lowest_next_type       = 'b';
17108         my $i_lowest_next_nonblank = -1;
17109
17110         #-------------------------------------------------------
17111         # BEGINNING of inner loop to find the best next breakpoint
17112         #-------------------------------------------------------
17113         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
17114             my $type       = $types_to_go[$i_test];
17115             my $token      = $tokens_to_go[$i_test];
17116             my $next_type  = $types_to_go[ $i_test + 1 ];
17117             my $next_token = $tokens_to_go[ $i_test + 1 ];
17118             my $i_next_nonblank =
17119               ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
17120             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
17121             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
17122             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17123             my $strength                 = $bond_strength_to_go[$i_test];
17124             my $must_break               = 0;
17125
17126             # FIXME: TESTING: Might want to be able to break after these
17127             # force an immediate break at certain operators
17128             # with lower level than the start of the line
17129             if (
17130                 (
17131                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
17132                     || (   $next_nonblank_type eq 'k'
17133                         && $next_nonblank_token =~ /^(and|or)$/ )
17134                 )
17135                 && ( $nesting_depth_to_go[$i_begin] >
17136                     $nesting_depth_to_go[$i_next_nonblank] )
17137               )
17138             {
17139                 set_forced_breakpoint($i_next_nonblank);
17140             }
17141
17142             if (
17143
17144                 # Try to put a break where requested by scan_list
17145                 $forced_breakpoint_to_go[$i_test]
17146
17147                 # break between ) { in a continued line so that the '{' can
17148                 # be outdented
17149                 # See similar logic in scan_list which catches instances
17150                 # where a line is just something like ') {'
17151                 || (   $line_count
17152                     && ( $token              eq ')' )
17153                     && ( $next_nonblank_type eq '{' )
17154                     && ($next_nonblank_block_type)
17155                     && !$rOpts->{'opening-brace-always-on-right'} )
17156
17157                 # There is an implied forced break at a terminal opening brace
17158                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
17159               )
17160             {
17161
17162                 # Forced breakpoints must sometimes be overridden, for example
17163                 # because of a side comment causing a NO_BREAK.  It is easier
17164                 # to catch this here than when they are set.
17165                 if ( $strength < NO_BREAK ) {
17166                     $strength   = $lowest_strength - $tiny_bias;
17167                     $must_break = 1;
17168                 }
17169             }
17170
17171             # quit if a break here would put a good terminal token on
17172             # the next line and we already have a possible break
17173             if (
17174                    !$must_break
17175                 && ( $next_nonblank_type =~ /^[\;\,]$/ )
17176                 && (
17177                     (
17178                         $leading_spaces +
17179                         $lengths_to_go[ $i_next_nonblank + 1 ] -
17180                         $starting_sum
17181                     ) > $rOpts_maximum_line_length
17182                 )
17183               )
17184             {
17185                 last if ( $i_lowest >= 0 );
17186             }
17187
17188             # Avoid a break which would strand a single punctuation
17189             # token.  For example, we do not want to strand a leading
17190             # '.' which is followed by a long quoted string.
17191             if (
17192                    !$must_break
17193                 && ( $i_test == $i_begin )
17194                 && ( $i_test < $imax )
17195                 && ( $token eq $type )
17196                 && (
17197                     (
17198                         $leading_spaces +
17199                         $lengths_to_go[ $i_test + 1 ] -
17200                         $starting_sum
17201                     ) <= $rOpts_maximum_line_length
17202                 )
17203               )
17204             {
17205                 $i_test++;
17206
17207                 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
17208                     $i_test++;
17209                 }
17210                 redo;
17211             }
17212
17213             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17214             {
17215
17216                 # break at previous best break if it would have produced
17217                 # a leading alignment of certain common tokens, and it
17218                 # is different from the latest candidate break
17219                 last
17220                   if ($leading_alignment_type);
17221
17222                 # Force at least one breakpoint if old code had good
17223                 # break It is only called if a breakpoint is required or
17224                 # desired.  This will probably need some adjustments
17225                 # over time.  A goal is to try to be sure that, if a new
17226                 # side comment is introduced into formated text, then
17227                 # the same breakpoints will occur.  scbreak.t
17228                 last
17229                   if (
17230                     $i_test == $imax                # we are at the end
17231                     && !$forced_breakpoint_count    #
17232                     && $saw_good_break              # old line had good break
17233                     && $type =~ /^[#;\{]$/          # and this line ends in
17234                                                     # ';' or side comment
17235                     && $i_last_break < 0        # and we haven't made a break
17236                     && $i_lowest > 0            # and we saw a possible break
17237                     && $i_lowest < $imax - 1    # (but not just before this ;)
17238                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17239                   );
17240
17241                 $lowest_strength        = $strength;
17242                 $i_lowest               = $i_test;
17243                 $lowest_next_token      = $next_nonblank_token;
17244                 $lowest_next_type       = $next_nonblank_type;
17245                 $i_lowest_next_nonblank = $i_next_nonblank;
17246                 last if $must_break;
17247
17248                 # set flags to remember if a break here will produce a
17249                 # leading alignment of certain common tokens
17250                 if (   $line_count > 0
17251                     && $i_test < $imax
17252                     && ( $lowest_strength - $last_break_strength <= $max_bias )
17253                   )
17254                 {
17255                     my $i_last_end = $i_begin - 1;
17256                     if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
17257                     my $tok_beg  = $tokens_to_go[$i_begin];
17258                     my $type_beg = $types_to_go[$i_begin];
17259                     if (
17260
17261                         # check for leading alignment of certain tokens
17262                         (
17263                                $tok_beg eq $next_nonblank_token
17264                             && $is_chain_operator{$tok_beg}
17265                             && (   $type_beg eq 'k'
17266                                 || $type_beg eq $tok_beg )
17267                             && $nesting_depth_to_go[$i_begin] >=
17268                             $nesting_depth_to_go[$i_next_nonblank]
17269                         )
17270
17271                         || (   $tokens_to_go[$i_last_end] eq $token
17272                             && $is_chain_operator{$token}
17273                             && ( $type eq 'k' || $type eq $token )
17274                             && $nesting_depth_to_go[$i_last_end] >=
17275                             $nesting_depth_to_go[$i_test] )
17276                       )
17277                     {
17278                         $leading_alignment_token = $next_nonblank_token;
17279                         $leading_alignment_type  = $next_nonblank_type;
17280                     }
17281                 }
17282             }
17283
17284             my $too_long =
17285               ( $i_test >= $imax )
17286               ? 1
17287               : (
17288                 (
17289                     $leading_spaces +
17290                       $lengths_to_go[ $i_test + 2 ] -
17291                       $starting_sum
17292                 ) > $rOpts_maximum_line_length
17293               );
17294
17295             FORMATTER_DEBUG_FLAG_BREAK
17296               && print
17297 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
17298
17299             # allow one extra terminal token after exceeding line length
17300             # if it would strand this token.
17301             if (   $rOpts_fuzzy_line_length
17302                 && $too_long
17303                 && ( $i_lowest == $i_test )
17304                 && ( length($token) > 1 )
17305                 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
17306             {
17307                 $too_long = 0;
17308             }
17309
17310             last
17311               if (
17312                 ( $i_test == $imax )    # we're done if no more tokens,
17313                 || (
17314                     ( $i_lowest >= 0 )    # or no more space and we have a break
17315                     && $too_long
17316                 )
17317               );
17318         }
17319
17320         #-------------------------------------------------------
17321         # END of inner loop to find the best next breakpoint
17322         # Now decide exactly where to put the breakpoint
17323         #-------------------------------------------------------
17324
17325         # it's always ok to break at imax if no other break was found
17326         if ( $i_lowest < 0 ) { $i_lowest = $imax }
17327
17328         # semi-final index calculation
17329         my $i_next_nonblank = (
17330             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17331             ? $i_lowest + 2
17332             : $i_lowest + 1
17333         );
17334         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17335         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17336
17337         #-------------------------------------------------------
17338         # ?/: rule 1 : if a break here will separate a '?' on this
17339         # line from its closing ':', then break at the '?' instead.
17340         #-------------------------------------------------------
17341         my $i;
17342         foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17343             next unless ( $tokens_to_go[$i] eq '?' );
17344
17345             # do not break if probable sequence of ?/: statements
17346             next if ($is_colon_chain);
17347
17348             # do not break if statement is broken by side comment
17349             next
17350               if (
17351                 $tokens_to_go[$max_index_to_go] eq '#'
17352                 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
17353                     $max_index_to_go ) !~ /^[\;\}]$/
17354               );
17355
17356             # no break needed if matching : is also on the line
17357             next
17358               if ( $mate_index_to_go[$i] >= 0
17359                 && $mate_index_to_go[$i] <= $i_next_nonblank );
17360
17361             $i_lowest = $i;
17362             if ( $want_break_before{'?'} ) { $i_lowest-- }
17363             last;
17364         }
17365
17366         #-------------------------------------------------------
17367         # END of inner loop to find the best next breakpoint:
17368         # Break the line after the token with index i=$i_lowest
17369         #-------------------------------------------------------
17370
17371         # final index calculation
17372         $i_next_nonblank = (
17373             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17374             ? $i_lowest + 2
17375             : $i_lowest + 1
17376         );
17377         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17378         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17379
17380         FORMATTER_DEBUG_FLAG_BREAK
17381           && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
17382
17383         #-------------------------------------------------------
17384         # ?/: rule 2 : if we break at a '?', then break at its ':'
17385         #
17386         # Note: this rule is also in sub scan_list to handle a break
17387         # at the start and end of a line (in case breaks are dictated
17388         # by side comments).
17389         #-------------------------------------------------------
17390         if ( $next_nonblank_type eq '?' ) {
17391             set_closing_breakpoint($i_next_nonblank);
17392         }
17393         elsif ( $types_to_go[$i_lowest] eq '?' ) {
17394             set_closing_breakpoint($i_lowest);
17395         }
17396
17397         #-------------------------------------------------------
17398         # ?/: rule 3 : if we break at a ':' then we save
17399         # its location for further work below.  We may need to go
17400         # back and break at its '?'.
17401         #-------------------------------------------------------
17402         if ( $next_nonblank_type eq ':' ) {
17403             push @i_colon_breaks, $i_next_nonblank;
17404         }
17405         elsif ( $types_to_go[$i_lowest] eq ':' ) {
17406             push @i_colon_breaks, $i_lowest;
17407         }
17408
17409         # here we should set breaks for all '?'/':' pairs which are
17410         # separated by this line
17411
17412         $line_count++;
17413
17414         # save this line segment, after trimming blanks at the ends
17415         push( @i_first,
17416             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17417         push( @i_last,
17418             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17419
17420         # set a forced breakpoint at a container opening, if necessary, to
17421         # signal a break at a closing container.  Excepting '(' for now.
17422         if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
17423             && !$forced_breakpoint_to_go[$i_lowest] )
17424         {
17425             set_closing_breakpoint($i_lowest);
17426         }
17427
17428         # get ready to go again
17429         $i_begin                 = $i_lowest + 1;
17430         $last_break_strength     = $lowest_strength;
17431         $i_last_break            = $i_lowest;
17432         $leading_alignment_token = "";
17433         $leading_alignment_type  = "";
17434         $lowest_next_token       = '';
17435         $lowest_next_type        = 'b';
17436
17437         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17438             $i_begin++;
17439         }
17440
17441         # update indentation size
17442         if ( $i_begin <= $imax ) {
17443             $leading_spaces = leading_spaces_to_go($i_begin);
17444         }
17445     }
17446
17447     #-------------------------------------------------------
17448     # END of main loop to set continuation breakpoints
17449     # Now go back and make any necessary corrections
17450     #-------------------------------------------------------
17451
17452     #-------------------------------------------------------
17453     # ?/: rule 4 -- if we broke at a ':', then break at
17454     # corresponding '?' unless this is a chain of ?: expressions
17455     #-------------------------------------------------------
17456     if (@i_colon_breaks) {
17457
17458         # using a simple method for deciding if we are in a ?/: chain --
17459         # this is a chain if it has multiple ?/: pairs all in order;
17460         # otherwise not.
17461         # Note that if line starts in a ':' we count that above as a break
17462         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17463
17464         unless ($is_chain) {
17465             my @insert_list = ();
17466             foreach (@i_colon_breaks) {
17467                 my $i_question = $mate_index_to_go[$_];
17468                 if ( $i_question >= 0 ) {
17469                     if ( $want_break_before{'?'} ) {
17470                         $i_question--;
17471                         if (   $i_question > 0
17472                             && $types_to_go[$i_question] eq 'b' )
17473                         {
17474                             $i_question--;
17475                         }
17476                     }
17477
17478                     if ( $i_question >= 0 ) {
17479                         push @insert_list, $i_question;
17480                     }
17481                 }
17482                 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
17483             }
17484         }
17485     }
17486     return ( \@i_first, \@i_last, $colon_count );
17487 }
17488
17489 sub insert_additional_breaks {
17490
17491     # this routine will add line breaks at requested locations after
17492     # sub set_continuation_breaks has made preliminary breaks.
17493
17494     my ( $ri_break_list, $ri_first, $ri_last ) = @_;
17495     my $i_f;
17496     my $i_l;
17497     my $line_number = 0;
17498     my $i_break_left;
17499     foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
17500
17501         $i_f = $$ri_first[$line_number];
17502         $i_l = $$ri_last[$line_number];
17503         while ( $i_break_left >= $i_l ) {
17504             $line_number++;
17505
17506             # shouldn't happen unless caller passes bad indexes
17507             if ( $line_number >= @$ri_last ) {
17508                 warning(
17509 "Non-fatal program bug: couldn't set break at $i_break_left\n"
17510                 );
17511                 report_definite_bug();
17512                 return;
17513             }
17514             $i_f = $$ri_first[$line_number];
17515             $i_l = $$ri_last[$line_number];
17516         }
17517
17518         my $i_break_right = $i_break_left + 1;
17519         if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
17520
17521         if (   $i_break_left >= $i_f
17522             && $i_break_left < $i_l
17523             && $i_break_right > $i_f
17524             && $i_break_right <= $i_l )
17525         {
17526             splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
17527             splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
17528         }
17529     }
17530 }
17531
17532 sub set_closing_breakpoint {
17533
17534     # set a breakpoint at a matching closing token
17535     # at present, this is only used to break at a ':' which matches a '?'
17536     my $i_break = shift;
17537
17538     if ( $mate_index_to_go[$i_break] >= 0 ) {
17539
17540         # CAUTION: infinite recursion possible here:
17541         #   set_closing_breakpoint calls set_forced_breakpoint, and
17542         #   set_forced_breakpoint call set_closing_breakpoint
17543         #   ( test files attrib.t, BasicLyx.pm.html).
17544         # Don't reduce the '2' in the statement below
17545         if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
17546
17547             # break before } ] and ), but sub set_forced_breakpoint will decide
17548             # to break before or after a ? and :
17549             my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
17550             set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
17551         }
17552     }
17553     else {
17554         my $type_sequence = $type_sequence_to_go[$i_break];
17555         if ($type_sequence) {
17556             my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
17557             $postponed_breakpoint{$type_sequence} = 1;
17558         }
17559     }
17560 }
17561
17562 # check to see if output line tabbing agrees with input line
17563 # this can be very useful for debugging a script which has an extra
17564 # or missing brace
17565 sub compare_indentation_levels {
17566
17567     my ( $python_indentation_level, $structural_indentation_level ) = @_;
17568     if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
17569         $last_tabbing_disagreement = $input_line_number;
17570
17571         if ($in_tabbing_disagreement) {
17572         }
17573         else {
17574             $tabbing_disagreement_count++;
17575
17576             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17577                 write_logfile_entry(
17578 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
17579                 );
17580             }
17581             $in_tabbing_disagreement    = $input_line_number;
17582             $first_tabbing_disagreement = $in_tabbing_disagreement
17583               unless ($first_tabbing_disagreement);
17584         }
17585     }
17586     else {
17587
17588         if ($in_tabbing_disagreement) {
17589
17590             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17591                 write_logfile_entry(
17592 "End indentation disagreement from input line $in_tabbing_disagreement\n"
17593                 );
17594
17595                 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
17596                     write_logfile_entry(
17597                         "No further tabbing disagreements will be noted\n");
17598                 }
17599             }
17600             $in_tabbing_disagreement = 0;
17601         }
17602     }
17603 }
17604
17605 #####################################################################
17606 #
17607 # the Perl::Tidy::IndentationItem class supplies items which contain
17608 # how much whitespace should be used at the start of a line
17609 #
17610 #####################################################################
17611
17612 package Perl::Tidy::IndentationItem;
17613
17614 # Indexes for indentation items
17615 use constant SPACES             => 0;     # total leading white spaces
17616 use constant LEVEL              => 1;     # the indentation 'level'
17617 use constant CI_LEVEL           => 2;     # the 'continuation level'
17618 use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
17619                                           # for this level
17620 use constant CLOSED             => 4;     # index where we saw closing '}'
17621 use constant COMMA_COUNT        => 5;     # how many commas at this level?
17622 use constant SEQUENCE_NUMBER    => 6;     # output batch number
17623 use constant INDEX              => 7;     # index in output batch list
17624 use constant HAVE_CHILD         => 8;     # any dependents?
17625 use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
17626                                           # we would like to move to get
17627                                           # alignment (negative if left)
17628 use constant ALIGN_PAREN        => 10;    # do we want to try to align
17629                                           # with an opening structure?
17630 use constant MARKED             => 11;    # if visited by corrector logic
17631 use constant STACK_DEPTH        => 12;    # indentation nesting depth
17632 use constant STARTING_INDEX     => 13;    # first token index of this level
17633 use constant ARROW_COUNT        => 14;    # how many =>'s
17634
17635 sub new {
17636
17637     # Create an 'indentation_item' which describes one level of leading
17638     # whitespace when the '-lp' indentation is used.  We return
17639     # a reference to an anonymous array of associated variables.
17640     # See above constants for storage scheme.
17641     my (
17642         $class,               $spaces,           $level,
17643         $ci_level,            $available_spaces, $index,
17644         $gnu_sequence_number, $align_paren,      $stack_depth,
17645         $starting_index,
17646     ) = @_;
17647     my $closed            = -1;
17648     my $arrow_count       = 0;
17649     my $comma_count       = 0;
17650     my $have_child        = 0;
17651     my $want_right_spaces = 0;
17652     my $marked            = 0;
17653     bless [
17654         $spaces,              $level,          $ci_level,
17655         $available_spaces,    $closed,         $comma_count,
17656         $gnu_sequence_number, $index,          $have_child,
17657         $want_right_spaces,   $align_paren,    $marked,
17658         $stack_depth,         $starting_index, $arrow_count,
17659     ], $class;
17660 }
17661
17662 sub permanently_decrease_AVAILABLE_SPACES {
17663
17664     # make a permanent reduction in the available indentation spaces
17665     # at one indentation item.  NOTE: if there are child nodes, their
17666     # total SPACES must be reduced by the caller.
17667
17668     my ( $item, $spaces_needed ) = @_;
17669     my $available_spaces = $item->get_AVAILABLE_SPACES();
17670     my $deleted_spaces =
17671       ( $available_spaces > $spaces_needed )
17672       ? $spaces_needed
17673       : $available_spaces;
17674     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
17675     $item->decrease_SPACES($deleted_spaces);
17676     $item->set_RECOVERABLE_SPACES(0);
17677
17678     return $deleted_spaces;
17679 }
17680
17681 sub tentatively_decrease_AVAILABLE_SPACES {
17682
17683     # We are asked to tentatively delete $spaces_needed of indentation
17684     # for a indentation item.  We may want to undo this later.  NOTE: if
17685     # there are child nodes, their total SPACES must be reduced by the
17686     # caller.
17687     my ( $item, $spaces_needed ) = @_;
17688     my $available_spaces = $item->get_AVAILABLE_SPACES();
17689     my $deleted_spaces =
17690       ( $available_spaces > $spaces_needed )
17691       ? $spaces_needed
17692       : $available_spaces;
17693     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
17694     $item->decrease_SPACES($deleted_spaces);
17695     $item->increase_RECOVERABLE_SPACES($deleted_spaces);
17696     return $deleted_spaces;
17697 }
17698
17699 sub get_STACK_DEPTH {
17700     my $self = shift;
17701     return $self->[STACK_DEPTH];
17702 }
17703
17704 sub get_SPACES {
17705     my $self = shift;
17706     return $self->[SPACES];
17707 }
17708
17709 sub get_MARKED {
17710     my $self = shift;
17711     return $self->[MARKED];
17712 }
17713
17714 sub set_MARKED {
17715     my ( $self, $value ) = @_;
17716     if ( defined($value) ) {
17717         $self->[MARKED] = $value;
17718     }
17719     return $self->[MARKED];
17720 }
17721
17722 sub get_AVAILABLE_SPACES {
17723     my $self = shift;
17724     return $self->[AVAILABLE_SPACES];
17725 }
17726
17727 sub decrease_SPACES {
17728     my ( $self, $value ) = @_;
17729     if ( defined($value) ) {
17730         $self->[SPACES] -= $value;
17731     }
17732     return $self->[SPACES];
17733 }
17734
17735 sub decrease_AVAILABLE_SPACES {
17736     my ( $self, $value ) = @_;
17737     if ( defined($value) ) {
17738         $self->[AVAILABLE_SPACES] -= $value;
17739     }
17740     return $self->[AVAILABLE_SPACES];
17741 }
17742
17743 sub get_ALIGN_PAREN {
17744     my $self = shift;
17745     return $self->[ALIGN_PAREN];
17746 }
17747
17748 sub get_RECOVERABLE_SPACES {
17749     my $self = shift;
17750     return $self->[RECOVERABLE_SPACES];
17751 }
17752
17753 sub set_RECOVERABLE_SPACES {
17754     my ( $self, $value ) = @_;
17755     if ( defined($value) ) {
17756         $self->[RECOVERABLE_SPACES] = $value;
17757     }
17758     return $self->[RECOVERABLE_SPACES];
17759 }
17760
17761 sub increase_RECOVERABLE_SPACES {
17762     my ( $self, $value ) = @_;
17763     if ( defined($value) ) {
17764         $self->[RECOVERABLE_SPACES] += $value;
17765     }
17766     return $self->[RECOVERABLE_SPACES];
17767 }
17768
17769 sub get_CI_LEVEL {
17770     my $self = shift;
17771     return $self->[CI_LEVEL];
17772 }
17773
17774 sub get_LEVEL {
17775     my $self = shift;
17776     return $self->[LEVEL];
17777 }
17778
17779 sub get_SEQUENCE_NUMBER {
17780     my $self = shift;
17781     return $self->[SEQUENCE_NUMBER];
17782 }
17783
17784 sub get_INDEX {
17785     my $self = shift;
17786     return $self->[INDEX];
17787 }
17788
17789 sub get_STARTING_INDEX {
17790     my $self = shift;
17791     return $self->[STARTING_INDEX];
17792 }
17793
17794 sub set_HAVE_CHILD {
17795     my ( $self, $value ) = @_;
17796     if ( defined($value) ) {
17797         $self->[HAVE_CHILD] = $value;
17798     }
17799     return $self->[HAVE_CHILD];
17800 }
17801
17802 sub get_HAVE_CHILD {
17803     my $self = shift;
17804     return $self->[HAVE_CHILD];
17805 }
17806
17807 sub set_ARROW_COUNT {
17808     my ( $self, $value ) = @_;
17809     if ( defined($value) ) {
17810         $self->[ARROW_COUNT] = $value;
17811     }
17812     return $self->[ARROW_COUNT];
17813 }
17814
17815 sub get_ARROW_COUNT {
17816     my $self = shift;
17817     return $self->[ARROW_COUNT];
17818 }
17819
17820 sub set_COMMA_COUNT {
17821     my ( $self, $value ) = @_;
17822     if ( defined($value) ) {
17823         $self->[COMMA_COUNT] = $value;
17824     }
17825     return $self->[COMMA_COUNT];
17826 }
17827
17828 sub get_COMMA_COUNT {
17829     my $self = shift;
17830     return $self->[COMMA_COUNT];
17831 }
17832
17833 sub set_CLOSED {
17834     my ( $self, $value ) = @_;
17835     if ( defined($value) ) {
17836         $self->[CLOSED] = $value;
17837     }
17838     return $self->[CLOSED];
17839 }
17840
17841 sub get_CLOSED {
17842     my $self = shift;
17843     return $self->[CLOSED];
17844 }
17845
17846 #####################################################################
17847 #
17848 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
17849 # contain a single output line
17850 #
17851 #####################################################################
17852
17853 package Perl::Tidy::VerticalAligner::Line;
17854
17855 {
17856
17857     use strict;
17858     use Carp;
17859
17860     use constant JMAX                      => 0;
17861     use constant JMAX_ORIGINAL_LINE        => 1;
17862     use constant RTOKENS                   => 2;
17863     use constant RFIELDS                   => 3;
17864     use constant RPATTERNS                 => 4;
17865     use constant INDENTATION               => 5;
17866     use constant LEADING_SPACE_COUNT       => 6;
17867     use constant OUTDENT_LONG_LINES        => 7;
17868     use constant LIST_TYPE                 => 8;
17869     use constant IS_HANGING_SIDE_COMMENT   => 9;
17870     use constant RALIGNMENTS               => 10;
17871     use constant MAXIMUM_LINE_LENGTH       => 11;
17872     use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
17873
17874     my %_index_map;
17875     $_index_map{jmax}                      = JMAX;
17876     $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
17877     $_index_map{rtokens}                   = RTOKENS;
17878     $_index_map{rfields}                   = RFIELDS;
17879     $_index_map{rpatterns}                 = RPATTERNS;
17880     $_index_map{indentation}               = INDENTATION;
17881     $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
17882     $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
17883     $_index_map{list_type}                 = LIST_TYPE;
17884     $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
17885     $_index_map{ralignments}               = RALIGNMENTS;
17886     $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
17887     $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
17888
17889     my @_default_data = ();
17890     $_default_data[JMAX]                      = undef;
17891     $_default_data[JMAX_ORIGINAL_LINE]        = undef;
17892     $_default_data[RTOKENS]                   = undef;
17893     $_default_data[RFIELDS]                   = undef;
17894     $_default_data[RPATTERNS]                 = undef;
17895     $_default_data[INDENTATION]               = undef;
17896     $_default_data[LEADING_SPACE_COUNT]       = undef;
17897     $_default_data[OUTDENT_LONG_LINES]        = undef;
17898     $_default_data[LIST_TYPE]                 = undef;
17899     $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
17900     $_default_data[RALIGNMENTS]               = [];
17901     $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
17902     $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
17903
17904     {
17905
17906         # methods to count object population
17907         my $_count = 0;
17908         sub get_count        { $_count; }
17909         sub _increment_count { ++$_count }
17910         sub _decrement_count { --$_count }
17911     }
17912
17913     # Constructor may be called as a class method
17914     sub new {
17915         my ( $caller, %arg ) = @_;
17916         my $caller_is_obj = ref($caller);
17917         my $class = $caller_is_obj || $caller;
17918         no strict "refs";
17919         my $self = bless [], $class;
17920
17921         $self->[RALIGNMENTS] = [];
17922
17923         my $index;
17924         foreach ( keys %_index_map ) {
17925             $index = $_index_map{$_};
17926             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
17927             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
17928             else { $self->[$index] = $_default_data[$index] }
17929         }
17930
17931         $self->_increment_count();
17932         return $self;
17933     }
17934
17935     sub DESTROY {
17936         $_[0]->_decrement_count();
17937     }
17938
17939     sub get_jmax                      { $_[0]->[JMAX] }
17940     sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
17941     sub get_rtokens                   { $_[0]->[RTOKENS] }
17942     sub get_rfields                   { $_[0]->[RFIELDS] }
17943     sub get_rpatterns                 { $_[0]->[RPATTERNS] }
17944     sub get_indentation               { $_[0]->[INDENTATION] }
17945     sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
17946     sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
17947     sub get_list_type                 { $_[0]->[LIST_TYPE] }
17948     sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
17949     sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
17950
17951     sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
17952     sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
17953     sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
17954     sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
17955
17956     sub get_starting_column {
17957         $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
17958     }
17959
17960     sub increment_column {
17961         $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
17962     }
17963     sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
17964
17965     sub current_field_width {
17966         my $self = shift;
17967         my ($j) = @_;
17968         if ( $j == 0 ) {
17969             return $self->get_column($j);
17970         }
17971         else {
17972             return $self->get_column($j) - $self->get_column( $j - 1 );
17973         }
17974     }
17975
17976     sub field_width_growth {
17977         my $self = shift;
17978         my $j    = shift;
17979         return $self->get_column($j) - $self->get_starting_column($j);
17980     }
17981
17982     sub starting_field_width {
17983         my $self = shift;
17984         my $j    = shift;
17985         if ( $j == 0 ) {
17986             return $self->get_starting_column($j);
17987         }
17988         else {
17989             return $self->get_starting_column($j) -
17990               $self->get_starting_column( $j - 1 );
17991         }
17992     }
17993
17994     sub increase_field_width {
17995
17996         my $self = shift;
17997         my ( $j, $pad ) = @_;
17998         my $jmax = $self->get_jmax();
17999         for my $k ( $j .. $jmax ) {
18000             $self->increment_column( $k, $pad );
18001         }
18002     }
18003
18004     sub get_available_space_on_right {
18005         my $self = shift;
18006         my $jmax = $self->get_jmax();
18007         return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
18008     }
18009
18010     sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
18011     sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
18012     sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
18013     sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
18014     sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
18015     sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
18016     sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
18017     sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
18018     sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
18019     sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
18020     sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
18021
18022 }
18023
18024 #####################################################################
18025 #
18026 # the Perl::Tidy::VerticalAligner::Alignment class holds information
18027 # on a single column being aligned
18028 #
18029 #####################################################################
18030 package Perl::Tidy::VerticalAligner::Alignment;
18031
18032 {
18033
18034     use strict;
18035
18036     #use Carp;
18037
18038     # Symbolic array indexes
18039     use constant COLUMN          => 0;    # the current column number
18040     use constant STARTING_COLUMN => 1;    # column number when created
18041     use constant MATCHING_TOKEN  => 2;    # what token we are matching
18042     use constant STARTING_LINE   => 3;    # the line index of creation
18043     use constant ENDING_LINE     => 4;    # the most recent line to use it
18044     use constant SAVED_COLUMN    => 5;    # the most recent line to use it
18045     use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
18046                                           # (just its index in an array)
18047
18048     # Correspondence between variables and array indexes
18049     my %_index_map;
18050     $_index_map{column}          = COLUMN;
18051     $_index_map{starting_column} = STARTING_COLUMN;
18052     $_index_map{matching_token}  = MATCHING_TOKEN;
18053     $_index_map{starting_line}   = STARTING_LINE;
18054     $_index_map{ending_line}     = ENDING_LINE;
18055     $_index_map{saved_column}    = SAVED_COLUMN;
18056     $_index_map{serial_number}   = SERIAL_NUMBER;
18057
18058     my @_default_data = ();
18059     $_default_data[COLUMN]          = undef;
18060     $_default_data[STARTING_COLUMN] = undef;
18061     $_default_data[MATCHING_TOKEN]  = undef;
18062     $_default_data[STARTING_LINE]   = undef;
18063     $_default_data[ENDING_LINE]     = undef;
18064     $_default_data[SAVED_COLUMN]    = undef;
18065     $_default_data[SERIAL_NUMBER]   = undef;
18066
18067     # class population count
18068     {
18069         my $_count = 0;
18070         sub get_count        { $_count; }
18071         sub _increment_count { ++$_count }
18072         sub _decrement_count { --$_count }
18073     }
18074
18075     # constructor
18076     sub new {
18077         my ( $caller, %arg ) = @_;
18078         my $caller_is_obj = ref($caller);
18079         my $class = $caller_is_obj || $caller;
18080         no strict "refs";
18081         my $self = bless [], $class;
18082
18083         foreach ( keys %_index_map ) {
18084             my $index = $_index_map{$_};
18085             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
18086             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
18087             else { $self->[$index] = $_default_data[$index] }
18088         }
18089         $self->_increment_count();
18090         return $self;
18091     }
18092
18093     sub DESTROY {
18094         $_[0]->_decrement_count();
18095     }
18096
18097     sub get_column          { return $_[0]->[COLUMN] }
18098     sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
18099     sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
18100     sub get_starting_line   { return $_[0]->[STARTING_LINE] }
18101     sub get_ending_line     { return $_[0]->[ENDING_LINE] }
18102     sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
18103
18104     sub set_column          { $_[0]->[COLUMN]          = $_[1] }
18105     sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
18106     sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
18107     sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
18108     sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
18109     sub increment_column { $_[0]->[COLUMN] += $_[1] }
18110
18111     sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
18112     sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
18113
18114 }
18115
18116 package Perl::Tidy::VerticalAligner;
18117
18118 # The Perl::Tidy::VerticalAligner package collects output lines and
18119 # attempts to line up certain common tokens, such as => and #, which are
18120 # identified by the calling routine.
18121 #
18122 # There are two main routines: append_line and flush.  Append acts as a
18123 # storage buffer, collecting lines into a group which can be vertically
18124 # aligned.  When alignment is no longer possible or desirable, it dumps
18125 # the group to flush.
18126 #
18127 #     append_line -----> flush
18128 #
18129 #     collects          writes
18130 #     vertical          one
18131 #     groups            group
18132
18133 BEGIN {
18134
18135     # Caution: these debug flags produce a lot of output
18136     # They should all be 0 except when debugging small scripts
18137
18138     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
18139     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
18140     use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
18141
18142     my $debug_warning = sub {
18143         print "VALIGN_DEBUGGING with key $_[0]\n";
18144     };
18145
18146     VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
18147     VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
18148
18149 }
18150
18151 use vars qw(
18152   $vertical_aligner_self
18153   $current_line
18154   $maximum_alignment_index
18155   $ralignment_list
18156   $maximum_jmax_seen
18157   $minimum_jmax_seen
18158   $previous_minimum_jmax_seen
18159   $previous_maximum_jmax_seen
18160   $maximum_line_index
18161   $group_level
18162   $group_type
18163   $group_maximum_gap
18164   $marginal_match
18165   $last_group_level_written
18166   $last_leading_space_count
18167   $extra_indent_ok
18168   $zero_count
18169   @group_lines
18170   $last_comment_column
18171   $last_side_comment_line_number
18172   $last_side_comment_length
18173   $last_side_comment_level
18174   $outdented_line_count
18175   $first_outdented_line_at
18176   $last_outdented_line_at
18177   $diagnostics_object
18178   $logger_object
18179   $file_writer_object
18180   @side_comment_history
18181   $comment_leading_space_count
18182   $is_matching_terminal_line
18183
18184   $cached_line_text
18185   $cached_line_type
18186   $cached_line_flag
18187   $cached_seqno
18188   $cached_line_valid
18189   $cached_line_leading_space_count
18190   $cached_seqno_string
18191
18192   $seqno_string
18193   $last_nonblank_seqno_string
18194
18195   $rOpts
18196
18197   $rOpts_maximum_line_length
18198   $rOpts_continuation_indentation
18199   $rOpts_indent_columns
18200   $rOpts_tabs
18201   $rOpts_entab_leading_whitespace
18202   $rOpts_valign
18203
18204   $rOpts_fixed_position_side_comment
18205   $rOpts_minimum_space_to_comment
18206
18207 );
18208
18209 sub initialize {
18210
18211     my $class;
18212
18213     ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
18214       = @_;
18215
18216     # variables describing the entire space group:
18217     $ralignment_list            = [];
18218     $group_level                = 0;
18219     $last_group_level_written   = -1;
18220     $extra_indent_ok            = 0;    # can we move all lines to the right?
18221     $last_side_comment_length   = 0;
18222     $maximum_jmax_seen          = 0;
18223     $minimum_jmax_seen          = 0;
18224     $previous_minimum_jmax_seen = 0;
18225     $previous_maximum_jmax_seen = 0;
18226
18227     # variables describing each line of the group
18228     @group_lines = ();                  # list of all lines in group
18229
18230     $outdented_line_count          = 0;
18231     $first_outdented_line_at       = 0;
18232     $last_outdented_line_at        = 0;
18233     $last_side_comment_line_number = 0;
18234     $last_side_comment_level       = -1;
18235     $is_matching_terminal_line     = 0;
18236
18237     # most recent 3 side comments; [ line number, column ]
18238     $side_comment_history[0] = [ -300, 0 ];
18239     $side_comment_history[1] = [ -200, 0 ];
18240     $side_comment_history[2] = [ -100, 0 ];
18241
18242     # write_leader_and_string cache:
18243     $cached_line_text                = "";
18244     $cached_line_type                = 0;
18245     $cached_line_flag                = 0;
18246     $cached_seqno                    = 0;
18247     $cached_line_valid               = 0;
18248     $cached_line_leading_space_count = 0;
18249     $cached_seqno_string             = "";
18250
18251     # string of sequence numbers joined together
18252     $seqno_string               = "";
18253     $last_nonblank_seqno_string = "";
18254
18255     # frequently used parameters
18256     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
18257     $rOpts_tabs                     = $rOpts->{'tabs'};
18258     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
18259     $rOpts_fixed_position_side_comment =
18260       $rOpts->{'fixed-position-side-comment'};
18261     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
18262     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
18263     $rOpts_valign                   = $rOpts->{'valign'};
18264
18265     forget_side_comment();
18266
18267     initialize_for_new_group();
18268
18269     $vertical_aligner_self = {};
18270     bless $vertical_aligner_self, $class;
18271     return $vertical_aligner_self;
18272 }
18273
18274 sub initialize_for_new_group {
18275     $maximum_line_index      = -1;      # lines in the current group
18276     $maximum_alignment_index = -1;      # alignments in current group
18277     $zero_count              = 0;       # count consecutive lines without tokens
18278     $current_line            = undef;   # line being matched for alignment
18279     $group_maximum_gap       = 0;       # largest gap introduced
18280     $group_type              = "";
18281     $marginal_match          = 0;
18282     $comment_leading_space_count = 0;
18283     $last_leading_space_count    = 0;
18284 }
18285
18286 # interface to Perl::Tidy::Diagnostics routines
18287 sub write_diagnostics {
18288     if ($diagnostics_object) {
18289         $diagnostics_object->write_diagnostics(@_);
18290     }
18291 }
18292
18293 # interface to Perl::Tidy::Logger routines
18294 sub warning {
18295     if ($logger_object) {
18296         $logger_object->warning(@_);
18297     }
18298 }
18299
18300 sub write_logfile_entry {
18301     if ($logger_object) {
18302         $logger_object->write_logfile_entry(@_);
18303     }
18304 }
18305
18306 sub report_definite_bug {
18307     if ($logger_object) {
18308         $logger_object->report_definite_bug();
18309     }
18310 }
18311
18312 sub get_SPACES {
18313
18314     # return the number of leading spaces associated with an indentation
18315     # variable $indentation is either a constant number of spaces or an
18316     # object with a get_SPACES method.
18317     my $indentation = shift;
18318     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
18319 }
18320
18321 sub get_RECOVERABLE_SPACES {
18322
18323     # return the number of spaces (+ means shift right, - means shift left)
18324     # that we would like to shift a group of lines with the same indentation
18325     # to get them to line up with their opening parens
18326     my $indentation = shift;
18327     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
18328 }
18329
18330 sub get_STACK_DEPTH {
18331
18332     my $indentation = shift;
18333     return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
18334 }
18335
18336 sub make_alignment {
18337     my ( $col, $token ) = @_;
18338
18339     # make one new alignment at column $col which aligns token $token
18340     ++$maximum_alignment_index;
18341     my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
18342         column          => $col,
18343         starting_column => $col,
18344         matching_token  => $token,
18345         starting_line   => $maximum_line_index,
18346         ending_line     => $maximum_line_index,
18347         serial_number   => $maximum_alignment_index,
18348     );
18349     $ralignment_list->[$maximum_alignment_index] = $alignment;
18350     return $alignment;
18351 }
18352
18353 sub dump_alignments {
18354     print
18355 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
18356     for my $i ( 0 .. $maximum_alignment_index ) {
18357         my $column          = $ralignment_list->[$i]->get_column();
18358         my $starting_column = $ralignment_list->[$i]->get_starting_column();
18359         my $matching_token  = $ralignment_list->[$i]->get_matching_token();
18360         my $starting_line   = $ralignment_list->[$i]->get_starting_line();
18361         my $ending_line     = $ralignment_list->[$i]->get_ending_line();
18362         print
18363 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
18364     }
18365 }
18366
18367 sub save_alignment_columns {
18368     for my $i ( 0 .. $maximum_alignment_index ) {
18369         $ralignment_list->[$i]->save_column();
18370     }
18371 }
18372
18373 sub restore_alignment_columns {
18374     for my $i ( 0 .. $maximum_alignment_index ) {
18375         $ralignment_list->[$i]->restore_column();
18376     }
18377 }
18378
18379 sub forget_side_comment {
18380     $last_comment_column = 0;
18381 }
18382
18383 sub append_line {
18384
18385     # sub append is called to place one line in the current vertical group.
18386     #
18387     # The input parameters are:
18388     #     $level = indentation level of this line
18389     #     $rfields = reference to array of fields
18390     #     $rpatterns = reference to array of patterns, one per field
18391     #     $rtokens   = reference to array of tokens starting fields 1,2,..
18392     #
18393     # Here is an example of what this package does.  In this example,
18394     # we are trying to line up both the '=>' and the '#'.
18395     #
18396     #         '18' => 'grave',    #   \`
18397     #         '19' => 'acute',    #   `'
18398     #         '20' => 'caron',    #   \v
18399     # <-tabs-><f1-><--field 2 ---><-f3->
18400     # |            |              |    |
18401     # |            |              |    |
18402     # col1        col2         col3 col4
18403     #
18404     # The calling routine has already broken the entire line into 3 fields as
18405     # indicated.  (So the work of identifying promising common tokens has
18406     # already been done).
18407     #
18408     # In this example, there will be 2 tokens being matched: '=>' and '#'.
18409     # They are the leading parts of fields 2 and 3, but we do need to know
18410     # what they are so that we can dump a group of lines when these tokens
18411     # change.
18412     #
18413     # The fields contain the actual characters of each field.  The patterns
18414     # are like the fields, but they contain mainly token types instead
18415     # of tokens, so they have fewer characters.  They are used to be
18416     # sure we are matching fields of similar type.
18417     #
18418     # In this example, there will be 4 column indexes being adjusted.  The
18419     # first one is always at zero.  The interior columns are at the start of
18420     # the matching tokens, and the last one tracks the maximum line length.
18421     #
18422     # Basically, each time a new line comes in, it joins the current vertical
18423     # group if possible.  Otherwise it causes the current group to be dumped
18424     # and a new group is started.
18425     #
18426     # For each new group member, the column locations are increased, as
18427     # necessary, to make room for the new fields.  When the group is finally
18428     # output, these column numbers are used to compute the amount of spaces of
18429     # padding needed for each field.
18430     #
18431     # Programming note: the fields are assumed not to have any tab characters.
18432     # Tabs have been previously removed except for tabs in quoted strings and
18433     # side comments.  Tabs in these fields can mess up the column counting.
18434     # The log file warns the user if there are any such tabs.
18435
18436     my (
18437         $level,               $level_end,
18438         $indentation,         $rfields,
18439         $rtokens,             $rpatterns,
18440         $is_forced_break,     $outdent_long_lines,
18441         $is_terminal_ternary, $is_terminal_statement,
18442         $do_not_pad,          $rvertical_tightness_flags,
18443         $level_jump,
18444     ) = @_;
18445
18446     # number of fields is $jmax
18447     # number of tokens between fields is $jmax-1
18448     my $jmax = $#{$rfields};
18449
18450     my $leading_space_count = get_SPACES($indentation);
18451
18452     # set outdented flag to be sure we either align within statements or
18453     # across statement boundaries, but not both.
18454     my $is_outdented = $last_leading_space_count > $leading_space_count;
18455     $last_leading_space_count = $leading_space_count;
18456
18457     # Patch: undo for hanging side comment
18458     my $is_hanging_side_comment =
18459       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
18460     $is_outdented = 0 if $is_hanging_side_comment;
18461
18462     VALIGN_DEBUG_FLAG_APPEND0 && do {
18463         print
18464 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
18465     };
18466
18467     # Validate cached line if necessary: If we can produce a container
18468     # with just 2 lines total by combining an existing cached opening
18469     # token with the closing token to follow, then we will mark both
18470     # cached flags as valid.
18471     if ($rvertical_tightness_flags) {
18472         if (   $maximum_line_index <= 0
18473             && $cached_line_type
18474             && $cached_seqno
18475             && $rvertical_tightness_flags->[2]
18476             && $rvertical_tightness_flags->[2] == $cached_seqno )
18477         {
18478             $rvertical_tightness_flags->[3] ||= 1;
18479             $cached_line_valid ||= 1;
18480         }
18481     }
18482
18483     # do not join an opening block brace with an unbalanced line
18484     # unless requested with a flag value of 2
18485     if (   $cached_line_type == 3
18486         && $maximum_line_index < 0
18487         && $cached_line_flag < 2
18488         && $level_jump != 0 )
18489     {
18490         $cached_line_valid = 0;
18491     }
18492
18493     # patch until new aligner is finished
18494     if ($do_not_pad) { my_flush() }
18495
18496     # shouldn't happen:
18497     if ( $level < 0 ) { $level = 0 }
18498
18499     # do not align code across indentation level changes
18500     # or if vertical alignment is turned off for debugging
18501     if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
18502
18503         # we are allowed to shift a group of lines to the right if its
18504         # level is greater than the previous and next group
18505         $extra_indent_ok =
18506           ( $level < $group_level && $last_group_level_written < $group_level );
18507
18508         my_flush();
18509
18510         # If we know that this line will get flushed out by itself because
18511         # of level changes, we can leave the extra_indent_ok flag set.
18512         # That way, if we get an external flush call, we will still be
18513         # able to do some -lp alignment if necessary.
18514         $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
18515
18516         $group_level = $level;
18517
18518         # wait until after the above flush to get the leading space
18519         # count because it may have been changed if the -icp flag is in
18520         # effect
18521         $leading_space_count = get_SPACES($indentation);
18522
18523     }
18524
18525     # --------------------------------------------------------------------
18526     # Patch to collect outdentable block COMMENTS
18527     # --------------------------------------------------------------------
18528     my $is_blank_line = "";
18529     my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
18530     if ( $group_type eq 'COMMENT' ) {
18531         if (
18532             (
18533                    $is_block_comment
18534                 && $outdent_long_lines
18535                 && $leading_space_count == $comment_leading_space_count
18536             )
18537             || $is_blank_line
18538           )
18539         {
18540             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
18541             return;
18542         }
18543         else {
18544             my_flush();
18545         }
18546     }
18547
18548     # --------------------------------------------------------------------
18549     # add dummy fields for terminal ternary
18550     # --------------------------------------------------------------------
18551     my $j_terminal_match;
18552     if ( $is_terminal_ternary && $current_line ) {
18553         $j_terminal_match =
18554           fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
18555         $jmax = @{$rfields} - 1;
18556     }
18557
18558     # --------------------------------------------------------------------
18559     # add dummy fields for else statement
18560     # --------------------------------------------------------------------
18561     if (   $rfields->[0] =~ /^else\s*$/
18562         && $current_line
18563         && $level_jump == 0 )
18564     {
18565         $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
18566         $jmax = @{$rfields} - 1;
18567     }
18568
18569     # --------------------------------------------------------------------
18570     # Step 1. Handle simple line of code with no fields to match.
18571     # --------------------------------------------------------------------
18572     if ( $jmax <= 0 ) {
18573         $zero_count++;
18574
18575         if ( $maximum_line_index >= 0
18576             && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
18577         {
18578
18579             # flush the current group if it has some aligned columns..
18580             if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
18581
18582             # flush current group if we are just collecting side comments..
18583             elsif (
18584
18585                 # ...and we haven't seen a comment lately
18586                 ( $zero_count > 3 )
18587
18588                 # ..or if this new line doesn't fit to the left of the comments
18589                 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
18590                     $group_lines[0]->get_column(0) )
18591               )
18592             {
18593                 my_flush();
18594             }
18595         }
18596
18597         # patch to start new COMMENT group if this comment may be outdented
18598         if (   $is_block_comment
18599             && $outdent_long_lines
18600             && $maximum_line_index < 0 )
18601         {
18602             $group_type                           = 'COMMENT';
18603             $comment_leading_space_count          = $leading_space_count;
18604             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
18605             return;
18606         }
18607
18608         # just write this line directly if no current group, no side comment,
18609         # and no space recovery is needed.
18610         if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
18611         {
18612             write_leader_and_string( $leading_space_count, $$rfields[0], 0,
18613                 $outdent_long_lines, $rvertical_tightness_flags );
18614             return;
18615         }
18616     }
18617     else {
18618         $zero_count = 0;
18619     }
18620
18621     # programming check: (shouldn't happen)
18622     # an error here implies an incorrect call was made
18623     if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
18624         warning(
18625 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
18626         );
18627         report_definite_bug();
18628     }
18629
18630     # --------------------------------------------------------------------
18631     # create an object to hold this line
18632     # --------------------------------------------------------------------
18633     my $new_line = new Perl::Tidy::VerticalAligner::Line(
18634         jmax                      => $jmax,
18635         jmax_original_line        => $jmax,
18636         rtokens                   => $rtokens,
18637         rfields                   => $rfields,
18638         rpatterns                 => $rpatterns,
18639         indentation               => $indentation,
18640         leading_space_count       => $leading_space_count,
18641         outdent_long_lines        => $outdent_long_lines,
18642         list_type                 => "",
18643         is_hanging_side_comment   => $is_hanging_side_comment,
18644         maximum_line_length       => $rOpts->{'maximum-line-length'},
18645         rvertical_tightness_flags => $rvertical_tightness_flags,
18646     );
18647
18648     # Initialize a global flag saying if the last line of the group should
18649     # match end of group and also terminate the group.  There should be no
18650     # returns between here and where the flag is handled at the bottom.
18651     my $col_matching_terminal = 0;
18652     if ( defined($j_terminal_match) ) {
18653
18654         # remember the column of the terminal ? or { to match with
18655         $col_matching_terminal = $current_line->get_column($j_terminal_match);
18656
18657         # set global flag for sub decide_if_aligned
18658         $is_matching_terminal_line = 1;
18659     }
18660
18661     # --------------------------------------------------------------------
18662     # It simplifies things to create a zero length side comment
18663     # if none exists.
18664     # --------------------------------------------------------------------
18665     make_side_comment( $new_line, $level_end );
18666
18667     # --------------------------------------------------------------------
18668     # Decide if this is a simple list of items.
18669     # There are 3 list types: none, comma, comma-arrow.
18670     # We use this below to be less restrictive in deciding what to align.
18671     # --------------------------------------------------------------------
18672     if ($is_forced_break) {
18673         decide_if_list($new_line);
18674     }
18675
18676     if ($current_line) {
18677
18678         # --------------------------------------------------------------------
18679         # Allow hanging side comment to join current group, if any
18680         # This will help keep side comments aligned, because otherwise we
18681         # will have to start a new group, making alignment less likely.
18682         # --------------------------------------------------------------------
18683         join_hanging_comment( $new_line, $current_line )
18684           if $is_hanging_side_comment;
18685
18686         # --------------------------------------------------------------------
18687         # If there is just one previous line, and it has more fields
18688         # than the new line, try to join fields together to get a match with
18689         # the new line.  At the present time, only a single leading '=' is
18690         # allowed to be compressed out.  This is useful in rare cases where
18691         # a table is forced to use old breakpoints because of side comments,
18692         # and the table starts out something like this:
18693         #   my %MonthChars = ('0', 'Jan',   # side comment
18694         #                     '1', 'Feb',
18695         #                     '2', 'Mar',
18696         # Eliminating the '=' field will allow the remaining fields to line up.
18697         # This situation does not occur if there are no side comments
18698         # because scan_list would put a break after the opening '('.
18699         # --------------------------------------------------------------------
18700         eliminate_old_fields( $new_line, $current_line );
18701
18702         # --------------------------------------------------------------------
18703         # If the new line has more fields than the current group,
18704         # see if we can match the first fields and combine the remaining
18705         # fields of the new line.
18706         # --------------------------------------------------------------------
18707         eliminate_new_fields( $new_line, $current_line );
18708
18709         # --------------------------------------------------------------------
18710         # Flush previous group unless all common tokens and patterns match..
18711         # --------------------------------------------------------------------
18712         check_match( $new_line, $current_line );
18713
18714         # --------------------------------------------------------------------
18715         # See if there is space for this line in the current group (if any)
18716         # --------------------------------------------------------------------
18717         if ($current_line) {
18718             check_fit( $new_line, $current_line );
18719         }
18720     }
18721
18722     # --------------------------------------------------------------------
18723     # Append this line to the current group (or start new group)
18724     # --------------------------------------------------------------------
18725     accept_line($new_line);
18726
18727     # Future update to allow this to vary:
18728     $current_line = $new_line if ( $maximum_line_index == 0 );
18729
18730     # output this group if it ends in a terminal else or ternary line
18731     if ( defined($j_terminal_match) ) {
18732
18733         # if there is only one line in the group (maybe due to failure to match
18734         # perfectly with previous lines), then align the ? or { of this
18735         # terminal line with the previous one unless that would make the line
18736         # too long
18737         if ( $maximum_line_index == 0 ) {
18738             my $col_now = $current_line->get_column($j_terminal_match);
18739             my $pad     = $col_matching_terminal - $col_now;
18740             my $padding_available =
18741               $current_line->get_available_space_on_right();
18742             if ( $pad > 0 && $pad <= $padding_available ) {
18743                 $current_line->increase_field_width( $j_terminal_match, $pad );
18744             }
18745         }
18746         my_flush();
18747         $is_matching_terminal_line = 0;
18748     }
18749
18750     # --------------------------------------------------------------------
18751     # Step 8. Some old debugging stuff
18752     # --------------------------------------------------------------------
18753     VALIGN_DEBUG_FLAG_APPEND && do {
18754         print "APPEND fields:";
18755         dump_array(@$rfields);
18756         print "APPEND tokens:";
18757         dump_array(@$rtokens);
18758         print "APPEND patterns:";
18759         dump_array(@$rpatterns);
18760         dump_alignments();
18761     };
18762
18763     return;
18764 }
18765
18766 sub join_hanging_comment {
18767
18768     my $line = shift;
18769     my $jmax = $line->get_jmax();
18770     return 0 unless $jmax == 1;    # must be 2 fields
18771     my $rtokens = $line->get_rtokens();
18772     return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
18773     my $rfields = $line->get_rfields();
18774     return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
18775     my $old_line            = shift;
18776     my $maximum_field_index = $old_line->get_jmax();
18777     return 0
18778       unless $maximum_field_index > $jmax;    # the current line has more fields
18779     my $rpatterns = $line->get_rpatterns();
18780
18781     $line->set_is_hanging_side_comment(1);
18782     $jmax = $maximum_field_index;
18783     $line->set_jmax($jmax);
18784     $$rfields[$jmax]         = $$rfields[1];
18785     $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
18786     $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
18787     for ( my $j = 1 ; $j < $jmax ; $j++ ) {
18788         $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
18789         $$rtokens[ $j - 1 ]   = "";
18790         $$rpatterns[ $j - 1 ] = "";
18791     }
18792     return 1;
18793 }
18794
18795 sub eliminate_old_fields {
18796
18797     my $new_line = shift;
18798     my $jmax     = $new_line->get_jmax();
18799     if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
18800     if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
18801
18802     # there must be one previous line
18803     return unless ( $maximum_line_index == 0 );
18804
18805     my $old_line            = shift;
18806     my $maximum_field_index = $old_line->get_jmax();
18807
18808     ###############################################
18809     # this line must have fewer fields
18810     return unless $maximum_field_index > $jmax;
18811     ###############################################
18812
18813     # Identify specific cases where field elimination is allowed:
18814     # case=1: both lines have comma-separated lists, and the first
18815     #         line has an equals
18816     # case=2: both lines have leading equals
18817
18818     # case 1 is the default
18819     my $case = 1;
18820
18821     # See if case 2: both lines have leading '='
18822     # We'll require smiliar leading patterns in this case
18823     my $old_rtokens   = $old_line->get_rtokens();
18824     my $rtokens       = $new_line->get_rtokens();
18825     my $rpatterns     = $new_line->get_rpatterns();
18826     my $old_rpatterns = $old_line->get_rpatterns();
18827     if (   $rtokens->[0] =~ /^=\d*$/
18828         && $old_rtokens->[0]   eq $rtokens->[0]
18829         && $old_rpatterns->[0] eq $rpatterns->[0] )
18830     {
18831         $case = 2;
18832     }
18833
18834     # not too many fewer fields in new line for case 1
18835     return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
18836
18837     # case 1 must have side comment
18838     my $old_rfields = $old_line->get_rfields();
18839     return
18840       if ( $case == 1
18841         && length( $$old_rfields[$maximum_field_index] ) == 0 );
18842
18843     my $rfields = $new_line->get_rfields();
18844
18845     my $hid_equals = 0;
18846
18847     my @new_alignments        = ();
18848     my @new_fields            = ();
18849     my @new_matching_patterns = ();
18850     my @new_matching_tokens   = ();
18851
18852     my $j = 0;
18853     my $k;
18854     my $current_field   = '';
18855     my $current_pattern = '';
18856
18857     # loop over all old tokens
18858     my $in_match = 0;
18859     for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
18860         $current_field   .= $$old_rfields[$k];
18861         $current_pattern .= $$old_rpatterns[$k];
18862         last if ( $j > $jmax - 1 );
18863
18864         if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
18865             $in_match                  = 1;
18866             $new_fields[$j]            = $current_field;
18867             $new_matching_patterns[$j] = $current_pattern;
18868             $current_field             = '';
18869             $current_pattern           = '';
18870             $new_matching_tokens[$j]   = $$old_rtokens[$k];
18871             $new_alignments[$j]        = $old_line->get_alignment($k);
18872             $j++;
18873         }
18874         else {
18875
18876             if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
18877                 last if ( $case == 2 );    # avoid problems with stuff
18878                                            # like:   $a=$b=$c=$d;
18879                 $hid_equals = 1;
18880             }
18881             last
18882               if ( $in_match && $case == 1 )
18883               ;    # disallow gaps in matching field types in case 1
18884         }
18885     }
18886
18887     # Modify the current state if we are successful.
18888     # We must exactly reach the ends of both lists for success.
18889     if (   ( $j == $jmax )
18890         && ( $current_field eq '' )
18891         && ( $case != 1 || $hid_equals ) )
18892     {
18893         $k = $maximum_field_index;
18894         $current_field   .= $$old_rfields[$k];
18895         $current_pattern .= $$old_rpatterns[$k];
18896         $new_fields[$j]            = $current_field;
18897         $new_matching_patterns[$j] = $current_pattern;
18898
18899         $new_alignments[$j] = $old_line->get_alignment($k);
18900         $maximum_field_index = $j;
18901
18902         $old_line->set_alignments(@new_alignments);
18903         $old_line->set_jmax($jmax);
18904         $old_line->set_rtokens( \@new_matching_tokens );
18905         $old_line->set_rfields( \@new_fields );
18906         $old_line->set_rpatterns( \@$rpatterns );
18907     }
18908 }
18909
18910 # create an empty side comment if none exists
18911 sub make_side_comment {
18912     my $new_line  = shift;
18913     my $level_end = shift;
18914     my $jmax      = $new_line->get_jmax();
18915     my $rtokens   = $new_line->get_rtokens();
18916
18917     # if line does not have a side comment...
18918     if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
18919         my $rfields   = $new_line->get_rfields();
18920         my $rpatterns = $new_line->get_rpatterns();
18921         $$rtokens[$jmax]     = '#';
18922         $$rfields[ ++$jmax ] = '';
18923         $$rpatterns[$jmax]   = '#';
18924         $new_line->set_jmax($jmax);
18925         $new_line->set_jmax_original_line($jmax);
18926     }
18927
18928     # line has a side comment..
18929     else {
18930
18931         # don't remember old side comment location for very long
18932         my $line_number = $vertical_aligner_self->get_output_line_number();
18933         my $rfields     = $new_line->get_rfields();
18934         if (
18935             $line_number - $last_side_comment_line_number > 12
18936
18937             # and don't remember comment location across block level changes
18938             || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
18939           )
18940         {
18941             forget_side_comment();
18942         }
18943         $last_side_comment_line_number = $line_number;
18944         $last_side_comment_level       = $level_end;
18945     }
18946 }
18947
18948 sub decide_if_list {
18949
18950     my $line = shift;
18951
18952     # A list will be taken to be a line with a forced break in which all
18953     # of the field separators are commas or comma-arrows (except for the
18954     # trailing #)
18955
18956     # List separator tokens are things like ',3'   or '=>2',
18957     # where the trailing digit is the nesting depth.  Allow braces
18958     # to allow nested list items.
18959     my $rtokens    = $line->get_rtokens();
18960     my $test_token = $$rtokens[0];
18961     if ( $test_token =~ /^(\,|=>)/ ) {
18962         my $list_type = $test_token;
18963         my $jmax      = $line->get_jmax();
18964
18965         foreach ( 1 .. $jmax - 2 ) {
18966             if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
18967                 $list_type = "";
18968                 last;
18969             }
18970         }
18971         $line->set_list_type($list_type);
18972     }
18973 }
18974
18975 sub eliminate_new_fields {
18976
18977     return unless ( $maximum_line_index >= 0 );
18978     my ( $new_line, $old_line ) = @_;
18979     my $jmax = $new_line->get_jmax();
18980
18981     my $old_rtokens = $old_line->get_rtokens();
18982     my $rtokens     = $new_line->get_rtokens();
18983     my $is_assignment =
18984       ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
18985
18986     # must be monotonic variation
18987     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
18988
18989     # must be more fields in the new line
18990     my $maximum_field_index = $old_line->get_jmax();
18991     return unless ( $maximum_field_index < $jmax );
18992
18993     unless ($is_assignment) {
18994         return
18995           unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
18996           ;    # only if monotonic
18997
18998         # never combine fields of a comma list
18999         return
19000           unless ( $maximum_field_index > 1 )
19001           && ( $new_line->get_list_type() !~ /^,/ );
19002     }
19003
19004     my $rfields       = $new_line->get_rfields();
19005     my $rpatterns     = $new_line->get_rpatterns();
19006     my $old_rpatterns = $old_line->get_rpatterns();
19007
19008     # loop over all OLD tokens except comment and check match
19009     my $match = 1;
19010     my $k;
19011     for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
19012         if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
19013             || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
19014         {
19015             $match = 0;
19016             last;
19017         }
19018     }
19019
19020     # first tokens agree, so combine extra new tokens
19021     if ($match) {
19022         for $k ( $maximum_field_index .. $jmax - 1 ) {
19023
19024             $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
19025             $$rfields[$k] = "";
19026             $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
19027             $$rpatterns[$k] = "";
19028         }
19029
19030         $$rtokens[ $maximum_field_index - 1 ] = '#';
19031         $$rfields[$maximum_field_index]       = $$rfields[$jmax];
19032         $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
19033         $jmax                                 = $maximum_field_index;
19034     }
19035     $new_line->set_jmax($jmax);
19036 }
19037
19038 sub fix_terminal_ternary {
19039
19040     # Add empty fields as necessary to align a ternary term
19041     # like this:
19042     #
19043     #  my $leapyear =
19044     #      $year % 4   ? 0
19045     #    : $year % 100 ? 1
19046     #    : $year % 400 ? 0
19047     #    :               1;
19048     #
19049     # returns 1 if the terminal item should be indented
19050
19051     my ( $rfields, $rtokens, $rpatterns ) = @_;
19052
19053     my $jmax        = @{$rfields} - 1;
19054     my $old_line    = $group_lines[$maximum_line_index];
19055     my $rfields_old = $old_line->get_rfields();
19056
19057     my $rpatterns_old       = $old_line->get_rpatterns();
19058     my $rtokens_old         = $old_line->get_rtokens();
19059     my $maximum_field_index = $old_line->get_jmax();
19060
19061     # look for the question mark after the :
19062     my ($jquestion);
19063     my $depth_question;
19064     my $pad = "";
19065     for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
19066         my $tok = $rtokens_old->[$j];
19067         if ( $tok =~ /^\?(\d+)$/ ) {
19068             $depth_question = $1;
19069
19070             # depth must be correct
19071             next unless ( $depth_question eq $group_level );
19072
19073             $jquestion = $j;
19074             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
19075                 $pad = " " x length($1);
19076             }
19077             else {
19078                 return;    # shouldn't happen
19079             }
19080             last;
19081         }
19082     }
19083     return unless ( defined($jquestion) );    # shouldn't happen
19084
19085     # Now splice the tokens and patterns of the previous line
19086     # into the else line to insure a match.  Add empty fields
19087     # as necessary.
19088     my $jadd = $jquestion;
19089
19090     # Work on copies of the actual arrays in case we have
19091     # to return due to an error
19092     my @fields   = @{$rfields};
19093     my @patterns = @{$rpatterns};
19094     my @tokens   = @{$rtokens};
19095
19096     VALIGN_DEBUG_FLAG_TERNARY && do {
19097         local $" = '><';
19098         print "CURRENT FIELDS=<@{$rfields_old}>\n";
19099         print "CURRENT TOKENS=<@{$rtokens_old}>\n";
19100         print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
19101         print "UNMODIFIED FIELDS=<@{$rfields}>\n";
19102         print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
19103         print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
19104     };
19105
19106     # handle cases of leading colon on this line
19107     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
19108
19109         my ( $colon, $therest ) = ( $1, $2 );
19110
19111         # Handle sub-case of first field with leading colon plus additional code
19112         # This is the usual situation as at the '1' below:
19113         #  ...
19114         #  : $year % 400 ? 0
19115         #  :               1;
19116         if ($therest) {
19117
19118             # Split the first field after the leading colon and insert padding.
19119             # Note that this padding will remain even if the terminal value goes
19120             # out on a separate line.  This does not seem to look to bad, so no
19121             # mechanism has been included to undo it.
19122             my $field1 = shift @fields;
19123             unshift @fields, ( $colon, $pad . $therest );
19124
19125             # change the leading pattern from : to ?
19126             return unless ( $patterns[0] =~ s/^\:/?/ );
19127
19128             # install leading tokens and patterns of existing line
19129             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19130             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19131
19132             # insert appropriate number of empty fields
19133             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19134         }
19135
19136         # handle sub-case of first field just equal to leading colon.
19137         # This can happen for example in the example below where
19138         # the leading '(' would create a new alignment token
19139         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
19140         # :                        ( $mname = $name . '->' );
19141         else {
19142
19143             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
19144
19145             # prepend a leading ? onto the second pattern
19146             $patterns[1] = "?b" . $patterns[1];
19147
19148             # pad the second field
19149             $fields[1] = $pad . $fields[1];
19150
19151             # install leading tokens and patterns of existing line, replacing
19152             # leading token and inserting appropriate number of empty fields
19153             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
19154             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
19155             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
19156         }
19157     }
19158
19159     # Handle case of no leading colon on this line.  This will
19160     # be the case when -wba=':' is used.  For example,
19161     #  $year % 400 ? 0 :
19162     #                1;
19163     else {
19164
19165         # install leading tokens and patterns of existing line
19166         $patterns[0] = '?' . 'b' . $patterns[0];
19167         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19168         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19169
19170         # insert appropriate number of empty fields
19171         $jadd = $jquestion + 1;
19172         $fields[0] = $pad . $fields[0];
19173         splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
19174     }
19175
19176     VALIGN_DEBUG_FLAG_TERNARY && do {
19177         local $" = '><';
19178         print "MODIFIED TOKENS=<@tokens>\n";
19179         print "MODIFIED PATTERNS=<@patterns>\n";
19180         print "MODIFIED FIELDS=<@fields>\n";
19181     };
19182
19183     # all ok .. update the arrays
19184     @{$rfields}   = @fields;
19185     @{$rtokens}   = @tokens;
19186     @{$rpatterns} = @patterns;
19187
19188     # force a flush after this line
19189     return $jquestion;
19190 }
19191
19192 sub fix_terminal_else {
19193
19194     # Add empty fields as necessary to align a balanced terminal
19195     # else block to a previous if/elsif/unless block,
19196     # like this:
19197     #
19198     #  if   ( 1 || $x ) { print "ok 13\n"; }
19199     #  else             { print "not ok 13\n"; }
19200     #
19201     # returns 1 if the else block should be indented
19202     #
19203     my ( $rfields, $rtokens, $rpatterns ) = @_;
19204     my $jmax = @{$rfields} - 1;
19205     return unless ( $jmax > 0 );
19206
19207     # check for balanced else block following if/elsif/unless
19208     my $rfields_old = $current_line->get_rfields();
19209
19210     # TBD: add handling for 'case'
19211     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
19212
19213     # look for the opening brace after the else, and extrace the depth
19214     my $tok_brace = $rtokens->[0];
19215     my $depth_brace;
19216     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
19217
19218     # probably:  "else # side_comment"
19219     else { return }
19220
19221     my $rpatterns_old       = $current_line->get_rpatterns();
19222     my $rtokens_old         = $current_line->get_rtokens();
19223     my $maximum_field_index = $current_line->get_jmax();
19224
19225     # be sure the previous if/elsif is followed by an opening paren
19226     my $jparen    = 0;
19227     my $tok_paren = '(' . $depth_brace;
19228     my $tok_test  = $rtokens_old->[$jparen];
19229     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
19230
19231     # Now find the opening block brace
19232     my ($jbrace);
19233     for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
19234         my $tok = $rtokens_old->[$j];
19235         if ( $tok eq $tok_brace ) {
19236             $jbrace = $j;
19237             last;
19238         }
19239     }
19240     return unless ( defined($jbrace) );           # shouldn't happen
19241
19242     # Now splice the tokens and patterns of the previous line
19243     # into the else line to insure a match.  Add empty fields
19244     # as necessary.
19245     my $jadd = $jbrace - $jparen;
19246     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
19247     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
19248     splice( @{$rfields}, 1, 0, ('') x $jadd );
19249
19250     # force a flush after this line if it does not follow a case
19251     return $jbrace
19252       unless ( $rfields_old->[0] =~ /^case\s*$/ );
19253 }
19254
19255 {    # sub check_match
19256     my %is_good_alignment;
19257
19258     BEGIN {
19259
19260         # Vertically aligning on certain "good" tokens is usually okay
19261         # so we can be less restrictive in marginal cases.
19262         @_ = qw( { ? => = );
19263         push @_, (',');
19264         @is_good_alignment{@_} = (1) x scalar(@_);
19265     }
19266
19267     sub check_match {
19268
19269         # See if the current line matches the current vertical alignment group.
19270         # If not, flush the current group.
19271         my $new_line = shift;
19272         my $old_line = shift;
19273
19274         # uses global variables:
19275         #  $previous_minimum_jmax_seen
19276         #  $maximum_jmax_seen
19277         #  $maximum_line_index
19278         #  $marginal_match
19279         my $jmax                = $new_line->get_jmax();
19280         my $maximum_field_index = $old_line->get_jmax();
19281
19282         # flush if this line has too many fields
19283         if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
19284
19285         # flush if adding this line would make a non-monotonic field count
19286         if (
19287             ( $maximum_field_index > $jmax )    # this has too few fields
19288             && (
19289                 ( $previous_minimum_jmax_seen <
19290                     $jmax )                     # and wouldn't be monotonic
19291                 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
19292             )
19293           )
19294         {
19295             goto NO_MATCH;
19296         }
19297
19298         # otherwise see if this line matches the current group
19299         my $jmax_original_line      = $new_line->get_jmax_original_line();
19300         my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19301         my $rtokens                 = $new_line->get_rtokens();
19302         my $rfields                 = $new_line->get_rfields();
19303         my $rpatterns               = $new_line->get_rpatterns();
19304         my $list_type               = $new_line->get_list_type();
19305
19306         my $group_list_type = $old_line->get_list_type();
19307         my $old_rpatterns   = $old_line->get_rpatterns();
19308         my $old_rtokens     = $old_line->get_rtokens();
19309
19310         my $jlimit = $jmax - 1;
19311         if ( $maximum_field_index > $jmax ) {
19312             $jlimit = $jmax_original_line;
19313             --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
19314         }
19315
19316         # handle comma-separated lists ..
19317         if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
19318             for my $j ( 0 .. $jlimit ) {
19319                 my $old_tok = $$old_rtokens[$j];
19320                 next unless $old_tok;
19321                 my $new_tok = $$rtokens[$j];
19322                 next unless $new_tok;
19323
19324                 # lists always match ...
19325                 # unless they would align any '=>'s with ','s
19326                 goto NO_MATCH
19327                   if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
19328                     || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
19329             }
19330         }
19331
19332         # do detailed check for everything else except hanging side comments
19333         elsif ( !$is_hanging_side_comment ) {
19334
19335             my $leading_space_count = $new_line->get_leading_space_count();
19336
19337             my $max_pad = 0;
19338             my $min_pad = 0;
19339             my $saw_good_alignment;
19340
19341             for my $j ( 0 .. $jlimit ) {
19342
19343                 my $old_tok = $$old_rtokens[$j];
19344                 my $new_tok = $$rtokens[$j];
19345
19346                 # Note on encoding used for alignment tokens:
19347                 # -------------------------------------------
19348                 # Tokens are "decorated" with information which can help
19349                 # prevent unwanted alignments.  Consider for example the
19350                 # following two lines:
19351                 #   local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
19352                 #   local ( $i, $f ) = &'bdiv( $xn, $xd );
19353                 # There are three alignment tokens in each line, a comma,
19354                 # an =, and a comma.  In the first line these three tokens
19355                 # are encoded as:
19356                 #    ,4+local-18     =3      ,4+split-7
19357                 # and in the second line they are encoded as
19358                 #    ,4+local-18     =3      ,4+&'bdiv-8
19359                 # Tokens always at least have token name and nesting
19360                 # depth.  So in this example the ='s are at depth 3 and
19361                 # the ,'s are at depth 4.  This prevents aligning tokens
19362                 # of different depths.  Commas contain additional
19363                 # information, as follows:
19364                 # ,  {depth} + {container name} - {spaces to opening paren}
19365                 # This allows us to reject matching the rightmost commas
19366                 # in the above two lines, since they are for different
19367                 # function calls.  This encoding is done in
19368                 # 'sub send_lines_to_vertical_aligner'.
19369
19370                 # Pick off actual token.
19371                 # Everything up to the first digit is the actual token.
19372                 my $alignment_token = $new_tok;
19373                 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
19374
19375                 # see if the decorated tokens match
19376                 my $tokens_match = $new_tok eq $old_tok
19377
19378                   # Exception for matching terminal : of ternary statement..
19379                   # consider containers prefixed by ? and : a match
19380                   || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
19381
19382                 # No match if the alignment tokens differ...
19383                 if ( !$tokens_match ) {
19384
19385                     # ...Unless this is a side comment
19386                     if (
19387                         $j == $jlimit
19388
19389                         # and there is either at least one alignment token
19390                         # or this is a single item following a list.  This
19391                         # latter rule is required for 'December' to join
19392                         # the following list:
19393                         # my (@months) = (
19394                         #     '',       'January',   'February', 'March',
19395                         #     'April',  'May',       'June',     'July',
19396                         #     'August', 'September', 'October',  'November',
19397                         #     'December'
19398                         # );
19399                         # If it doesn't then the -lp formatting will fail.
19400                         && ( $j > 0 || $old_tok =~ /^,/ )
19401                       )
19402                     {
19403                         $marginal_match = 1
19404                           if ( $marginal_match == 0
19405                             && $maximum_line_index == 0 );
19406                         last;
19407                     }
19408
19409                     goto NO_MATCH;
19410                 }
19411
19412                 # Calculate amount of padding required to fit this in.
19413                 # $pad is the number of spaces by which we must increase
19414                 # the current field to squeeze in this field.
19415                 my $pad =
19416                   length( $$rfields[$j] ) - $old_line->current_field_width($j);
19417                 if ( $j == 0 ) { $pad += $leading_space_count; }
19418
19419                 # remember max pads to limit marginal cases
19420                 if ( $alignment_token ne '#' ) {
19421                     if ( $pad > $max_pad ) { $max_pad = $pad }
19422                     if ( $pad < $min_pad ) { $min_pad = $pad }
19423                 }
19424                 if ( $is_good_alignment{$alignment_token} ) {
19425                     $saw_good_alignment = 1;
19426                 }
19427
19428                 # If patterns don't match, we have to be careful...
19429                 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
19430
19431                     # flag this as a marginal match since patterns differ
19432                     $marginal_match = 1
19433                       if ( $marginal_match == 0 && $maximum_line_index == 0 );
19434
19435                     # We have to be very careful about aligning commas
19436                     # when the pattern's don't match, because it can be
19437                     # worse to create an alignment where none is needed
19438                     # than to omit one.  Here's an example where the ','s
19439                     # are not in named continers.  The first line below
19440                     # should not match the next two:
19441                     #   ( $a, $b ) = ( $b, $r );
19442                     #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
19443                     #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
19444                     if ( $alignment_token eq ',' ) {
19445
19446                        # do not align commas unless they are in named containers
19447                         goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
19448                     }
19449
19450                     # do not align parens unless patterns match;
19451                     # large ugly spaces can occur in math expressions.
19452                     elsif ( $alignment_token eq '(' ) {
19453
19454                         # But we can allow a match if the parens don't
19455                         # require any padding.
19456                         if ( $pad != 0 ) { goto NO_MATCH }
19457                     }
19458
19459                     # Handle an '=' alignment with different patterns to
19460                     # the left.
19461                     elsif ( $alignment_token eq '=' ) {
19462
19463                         # It is best to be a little restrictive when
19464                         # aligning '=' tokens.  Here is an example of
19465                         # two lines that we will not align:
19466                         #       my $variable=6;
19467                         #       $bb=4;
19468                         # The problem is that one is a 'my' declaration,
19469                         # and the other isn't, so they're not very similar.
19470                         # We will filter these out by comparing the first
19471                         # letter of the pattern.  This is crude, but works
19472                         # well enough.
19473                         if (
19474                             substr( $$old_rpatterns[$j], 0, 1 ) ne
19475                             substr( $$rpatterns[$j], 0, 1 ) )
19476                         {
19477                             goto NO_MATCH;
19478                         }
19479
19480                         # If we pass that test, we'll call it a marginal match.
19481                         # Here is an example of a marginal match:
19482                         #       $done{$$op} = 1;
19483                         #       $op         = compile_bblock($op);
19484                         # The left tokens are both identifiers, but
19485                         # one accesses a hash and the other doesn't.
19486                         # We'll let this be a tentative match and undo
19487                         # it later if we don't find more than 2 lines
19488                         # in the group.
19489                         elsif ( $maximum_line_index == 0 ) {
19490                             $marginal_match =
19491                               2;    # =2 prevents being undone below
19492                         }
19493                     }
19494                 }
19495
19496                 # Don't let line with fewer fields increase column widths
19497                 # ( align3.t )
19498                 if ( $maximum_field_index > $jmax ) {
19499
19500                     # Exception: suspend this rule to allow last lines to join
19501                     if ( $pad > 0 ) { goto NO_MATCH; }
19502                 }
19503             } ## end for my $j ( 0 .. $jlimit)
19504
19505             # Turn off the "marginal match" flag in some cases...
19506             # A "marginal match" occurs when the alignment tokens agree
19507             # but there are differences in the other tokens (patterns).
19508             # If we leave the marginal match flag set, then the rule is that we
19509             # will align only if there are more than two lines in the group.
19510             # We will turn of the flag if we almost have a match
19511             # and either we have seen a good alignment token or we
19512             # just need a small pad (2 spaces) to fit.  These rules are
19513             # the result of experimentation.  Tokens which misaligned by just
19514             # one or two characters are annoying.  On the other hand,
19515             # large gaps to less important alignment tokens are also annoying.
19516             if (   $marginal_match == 1
19517                 && $jmax == $maximum_field_index
19518                 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
19519               )
19520             {
19521                 $marginal_match = 0;
19522             }
19523             ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
19524         }
19525
19526         # We have a match (even if marginal).
19527         # If the current line has fewer fields than the current group
19528         # but otherwise matches, copy the remaining group fields to
19529         # make it a perfect match.
19530         if ( $maximum_field_index > $jmax ) {
19531             my $comment = $$rfields[$jmax];
19532             for $jmax ( $jlimit .. $maximum_field_index ) {
19533                 $$rtokens[$jmax]     = $$old_rtokens[$jmax];
19534                 $$rfields[ ++$jmax ] = '';
19535                 $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
19536             }
19537             $$rfields[$jmax] = $comment;
19538             $new_line->set_jmax($jmax);
19539         }
19540         return;
19541
19542       NO_MATCH:
19543         ##print "BUBBA: no match jmax=$jmax  max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
19544         my_flush();
19545         return;
19546     }
19547 }
19548
19549 sub check_fit {
19550
19551     return unless ( $maximum_line_index >= 0 );
19552     my $new_line = shift;
19553     my $old_line = shift;
19554
19555     my $jmax                    = $new_line->get_jmax();
19556     my $leading_space_count     = $new_line->get_leading_space_count();
19557     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19558     my $rtokens                 = $new_line->get_rtokens();
19559     my $rfields                 = $new_line->get_rfields();
19560     my $rpatterns               = $new_line->get_rpatterns();
19561
19562     my $group_list_type = $group_lines[0]->get_list_type();
19563
19564     my $padding_so_far    = 0;
19565     my $padding_available = $old_line->get_available_space_on_right();
19566
19567     # save current columns in case this doesn't work
19568     save_alignment_columns();
19569
19570     my ( $j, $pad, $eight );
19571     my $maximum_field_index = $old_line->get_jmax();
19572     for $j ( 0 .. $jmax ) {
19573
19574         $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
19575
19576         if ( $j == 0 ) {
19577             $pad += $leading_space_count;
19578         }
19579
19580         # remember largest gap of the group, excluding gap to side comment
19581         if (   $pad < 0
19582             && $group_maximum_gap < -$pad
19583             && $j > 0
19584             && $j < $jmax - 1 )
19585         {
19586             $group_maximum_gap = -$pad;
19587         }
19588
19589         next if $pad < 0;
19590
19591         ## This patch helps sometimes, but it doesn't check to see if
19592         ## the line is too long even without the side comment.  It needs
19593         ## to be reworked.
19594         ##don't let a long token with no trailing side comment push
19595         ##side comments out, or end a group.  (sidecmt1.t)
19596         ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
19597
19598         # This line will need space; lets see if we want to accept it..
19599         if (
19600
19601             # not if this won't fit
19602             ( $pad > $padding_available )
19603
19604             # previously, there were upper bounds placed on padding here
19605             # (maximum_whitespace_columns), but they were not really helpful
19606
19607           )
19608         {
19609
19610             # revert to starting state then flush; things didn't work out
19611             restore_alignment_columns();
19612             my_flush();
19613             last;
19614         }
19615
19616         # patch to avoid excessive gaps in previous lines,
19617         # due to a line of fewer fields.
19618         #   return join( ".",
19619         #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
19620         #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
19621         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
19622
19623         # looks ok, squeeze this field in
19624         $old_line->increase_field_width( $j, $pad );
19625         $padding_available -= $pad;
19626
19627         # remember largest gap of the group, excluding gap to side comment
19628         if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
19629             $group_maximum_gap = $pad;
19630         }
19631     }
19632 }
19633
19634 sub accept_line {
19635
19636     # The current line either starts a new alignment group or is
19637     # accepted into the current alignment group.
19638     my $new_line = shift;
19639     $group_lines[ ++$maximum_line_index ] = $new_line;
19640
19641     # initialize field lengths if starting new group
19642     if ( $maximum_line_index == 0 ) {
19643
19644         my $jmax    = $new_line->get_jmax();
19645         my $rfields = $new_line->get_rfields();
19646         my $rtokens = $new_line->get_rtokens();
19647         my $j;
19648         my $col = $new_line->get_leading_space_count();
19649
19650         for $j ( 0 .. $jmax ) {
19651             $col += length( $$rfields[$j] );
19652
19653             # create initial alignments for the new group
19654             my $token = "";
19655             if ( $j < $jmax ) { $token = $$rtokens[$j] }
19656             my $alignment = make_alignment( $col, $token );
19657             $new_line->set_alignment( $j, $alignment );
19658         }
19659
19660         $maximum_jmax_seen = $jmax;
19661         $minimum_jmax_seen = $jmax;
19662     }
19663
19664     # use previous alignments otherwise
19665     else {
19666         my @new_alignments =
19667           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
19668         $new_line->set_alignments(@new_alignments);
19669     }
19670
19671     # remember group jmax extremes for next call to append_line
19672     $previous_minimum_jmax_seen = $minimum_jmax_seen;
19673     $previous_maximum_jmax_seen = $maximum_jmax_seen;
19674 }
19675
19676 sub dump_array {
19677
19678     # debug routine to dump array contents
19679     local $" = ')(';
19680     print "(@_)\n";
19681 }
19682
19683 # flush() sends the current Perl::Tidy::VerticalAligner group down the
19684 # pipeline to Perl::Tidy::FileWriter.
19685
19686 # This is the external flush, which also empties the cache
19687 sub flush {
19688
19689     if ( $maximum_line_index < 0 ) {
19690         if ($cached_line_type) {
19691             $seqno_string = $cached_seqno_string;
19692             entab_and_output( $cached_line_text,
19693                 $cached_line_leading_space_count,
19694                 $last_group_level_written );
19695             $cached_line_type    = 0;
19696             $cached_line_text    = "";
19697             $cached_seqno_string = "";
19698         }
19699     }
19700     else {
19701         my_flush();
19702     }
19703 }
19704
19705 # This is the internal flush, which leaves the cache intact
19706 sub my_flush {
19707
19708     return if ( $maximum_line_index < 0 );
19709
19710     # handle a group of comment lines
19711     if ( $group_type eq 'COMMENT' ) {
19712
19713         VALIGN_DEBUG_FLAG_APPEND0 && do {
19714             my ( $a, $b, $c ) = caller();
19715             print
19716 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
19717
19718         };
19719         my $leading_space_count = $comment_leading_space_count;
19720         my $leading_string      = get_leading_string($leading_space_count);
19721
19722         # zero leading space count if any lines are too long
19723         my $max_excess = 0;
19724         for my $i ( 0 .. $maximum_line_index ) {
19725             my $str = $group_lines[$i];
19726             my $excess =
19727               length($str) + $leading_space_count - $rOpts_maximum_line_length;
19728             if ( $excess > $max_excess ) {
19729                 $max_excess = $excess;
19730             }
19731         }
19732
19733         if ( $max_excess > 0 ) {
19734             $leading_space_count -= $max_excess;
19735             if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
19736             $last_outdented_line_at =
19737               $file_writer_object->get_output_line_number();
19738             unless ($outdented_line_count) {
19739                 $first_outdented_line_at = $last_outdented_line_at;
19740             }
19741             $outdented_line_count += ( $maximum_line_index + 1 );
19742         }
19743
19744         # write the group of lines
19745         my $outdent_long_lines = 0;
19746         for my $i ( 0 .. $maximum_line_index ) {
19747             write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
19748                 $outdent_long_lines, "" );
19749         }
19750     }
19751
19752     # handle a group of code lines
19753     else {
19754
19755         VALIGN_DEBUG_FLAG_APPEND0 && do {
19756             my $group_list_type = $group_lines[0]->get_list_type();
19757             my ( $a, $b, $c ) = caller();
19758             my $maximum_field_index = $group_lines[0]->get_jmax();
19759             print
19760 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
19761
19762         };
19763
19764         # some small groups are best left unaligned
19765         my $do_not_align = decide_if_aligned();
19766
19767         # optimize side comment location
19768         $do_not_align = adjust_side_comment($do_not_align);
19769
19770         # recover spaces for -lp option if possible
19771         my $extra_leading_spaces = get_extra_leading_spaces();
19772
19773         # all lines of this group have the same basic leading spacing
19774         my $group_leader_length = $group_lines[0]->get_leading_space_count();
19775
19776         # add extra leading spaces if helpful
19777         my $min_ci_gap = improve_continuation_indentation( $do_not_align,
19778             $group_leader_length );
19779
19780         # loop to output all lines
19781         for my $i ( 0 .. $maximum_line_index ) {
19782             my $line = $group_lines[$i];
19783             write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
19784                 $group_leader_length, $extra_leading_spaces );
19785         }
19786     }
19787     initialize_for_new_group();
19788 }
19789
19790 sub decide_if_aligned {
19791
19792     # Do not try to align two lines which are not really similar
19793     return unless $maximum_line_index == 1;
19794     return if ($is_matching_terminal_line);
19795
19796     my $group_list_type = $group_lines[0]->get_list_type();
19797
19798     my $do_not_align = (
19799
19800         # always align lists
19801         !$group_list_type
19802
19803           && (
19804
19805             # don't align if it was just a marginal match
19806             $marginal_match
19807
19808             # don't align two lines with big gap
19809             || $group_maximum_gap > 12
19810
19811             # or lines with differing number of alignment tokens
19812             # TODO: this could be improved.  It occasionally rejects
19813             # good matches.
19814             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
19815           )
19816     );
19817
19818     # But try to convert them into a simple comment group if the first line
19819     # a has side comment
19820     my $rfields             = $group_lines[0]->get_rfields();
19821     my $maximum_field_index = $group_lines[0]->get_jmax();
19822     if (   $do_not_align
19823         && ( $maximum_line_index > 0 )
19824         && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
19825     {
19826         combine_fields();
19827         $do_not_align = 0;
19828     }
19829     return $do_not_align;
19830 }
19831
19832 sub adjust_side_comment {
19833
19834     my $do_not_align = shift;
19835
19836     # let's see if we can move the side comment field out a little
19837     # to improve readability (the last field is always a side comment field)
19838     my $have_side_comment       = 0;
19839     my $first_side_comment_line = -1;
19840     my $maximum_field_index     = $group_lines[0]->get_jmax();
19841     for my $i ( 0 .. $maximum_line_index ) {
19842         my $line = $group_lines[$i];
19843
19844         if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
19845             $have_side_comment       = 1;
19846             $first_side_comment_line = $i;
19847             last;
19848         }
19849     }
19850
19851     my $kmax = $maximum_field_index + 1;
19852
19853     if ($have_side_comment) {
19854
19855         my $line = $group_lines[0];
19856
19857         # the maximum space without exceeding the line length:
19858         my $avail = $line->get_available_space_on_right();
19859
19860         # try to use the previous comment column
19861         my $side_comment_column = $line->get_column( $kmax - 2 );
19862         my $move                = $last_comment_column - $side_comment_column;
19863
19864 ##        my $sc_line0 = $side_comment_history[0]->[0];
19865 ##        my $sc_col0  = $side_comment_history[0]->[1];
19866 ##        my $sc_line1 = $side_comment_history[1]->[0];
19867 ##        my $sc_col1  = $side_comment_history[1]->[1];
19868 ##        my $sc_line2 = $side_comment_history[2]->[0];
19869 ##        my $sc_col2  = $side_comment_history[2]->[1];
19870 ##
19871 ##        # FUTURE UPDATES:
19872 ##        # Be sure to ignore 'do not align' and  '} # end comments'
19873 ##        # Find first $move > 0 and $move <= $avail as follows:
19874 ##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
19875 ##        # 2. try sc_col2 if (line-sc_line2) < 12
19876 ##        # 3. try min possible space, plus up to 8,
19877 ##        # 4. try min possible space
19878
19879         if ( $kmax > 0 && !$do_not_align ) {
19880
19881             # but if this doesn't work, give up and use the minimum space
19882             if ( $move > $avail ) {
19883                 $move = $rOpts_minimum_space_to_comment - 1;
19884             }
19885
19886             # but we want some minimum space to the comment
19887             my $min_move = $rOpts_minimum_space_to_comment - 1;
19888             if (   $move >= 0
19889                 && $last_side_comment_length > 0
19890                 && ( $first_side_comment_line == 0 )
19891                 && $group_level == $last_group_level_written )
19892             {
19893                 $min_move = 0;
19894             }
19895
19896             if ( $move < $min_move ) {
19897                 $move = $min_move;
19898             }
19899
19900             # prevously, an upper bound was placed on $move here,
19901             # (maximum_space_to_comment), but it was not helpful
19902
19903             # don't exceed the available space
19904             if ( $move > $avail ) { $move = $avail }
19905
19906             # we can only increase space, never decrease
19907             if ( $move > 0 ) {
19908                 $line->increase_field_width( $maximum_field_index - 1, $move );
19909             }
19910
19911             # remember this column for the next group
19912             $last_comment_column = $line->get_column( $kmax - 2 );
19913         }
19914         else {
19915
19916             # try to at least line up the existing side comment location
19917             if ( $kmax > 0 && $move > 0 && $move < $avail ) {
19918                 $line->increase_field_width( $maximum_field_index - 1, $move );
19919                 $do_not_align = 0;
19920             }
19921
19922             # reset side comment column if we can't align
19923             else {
19924                 forget_side_comment();
19925             }
19926         }
19927     }
19928     return $do_not_align;
19929 }
19930
19931 sub improve_continuation_indentation {
19932     my ( $do_not_align, $group_leader_length ) = @_;
19933
19934     # See if we can increase the continuation indentation
19935     # to move all continuation lines closer to the next field
19936     # (unless it is a comment).
19937     #
19938     # '$min_ci_gap'is the extra indentation that we may need to introduce.
19939     # We will only introduce this to fields which already have some ci.
19940     # Without this variable, we would occasionally get something like this
19941     # (Complex.pm):
19942     #
19943     # use overload '+' => \&plus,
19944     #   '-'            => \&minus,
19945     #   '*'            => \&multiply,
19946     #   ...
19947     #   'tan'          => \&tan,
19948     #   'atan2'        => \&atan2,
19949     #
19950     # Whereas with this variable, we can shift variables over to get this:
19951     #
19952     # use overload '+' => \&plus,
19953     #          '-'     => \&minus,
19954     #          '*'     => \&multiply,
19955     #          ...
19956     #          'tan'   => \&tan,
19957     #          'atan2' => \&atan2,
19958
19959     ## BUB: Deactivated####################
19960     # The trouble with this patch is that it may, for example,
19961     # move in some 'or's  or ':'s, and leave some out, so that the
19962     # left edge alignment suffers.
19963     return 0;
19964     ###########################################
19965
19966     my $maximum_field_index = $group_lines[0]->get_jmax();
19967
19968     my $min_ci_gap = $rOpts_maximum_line_length;
19969     if ( $maximum_field_index > 1 && !$do_not_align ) {
19970
19971         for my $i ( 0 .. $maximum_line_index ) {
19972             my $line                = $group_lines[$i];
19973             my $leading_space_count = $line->get_leading_space_count();
19974             my $rfields             = $line->get_rfields();
19975
19976             my $gap =
19977               $line->get_column(0) -
19978               $leading_space_count -
19979               length( $$rfields[0] );
19980
19981             if ( $leading_space_count > $group_leader_length ) {
19982                 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
19983             }
19984         }
19985
19986         if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
19987             $min_ci_gap = 0;
19988         }
19989     }
19990     else {
19991         $min_ci_gap = 0;
19992     }
19993     return $min_ci_gap;
19994 }
19995
19996 sub write_vertically_aligned_line {
19997
19998     my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
19999         $extra_leading_spaces )
20000       = @_;
20001     my $rfields                   = $line->get_rfields();
20002     my $leading_space_count       = $line->get_leading_space_count();
20003     my $outdent_long_lines        = $line->get_outdent_long_lines();
20004     my $maximum_field_index       = $line->get_jmax();
20005     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
20006
20007     # add any extra spaces
20008     if ( $leading_space_count > $group_leader_length ) {
20009         $leading_space_count += $min_ci_gap;
20010     }
20011
20012     my $str = $$rfields[0];
20013
20014     # loop to concatenate all fields of this line and needed padding
20015     my $total_pad_count = 0;
20016     my ( $j, $pad );
20017     for $j ( 1 .. $maximum_field_index ) {
20018
20019         # skip zero-length side comments
20020         last
20021           if ( ( $j == $maximum_field_index )
20022             && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
20023           );
20024
20025         # compute spaces of padding before this field
20026         my $col = $line->get_column( $j - 1 );
20027         $pad = $col - ( length($str) + $leading_space_count );
20028
20029         if ($do_not_align) {
20030             $pad =
20031               ( $j < $maximum_field_index )
20032               ? 0
20033               : $rOpts_minimum_space_to_comment - 1;
20034         }
20035
20036         # if the -fpsc flag is set, move the side comment to the selected
20037         # column if and only if it is possible, ignoring constraints on
20038         # line length and minimum space to comment
20039         if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
20040         {
20041             my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
20042             if ( $newpad >= 0 ) { $pad = $newpad; }
20043         }
20044
20045         # accumulate the padding
20046         if ( $pad > 0 ) { $total_pad_count += $pad; }
20047
20048         # add this field
20049         if ( !defined $$rfields[$j] ) {
20050             write_diagnostics("UNDEFined field at j=$j\n");
20051         }
20052
20053         # only add padding when we have a finite field;
20054         # this avoids extra terminal spaces if we have empty fields
20055         if ( length( $$rfields[$j] ) > 0 ) {
20056             $str .= ' ' x $total_pad_count;
20057             $total_pad_count = 0;
20058             $str .= $$rfields[$j];
20059         }
20060         else {
20061             $total_pad_count = 0;
20062         }
20063
20064         # update side comment history buffer
20065         if ( $j == $maximum_field_index ) {
20066             my $lineno = $file_writer_object->get_output_line_number();
20067             shift @side_comment_history;
20068             push @side_comment_history, [ $lineno, $col ];
20069         }
20070     }
20071
20072     my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
20073
20074     # ship this line off
20075     write_leader_and_string( $leading_space_count + $extra_leading_spaces,
20076         $str, $side_comment_length, $outdent_long_lines,
20077         $rvertical_tightness_flags );
20078 }
20079
20080 sub get_extra_leading_spaces {
20081
20082     #----------------------------------------------------------
20083     # Define any extra indentation space (for the -lp option).
20084     # Here is why:
20085     # If a list has side comments, sub scan_list must dump the
20086     # list before it sees everything.  When this happens, it sets
20087     # the indentation to the standard scheme, but notes how
20088     # many spaces it would have liked to use.  We may be able
20089     # to recover that space here in the event that that all of the
20090     # lines of a list are back together again.
20091     #----------------------------------------------------------
20092
20093     my $extra_leading_spaces = 0;
20094     if ($extra_indent_ok) {
20095         my $object = $group_lines[0]->get_indentation();
20096         if ( ref($object) ) {
20097             my $extra_indentation_spaces_wanted =
20098               get_RECOVERABLE_SPACES($object);
20099
20100             # all indentation objects must be the same
20101             my $i;
20102             for $i ( 1 .. $maximum_line_index ) {
20103                 if ( $object != $group_lines[$i]->get_indentation() ) {
20104                     $extra_indentation_spaces_wanted = 0;
20105                     last;
20106                 }
20107             }
20108
20109             if ($extra_indentation_spaces_wanted) {
20110
20111                 # the maximum space without exceeding the line length:
20112                 my $avail = $group_lines[0]->get_available_space_on_right();
20113                 $extra_leading_spaces =
20114                   ( $avail > $extra_indentation_spaces_wanted )
20115                   ? $extra_indentation_spaces_wanted
20116                   : $avail;
20117
20118                 # update the indentation object because with -icp the terminal
20119                 # ');' will use the same adjustment.
20120                 $object->permanently_decrease_AVAILABLE_SPACES(
20121                     -$extra_leading_spaces );
20122             }
20123         }
20124     }
20125     return $extra_leading_spaces;
20126 }
20127
20128 sub combine_fields {
20129
20130     # combine all fields except for the comment field  ( sidecmt.t )
20131     # Uses global variables:
20132     #  @group_lines
20133     #  $maximum_line_index
20134     my ( $j, $k );
20135     my $maximum_field_index = $group_lines[0]->get_jmax();
20136     for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
20137         my $line    = $group_lines[$j];
20138         my $rfields = $line->get_rfields();
20139         foreach ( 1 .. $maximum_field_index - 1 ) {
20140             $$rfields[0] .= $$rfields[$_];
20141         }
20142         $$rfields[1] = $$rfields[$maximum_field_index];
20143
20144         $line->set_jmax(1);
20145         $line->set_column( 0, 0 );
20146         $line->set_column( 1, 0 );
20147
20148     }
20149     $maximum_field_index = 1;
20150
20151     for $j ( 0 .. $maximum_line_index ) {
20152         my $line    = $group_lines[$j];
20153         my $rfields = $line->get_rfields();
20154         for $k ( 0 .. $maximum_field_index ) {
20155             my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
20156             if ( $k == 0 ) {
20157                 $pad += $group_lines[$j]->get_leading_space_count();
20158             }
20159
20160             if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
20161
20162         }
20163     }
20164 }
20165
20166 sub get_output_line_number {
20167
20168     # the output line number reported to a caller is the number of items
20169     # written plus the number of items in the buffer
20170     my $self = shift;
20171     1 + $maximum_line_index + $file_writer_object->get_output_line_number();
20172 }
20173
20174 sub write_leader_and_string {
20175
20176     my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
20177         $rvertical_tightness_flags )
20178       = @_;
20179
20180     # handle outdenting of long lines:
20181     if ($outdent_long_lines) {
20182         my $excess =
20183           length($str) -
20184           $side_comment_length +
20185           $leading_space_count -
20186           $rOpts_maximum_line_length;
20187         if ( $excess > 0 ) {
20188             $leading_space_count = 0;
20189             $last_outdented_line_at =
20190               $file_writer_object->get_output_line_number();
20191
20192             unless ($outdented_line_count) {
20193                 $first_outdented_line_at = $last_outdented_line_at;
20194             }
20195             $outdented_line_count++;
20196         }
20197     }
20198
20199     # Make preliminary leading whitespace.  It could get changed
20200     # later by entabbing, so we have to keep track of any changes
20201     # to the leading_space_count from here on.
20202     my $leading_string =
20203       $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
20204
20205     # Unpack any recombination data; it was packed by
20206     # sub send_lines_to_vertical_aligner. Contents:
20207     #
20208     #   [0] type: 1=opening  2=closing  3=opening block brace
20209     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
20210     #             if closing: spaces of padding to use
20211     #   [2] sequence number of container
20212     #   [3] valid flag: do not append if this flag is false
20213     #
20214     my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20215         $seqno_end );
20216     if ($rvertical_tightness_flags) {
20217         (
20218             $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20219             $seqno_end
20220         ) = @{$rvertical_tightness_flags};
20221     }
20222
20223     $seqno_string = $seqno_end;
20224
20225     # handle any cached line ..
20226     # either append this line to it or write it out
20227     if ( length($cached_line_text) ) {
20228
20229         if ( !$cached_line_valid ) {
20230             entab_and_output( $cached_line_text,
20231                 $cached_line_leading_space_count,
20232                 $last_group_level_written );
20233         }
20234
20235         # handle cached line with opening container token
20236         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
20237
20238             my $gap = $leading_space_count - length($cached_line_text);
20239
20240             # handle option of just one tight opening per line:
20241             if ( $cached_line_flag == 1 ) {
20242                 if ( defined($open_or_close) && $open_or_close == 1 ) {
20243                     $gap = -1;
20244                 }
20245             }
20246
20247             if ( $gap >= 0 ) {
20248                 $leading_string      = $cached_line_text . ' ' x $gap;
20249                 $leading_space_count = $cached_line_leading_space_count;
20250                 $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
20251             }
20252             else {
20253                 entab_and_output( $cached_line_text,
20254                     $cached_line_leading_space_count,
20255                     $last_group_level_written );
20256             }
20257         }
20258
20259         # handle cached line to place before this closing container token
20260         else {
20261             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
20262
20263             if ( length($test_line) <= $rOpts_maximum_line_length ) {
20264
20265                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
20266
20267                 # Patch to outdent closing tokens ending # in ');'
20268                 # If we are joining a line like ');' to a previous stacked
20269                 # set of closing tokens, then decide if we may outdent the
20270                 # combined stack to the indentation of the ');'.  Since we
20271                 # should not normally outdent any of the other tokens more than
20272                 # the indentation of the lines that contained them, we will
20273                 # only do this if all of the corresponding opening
20274                 # tokens were on the same line.  This can happen with
20275                 # -sot and -sct.  For example, it is ok here:
20276                 #   __PACKAGE__->load_components( qw(
20277                 #         PK::Auto
20278                 #         Core
20279                 #   ));
20280                 #
20281                 #   But, for example, we do not outdent in this example because
20282                 #   that would put the closing sub brace out farther than the
20283                 #   opening sub brace:
20284                 #
20285                 #   perltidy -sot -sct
20286                 #   $c->Tk::bind(
20287                 #       '<Control-f>' => sub {
20288                 #           my ($c) = @_;
20289                 #           my $e = $c->XEvent;
20290                 #           itemsUnderArea $c;
20291                 #       } );
20292                 #
20293                 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
20294
20295                     # The way to tell this is if the stacked sequence numbers
20296                     # of this output line are the reverse of the stacked
20297                     # sequence numbers of the previous non-blank line of
20298                     # sequence numbers.  So we can join if the previous
20299                     # nonblank string of tokens is the mirror image.  For
20300                     # example if stack )}] is 13:8:6 then we are looking for a
20301                     # leading stack like [{( which is 6:8:13 We only need to
20302                     # check the two ends, because the intermediate tokens must
20303                     # fall in order.  Note on speed: having to split on colons
20304                     # and eliminate multiple colons might appear to be slow,
20305                     # but it's not an issue because we almost never come
20306                     # through here.  In a typical file we don't.
20307                     $seqno_string               =~ s/^:+//;
20308                     $last_nonblank_seqno_string =~ s/^:+//;
20309                     $seqno_string               =~ s/:+/:/g;
20310                     $last_nonblank_seqno_string =~ s/:+/:/g;
20311
20312                     # how many spaces can we outdent?
20313                     my $diff =
20314                       $cached_line_leading_space_count - $leading_space_count;
20315                     if (   $diff > 0
20316                         && length($seqno_string)
20317                         && length($last_nonblank_seqno_string) ==
20318                         length($seqno_string) )
20319                     {
20320                         my @seqno_last =
20321                           ( split ':', $last_nonblank_seqno_string );
20322                         my @seqno_now = ( split ':', $seqno_string );
20323                         if (   $seqno_now[-1] == $seqno_last[0]
20324                             && $seqno_now[0] == $seqno_last[-1] )
20325                         {
20326
20327                             # OK to outdent ..
20328                             # for absolute safety, be sure we only remove
20329                             # whitespace
20330                             my $ws = substr( $test_line, 0, $diff );
20331                             if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
20332
20333                                 $test_line = substr( $test_line, $diff );
20334                                 $cached_line_leading_space_count -= $diff;
20335                             }
20336
20337                             # shouldn't happen, but not critical:
20338                             ##else {
20339                             ## ERROR transferring indentation here
20340                             ##}
20341                         }
20342                     }
20343                 }
20344
20345                 $str                 = $test_line;
20346                 $leading_string      = "";
20347                 $leading_space_count = $cached_line_leading_space_count;
20348             }
20349             else {
20350                 entab_and_output( $cached_line_text,
20351                     $cached_line_leading_space_count,
20352                     $last_group_level_written );
20353             }
20354         }
20355     }
20356     $cached_line_type = 0;
20357     $cached_line_text = "";
20358
20359     # make the line to be written
20360     my $line = $leading_string . $str;
20361
20362     # write or cache this line
20363     if ( !$open_or_close || $side_comment_length > 0 ) {
20364         entab_and_output( $line, $leading_space_count, $group_level );
20365     }
20366     else {
20367         $cached_line_text                = $line;
20368         $cached_line_type                = $open_or_close;
20369         $cached_line_flag                = $tightness_flag;
20370         $cached_seqno                    = $seqno;
20371         $cached_line_valid               = $valid;
20372         $cached_line_leading_space_count = $leading_space_count;
20373         $cached_seqno_string             = $seqno_string;
20374     }
20375
20376     $last_group_level_written = $group_level;
20377     $last_side_comment_length = $side_comment_length;
20378     $extra_indent_ok          = 0;
20379 }
20380
20381 sub entab_and_output {
20382     my ( $line, $leading_space_count, $level ) = @_;
20383
20384     # The line is currently correct if there is no tabbing (recommended!)
20385     # We may have to lop off some leading spaces and replace with tabs.
20386     if ( $leading_space_count > 0 ) {
20387
20388         # Nothing to do if no tabs
20389         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20390             || $rOpts_indent_columns <= 0 )
20391         {
20392
20393             # nothing to do
20394         }
20395
20396         # Handle entab option
20397         elsif ($rOpts_entab_leading_whitespace) {
20398             my $space_count =
20399               $leading_space_count % $rOpts_entab_leading_whitespace;
20400             my $tab_count =
20401               int( $leading_space_count / $rOpts_entab_leading_whitespace );
20402             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
20403             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20404                 substr( $line, 0, $leading_space_count ) = $leading_string;
20405             }
20406             else {
20407
20408                 # REMOVE AFTER TESTING
20409                 # shouldn't happen - program error counting whitespace
20410                 # we'll skip entabbing
20411                 warning(
20412 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20413                 );
20414             }
20415         }
20416
20417         # Handle option of one tab per level
20418         else {
20419             my $leading_string = ( "\t" x $level );
20420             my $space_count =
20421               $leading_space_count - $level * $rOpts_indent_columns;
20422
20423             # shouldn't happen:
20424             if ( $space_count < 0 ) {
20425                 warning(
20426 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
20427                 );
20428                 $leading_string = ( ' ' x $leading_space_count );
20429             }
20430             else {
20431                 $leading_string .= ( ' ' x $space_count );
20432             }
20433             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20434                 substr( $line, 0, $leading_space_count ) = $leading_string;
20435             }
20436             else {
20437
20438                 # REMOVE AFTER TESTING
20439                 # shouldn't happen - program error counting whitespace
20440                 # we'll skip entabbing
20441                 warning(
20442 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20443                 );
20444             }
20445         }
20446     }
20447     $file_writer_object->write_code_line( $line . "\n" );
20448     if ($seqno_string) {
20449         $last_nonblank_seqno_string = $seqno_string;
20450     }
20451 }
20452
20453 {    # begin get_leading_string
20454
20455     my @leading_string_cache;
20456
20457     sub get_leading_string {
20458
20459         # define the leading whitespace string for this line..
20460         my $leading_whitespace_count = shift;
20461
20462         # Handle case of zero whitespace, which includes multi-line quotes
20463         # (which may have a finite level; this prevents tab problems)
20464         if ( $leading_whitespace_count <= 0 ) {
20465             return "";
20466         }
20467
20468         # look for previous result
20469         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
20470             return $leading_string_cache[$leading_whitespace_count];
20471         }
20472
20473         # must compute a string for this number of spaces
20474         my $leading_string;
20475
20476         # Handle simple case of no tabs
20477         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20478             || $rOpts_indent_columns <= 0 )
20479         {
20480             $leading_string = ( ' ' x $leading_whitespace_count );
20481         }
20482
20483         # Handle entab option
20484         elsif ($rOpts_entab_leading_whitespace) {
20485             my $space_count =
20486               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
20487             my $tab_count = int(
20488                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
20489             $leading_string = "\t" x $tab_count . ' ' x $space_count;
20490         }
20491
20492         # Handle option of one tab per level
20493         else {
20494             $leading_string = ( "\t" x $group_level );
20495             my $space_count =
20496               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
20497
20498             # shouldn't happen:
20499             if ( $space_count < 0 ) {
20500                 warning(
20501 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
20502                 );
20503                 $leading_string = ( ' ' x $leading_whitespace_count );
20504             }
20505             else {
20506                 $leading_string .= ( ' ' x $space_count );
20507             }
20508         }
20509         $leading_string_cache[$leading_whitespace_count] = $leading_string;
20510         return $leading_string;
20511     }
20512 }    # end get_leading_string
20513
20514 sub report_anything_unusual {
20515     my $self = shift;
20516     if ( $outdented_line_count > 0 ) {
20517         write_logfile_entry(
20518             "$outdented_line_count long lines were outdented:\n");
20519         write_logfile_entry(
20520             "  First at output line $first_outdented_line_at\n");
20521
20522         if ( $outdented_line_count > 1 ) {
20523             write_logfile_entry(
20524                 "   Last at output line $last_outdented_line_at\n");
20525         }
20526         write_logfile_entry(
20527             "  use -noll to prevent outdenting, -l=n to increase line length\n"
20528         );
20529         write_logfile_entry("\n");
20530     }
20531 }
20532
20533 #####################################################################
20534 #
20535 # the Perl::Tidy::FileWriter class writes the output file
20536 #
20537 #####################################################################
20538
20539 package Perl::Tidy::FileWriter;
20540
20541 # Maximum number of little messages; probably need not be changed.
20542 use constant MAX_NAG_MESSAGES => 6;
20543
20544 sub write_logfile_entry {
20545     my $self          = shift;
20546     my $logger_object = $self->{_logger_object};
20547     if ($logger_object) {
20548         $logger_object->write_logfile_entry(@_);
20549     }
20550 }
20551
20552 sub new {
20553     my $class = shift;
20554     my ( $line_sink_object, $rOpts, $logger_object ) = @_;
20555
20556     bless {
20557         _line_sink_object           => $line_sink_object,
20558         _logger_object              => $logger_object,
20559         _rOpts                      => $rOpts,
20560         _output_line_number         => 1,
20561         _consecutive_blank_lines    => 0,
20562         _consecutive_nonblank_lines => 0,
20563         _first_line_length_error    => 0,
20564         _max_line_length_error      => 0,
20565         _last_line_length_error     => 0,
20566         _first_line_length_error_at => 0,
20567         _max_line_length_error_at   => 0,
20568         _last_line_length_error_at  => 0,
20569         _line_length_error_count    => 0,
20570         _max_output_line_length     => 0,
20571         _max_output_line_length_at  => 0,
20572     }, $class;
20573 }
20574
20575 sub tee_on {
20576     my $self = shift;
20577     $self->{_line_sink_object}->tee_on();
20578 }
20579
20580 sub tee_off {
20581     my $self = shift;
20582     $self->{_line_sink_object}->tee_off();
20583 }
20584
20585 sub get_output_line_number {
20586     my $self = shift;
20587     return $self->{_output_line_number};
20588 }
20589
20590 sub decrement_output_line_number {
20591     my $self = shift;
20592     $self->{_output_line_number}--;
20593 }
20594
20595 sub get_consecutive_nonblank_lines {
20596     my $self = shift;
20597     return $self->{_consecutive_nonblank_lines};
20598 }
20599
20600 sub reset_consecutive_blank_lines {
20601     my $self = shift;
20602     $self->{_consecutive_blank_lines} = 0;
20603 }
20604
20605 sub want_blank_line {
20606     my $self = shift;
20607     unless ( $self->{_consecutive_blank_lines} ) {
20608         $self->write_blank_code_line();
20609     }
20610 }
20611
20612 sub write_blank_code_line {
20613     my $self   = shift;
20614     my $forced = shift;
20615     my $rOpts  = $self->{_rOpts};
20616     return
20617       if (!$forced
20618         && $self->{_consecutive_blank_lines} >=
20619         $rOpts->{'maximum-consecutive-blank-lines'} );
20620     $self->{_consecutive_blank_lines}++;
20621     $self->{_consecutive_nonblank_lines} = 0;
20622     $self->write_line("\n");
20623 }
20624
20625 sub write_code_line {
20626     my $self = shift;
20627     my $a    = shift;
20628
20629     if ( $a =~ /^\s*$/ ) {
20630         my $rOpts = $self->{_rOpts};
20631         return
20632           if ( $self->{_consecutive_blank_lines} >=
20633             $rOpts->{'maximum-consecutive-blank-lines'} );
20634         $self->{_consecutive_blank_lines}++;
20635         $self->{_consecutive_nonblank_lines} = 0;
20636     }
20637     else {
20638         $self->{_consecutive_blank_lines} = 0;
20639         $self->{_consecutive_nonblank_lines}++;
20640     }
20641     $self->write_line($a);
20642 }
20643
20644 sub write_line {
20645     my $self = shift;
20646     my $a    = shift;
20647
20648     # TODO: go through and see if the test is necessary here
20649     if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
20650
20651     $self->{_line_sink_object}->write_line($a);
20652
20653     # This calculation of excess line length ignores any internal tabs
20654     my $rOpts  = $self->{_rOpts};
20655     my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
20656     if ( $a =~ /^\t+/g ) {
20657         $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
20658     }
20659
20660     # Note that we just incremented output line number to future value
20661     # so we must subtract 1 for current line number
20662     if ( length($a) > 1 + $self->{_max_output_line_length} ) {
20663         $self->{_max_output_line_length}    = length($a) - 1;
20664         $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
20665     }
20666
20667     if ( $exceed > 0 ) {
20668         my $output_line_number = $self->{_output_line_number};
20669         $self->{_last_line_length_error}    = $exceed;
20670         $self->{_last_line_length_error_at} = $output_line_number - 1;
20671         if ( $self->{_line_length_error_count} == 0 ) {
20672             $self->{_first_line_length_error}    = $exceed;
20673             $self->{_first_line_length_error_at} = $output_line_number - 1;
20674         }
20675
20676         if (
20677             $self->{_last_line_length_error} > $self->{_max_line_length_error} )
20678         {
20679             $self->{_max_line_length_error}    = $exceed;
20680             $self->{_max_line_length_error_at} = $output_line_number - 1;
20681         }
20682
20683         if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
20684             $self->write_logfile_entry(
20685                 "Line length exceeded by $exceed characters\n");
20686         }
20687         $self->{_line_length_error_count}++;
20688     }
20689
20690 }
20691
20692 sub report_line_length_errors {
20693     my $self                    = shift;
20694     my $rOpts                   = $self->{_rOpts};
20695     my $line_length_error_count = $self->{_line_length_error_count};
20696     if ( $line_length_error_count == 0 ) {
20697         $self->write_logfile_entry(
20698             "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
20699         my $max_output_line_length    = $self->{_max_output_line_length};
20700         my $max_output_line_length_at = $self->{_max_output_line_length_at};
20701         $self->write_logfile_entry(
20702 "  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
20703         );
20704
20705     }
20706     else {
20707
20708         my $word = ( $line_length_error_count > 1 ) ? "s" : "";
20709         $self->write_logfile_entry(
20710 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
20711         );
20712
20713         $word = ( $line_length_error_count > 1 ) ? "First" : "";
20714         my $first_line_length_error    = $self->{_first_line_length_error};
20715         my $first_line_length_error_at = $self->{_first_line_length_error_at};
20716         $self->write_logfile_entry(
20717 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
20718         );
20719
20720         if ( $line_length_error_count > 1 ) {
20721             my $max_line_length_error     = $self->{_max_line_length_error};
20722             my $max_line_length_error_at  = $self->{_max_line_length_error_at};
20723             my $last_line_length_error    = $self->{_last_line_length_error};
20724             my $last_line_length_error_at = $self->{_last_line_length_error_at};
20725             $self->write_logfile_entry(
20726 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
20727             );
20728             $self->write_logfile_entry(
20729 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
20730             );
20731         }
20732     }
20733 }
20734
20735 #####################################################################
20736 #
20737 # The Perl::Tidy::Debugger class shows line tokenization
20738 #
20739 #####################################################################
20740
20741 package Perl::Tidy::Debugger;
20742
20743 sub new {
20744
20745     my ( $class, $filename ) = @_;
20746
20747     bless {
20748         _debug_file        => $filename,
20749         _debug_file_opened => 0,
20750         _fh                => undef,
20751     }, $class;
20752 }
20753
20754 sub really_open_debug_file {
20755
20756     my $self       = shift;
20757     my $debug_file = $self->{_debug_file};
20758     my $fh;
20759     unless ( $fh = IO::File->new("> $debug_file") ) {
20760         warn("can't open $debug_file: $!\n");
20761     }
20762     $self->{_debug_file_opened} = 1;
20763     $self->{_fh}                = $fh;
20764     print $fh
20765       "Use -dump-token-types (-dtt) to get a list of token type codes\n";
20766 }
20767
20768 sub close_debug_file {
20769
20770     my $self = shift;
20771     my $fh   = $self->{_fh};
20772     if ( $self->{_debug_file_opened} ) {
20773
20774         eval { $self->{_fh}->close() };
20775     }
20776 }
20777
20778 sub write_debug_entry {
20779
20780     # This is a debug dump routine which may be modified as necessary
20781     # to dump tokens on a line-by-line basis.  The output will be written
20782     # to the .DEBUG file when the -D flag is entered.
20783     my $self           = shift;
20784     my $line_of_tokens = shift;
20785
20786     my $input_line        = $line_of_tokens->{_line_text};
20787     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
20788     my $rtokens           = $line_of_tokens->{_rtokens};
20789     my $rlevels           = $line_of_tokens->{_rlevels};
20790     my $rslevels          = $line_of_tokens->{_rslevels};
20791     my $rblock_type       = $line_of_tokens->{_rblock_type};
20792     my $input_line_number = $line_of_tokens->{_line_number};
20793     my $line_type         = $line_of_tokens->{_line_type};
20794
20795     my ( $j, $num );
20796
20797     my $token_str              = "$input_line_number: ";
20798     my $reconstructed_original = "$input_line_number: ";
20799     my $block_str              = "$input_line_number: ";
20800
20801     #$token_str .= "$line_type: ";
20802     #$reconstructed_original .= "$line_type: ";
20803
20804     my $pattern   = "";
20805     my @next_char = ( '"', '"' );
20806     my $i_next    = 0;
20807     unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
20808     my $fh = $self->{_fh};
20809
20810     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
20811
20812         # testing patterns
20813         if ( $$rtoken_type[$j] eq 'k' ) {
20814             $pattern .= $$rtokens[$j];
20815         }
20816         else {
20817             $pattern .= $$rtoken_type[$j];
20818         }
20819         $reconstructed_original .= $$rtokens[$j];
20820         $block_str .= "($$rblock_type[$j])";
20821         $num = length( $$rtokens[$j] );
20822         my $type_str = $$rtoken_type[$j];
20823
20824         # be sure there are no blank tokens (shouldn't happen)
20825         # This can only happen if a programming error has been made
20826         # because all valid tokens are non-blank
20827         if ( $type_str eq ' ' ) {
20828             print $fh "BLANK TOKEN on the next line\n";
20829             $type_str = $next_char[$i_next];
20830             $i_next   = 1 - $i_next;
20831         }
20832
20833         if ( length($type_str) == 1 ) {
20834             $type_str = $type_str x $num;
20835         }
20836         $token_str .= $type_str;
20837     }
20838
20839     # Write what you want here ...
20840     # print $fh "$input_line\n";
20841     # print $fh "$pattern\n";
20842     print $fh "$reconstructed_original\n";
20843     print $fh "$token_str\n";
20844
20845     #print $fh "$block_str\n";
20846 }
20847
20848 #####################################################################
20849 #
20850 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
20851 # method for returning the next line to be parsed, as well as a
20852 # 'peek_ahead()' method
20853 #
20854 # The input parameter is an object with a 'get_line()' method
20855 # which returns the next line to be parsed
20856 #
20857 #####################################################################
20858
20859 package Perl::Tidy::LineBuffer;
20860
20861 sub new {
20862
20863     my $class              = shift;
20864     my $line_source_object = shift;
20865
20866     return bless {
20867         _line_source_object => $line_source_object,
20868         _rlookahead_buffer  => [],
20869     }, $class;
20870 }
20871
20872 sub peek_ahead {
20873     my $self               = shift;
20874     my $buffer_index       = shift;
20875     my $line               = undef;
20876     my $line_source_object = $self->{_line_source_object};
20877     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
20878     if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
20879         $line = $$rlookahead_buffer[$buffer_index];
20880     }
20881     else {
20882         $line = $line_source_object->get_line();
20883         push( @$rlookahead_buffer, $line );
20884     }
20885     return $line;
20886 }
20887
20888 sub get_line {
20889     my $self               = shift;
20890     my $line               = undef;
20891     my $line_source_object = $self->{_line_source_object};
20892     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
20893
20894     if ( scalar(@$rlookahead_buffer) ) {
20895         $line = shift @$rlookahead_buffer;
20896     }
20897     else {
20898         $line = $line_source_object->get_line();
20899     }
20900     return $line;
20901 }
20902
20903 ########################################################################
20904 #
20905 # the Perl::Tidy::Tokenizer package is essentially a filter which
20906 # reads lines of perl source code from a source object and provides
20907 # corresponding tokenized lines through its get_line() method.  Lines
20908 # flow from the source_object to the caller like this:
20909 #
20910 # source_object --> LineBuffer_object --> Tokenizer -->  calling routine
20911 #   get_line()         get_line()           get_line()     line_of_tokens
20912 #
20913 # The source object can be any object with a get_line() method which
20914 # supplies one line (a character string) perl call.
20915 # The LineBuffer object is created by the Tokenizer.
20916 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
20917 # containing one tokenized line for each call to its get_line() method.
20918 #
20919 # WARNING: This is not a real class yet.  Only one tokenizer my be used.
20920 #
20921 ########################################################################
20922
20923 package Perl::Tidy::Tokenizer;
20924
20925 BEGIN {
20926
20927     # Caution: these debug flags produce a lot of output
20928     # They should all be 0 except when debugging small scripts
20929
20930     use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
20931     use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
20932     use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
20933     use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
20934     use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
20935
20936     my $debug_warning = sub {
20937         print "TOKENIZER_DEBUGGING with key $_[0]\n";
20938     };
20939
20940     TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
20941     TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
20942     TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
20943     TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
20944     TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
20945
20946 }
20947
20948 use Carp;
20949
20950 # PACKAGE VARIABLES for for processing an entire FILE.
20951 use vars qw{
20952   $tokenizer_self
20953
20954   $last_nonblank_token
20955   $last_nonblank_type
20956   $last_nonblank_block_type
20957   $statement_type
20958   $in_attribute_list
20959   $current_package
20960   $context
20961
20962   %is_constant
20963   %is_user_function
20964   %user_function_prototype
20965   %is_block_function
20966   %is_block_list_function
20967   %saw_function_definition
20968
20969   $brace_depth
20970   $paren_depth
20971   $square_bracket_depth
20972
20973   @current_depth
20974   @total_depth
20975   $total_depth
20976   @nesting_sequence_number
20977   @current_sequence_number
20978   @paren_type
20979   @paren_semicolon_count
20980   @paren_structural_type
20981   @brace_type
20982   @brace_structural_type
20983   @brace_statement_type
20984   @brace_context
20985   @brace_package
20986   @square_bracket_type
20987   @square_bracket_structural_type
20988   @depth_array
20989   @nested_ternary_flag
20990   @starting_line_of_current_depth
20991 };
20992
20993 # GLOBAL CONSTANTS for routines in this package
20994 use vars qw{
20995   %is_indirect_object_taker
20996   %is_block_operator
20997   %expecting_operator_token
20998   %expecting_operator_types
20999   %expecting_term_types
21000   %expecting_term_token
21001   %is_digraph
21002   %is_file_test_operator
21003   %is_trigraph
21004   %is_valid_token_type
21005   %is_keyword
21006   %is_code_block_token
21007   %really_want_term
21008   @opening_brace_names
21009   @closing_brace_names
21010   %is_keyword_taking_list
21011   %is_q_qq_qw_qx_qr_s_y_tr_m
21012 };
21013
21014 # possible values of operator_expected()
21015 use constant TERM     => -1;
21016 use constant UNKNOWN  => 0;
21017 use constant OPERATOR => 1;
21018
21019 # possible values of context
21020 use constant SCALAR_CONTEXT  => -1;
21021 use constant UNKNOWN_CONTEXT => 0;
21022 use constant LIST_CONTEXT    => 1;
21023
21024 # Maximum number of little messages; probably need not be changed.
21025 use constant MAX_NAG_MESSAGES => 6;
21026
21027 {
21028
21029     # methods to count instances
21030     my $_count = 0;
21031     sub get_count        { $_count; }
21032     sub _increment_count { ++$_count }
21033     sub _decrement_count { --$_count }
21034 }
21035
21036 sub DESTROY {
21037     $_[0]->_decrement_count();
21038 }
21039
21040 sub new {
21041
21042     my $class = shift;
21043
21044     # Note: 'tabs' and 'indent_columns' are temporary and should be
21045     # removed asap
21046     my %defaults = (
21047         source_object        => undef,
21048         debugger_object      => undef,
21049         diagnostics_object   => undef,
21050         logger_object        => undef,
21051         starting_level       => undef,
21052         indent_columns       => 4,
21053         tabs                 => 0,
21054         entab_leading_space  => undef,
21055         look_for_hash_bang   => 0,
21056         trim_qw              => 1,
21057         look_for_autoloader  => 1,
21058         look_for_selfloader  => 1,
21059         starting_line_number => 1,
21060     );
21061     my %args = ( %defaults, @_ );
21062
21063     # we are given an object with a get_line() method to supply source lines
21064     my $source_object = $args{source_object};
21065
21066     # we create another object with a get_line() and peek_ahead() method
21067     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
21068
21069     # Tokenizer state data is as follows:
21070     # _rhere_target_list    reference to list of here-doc targets
21071     # _here_doc_target      the target string for a here document
21072     # _here_quote_character the type of here-doc quoting (" ' ` or none)
21073     #                       to determine if interpolation is done
21074     # _quote_target         character we seek if chasing a quote
21075     # _line_start_quote     line where we started looking for a long quote
21076     # _in_here_doc          flag indicating if we are in a here-doc
21077     # _in_pod               flag set if we are in pod documentation
21078     # _in_error             flag set if we saw severe error (binary in script)
21079     # _in_data              flag set if we are in __DATA__ section
21080     # _in_end               flag set if we are in __END__ section
21081     # _in_format            flag set if we are in a format description
21082     # _in_attribute_list    flag telling if we are looking for attributes
21083     # _in_quote             flag telling if we are chasing a quote
21084     # _starting_level       indentation level of first line
21085     # _input_tabstr         string denoting one indentation level of input file
21086     # _know_input_tabstr    flag indicating if we know _input_tabstr
21087     # _line_buffer_object   object with get_line() method to supply source code
21088     # _diagnostics_object   place to write debugging information
21089     # _unexpected_error_count  error count used to limit output
21090     # _lower_case_labels_at  line numbers where lower case labels seen
21091     $tokenizer_self = {
21092         _rhere_target_list                  => [],
21093         _in_here_doc                        => 0,
21094         _here_doc_target                    => "",
21095         _here_quote_character               => "",
21096         _in_data                            => 0,
21097         _in_end                             => 0,
21098         _in_format                          => 0,
21099         _in_error                           => 0,
21100         _in_pod                             => 0,
21101         _in_attribute_list                  => 0,
21102         _in_quote                           => 0,
21103         _quote_target                       => "",
21104         _line_start_quote                   => -1,
21105         _starting_level                     => $args{starting_level},
21106         _know_starting_level                => defined( $args{starting_level} ),
21107         _tabs                               => $args{tabs},
21108         _entab_leading_space                => $args{entab_leading_space},
21109         _indent_columns                     => $args{indent_columns},
21110         _look_for_hash_bang                 => $args{look_for_hash_bang},
21111         _trim_qw                            => $args{trim_qw},
21112         _input_tabstr                       => "",
21113         _know_input_tabstr                  => -1,
21114         _last_line_number                   => $args{starting_line_number} - 1,
21115         _saw_perl_dash_P                    => 0,
21116         _saw_perl_dash_w                    => 0,
21117         _saw_use_strict                     => 0,
21118         _saw_v_string                       => 0,
21119         _look_for_autoloader                => $args{look_for_autoloader},
21120         _look_for_selfloader                => $args{look_for_selfloader},
21121         _saw_autoloader                     => 0,
21122         _saw_selfloader                     => 0,
21123         _saw_hash_bang                      => 0,
21124         _saw_end                            => 0,
21125         _saw_data                           => 0,
21126         _saw_negative_indentation           => 0,
21127         _started_tokenizing                 => 0,
21128         _line_buffer_object                 => $line_buffer_object,
21129         _debugger_object                    => $args{debugger_object},
21130         _diagnostics_object                 => $args{diagnostics_object},
21131         _logger_object                      => $args{logger_object},
21132         _unexpected_error_count             => 0,
21133         _started_looking_for_here_target_at => 0,
21134         _nearly_matched_here_target_at      => undef,
21135         _line_text                          => "",
21136         _rlower_case_labels_at              => undef,
21137     };
21138
21139     prepare_for_a_new_file();
21140     find_starting_indentation_level();
21141
21142     bless $tokenizer_self, $class;
21143
21144     # This is not a full class yet, so die if an attempt is made to
21145     # create more than one object.
21146
21147     if ( _increment_count() > 1 ) {
21148         confess
21149 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
21150     }
21151
21152     return $tokenizer_self;
21153
21154 }
21155
21156 # interface to Perl::Tidy::Logger routines
21157 sub warning {
21158     my $logger_object = $tokenizer_self->{_logger_object};
21159     if ($logger_object) {
21160         $logger_object->warning(@_);
21161     }
21162 }
21163
21164 sub complain {
21165     my $logger_object = $tokenizer_self->{_logger_object};
21166     if ($logger_object) {
21167         $logger_object->complain(@_);
21168     }
21169 }
21170
21171 sub write_logfile_entry {
21172     my $logger_object = $tokenizer_self->{_logger_object};
21173     if ($logger_object) {
21174         $logger_object->write_logfile_entry(@_);
21175     }
21176 }
21177
21178 sub interrupt_logfile {
21179     my $logger_object = $tokenizer_self->{_logger_object};
21180     if ($logger_object) {
21181         $logger_object->interrupt_logfile();
21182     }
21183 }
21184
21185 sub resume_logfile {
21186     my $logger_object = $tokenizer_self->{_logger_object};
21187     if ($logger_object) {
21188         $logger_object->resume_logfile();
21189     }
21190 }
21191
21192 sub increment_brace_error {
21193     my $logger_object = $tokenizer_self->{_logger_object};
21194     if ($logger_object) {
21195         $logger_object->increment_brace_error();
21196     }
21197 }
21198
21199 sub report_definite_bug {
21200     my $logger_object = $tokenizer_self->{_logger_object};
21201     if ($logger_object) {
21202         $logger_object->report_definite_bug();
21203     }
21204 }
21205
21206 sub brace_warning {
21207     my $logger_object = $tokenizer_self->{_logger_object};
21208     if ($logger_object) {
21209         $logger_object->brace_warning(@_);
21210     }
21211 }
21212
21213 sub get_saw_brace_error {
21214     my $logger_object = $tokenizer_self->{_logger_object};
21215     if ($logger_object) {
21216         $logger_object->get_saw_brace_error();
21217     }
21218     else {
21219         0;
21220     }
21221 }
21222
21223 # interface to Perl::Tidy::Diagnostics routines
21224 sub write_diagnostics {
21225     if ( $tokenizer_self->{_diagnostics_object} ) {
21226         $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
21227     }
21228 }
21229
21230 sub report_tokenization_errors {
21231
21232     my $self = shift;
21233
21234     my $level = get_indentation_level();
21235     if ( $level != $tokenizer_self->{_starting_level} ) {
21236         warning("final indentation level: $level\n");
21237     }
21238
21239     check_final_nesting_depths();
21240
21241     if ( $tokenizer_self->{_look_for_hash_bang}
21242         && !$tokenizer_self->{_saw_hash_bang} )
21243     {
21244         warning(
21245             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
21246     }
21247
21248     if ( $tokenizer_self->{_in_format} ) {
21249         warning("hit EOF while in format description\n");
21250     }
21251
21252     if ( $tokenizer_self->{_in_pod} ) {
21253
21254         # Just write log entry if this is after __END__ or __DATA__
21255         # because this happens to often, and it is not likely to be
21256         # a parsing error.
21257         if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21258             write_logfile_entry(
21259 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21260             );
21261         }
21262
21263         else {
21264             complain(
21265 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21266             );
21267         }
21268
21269     }
21270
21271     if ( $tokenizer_self->{_in_here_doc} ) {
21272         my $here_doc_target = $tokenizer_self->{_here_doc_target};
21273         my $started_looking_for_here_target_at =
21274           $tokenizer_self->{_started_looking_for_here_target_at};
21275         if ($here_doc_target) {
21276             warning(
21277 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
21278             );
21279         }
21280         else {
21281             warning(
21282 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
21283             );
21284         }
21285         my $nearly_matched_here_target_at =
21286           $tokenizer_self->{_nearly_matched_here_target_at};
21287         if ($nearly_matched_here_target_at) {
21288             warning(
21289 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
21290             );
21291         }
21292     }
21293
21294     if ( $tokenizer_self->{_in_quote} ) {
21295         my $line_start_quote = $tokenizer_self->{_line_start_quote};
21296         my $quote_target     = $tokenizer_self->{_quote_target};
21297         my $what =
21298           ( $tokenizer_self->{_in_attribute_list} )
21299           ? "attribute list"
21300           : "quote/pattern";
21301         warning(
21302 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
21303         );
21304     }
21305
21306     unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
21307         if ( $] < 5.006 ) {
21308             write_logfile_entry("Suggest including '-w parameter'\n");
21309         }
21310         else {
21311             write_logfile_entry("Suggest including 'use warnings;'\n");
21312         }
21313     }
21314
21315     if ( $tokenizer_self->{_saw_perl_dash_P} ) {
21316         write_logfile_entry("Use of -P parameter for defines is discouraged\n");
21317     }
21318
21319     unless ( $tokenizer_self->{_saw_use_strict} ) {
21320         write_logfile_entry("Suggest including 'use strict;'\n");
21321     }
21322
21323     # it is suggested that lables have at least one upper case character
21324     # for legibility and to avoid code breakage as new keywords are introduced
21325     if ( $tokenizer_self->{_rlower_case_labels_at} ) {
21326         my @lower_case_labels_at =
21327           @{ $tokenizer_self->{_rlower_case_labels_at} };
21328         write_logfile_entry(
21329             "Suggest using upper case characters in label(s)\n");
21330         local $" = ')(';
21331         write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
21332     }
21333 }
21334
21335 sub report_v_string {
21336
21337     # warn if this version can't handle v-strings
21338     my $tok = shift;
21339     unless ( $tokenizer_self->{_saw_v_string} ) {
21340         $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
21341     }
21342     if ( $] < 5.006 ) {
21343         warning(
21344 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
21345         );
21346     }
21347 }
21348
21349 sub get_input_line_number {
21350     return $tokenizer_self->{_last_line_number};
21351 }
21352
21353 # returns the next tokenized line
21354 sub get_line {
21355
21356     my $self = shift;
21357
21358     # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
21359     # $square_bracket_depth, $paren_depth
21360
21361     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
21362     $tokenizer_self->{_line_text} = $input_line;
21363
21364     return undef unless ($input_line);
21365
21366     my $input_line_number = ++$tokenizer_self->{_last_line_number};
21367
21368     # Find and remove what characters terminate this line, including any
21369     # control r
21370     my $input_line_separator = "";
21371     if ( chomp($input_line) ) { $input_line_separator = $/ }
21372
21373     # TODO: what other characters should be included here?
21374     if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
21375         $input_line_separator = $2 . $input_line_separator;
21376     }
21377
21378     # for backwards compatability we keep the line text terminated with
21379     # a newline character
21380     $input_line .= "\n";
21381     $tokenizer_self->{_line_text} = $input_line;    # update
21382
21383     # create a data structure describing this line which will be
21384     # returned to the caller.
21385
21386     # _line_type codes are:
21387     #   SYSTEM         - system-specific code before hash-bang line
21388     #   CODE           - line of perl code (including comments)
21389     #   POD_START      - line starting pod, such as '=head'
21390     #   POD            - pod documentation text
21391     #   POD_END        - last line of pod section, '=cut'
21392     #   HERE           - text of here-document
21393     #   HERE_END       - last line of here-doc (target word)
21394     #   FORMAT         - format section
21395     #   FORMAT_END     - last line of format section, '.'
21396     #   DATA_START     - __DATA__ line
21397     #   DATA           - unidentified text following __DATA__
21398     #   END_START      - __END__ line
21399     #   END            - unidentified text following __END__
21400     #   ERROR          - we are in big trouble, probably not a perl script
21401
21402     # Other variables:
21403     #   _curly_brace_depth     - depth of curly braces at start of line
21404     #   _square_bracket_depth  - depth of square brackets at start of line
21405     #   _paren_depth           - depth of parens at start of line
21406     #   _starting_in_quote     - this line continues a multi-line quote
21407     #                            (so don't trim leading blanks!)
21408     #   _ending_in_quote       - this line ends in a multi-line quote
21409     #                            (so don't trim trailing blanks!)
21410     my $line_of_tokens = {
21411         _line_type                => 'EOF',
21412         _line_text                => $input_line,
21413         _line_number              => $input_line_number,
21414         _rtoken_type              => undef,
21415         _rtokens                  => undef,
21416         _rlevels                  => undef,
21417         _rslevels                 => undef,
21418         _rblock_type              => undef,
21419         _rcontainer_type          => undef,
21420         _rcontainer_environment   => undef,
21421         _rtype_sequence           => undef,
21422         _rnesting_tokens          => undef,
21423         _rci_levels               => undef,
21424         _rnesting_blocks          => undef,
21425         _python_indentation_level => -1,                   ## 0,
21426         _starting_in_quote    => 0,                    # to be set by subroutine
21427         _ending_in_quote      => 0,
21428         _curly_brace_depth    => $brace_depth,
21429         _square_bracket_depth => $square_bracket_depth,
21430         _paren_depth          => $paren_depth,
21431         _quote_character      => '',
21432     };
21433
21434     # must print line unchanged if we are in a here document
21435     if ( $tokenizer_self->{_in_here_doc} ) {
21436
21437         $line_of_tokens->{_line_type} = 'HERE';
21438         my $here_doc_target      = $tokenizer_self->{_here_doc_target};
21439         my $here_quote_character = $tokenizer_self->{_here_quote_character};
21440         my $candidate_target     = $input_line;
21441         chomp $candidate_target;
21442         if ( $candidate_target eq $here_doc_target ) {
21443             $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21444             $line_of_tokens->{_line_type}                     = 'HERE_END';
21445             write_logfile_entry("Exiting HERE document $here_doc_target\n");
21446
21447             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21448             if (@$rhere_target_list) {    # there can be multiple here targets
21449                 ( $here_doc_target, $here_quote_character ) =
21450                   @{ shift @$rhere_target_list };
21451                 $tokenizer_self->{_here_doc_target} = $here_doc_target;
21452                 $tokenizer_self->{_here_quote_character} =
21453                   $here_quote_character;
21454                 write_logfile_entry(
21455                     "Entering HERE document $here_doc_target\n");
21456                 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21457                 $tokenizer_self->{_started_looking_for_here_target_at} =
21458                   $input_line_number;
21459             }
21460             else {
21461                 $tokenizer_self->{_in_here_doc}          = 0;
21462                 $tokenizer_self->{_here_doc_target}      = "";
21463                 $tokenizer_self->{_here_quote_character} = "";
21464             }
21465         }
21466
21467         # check for error of extra whitespace
21468         # note for PERL6: leading whitespace is allowed
21469         else {
21470             $candidate_target =~ s/\s*$//;
21471             $candidate_target =~ s/^\s*//;
21472             if ( $candidate_target eq $here_doc_target ) {
21473                 $tokenizer_self->{_nearly_matched_here_target_at} =
21474                   $input_line_number;
21475             }
21476         }
21477         return $line_of_tokens;
21478     }
21479
21480     # must print line unchanged if we are in a format section
21481     elsif ( $tokenizer_self->{_in_format} ) {
21482
21483         if ( $input_line =~ /^\.[\s#]*$/ ) {
21484             write_logfile_entry("Exiting format section\n");
21485             $tokenizer_self->{_in_format} = 0;
21486             $line_of_tokens->{_line_type} = 'FORMAT_END';
21487         }
21488         else {
21489             $line_of_tokens->{_line_type} = 'FORMAT';
21490         }
21491         return $line_of_tokens;
21492     }
21493
21494     # must print line unchanged if we are in pod documentation
21495     elsif ( $tokenizer_self->{_in_pod} ) {
21496
21497         $line_of_tokens->{_line_type} = 'POD';
21498         if ( $input_line =~ /^=cut/ ) {
21499             $line_of_tokens->{_line_type} = 'POD_END';
21500             write_logfile_entry("Exiting POD section\n");
21501             $tokenizer_self->{_in_pod} = 0;
21502         }
21503         if ( $input_line =~ /^\#\!.*perl\b/ ) {
21504             warning(
21505                 "Hash-bang in pod can cause older versions of perl to fail! \n"
21506             );
21507         }
21508
21509         return $line_of_tokens;
21510     }
21511
21512     # must print line unchanged if we have seen a severe error (i.e., we
21513     # are seeing illegal tokens and connot continue.  Syntax errors do
21514     # not pass this route).  Calling routine can decide what to do, but
21515     # the default can be to just pass all lines as if they were after __END__
21516     elsif ( $tokenizer_self->{_in_error} ) {
21517         $line_of_tokens->{_line_type} = 'ERROR';
21518         return $line_of_tokens;
21519     }
21520
21521     # print line unchanged if we are __DATA__ section
21522     elsif ( $tokenizer_self->{_in_data} ) {
21523
21524         # ...but look for POD
21525         # Note that the _in_data and _in_end flags remain set
21526         # so that we return to that state after seeing the
21527         # end of a pod section
21528         if ( $input_line =~ /^=(?!cut)/ ) {
21529             $line_of_tokens->{_line_type} = 'POD_START';
21530             write_logfile_entry("Entering POD section\n");
21531             $tokenizer_self->{_in_pod} = 1;
21532             return $line_of_tokens;
21533         }
21534         else {
21535             $line_of_tokens->{_line_type} = 'DATA';
21536             return $line_of_tokens;
21537         }
21538     }
21539
21540     # print line unchanged if we are in __END__ section
21541     elsif ( $tokenizer_self->{_in_end} ) {
21542
21543         # ...but look for POD
21544         # Note that the _in_data and _in_end flags remain set
21545         # so that we return to that state after seeing the
21546         # end of a pod section
21547         if ( $input_line =~ /^=(?!cut)/ ) {
21548             $line_of_tokens->{_line_type} = 'POD_START';
21549             write_logfile_entry("Entering POD section\n");
21550             $tokenizer_self->{_in_pod} = 1;
21551             return $line_of_tokens;
21552         }
21553         else {
21554             $line_of_tokens->{_line_type} = 'END';
21555             return $line_of_tokens;
21556         }
21557     }
21558
21559     # check for a hash-bang line if we haven't seen one
21560     if ( !$tokenizer_self->{_saw_hash_bang} ) {
21561         if ( $input_line =~ /^\#\!.*perl\b/ ) {
21562             $tokenizer_self->{_saw_hash_bang} = $input_line_number;
21563
21564             # check for -w and -P flags
21565             if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
21566                 $tokenizer_self->{_saw_perl_dash_P} = 1;
21567             }
21568
21569             if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
21570                 $tokenizer_self->{_saw_perl_dash_w} = 1;
21571             }
21572
21573             if (   ( $input_line_number > 1 )
21574                 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
21575             {
21576
21577                 # this is helpful for VMS systems; we may have accidentally
21578                 # tokenized some DCL commands
21579                 if ( $tokenizer_self->{_started_tokenizing} ) {
21580                     warning(
21581 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
21582                     );
21583                 }
21584                 else {
21585                     complain("Useless hash-bang after line 1\n");
21586                 }
21587             }
21588
21589             # Report the leading hash-bang as a system line
21590             # This will prevent -dac from deleting it
21591             else {
21592                 $line_of_tokens->{_line_type} = 'SYSTEM';
21593                 return $line_of_tokens;
21594             }
21595         }
21596     }
21597
21598     # wait for a hash-bang before parsing if the user invoked us with -x
21599     if ( $tokenizer_self->{_look_for_hash_bang}
21600         && !$tokenizer_self->{_saw_hash_bang} )
21601     {
21602         $line_of_tokens->{_line_type} = 'SYSTEM';
21603         return $line_of_tokens;
21604     }
21605
21606     # a first line of the form ': #' will be marked as SYSTEM
21607     # since lines of this form may be used by tcsh
21608     if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
21609         $line_of_tokens->{_line_type} = 'SYSTEM';
21610         return $line_of_tokens;
21611     }
21612
21613     # now we know that it is ok to tokenize the line...
21614     # the line tokenizer will modify any of these private variables:
21615     #        _rhere_target_list
21616     #        _in_data
21617     #        _in_end
21618     #        _in_format
21619     #        _in_error
21620     #        _in_pod
21621     #        _in_quote
21622     my $ending_in_quote_last = $tokenizer_self->{_in_quote};
21623     tokenize_this_line($line_of_tokens);
21624
21625     # Now finish defining the return structure and return it
21626     $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
21627
21628     # handle severe error (binary data in script)
21629     if ( $tokenizer_self->{_in_error} ) {
21630         $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
21631         warning("Giving up after error\n");
21632         $line_of_tokens->{_line_type} = 'ERROR';
21633         reset_indentation_level(0);          # avoid error messages
21634         return $line_of_tokens;
21635     }
21636
21637     # handle start of pod documentation
21638     if ( $tokenizer_self->{_in_pod} ) {
21639
21640         # This gets tricky..above a __DATA__ or __END__ section, perl
21641         # accepts '=cut' as the start of pod section. But afterwards,
21642         # only pod utilities see it and they may ignore an =cut without
21643         # leading =head.  In any case, this isn't good.
21644         if ( $input_line =~ /^=cut\b/ ) {
21645             if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21646                 complain("=cut while not in pod ignored\n");
21647                 $tokenizer_self->{_in_pod}    = 0;
21648                 $line_of_tokens->{_line_type} = 'POD_END';
21649             }
21650             else {
21651                 $line_of_tokens->{_line_type} = 'POD_START';
21652                 complain(
21653 "=cut starts a pod section .. this can fool pod utilities.\n"
21654                 );
21655                 write_logfile_entry("Entering POD section\n");
21656             }
21657         }
21658
21659         else {
21660             $line_of_tokens->{_line_type} = 'POD_START';
21661             write_logfile_entry("Entering POD section\n");
21662         }
21663
21664         return $line_of_tokens;
21665     }
21666
21667     # update indentation levels for log messages
21668     if ( $input_line !~ /^\s*$/ ) {
21669         my $rlevels                      = $line_of_tokens->{_rlevels};
21670         my $structural_indentation_level = $$rlevels[0];
21671         my ( $python_indentation_level, $msg ) =
21672           find_indentation_level( $input_line, $structural_indentation_level );
21673         if ($msg) { write_logfile_entry("$msg") }
21674         if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
21675             $line_of_tokens->{_python_indentation_level} =
21676               $python_indentation_level;
21677         }
21678     }
21679
21680     # see if this line contains here doc targets
21681     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21682     if (@$rhere_target_list) {
21683
21684         my ( $here_doc_target, $here_quote_character ) =
21685           @{ shift @$rhere_target_list };
21686         $tokenizer_self->{_in_here_doc}          = 1;
21687         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
21688         $tokenizer_self->{_here_quote_character} = $here_quote_character;
21689         write_logfile_entry("Entering HERE document $here_doc_target\n");
21690         $tokenizer_self->{_started_looking_for_here_target_at} =
21691           $input_line_number;
21692     }
21693
21694     # NOTE: __END__ and __DATA__ statements are written unformatted
21695     # because they can theoretically contain additional characters
21696     # which are not tokenized (and cannot be read with <DATA> either!).
21697     if ( $tokenizer_self->{_in_data} ) {
21698         $line_of_tokens->{_line_type} = 'DATA_START';
21699         write_logfile_entry("Starting __DATA__ section\n");
21700         $tokenizer_self->{_saw_data} = 1;
21701
21702         # keep parsing after __DATA__ if use SelfLoader was seen
21703         if ( $tokenizer_self->{_saw_selfloader} ) {
21704             $tokenizer_self->{_in_data} = 0;
21705             write_logfile_entry(
21706                 "SelfLoader seen, continuing; -nlsl deactivates\n");
21707         }
21708
21709         return $line_of_tokens;
21710     }
21711
21712     elsif ( $tokenizer_self->{_in_end} ) {
21713         $line_of_tokens->{_line_type} = 'END_START';
21714         write_logfile_entry("Starting __END__ section\n");
21715         $tokenizer_self->{_saw_end} = 1;
21716
21717         # keep parsing after __END__ if use AutoLoader was seen
21718         if ( $tokenizer_self->{_saw_autoloader} ) {
21719             $tokenizer_self->{_in_end} = 0;
21720             write_logfile_entry(
21721                 "AutoLoader seen, continuing; -nlal deactivates\n");
21722         }
21723         return $line_of_tokens;
21724     }
21725
21726     # now, finally, we know that this line is type 'CODE'
21727     $line_of_tokens->{_line_type} = 'CODE';
21728
21729     # remember if we have seen any real code
21730     if (  !$tokenizer_self->{_started_tokenizing}
21731         && $input_line !~ /^\s*$/
21732         && $input_line !~ /^\s*#/ )
21733     {
21734         $tokenizer_self->{_started_tokenizing} = 1;
21735     }
21736
21737     if ( $tokenizer_self->{_debugger_object} ) {
21738         $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
21739     }
21740
21741     # Note: if keyword 'format' occurs in this line code, it is still CODE
21742     # (keyword 'format' need not start a line)
21743     if ( $tokenizer_self->{_in_format} ) {
21744         write_logfile_entry("Entering format section\n");
21745     }
21746
21747     if ( $tokenizer_self->{_in_quote}
21748         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
21749     {
21750
21751         #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
21752         if (
21753             ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
21754         {
21755             $tokenizer_self->{_line_start_quote} = $input_line_number;
21756             write_logfile_entry(
21757                 "Start multi-line quote or pattern ending in $quote_target\n");
21758         }
21759     }
21760     elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
21761         and !$tokenizer_self->{_in_quote} )
21762     {
21763         $tokenizer_self->{_line_start_quote} = -1;
21764         write_logfile_entry("End of multi-line quote or pattern\n");
21765     }
21766
21767     # we are returning a line of CODE
21768     return $line_of_tokens;
21769 }
21770
21771 sub find_starting_indentation_level {
21772
21773     # USES GLOBAL VARIABLES: $tokenizer_self
21774     my $starting_level    = 0;
21775     my $know_input_tabstr = -1;    # flag for find_indentation_level
21776
21777     # use value if given as parameter
21778     if ( $tokenizer_self->{_know_starting_level} ) {
21779         $starting_level = $tokenizer_self->{_starting_level};
21780     }
21781
21782     # if we know there is a hash_bang line, the level must be zero
21783     elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
21784         $tokenizer_self->{_know_starting_level} = 1;
21785     }
21786
21787     # otherwise figure it out from the input file
21788     else {
21789         my $line;
21790         my $i                            = 0;
21791         my $structural_indentation_level = -1; # flag for find_indentation_level
21792
21793         # keep looking at lines until we find a hash bang or piece of code
21794         my $msg = "";
21795         while ( $line =
21796             $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
21797         {
21798
21799             # if first line is #! then assume starting level is zero
21800             if ( $i == 1 && $line =~ /^\#\!/ ) {
21801                 $starting_level = 0;
21802                 last;
21803             }
21804             next if ( $line =~ /^\s*#/ );    # skip past comments
21805             next if ( $line =~ /^\s*$/ );    # skip past blank lines
21806             ( $starting_level, $msg ) =
21807               find_indentation_level( $line, $structural_indentation_level );
21808             if ($msg) { write_logfile_entry("$msg") }
21809             last;
21810         }
21811         $msg = "Line $i implies starting-indentation-level = $starting_level\n";
21812
21813         if ( $starting_level > 0 ) {
21814
21815             my $input_tabstr = $tokenizer_self->{_input_tabstr};
21816             if ( $input_tabstr eq "\t" ) {
21817                 $msg .= "by guessing input tabbing uses 1 tab per level\n";
21818             }
21819             else {
21820                 my $cols = length($input_tabstr);
21821                 $msg .=
21822                   "by guessing input tabbing uses $cols blanks per level\n";
21823             }
21824         }
21825         write_logfile_entry("$msg");
21826     }
21827     $tokenizer_self->{_starting_level} = $starting_level;
21828     reset_indentation_level($starting_level);
21829 }
21830
21831 # Find indentation level given a input line.  At the same time, try to
21832 # figure out the input tabbing scheme.
21833 #
21834 # There are two types of calls:
21835 #
21836 # Type 1: $structural_indentation_level < 0
21837 #  In this case we have to guess $input_tabstr to figure out the level.
21838 #
21839 # Type 2: $structural_indentation_level >= 0
21840 #  In this case the level of this line is known, and this routine can
21841 #  update the tabbing string, if still unknown, to make the level correct.
21842
21843 sub find_indentation_level {
21844     my ( $line, $structural_indentation_level ) = @_;
21845
21846     # USES GLOBAL VARIABLES: $tokenizer_self
21847     my $level = 0;
21848     my $msg   = "";
21849
21850     my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
21851     my $input_tabstr      = $tokenizer_self->{_input_tabstr};
21852
21853     # find leading whitespace
21854     my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
21855
21856     # make first guess at input tabbing scheme if necessary
21857     if ( $know_input_tabstr < 0 ) {
21858
21859         $know_input_tabstr = 0;
21860
21861         # When -et=n is used for the output formatting, we will assume that
21862         # tabs in the input formatting were also produced with -et=n.  This may
21863         # not be true, but it is the best guess because it will keep leading
21864         # whitespace unchanged on repeated formatting on small pieces of code
21865         # when -et=n is used.  Thanks to Sam Kington for this patch.
21866         if ( my $tabsize = $tokenizer_self->{_entab_leading_space} ) {
21867             $leading_whitespace =~ s{^ (\t*) }
21868            { " " x (length($1) * $tabsize) }xe;
21869             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
21870         }
21871         elsif ( $tokenizer_self->{_tabs} ) {
21872             $input_tabstr = "\t";
21873             if ( length($leading_whitespace) > 0 ) {
21874                 if ( $leading_whitespace !~ /\t/ ) {
21875
21876                     my $cols = $tokenizer_self->{_indent_columns};
21877
21878                     if ( length($leading_whitespace) < $cols ) {
21879                         $cols = length($leading_whitespace);
21880                     }
21881                     $input_tabstr = " " x $cols;
21882                 }
21883             }
21884         }
21885         else {
21886             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
21887
21888             if ( length($leading_whitespace) > 0 ) {
21889                 if ( $leading_whitespace =~ /^\t/ ) {
21890                     $input_tabstr = "\t";
21891                 }
21892             }
21893         }
21894         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21895         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
21896     }
21897
21898     # determine the input tabbing scheme if possible
21899     if (   ( $know_input_tabstr == 0 )
21900         && ( length($leading_whitespace) > 0 )
21901         && ( $structural_indentation_level > 0 ) )
21902     {
21903         my $saved_input_tabstr = $input_tabstr;
21904
21905         # check for common case of one tab per indentation level
21906         if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21907             if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21908                 $input_tabstr = "\t";
21909                 $msg          = "Guessing old indentation was tab character\n";
21910             }
21911         }
21912
21913         else {
21914
21915             # detab any tabs based on 8 blanks per tab
21916             my $entabbed = "";
21917             if ( $leading_whitespace =~ s/^\t+/        /g ) {
21918                 $entabbed = "entabbed";
21919             }
21920
21921             # now compute tabbing from number of spaces
21922             my $columns =
21923               length($leading_whitespace) / $structural_indentation_level;
21924             if ( $columns == int $columns ) {
21925                 $msg =
21926                   "Guessing old indentation was $columns $entabbed spaces\n";
21927             }
21928             else {
21929                 $columns = int $columns;
21930                 $msg =
21931 "old indentation is unclear, using $columns $entabbed spaces\n";
21932             }
21933             $input_tabstr = " " x $columns;
21934         }
21935         $know_input_tabstr                    = 1;
21936         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21937         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
21938
21939         # see if mistakes were made
21940         if ( ( $tokenizer_self->{_starting_level} > 0 )
21941             && !$tokenizer_self->{_know_starting_level} )
21942         {
21943
21944             if ( $input_tabstr ne $saved_input_tabstr ) {
21945                 complain(
21946 "I made a bad starting level guess; rerun with a value for -sil \n"
21947                 );
21948             }
21949         }
21950     }
21951
21952     # use current guess at input tabbing to get input indentation level
21953     #
21954     # Patch to handle a common case of entabbed leading whitespace
21955     # If the leading whitespace equals 4 spaces and we also have
21956     # tabs, detab the input whitespace assuming 8 spaces per tab.
21957     if ( length($input_tabstr) == 4 ) {
21958         $leading_whitespace =~ s/^\t+/        /g;
21959     }
21960
21961     if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
21962         my $pos = 0;
21963
21964         while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
21965         {
21966             $pos += $len_tab;
21967             $level++;
21968         }
21969     }
21970     return ( $level, $msg );
21971 }
21972
21973 # This is a currently unused debug routine
21974 sub dump_functions {
21975
21976     my $fh = *STDOUT;
21977     my ( $pkg, $sub );
21978     foreach $pkg ( keys %is_user_function ) {
21979         print $fh "\nnon-constant subs in package $pkg\n";
21980
21981         foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
21982             my $msg = "";
21983             if ( $is_block_list_function{$pkg}{$sub} ) {
21984                 $msg = 'block_list';
21985             }
21986
21987             if ( $is_block_function{$pkg}{$sub} ) {
21988                 $msg = 'block';
21989             }
21990             print $fh "$sub $msg\n";
21991         }
21992     }
21993
21994     foreach $pkg ( keys %is_constant ) {
21995         print $fh "\nconstants and constant subs in package $pkg\n";
21996
21997         foreach $sub ( keys %{ $is_constant{$pkg} } ) {
21998             print $fh "$sub\n";
21999         }
22000     }
22001 }
22002
22003 sub ones_count {
22004
22005     # count number of 1's in a string of 1's and 0's
22006     # example: ones_count("010101010101") gives 6
22007     return ( my $cis = $_[0] ) =~ tr/1/0/;
22008 }
22009
22010 sub prepare_for_a_new_file {
22011
22012     # previous tokens needed to determine what to expect next
22013     $last_nonblank_token      = ';';    # the only possible starting state which
22014     $last_nonblank_type       = ';';    # will make a leading brace a code block
22015     $last_nonblank_block_type = '';
22016
22017     # scalars for remembering statement types across multiple lines
22018     $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
22019     $in_attribute_list = 0;
22020
22021     # scalars for remembering where we are in the file
22022     $current_package = "main";
22023     $context         = UNKNOWN_CONTEXT;
22024
22025     # hashes used to remember function information
22026     %is_constant             = ();      # user-defined constants
22027     %is_user_function        = ();      # user-defined functions
22028     %user_function_prototype = ();      # their prototypes
22029     %is_block_function       = ();
22030     %is_block_list_function  = ();
22031     %saw_function_definition = ();
22032
22033     # variables used to track depths of various containers
22034     # and report nesting errors
22035     $paren_depth          = 0;
22036     $brace_depth          = 0;
22037     $square_bracket_depth = 0;
22038     @current_depth[ 0 .. $#closing_brace_names ] =
22039       (0) x scalar @closing_brace_names;
22040     $total_depth = 0;
22041     @total_depth = ();
22042     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
22043       ( 0 .. $#closing_brace_names );
22044     @current_sequence_number             = ();
22045     $paren_type[$paren_depth]            = '';
22046     $paren_semicolon_count[$paren_depth] = 0;
22047     $paren_structural_type[$brace_depth] = '';
22048     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
22049     $brace_structural_type[$brace_depth]                   = '';
22050     $brace_statement_type[$brace_depth]                    = "";
22051     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
22052     $brace_package[$paren_depth]                           = $current_package;
22053     $square_bracket_type[$square_bracket_depth]            = '';
22054     $square_bracket_structural_type[$square_bracket_depth] = '';
22055
22056     initialize_tokenizer_state();
22057 }
22058
22059 {                                       # begin tokenize_this_line
22060
22061     use constant BRACE          => 0;
22062     use constant SQUARE_BRACKET => 1;
22063     use constant PAREN          => 2;
22064     use constant QUESTION_COLON => 3;
22065
22066     # TV1: scalars for processing one LINE.
22067     # Re-initialized on each entry to sub tokenize_this_line.
22068     my (
22069         $block_type,        $container_type,    $expecting,
22070         $i,                 $i_tok,             $input_line,
22071         $input_line_number, $last_nonblank_i,   $max_token_index,
22072         $next_tok,          $next_type,         $peeked_ahead,
22073         $prototype,         $rhere_target_list, $rtoken_map,
22074         $rtoken_type,       $rtokens,           $tok,
22075         $type,              $type_sequence,     $indent_flag,
22076     );
22077
22078     # TV2: refs to ARRAYS for processing one LINE
22079     # Re-initialized on each call.
22080     my $routput_token_list     = [];    # stack of output token indexes
22081     my $routput_token_type     = [];    # token types
22082     my $routput_block_type     = [];    # types of code block
22083     my $routput_container_type = [];    # paren types, such as if, elsif, ..
22084     my $routput_type_sequence  = [];    # nesting sequential number
22085     my $routput_indent_flag    = [];    #
22086
22087     # TV3: SCALARS for quote variables.  These are initialized with a
22088     # subroutine call and continually updated as lines are processed.
22089     my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22090         $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
22091
22092     # TV4: SCALARS for multi-line identifiers and
22093     # statements. These are initialized with a subroutine call
22094     # and continually updated as lines are processed.
22095     my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
22096
22097     # TV5: SCALARS for tracking indentation level.
22098     # Initialized once and continually updated as lines are
22099     # processed.
22100     my (
22101         $nesting_token_string,      $nesting_type_string,
22102         $nesting_block_string,      $nesting_block_flag,
22103         $nesting_list_string,       $nesting_list_flag,
22104         $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22105         $in_statement_continuation, $level_in_tokenizer,
22106         $slevel_in_tokenizer,       $rslevel_stack,
22107     );
22108
22109     # TV6: SCALARS for remembering several previous
22110     # tokens. Initialized once and continually updated as
22111     # lines are processed.
22112     my (
22113         $last_nonblank_container_type,     $last_nonblank_type_sequence,
22114         $last_last_nonblank_token,         $last_last_nonblank_type,
22115         $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
22116         $last_last_nonblank_type_sequence, $last_nonblank_prototype,
22117     );
22118
22119     # ----------------------------------------------------------------
22120     # beginning of tokenizer variable access and manipulation routines
22121     # ----------------------------------------------------------------
22122
22123     sub initialize_tokenizer_state {
22124
22125         # TV1: initialized on each call
22126         # TV2: initialized on each call
22127         # TV3:
22128         $in_quote                = 0;
22129         $quote_type              = 'Q';
22130         $quote_character         = "";
22131         $quote_pos               = 0;
22132         $quote_depth             = 0;
22133         $quoted_string_1         = "";
22134         $quoted_string_2         = "";
22135         $allowed_quote_modifiers = "";
22136
22137         # TV4:
22138         $id_scan_state     = '';
22139         $identifier        = '';
22140         $want_paren        = "";
22141         $indented_if_level = 0;
22142
22143         # TV5:
22144         $nesting_token_string             = "";
22145         $nesting_type_string              = "";
22146         $nesting_block_string             = '1';    # initially in a block
22147         $nesting_block_flag               = 1;
22148         $nesting_list_string              = '0';    # initially not in a list
22149         $nesting_list_flag                = 0;      # initially not in a list
22150         $ci_string_in_tokenizer           = "";
22151         $continuation_string_in_tokenizer = "0";
22152         $in_statement_continuation        = 0;
22153         $level_in_tokenizer               = 0;
22154         $slevel_in_tokenizer              = 0;
22155         $rslevel_stack                    = [];
22156
22157         # TV6:
22158         $last_nonblank_container_type      = '';
22159         $last_nonblank_type_sequence       = '';
22160         $last_last_nonblank_token          = ';';
22161         $last_last_nonblank_type           = ';';
22162         $last_last_nonblank_block_type     = '';
22163         $last_last_nonblank_container_type = '';
22164         $last_last_nonblank_type_sequence  = '';
22165         $last_nonblank_prototype           = "";
22166     }
22167
22168     sub save_tokenizer_state {
22169
22170         my $rTV1 = [
22171             $block_type,        $container_type,    $expecting,
22172             $i,                 $i_tok,             $input_line,
22173             $input_line_number, $last_nonblank_i,   $max_token_index,
22174             $next_tok,          $next_type,         $peeked_ahead,
22175             $prototype,         $rhere_target_list, $rtoken_map,
22176             $rtoken_type,       $rtokens,           $tok,
22177             $type,              $type_sequence,     $indent_flag,
22178         ];
22179
22180         my $rTV2 = [
22181             $routput_token_list,    $routput_token_type,
22182             $routput_block_type,    $routput_container_type,
22183             $routput_type_sequence, $routput_indent_flag,
22184         ];
22185
22186         my $rTV3 = [
22187             $in_quote,        $quote_type,
22188             $quote_character, $quote_pos,
22189             $quote_depth,     $quoted_string_1,
22190             $quoted_string_2, $allowed_quote_modifiers,
22191         ];
22192
22193         my $rTV4 =
22194           [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
22195
22196         my $rTV5 = [
22197             $nesting_token_string,      $nesting_type_string,
22198             $nesting_block_string,      $nesting_block_flag,
22199             $nesting_list_string,       $nesting_list_flag,
22200             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22201             $in_statement_continuation, $level_in_tokenizer,
22202             $slevel_in_tokenizer,       $rslevel_stack,
22203         ];
22204
22205         my $rTV6 = [
22206             $last_nonblank_container_type,
22207             $last_nonblank_type_sequence,
22208             $last_last_nonblank_token,
22209             $last_last_nonblank_type,
22210             $last_last_nonblank_block_type,
22211             $last_last_nonblank_container_type,
22212             $last_last_nonblank_type_sequence,
22213             $last_nonblank_prototype,
22214         ];
22215         return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
22216     }
22217
22218     sub restore_tokenizer_state {
22219         my ($rstate) = @_;
22220         my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
22221         (
22222             $block_type,        $container_type,    $expecting,
22223             $i,                 $i_tok,             $input_line,
22224             $input_line_number, $last_nonblank_i,   $max_token_index,
22225             $next_tok,          $next_type,         $peeked_ahead,
22226             $prototype,         $rhere_target_list, $rtoken_map,
22227             $rtoken_type,       $rtokens,           $tok,
22228             $type,              $type_sequence,     $indent_flag,
22229         ) = @{$rTV1};
22230
22231         (
22232             $routput_token_list,    $routput_token_type,
22233             $routput_block_type,    $routput_container_type,
22234             $routput_type_sequence, $routput_type_sequence,
22235         ) = @{$rTV2};
22236
22237         (
22238             $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22239             $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
22240         ) = @{$rTV3};
22241
22242         ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
22243           @{$rTV4};
22244
22245         (
22246             $nesting_token_string,      $nesting_type_string,
22247             $nesting_block_string,      $nesting_block_flag,
22248             $nesting_list_string,       $nesting_list_flag,
22249             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22250             $in_statement_continuation, $level_in_tokenizer,
22251             $slevel_in_tokenizer,       $rslevel_stack,
22252         ) = @{$rTV5};
22253
22254         (
22255             $last_nonblank_container_type,
22256             $last_nonblank_type_sequence,
22257             $last_last_nonblank_token,
22258             $last_last_nonblank_type,
22259             $last_last_nonblank_block_type,
22260             $last_last_nonblank_container_type,
22261             $last_last_nonblank_type_sequence,
22262             $last_nonblank_prototype,
22263         ) = @{$rTV6};
22264     }
22265
22266     sub get_indentation_level {
22267
22268         # patch to avoid reporting error if indented if is not terminated
22269         if ($indented_if_level) { return $level_in_tokenizer - 1 }
22270         return $level_in_tokenizer;
22271     }
22272
22273     sub reset_indentation_level {
22274         $level_in_tokenizer  = $_[0];
22275         $slevel_in_tokenizer = $_[0];
22276         push @{$rslevel_stack}, $slevel_in_tokenizer;
22277     }
22278
22279     sub peeked_ahead {
22280         $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
22281     }
22282
22283     # ------------------------------------------------------------
22284     # end of tokenizer variable access and manipulation routines
22285     # ------------------------------------------------------------
22286
22287     # ------------------------------------------------------------
22288     # beginning of various scanner interface routines
22289     # ------------------------------------------------------------
22290     sub scan_replacement_text {
22291
22292         # check for here-docs in replacement text invoked by
22293         # a substitution operator with executable modifier 'e'.
22294         #
22295         # given:
22296         #  $replacement_text
22297         # return:
22298         #  $rht = reference to any here-doc targets
22299         my ($replacement_text) = @_;
22300
22301         # quick check
22302         return undef unless ( $replacement_text =~ /<</ );
22303
22304         write_logfile_entry("scanning replacement text for here-doc targets\n");
22305
22306         # save the logger object for error messages
22307         my $logger_object = $tokenizer_self->{_logger_object};
22308
22309         # localize all package variables
22310         local (
22311             $tokenizer_self,          $last_nonblank_token,
22312             $last_nonblank_type,      $last_nonblank_block_type,
22313             $statement_type,          $in_attribute_list,
22314             $current_package,         $context,
22315             %is_constant,             %is_user_function,
22316             %user_function_prototype, %is_block_function,
22317             %is_block_list_function,  %saw_function_definition,
22318             $brace_depth,             $paren_depth,
22319             $square_bracket_depth,    @current_depth,
22320             @total_depth,             $total_depth,
22321             @nesting_sequence_number, @current_sequence_number,
22322             @paren_type,              @paren_semicolon_count,
22323             @paren_structural_type,   @brace_type,
22324             @brace_structural_type,   @brace_statement_type,
22325             @brace_context,           @brace_package,
22326             @square_bracket_type,     @square_bracket_structural_type,
22327             @depth_array,             @starting_line_of_current_depth,
22328             @nested_ternary_flag,
22329         );
22330
22331         # save all lexical variables
22332         my $rstate = save_tokenizer_state();
22333         _decrement_count();    # avoid error check for multiple tokenizers
22334
22335         # make a new tokenizer
22336         my $rOpts = {};
22337         my $rpending_logfile_message;
22338         my $source_object =
22339           Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
22340             $rpending_logfile_message );
22341         my $tokenizer = Perl::Tidy::Tokenizer->new(
22342             source_object        => $source_object,
22343             logger_object        => $logger_object,
22344             starting_line_number => $input_line_number,
22345         );
22346
22347         # scan the replacement text
22348         1 while ( $tokenizer->get_line() );
22349
22350         # remove any here doc targets
22351         my $rht = undef;
22352         if ( $tokenizer_self->{_in_here_doc} ) {
22353             $rht = [];
22354             push @{$rht},
22355               [
22356                 $tokenizer_self->{_here_doc_target},
22357                 $tokenizer_self->{_here_quote_character}
22358               ];
22359             if ( $tokenizer_self->{_rhere_target_list} ) {
22360                 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
22361                 $tokenizer_self->{_rhere_target_list} = undef;
22362             }
22363             $tokenizer_self->{_in_here_doc} = undef;
22364         }
22365
22366         # now its safe to report errors
22367         $tokenizer->report_tokenization_errors();
22368
22369         # restore all tokenizer lexical variables
22370         restore_tokenizer_state($rstate);
22371
22372         # return the here doc targets
22373         return $rht;
22374     }
22375
22376     sub scan_bare_identifier {
22377         ( $i, $tok, $type, $prototype ) =
22378           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
22379             $rtoken_map, $max_token_index );
22380     }
22381
22382     sub scan_identifier {
22383         ( $i, $tok, $type, $id_scan_state, $identifier ) =
22384           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
22385             $max_token_index, $expecting );
22386     }
22387
22388     sub scan_id {
22389         ( $i, $tok, $type, $id_scan_state ) =
22390           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
22391             $id_scan_state, $max_token_index );
22392     }
22393
22394     sub scan_number {
22395         my $number;
22396         ( $i, $type, $number ) =
22397           scan_number_do( $input_line, $i, $rtoken_map, $type,
22398             $max_token_index );
22399         return $number;
22400     }
22401
22402     # a sub to warn if token found where term expected
22403     sub error_if_expecting_TERM {
22404         if ( $expecting == TERM ) {
22405             if ( $really_want_term{$last_nonblank_type} ) {
22406                 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
22407                     $rtoken_type, $input_line );
22408                 1;
22409             }
22410         }
22411     }
22412
22413     # a sub to warn if token found where operator expected
22414     sub error_if_expecting_OPERATOR {
22415         if ( $expecting == OPERATOR ) {
22416             my $thing = defined $_[0] ? $_[0] : $tok;
22417             unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
22418                 $rtoken_map, $rtoken_type, $input_line );
22419             if ( $i_tok == 0 ) {
22420                 interrupt_logfile();
22421                 warning("Missing ';' above?\n");
22422                 resume_logfile();
22423             }
22424             1;
22425         }
22426     }
22427
22428     # ------------------------------------------------------------
22429     # end scanner interfaces
22430     # ------------------------------------------------------------
22431
22432     my %is_for_foreach;
22433     @_ = qw(for foreach);
22434     @is_for_foreach{@_} = (1) x scalar(@_);
22435
22436     my %is_my_our;
22437     @_ = qw(my our);
22438     @is_my_our{@_} = (1) x scalar(@_);
22439
22440     # These keywords may introduce blocks after parenthesized expressions,
22441     # in the form:
22442     # keyword ( .... ) { BLOCK }
22443     # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
22444     my %is_blocktype_with_paren;
22445     @_ = qw(if elsif unless while until for foreach switch case given when);
22446     @is_blocktype_with_paren{@_} = (1) x scalar(@_);
22447
22448     # ------------------------------------------------------------
22449     # begin hash of code for handling most token types
22450     # ------------------------------------------------------------
22451     my $tokenization_code = {
22452
22453         # no special code for these types yet, but syntax checks
22454         # could be added
22455
22456 ##      '!'   => undef,
22457 ##      '!='  => undef,
22458 ##      '!~'  => undef,
22459 ##      '%='  => undef,
22460 ##      '&&=' => undef,
22461 ##      '&='  => undef,
22462 ##      '+='  => undef,
22463 ##      '-='  => undef,
22464 ##      '..'  => undef,
22465 ##      '..'  => undef,
22466 ##      '...' => undef,
22467 ##      '.='  => undef,
22468 ##      '<<=' => undef,
22469 ##      '<='  => undef,
22470 ##      '<=>' => undef,
22471 ##      '<>'  => undef,
22472 ##      '='   => undef,
22473 ##      '=='  => undef,
22474 ##      '=~'  => undef,
22475 ##      '>='  => undef,
22476 ##      '>>'  => undef,
22477 ##      '>>=' => undef,
22478 ##      '\\'  => undef,
22479 ##      '^='  => undef,
22480 ##      '|='  => undef,
22481 ##      '||=' => undef,
22482 ##      '//=' => undef,
22483 ##      '~'   => undef,
22484 ##      '~~'  => undef,
22485 ##      '!~~'  => undef,
22486
22487         '>' => sub {
22488             error_if_expecting_TERM()
22489               if ( $expecting == TERM );
22490         },
22491         '|' => sub {
22492             error_if_expecting_TERM()
22493               if ( $expecting == TERM );
22494         },
22495         '$' => sub {
22496
22497             # start looking for a scalar
22498             error_if_expecting_OPERATOR("Scalar")
22499               if ( $expecting == OPERATOR );
22500             scan_identifier();
22501
22502             if ( $identifier eq '$^W' ) {
22503                 $tokenizer_self->{_saw_perl_dash_w} = 1;
22504             }
22505
22506             # Check for indentifier in indirect object slot
22507             # (vorboard.pl, sort.t).  Something like:
22508             #   /^(print|printf|sort|exec|system)$/
22509             if (
22510                 $is_indirect_object_taker{$last_nonblank_token}
22511
22512                 || ( ( $last_nonblank_token eq '(' )
22513                     && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
22514                 || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
22515               )
22516             {
22517                 $type = 'Z';
22518             }
22519         },
22520         '(' => sub {
22521
22522             ++$paren_depth;
22523             $paren_semicolon_count[$paren_depth] = 0;
22524             if ($want_paren) {
22525                 $container_type = $want_paren;
22526                 $want_paren     = "";
22527             }
22528             else {
22529                 $container_type = $last_nonblank_token;
22530
22531                 # We can check for a syntax error here of unexpected '(',
22532                 # but this is going to get messy...
22533                 if (
22534                     $expecting == OPERATOR
22535
22536                     # be sure this is not a method call of the form
22537                     # &method(...), $method->(..), &{method}(...),
22538                     # $ref[2](list) is ok & short for $ref[2]->(list)
22539                     # NOTE: at present, braces in something like &{ xxx }
22540                     # are not marked as a block, we might have a method call
22541                     && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
22542
22543                   )
22544                 {
22545
22546                     # ref: camel 3 p 703.
22547                     if ( $last_last_nonblank_token eq 'do' ) {
22548                         complain(
22549 "do SUBROUTINE is deprecated; consider & or -> notation\n"
22550                         );
22551                     }
22552                     else {
22553
22554                         # if this is an empty list, (), then it is not an
22555                         # error; for example, we might have a constant pi and
22556                         # invoke it with pi() or just pi;
22557                         my ( $next_nonblank_token, $i_next ) =
22558                           find_next_nonblank_token( $i, $rtokens,
22559                             $max_token_index );
22560                         if ( $next_nonblank_token ne ')' ) {
22561                             my $hint;
22562                             error_if_expecting_OPERATOR('(');
22563
22564                             if ( $last_nonblank_type eq 'C' ) {
22565                                 $hint =
22566                                   "$last_nonblank_token has a void prototype\n";
22567                             }
22568                             elsif ( $last_nonblank_type eq 'i' ) {
22569                                 if (   $i_tok > 0
22570                                     && $last_nonblank_token =~ /^\$/ )
22571                                 {
22572                                     $hint =
22573 "Do you mean '$last_nonblank_token->(' ?\n";
22574                                 }
22575                             }
22576                             if ($hint) {
22577                                 interrupt_logfile();
22578                                 warning($hint);
22579                                 resume_logfile();
22580                             }
22581                         } ## end if ( $next_nonblank_token...
22582                     } ## end else [ if ( $last_last_nonblank_token...
22583                 } ## end if ( $expecting == OPERATOR...
22584             }
22585             $paren_type[$paren_depth] = $container_type;
22586             ( $type_sequence, $indent_flag ) =
22587               increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22588
22589             # propagate types down through nested parens
22590             # for example: the second paren in 'if ((' would be structural
22591             # since the first is.
22592
22593             if ( $last_nonblank_token eq '(' ) {
22594                 $type = $last_nonblank_type;
22595             }
22596
22597             #     We exclude parens as structural after a ',' because it
22598             #     causes subtle problems with continuation indentation for
22599             #     something like this, where the first 'or' will not get
22600             #     indented.
22601             #
22602             #         assert(
22603             #             __LINE__,
22604             #             ( not defined $check )
22605             #               or ref $check
22606             #               or $check eq "new"
22607             #               or $check eq "old",
22608             #         );
22609             #
22610             #     Likewise, we exclude parens where a statement can start
22611             #     because of problems with continuation indentation, like
22612             #     these:
22613             #
22614             #         ($firstline =~ /^#\!.*perl/)
22615             #         and (print $File::Find::name, "\n")
22616             #           and (return 1);
22617             #
22618             #         (ref($usage_fref) =~ /CODE/)
22619             #         ? &$usage_fref
22620             #           : (&blast_usage, &blast_params, &blast_general_params);
22621
22622             else {
22623                 $type = '{';
22624             }
22625
22626             if ( $last_nonblank_type eq ')' ) {
22627                 warning(
22628                     "Syntax error? found token '$last_nonblank_type' then '('\n"
22629                 );
22630             }
22631             $paren_structural_type[$paren_depth] = $type;
22632
22633         },
22634         ')' => sub {
22635             ( $type_sequence, $indent_flag ) =
22636               decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22637
22638             if ( $paren_structural_type[$paren_depth] eq '{' ) {
22639                 $type = '}';
22640             }
22641
22642             $container_type = $paren_type[$paren_depth];
22643
22644             #    /^(for|foreach)$/
22645             if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
22646                 my $num_sc = $paren_semicolon_count[$paren_depth];
22647                 if ( $num_sc > 0 && $num_sc != 2 ) {
22648                     warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
22649                 }
22650             }
22651
22652             if ( $paren_depth > 0 ) { $paren_depth-- }
22653         },
22654         ',' => sub {
22655             if ( $last_nonblank_type eq ',' ) {
22656                 complain("Repeated ','s \n");
22657             }
22658
22659             # patch for operator_expected: note if we are in the list (use.t)
22660             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
22661 ##                FIXME: need to move this elsewhere, perhaps check after a '('
22662 ##                elsif ($last_nonblank_token eq '(') {
22663 ##                    warning("Leading ','s illegal in some versions of perl\n");
22664 ##                }
22665         },
22666         ';' => sub {
22667             $context        = UNKNOWN_CONTEXT;
22668             $statement_type = '';
22669
22670             #    /^(for|foreach)$/
22671             if ( $is_for_foreach{ $paren_type[$paren_depth] } )
22672             {    # mark ; in for loop
22673
22674                 # Be careful: we do not want a semicolon such as the
22675                 # following to be included:
22676                 #
22677                 #    for (sort {strcoll($a,$b);} keys %investments) {
22678
22679                 if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
22680                     && $square_bracket_depth ==
22681                     $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
22682                 {
22683
22684                     $type = 'f';
22685                     $paren_semicolon_count[$paren_depth]++;
22686                 }
22687             }
22688
22689         },
22690         '"' => sub {
22691             error_if_expecting_OPERATOR("String")
22692               if ( $expecting == OPERATOR );
22693             $in_quote                = 1;
22694             $type                    = 'Q';
22695             $allowed_quote_modifiers = "";
22696         },
22697         "'" => sub {
22698             error_if_expecting_OPERATOR("String")
22699               if ( $expecting == OPERATOR );
22700             $in_quote                = 1;
22701             $type                    = 'Q';
22702             $allowed_quote_modifiers = "";
22703         },
22704         '`' => sub {
22705             error_if_expecting_OPERATOR("String")
22706               if ( $expecting == OPERATOR );
22707             $in_quote                = 1;
22708             $type                    = 'Q';
22709             $allowed_quote_modifiers = "";
22710         },
22711         '/' => sub {
22712             my $is_pattern;
22713
22714             if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
22715                 my $msg;
22716                 ( $is_pattern, $msg ) =
22717                   guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
22718                     $max_token_index );
22719
22720                 if ($msg) {
22721                     write_diagnostics("DIVIDE:$msg\n");
22722                     write_logfile_entry($msg);
22723                 }
22724             }
22725             else { $is_pattern = ( $expecting == TERM ) }
22726
22727             if ($is_pattern) {
22728                 $in_quote                = 1;
22729                 $type                    = 'Q';
22730                 $allowed_quote_modifiers = '[cgimosxp]';
22731             }
22732             else {    # not a pattern; check for a /= token
22733
22734                 if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
22735                     $i++;
22736                     $tok  = '/=';
22737                     $type = $tok;
22738                 }
22739
22740               #DEBUG - collecting info on what tokens follow a divide
22741               # for development of guessing algorithm
22742               #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
22743               #    #write_diagnostics( "DIVIDE? $input_line\n" );
22744               #}
22745             }
22746         },
22747         '{' => sub {
22748
22749             # if we just saw a ')', we will label this block with
22750             # its type.  We need to do this to allow sub
22751             # code_block_type to determine if this brace starts a
22752             # code block or anonymous hash.  (The type of a paren
22753             # pair is the preceding token, such as 'if', 'else',
22754             # etc).
22755             $container_type = "";
22756
22757             # ATTRS: for a '{' following an attribute list, reset
22758             # things to look like we just saw the sub name
22759             if ( $statement_type =~ /^sub/ ) {
22760                 $last_nonblank_token = $statement_type;
22761                 $last_nonblank_type  = 'i';
22762                 $statement_type      = "";
22763             }
22764
22765             # patch for SWITCH/CASE: hide these keywords from an immediately
22766             # following opening brace
22767             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
22768                 && $statement_type eq $last_nonblank_token )
22769             {
22770                 $last_nonblank_token = ";";
22771             }
22772
22773             elsif ( $last_nonblank_token eq ')' ) {
22774                 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
22775
22776                 # defensive move in case of a nesting error (pbug.t)
22777                 # in which this ')' had no previous '('
22778                 # this nesting error will have been caught
22779                 if ( !defined($last_nonblank_token) ) {
22780                     $last_nonblank_token = 'if';
22781                 }
22782
22783                 # check for syntax error here;
22784                 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
22785                     my $list = join( ' ', sort keys %is_blocktype_with_paren );
22786                     warning(
22787                         "syntax error at ') {', didn't see one of: $list\n");
22788                 }
22789             }
22790
22791             # patch for paren-less for/foreach glitch, part 2.
22792             # see note below under 'qw'
22793             elsif ($last_nonblank_token eq 'qw'
22794                 && $is_for_foreach{$want_paren} )
22795             {
22796                 $last_nonblank_token = $want_paren;
22797                 if ( $last_last_nonblank_token eq $want_paren ) {
22798                     warning(
22799 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
22800                     );
22801
22802                 }
22803                 $want_paren = "";
22804             }
22805
22806             # now identify which of the three possible types of
22807             # curly braces we have: hash index container, anonymous
22808             # hash reference, or code block.
22809
22810             # non-structural (hash index) curly brace pair
22811             # get marked 'L' and 'R'
22812             if ( is_non_structural_brace() ) {
22813                 $type = 'L';
22814
22815                 # patch for SWITCH/CASE:
22816                 # allow paren-less identifier after 'when'
22817                 # if the brace is preceded by a space
22818                 if (   $statement_type eq 'when'
22819                     && $last_nonblank_type      eq 'i'
22820                     && $last_last_nonblank_type eq 'k'
22821                     && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
22822                 {
22823                     $type       = '{';
22824                     $block_type = $statement_type;
22825                 }
22826             }
22827
22828             # code and anonymous hash have the same type, '{', but are
22829             # distinguished by 'block_type',
22830             # which will be blank for an anonymous hash
22831             else {
22832
22833                 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
22834                     $max_token_index );
22835
22836                 # patch to promote bareword type to function taking block
22837                 if (   $block_type
22838                     && $last_nonblank_type eq 'w'
22839                     && $last_nonblank_i >= 0 )
22840                 {
22841                     if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
22842                         $routput_token_type->[$last_nonblank_i] = 'G';
22843                     }
22844                 }
22845
22846                 # patch for SWITCH/CASE: if we find a stray opening block brace
22847                 # where we might accept a 'case' or 'when' block, then take it
22848                 if (   $statement_type eq 'case'
22849                     || $statement_type eq 'when' )
22850                 {
22851                     if ( !$block_type || $block_type eq '}' ) {
22852                         $block_type = $statement_type;
22853                     }
22854                 }
22855             }
22856             $brace_type[ ++$brace_depth ] = $block_type;
22857             $brace_package[$brace_depth] = $current_package;
22858             ( $type_sequence, $indent_flag ) =
22859               increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22860             $brace_structural_type[$brace_depth] = $type;
22861             $brace_context[$brace_depth]         = $context;
22862             $brace_statement_type[$brace_depth]  = $statement_type;
22863         },
22864         '}' => sub {
22865             $block_type = $brace_type[$brace_depth];
22866             if ($block_type) { $statement_type = '' }
22867             if ( defined( $brace_package[$brace_depth] ) ) {
22868                 $current_package = $brace_package[$brace_depth];
22869             }
22870
22871             # can happen on brace error (caught elsewhere)
22872             else {
22873             }
22874             ( $type_sequence, $indent_flag ) =
22875               decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22876
22877             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
22878                 $type = 'R';
22879             }
22880
22881             # propagate type information for 'do' and 'eval' blocks.
22882             # This is necessary to enable us to know if an operator
22883             # or term is expected next
22884             if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
22885                 $tok = $brace_type[$brace_depth];
22886             }
22887
22888             $context        = $brace_context[$brace_depth];
22889             $statement_type = $brace_statement_type[$brace_depth];
22890             if ( $brace_depth > 0 ) { $brace_depth--; }
22891         },
22892         '&' => sub {    # maybe sub call? start looking
22893
22894             # We have to check for sub call unless we are sure we
22895             # are expecting an operator.  This example from s2p
22896             # got mistaken as a q operator in an early version:
22897             #   print BODY &q(<<'EOT');
22898             if ( $expecting != OPERATOR ) {
22899                 scan_identifier();
22900             }
22901             else {
22902             }
22903         },
22904         '<' => sub {    # angle operator or less than?
22905
22906             if ( $expecting != OPERATOR ) {
22907                 ( $i, $type ) =
22908                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
22909                     $expecting, $max_token_index );
22910
22911                 if ( $type eq '<' && $expecting == TERM ) {
22912                     error_if_expecting_TERM();
22913                     interrupt_logfile();
22914                     warning("Unterminated <> operator?\n");
22915                     resume_logfile();
22916                 }
22917             }
22918             else {
22919             }
22920         },
22921         '?' => sub {    # ?: conditional or starting pattern?
22922
22923             my $is_pattern;
22924
22925             if ( $expecting == UNKNOWN ) {
22926
22927                 my $msg;
22928                 ( $is_pattern, $msg ) =
22929                   guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
22930                     $max_token_index );
22931
22932                 if ($msg) { write_logfile_entry($msg) }
22933             }
22934             else { $is_pattern = ( $expecting == TERM ) }
22935
22936             if ($is_pattern) {
22937                 $in_quote                = 1;
22938                 $type                    = 'Q';
22939                 $allowed_quote_modifiers = '[cgimosxp]';
22940             }
22941             else {
22942                 ( $type_sequence, $indent_flag ) =
22943                   increase_nesting_depth( QUESTION_COLON,
22944                     $$rtoken_map[$i_tok] );
22945             }
22946         },
22947         '*' => sub {    # typeglob, or multiply?
22948
22949             if ( $expecting == TERM ) {
22950                 scan_identifier();
22951             }
22952             else {
22953
22954                 if ( $$rtokens[ $i + 1 ] eq '=' ) {
22955                     $tok  = '*=';
22956                     $type = $tok;
22957                     $i++;
22958                 }
22959                 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
22960                     $tok  = '**';
22961                     $type = $tok;
22962                     $i++;
22963                     if ( $$rtokens[ $i + 1 ] eq '=' ) {
22964                         $tok  = '**=';
22965                         $type = $tok;
22966                         $i++;
22967                     }
22968                 }
22969             }
22970         },
22971         '.' => sub {    # what kind of . ?
22972
22973             if ( $expecting != OPERATOR ) {
22974                 scan_number();
22975                 if ( $type eq '.' ) {
22976                     error_if_expecting_TERM()
22977                       if ( $expecting == TERM );
22978                 }
22979             }
22980             else {
22981             }
22982         },
22983         ':' => sub {
22984
22985             # if this is the first nonblank character, call it a label
22986             # since perl seems to just swallow it
22987             if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
22988                 $type = 'J';
22989             }
22990
22991             # ATTRS: check for a ':' which introduces an attribute list
22992             # (this might eventually get its own token type)
22993             elsif ( $statement_type =~ /^sub/ ) {
22994                 $type              = 'A';
22995                 $in_attribute_list = 1;
22996             }
22997
22998             # check for scalar attribute, such as
22999             # my $foo : shared = 1;
23000             elsif ($is_my_our{$statement_type}
23001                 && $current_depth[QUESTION_COLON] == 0 )
23002             {
23003                 $type              = 'A';
23004                 $in_attribute_list = 1;
23005             }
23006
23007             # otherwise, it should be part of a ?/: operator
23008             else {
23009                 ( $type_sequence, $indent_flag ) =
23010                   decrease_nesting_depth( QUESTION_COLON,
23011                     $$rtoken_map[$i_tok] );
23012                 if ( $last_nonblank_token eq '?' ) {
23013                     warning("Syntax error near ? :\n");
23014                 }
23015             }
23016         },
23017         '+' => sub {    # what kind of plus?
23018
23019             if ( $expecting == TERM ) {
23020                 my $number = scan_number();
23021
23022                 # unary plus is safest assumption if not a number
23023                 if ( !defined($number) ) { $type = 'p'; }
23024             }
23025             elsif ( $expecting == OPERATOR ) {
23026             }
23027             else {
23028                 if ( $next_type eq 'w' ) { $type = 'p' }
23029             }
23030         },
23031         '@' => sub {
23032
23033             error_if_expecting_OPERATOR("Array")
23034               if ( $expecting == OPERATOR );
23035             scan_identifier();
23036         },
23037         '%' => sub {    # hash or modulo?
23038
23039             # first guess is hash if no following blank
23040             if ( $expecting == UNKNOWN ) {
23041                 if ( $next_type ne 'b' ) { $expecting = TERM }
23042             }
23043             if ( $expecting == TERM ) {
23044                 scan_identifier();
23045             }
23046         },
23047         '[' => sub {
23048             $square_bracket_type[ ++$square_bracket_depth ] =
23049               $last_nonblank_token;
23050             ( $type_sequence, $indent_flag ) =
23051               increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23052
23053             # It may seem odd, but structural square brackets have
23054             # type '{' and '}'.  This simplifies the indentation logic.
23055             if ( !is_non_structural_brace() ) {
23056                 $type = '{';
23057             }
23058             $square_bracket_structural_type[$square_bracket_depth] = $type;
23059         },
23060         ']' => sub {
23061             ( $type_sequence, $indent_flag ) =
23062               decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
23063
23064             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
23065             {
23066                 $type = '}';
23067             }
23068             if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
23069         },
23070         '-' => sub {    # what kind of minus?
23071
23072             if ( ( $expecting != OPERATOR )
23073                 && $is_file_test_operator{$next_tok} )
23074             {
23075                 my ( $next_nonblank_token, $i_next ) =
23076                   find_next_nonblank_token( $i + 1, $rtokens,
23077                     $max_token_index );
23078
23079                 # check for a quoted word like "-w=>xx";
23080                 # it is sufficient to just check for a following '='
23081                 if ( $next_nonblank_token eq '=' ) {
23082                     $type = 'm';
23083                 }
23084                 else {
23085                     $i++;
23086                     $tok .= $next_tok;
23087                     $type = 'F';
23088                 }
23089             }
23090             elsif ( $expecting == TERM ) {
23091                 my $number = scan_number();
23092
23093                 # maybe part of bareword token? unary is safest
23094                 if ( !defined($number) ) { $type = 'm'; }
23095
23096             }
23097             elsif ( $expecting == OPERATOR ) {
23098             }
23099             else {
23100
23101                 if ( $next_type eq 'w' ) {
23102                     $type = 'm';
23103                 }
23104             }
23105         },
23106
23107         '^' => sub {
23108
23109             # check for special variables like ${^WARNING_BITS}
23110             if ( $expecting == TERM ) {
23111
23112                 # FIXME: this should work but will not catch errors
23113                 # because we also have to be sure that previous token is
23114                 # a type character ($,@,%).
23115                 if ( $last_nonblank_token eq '{'
23116                     && ( $next_tok =~ /^[A-Za-z_]/ ) )
23117                 {
23118
23119                     if ( $next_tok eq 'W' ) {
23120                         $tokenizer_self->{_saw_perl_dash_w} = 1;
23121                     }
23122                     $tok  = $tok . $next_tok;
23123                     $i    = $i + 1;
23124                     $type = 'w';
23125                 }
23126
23127                 else {
23128                     unless ( error_if_expecting_TERM() ) {
23129
23130                         # Something like this is valid but strange:
23131                         # undef ^I;
23132                         complain("The '^' seems unusual here\n");
23133                     }
23134                 }
23135             }
23136         },
23137
23138         '::' => sub {    # probably a sub call
23139             scan_bare_identifier();
23140         },
23141         '<<' => sub {    # maybe a here-doc?
23142             return
23143               unless ( $i < $max_token_index )
23144               ;          # here-doc not possible if end of line
23145
23146             if ( $expecting != OPERATOR ) {
23147                 my ( $found_target, $here_doc_target, $here_quote_character,
23148                     $saw_error );
23149                 (
23150                     $found_target, $here_doc_target, $here_quote_character, $i,
23151                     $saw_error
23152                   )
23153                   = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
23154                     $max_token_index );
23155
23156                 if ($found_target) {
23157                     push @{$rhere_target_list},
23158                       [ $here_doc_target, $here_quote_character ];
23159                     $type = 'h';
23160                     if ( length($here_doc_target) > 80 ) {
23161                         my $truncated = substr( $here_doc_target, 0, 80 );
23162                         complain("Long here-target: '$truncated' ...\n");
23163                     }
23164                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
23165                         complain(
23166                             "Unconventional here-target: '$here_doc_target'\n"
23167                         );
23168                     }
23169                 }
23170                 elsif ( $expecting == TERM ) {
23171                     unless ($saw_error) {
23172
23173                         # shouldn't happen..
23174                         warning("Program bug; didn't find here doc target\n");
23175                         report_definite_bug();
23176                     }
23177                 }
23178             }
23179             else {
23180             }
23181         },
23182         '->' => sub {
23183
23184             # if -> points to a bare word, we must scan for an identifier,
23185             # otherwise something like ->y would look like the y operator
23186             scan_identifier();
23187         },
23188
23189         # type = 'pp' for pre-increment, '++' for post-increment
23190         '++' => sub {
23191             if ( $expecting == TERM ) { $type = 'pp' }
23192             elsif ( $expecting == UNKNOWN ) {
23193                 my ( $next_nonblank_token, $i_next ) =
23194                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23195                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
23196             }
23197         },
23198
23199         '=>' => sub {
23200             if ( $last_nonblank_type eq $tok ) {
23201                 complain("Repeated '=>'s \n");
23202             }
23203
23204             # patch for operator_expected: note if we are in the list (use.t)
23205             # TODO: make version numbers a new token type
23206             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
23207         },
23208
23209         # type = 'mm' for pre-decrement, '--' for post-decrement
23210         '--' => sub {
23211
23212             if ( $expecting == TERM ) { $type = 'mm' }
23213             elsif ( $expecting == UNKNOWN ) {
23214                 my ( $next_nonblank_token, $i_next ) =
23215                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23216                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
23217             }
23218         },
23219
23220         '&&' => sub {
23221             error_if_expecting_TERM()
23222               if ( $expecting == TERM );
23223         },
23224
23225         '||' => sub {
23226             error_if_expecting_TERM()
23227               if ( $expecting == TERM );
23228         },
23229
23230         '//' => sub {
23231             error_if_expecting_TERM()
23232               if ( $expecting == TERM );
23233         },
23234     };
23235
23236     # ------------------------------------------------------------
23237     # end hash of code for handling individual token types
23238     # ------------------------------------------------------------
23239
23240     my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
23241
23242     # These block types terminate statements and do not need a trailing
23243     # semicolon
23244     # patched for SWITCH/CASE/
23245     my %is_zero_continuation_block_type;
23246     @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
23247       if elsif else unless while until for foreach switch case given when);
23248     @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
23249
23250     my %is_not_zero_continuation_block_type;
23251     @_ = qw(sort grep map do eval);
23252     @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
23253
23254     my %is_logical_container;
23255     @_ = qw(if elsif unless while and or err not && !  || for foreach);
23256     @is_logical_container{@_} = (1) x scalar(@_);
23257
23258     my %is_binary_type;
23259     @_ = qw(|| &&);
23260     @is_binary_type{@_} = (1) x scalar(@_);
23261
23262     my %is_binary_keyword;
23263     @_ = qw(and or err eq ne cmp);
23264     @is_binary_keyword{@_} = (1) x scalar(@_);
23265
23266     # 'L' is token for opening { at hash key
23267     my %is_opening_type;
23268     @_ = qw" L { ( [ ";
23269     @is_opening_type{@_} = (1) x scalar(@_);
23270
23271     # 'R' is token for closing } at hash key
23272     my %is_closing_type;
23273     @_ = qw" R } ) ] ";
23274     @is_closing_type{@_} = (1) x scalar(@_);
23275
23276     my %is_redo_last_next_goto;
23277     @_ = qw(redo last next goto);
23278     @is_redo_last_next_goto{@_} = (1) x scalar(@_);
23279
23280     my %is_use_require;
23281     @_ = qw(use require);
23282     @is_use_require{@_} = (1) x scalar(@_);
23283
23284     my %is_sub_package;
23285     @_ = qw(sub package);
23286     @is_sub_package{@_} = (1) x scalar(@_);
23287
23288     # This hash holds the hash key in $tokenizer_self for these keywords:
23289     my %is_format_END_DATA = (
23290         'format'   => '_in_format',
23291         '__END__'  => '_in_end',
23292         '__DATA__' => '_in_data',
23293     );
23294
23295     # ref: camel 3 p 147,
23296     # but perl may accept undocumented flags
23297     # perl 5.10 adds 'p' (preserve)
23298     my %quote_modifiers = (
23299         's'  => '[cegimosxp]',
23300         'y'  => '[cds]',
23301         'tr' => '[cds]',
23302         'm'  => '[cgimosxp]',
23303         'qr' => '[imosxp]',
23304         'q'  => "",
23305         'qq' => "",
23306         'qw' => "",
23307         'qx' => "",
23308     );
23309
23310     # table showing how many quoted things to look for after quote operator..
23311     # s, y, tr have 2 (pattern and replacement)
23312     # others have 1 (pattern only)
23313     my %quote_items = (
23314         's'  => 2,
23315         'y'  => 2,
23316         'tr' => 2,
23317         'm'  => 1,
23318         'qr' => 1,
23319         'q'  => 1,
23320         'qq' => 1,
23321         'qw' => 1,
23322         'qx' => 1,
23323     );
23324
23325     sub tokenize_this_line {
23326
23327   # This routine breaks a line of perl code into tokens which are of use in
23328   # indentation and reformatting.  One of my goals has been to define tokens
23329   # such that a newline may be inserted between any pair of tokens without
23330   # changing or invalidating the program. This version comes close to this,
23331   # although there are necessarily a few exceptions which must be caught by
23332   # the formatter.  Many of these involve the treatment of bare words.
23333   #
23334   # The tokens and their types are returned in arrays.  See previous
23335   # routine for their names.
23336   #
23337   # See also the array "valid_token_types" in the BEGIN section for an
23338   # up-to-date list.
23339   #
23340   # To simplify things, token types are either a single character, or they
23341   # are identical to the tokens themselves.
23342   #
23343   # As a debugging aid, the -D flag creates a file containing a side-by-side
23344   # comparison of the input string and its tokenization for each line of a file.
23345   # This is an invaluable debugging aid.
23346   #
23347   # In addition to tokens, and some associated quantities, the tokenizer
23348   # also returns flags indication any special line types.  These include
23349   # quotes, here_docs, formats.
23350   #
23351   # -----------------------------------------------------------------------
23352   #
23353   # How to add NEW_TOKENS:
23354   #
23355   # New token types will undoubtedly be needed in the future both to keep up
23356   # with changes in perl and to help adapt the tokenizer to other applications.
23357   #
23358   # Here are some notes on the minimal steps.  I wrote these notes while
23359   # adding the 'v' token type for v-strings, which are things like version
23360   # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
23361   # can use your editor to search for the string "NEW_TOKENS" to find the
23362   # appropriate sections to change):
23363   #
23364   # *. Try to talk somebody else into doing it!  If not, ..
23365   #
23366   # *. Make a backup of your current version in case things don't work out!
23367   #
23368   # *. Think of a new, unused character for the token type, and add to
23369   # the array @valid_token_types in the BEGIN section of this package.
23370   # For example, I used 'v' for v-strings.
23371   #
23372   # *. Implement coding to recognize the $type of the token in this routine.
23373   # This is the hardest part, and is best done by immitating or modifying
23374   # some of the existing coding.  For example, to recognize v-strings, I
23375   # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
23376   # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
23377   #
23378   # *. Update sub operator_expected.  This update is critically important but
23379   # the coding is trivial.  Look at the comments in that routine for help.
23380   # For v-strings, which should behave like numbers, I just added 'v' to the
23381   # regex used to handle numbers and strings (types 'n' and 'Q').
23382   #
23383   # *. Implement a 'bond strength' rule in sub set_bond_strengths in
23384   # Perl::Tidy::Formatter for breaking lines around this token type.  You can
23385   # skip this step and take the default at first, then adjust later to get
23386   # desired results.  For adding type 'v', I looked at sub bond_strength and
23387   # saw that number type 'n' was using default strengths, so I didn't do
23388   # anything.  I may tune it up someday if I don't like the way line
23389   # breaks with v-strings look.
23390   #
23391   # *. Implement a 'whitespace' rule in sub set_white_space_flag in
23392   # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
23393   # and saw that type 'n' used spaces on both sides, so I just added 'v'
23394   # to the array @spaces_both_sides.
23395   #
23396   # *. Update HtmlWriter package so that users can colorize the token as
23397   # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
23398   # that package.  For v-strings, I initially chose to use a default color
23399   # equal to the default for numbers, but it might be nice to change that
23400   # eventually.
23401   #
23402   # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
23403   #
23404   # *. Run lots and lots of debug tests.  Start with special files designed
23405   # to test the new token type.  Run with the -D flag to create a .DEBUG
23406   # file which shows the tokenization.  When these work ok, test as many old
23407   # scripts as possible.  Start with all of the '.t' files in the 'test'
23408   # directory of the distribution file.  Compare .tdy output with previous
23409   # version and updated version to see the differences.  Then include as
23410   # many more files as possible. My own technique has been to collect a huge
23411   # number of perl scripts (thousands!) into one directory and run perltidy
23412   # *, then run diff between the output of the previous version and the
23413   # current version.
23414   #
23415   # *. For another example, search for the smartmatch operator '~~'
23416   # with your editor to see where updates were made for it.
23417   #
23418   # -----------------------------------------------------------------------
23419
23420         my $line_of_tokens = shift;
23421         my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
23422
23423         # patch while coding change is underway
23424         # make callers private data to allow access
23425         # $tokenizer_self = $caller_tokenizer_self;
23426
23427         # extract line number for use in error messages
23428         $input_line_number = $line_of_tokens->{_line_number};
23429
23430         # reinitialize for multi-line quote
23431         $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
23432
23433         # check for pod documentation
23434         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
23435
23436             # must not be in multi-line quote
23437             # and must not be in an eqn
23438             if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
23439             {
23440                 $tokenizer_self->{_in_pod} = 1;
23441                 return;
23442             }
23443         }
23444
23445         $input_line = $untrimmed_input_line;
23446
23447         chomp $input_line;
23448
23449         # trim start of this line unless we are continuing a quoted line
23450         # do not trim end because we might end in a quote (test: deken4.pl)
23451         # Perl::Tidy::Formatter will delete needless trailing blanks
23452         unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
23453             $input_line =~ s/^\s*//;    # trim left end
23454         }
23455
23456         # update the copy of the line for use in error messages
23457         # This must be exactly what we give the pre_tokenizer
23458         $tokenizer_self->{_line_text} = $input_line;
23459
23460         # re-initialize for the main loop
23461         $routput_token_list     = [];    # stack of output token indexes
23462         $routput_token_type     = [];    # token types
23463         $routput_block_type     = [];    # types of code block
23464         $routput_container_type = [];    # paren types, such as if, elsif, ..
23465         $routput_type_sequence  = [];    # nesting sequential number
23466
23467         $rhere_target_list = [];
23468
23469         $tok             = $last_nonblank_token;
23470         $type            = $last_nonblank_type;
23471         $prototype       = $last_nonblank_prototype;
23472         $last_nonblank_i = -1;
23473         $block_type      = $last_nonblank_block_type;
23474         $container_type  = $last_nonblank_container_type;
23475         $type_sequence   = $last_nonblank_type_sequence;
23476         $indent_flag     = 0;
23477         $peeked_ahead    = 0;
23478
23479         # tokenization is done in two stages..
23480         # stage 1 is a very simple pre-tokenization
23481         my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
23482
23483         # a little optimization for a full-line comment
23484         if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
23485             $max_tokens_wanted = 1    # no use tokenizing a comment
23486         }
23487
23488         # start by breaking the line into pre-tokens
23489         ( $rtokens, $rtoken_map, $rtoken_type ) =
23490           pre_tokenize( $input_line, $max_tokens_wanted );
23491
23492         $max_token_index = scalar(@$rtokens) - 1;
23493         push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
23494         push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
23495         push( @$rtoken_type, 'b', 'b', 'b' );
23496
23497         # initialize for main loop
23498         for $i ( 0 .. $max_token_index + 3 ) {
23499             $routput_token_type->[$i]     = "";
23500             $routput_block_type->[$i]     = "";
23501             $routput_container_type->[$i] = "";
23502             $routput_type_sequence->[$i]  = "";
23503             $routput_indent_flag->[$i]    = 0;
23504         }
23505         $i     = -1;
23506         $i_tok = -1;
23507
23508         # ------------------------------------------------------------
23509         # begin main tokenization loop
23510         # ------------------------------------------------------------
23511
23512         # we are looking at each pre-token of one line and combining them
23513         # into tokens
23514         while ( ++$i <= $max_token_index ) {
23515
23516             if ($in_quote) {    # continue looking for end of a quote
23517                 $type = $quote_type;
23518
23519                 unless ( @{$routput_token_list} )
23520                 {               # initialize if continuation line
23521                     push( @{$routput_token_list}, $i );
23522                     $routput_token_type->[$i] = $type;
23523
23524                 }
23525                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
23526
23527                 # scan for the end of the quote or pattern
23528                 (
23529                     $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23530                     $quoted_string_1, $quoted_string_2
23531                   )
23532                   = do_quote(
23533                     $i,               $in_quote,    $quote_character,
23534                     $quote_pos,       $quote_depth, $quoted_string_1,
23535                     $quoted_string_2, $rtokens,     $rtoken_map,
23536                     $max_token_index
23537                   );
23538
23539                 # all done if we didn't find it
23540                 last if ($in_quote);
23541
23542                 # save pattern and replacement text for rescanning
23543                 my $qs1 = $quoted_string_1;
23544                 my $qs2 = $quoted_string_2;
23545
23546                 # re-initialize for next search
23547                 $quote_character = '';
23548                 $quote_pos       = 0;
23549                 $quote_type      = 'Q';
23550                 $quoted_string_1 = "";
23551                 $quoted_string_2 = "";
23552                 last if ( ++$i > $max_token_index );
23553
23554                 # look for any modifiers
23555                 if ($allowed_quote_modifiers) {
23556
23557                     # check for exact quote modifiers
23558                     if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
23559                         my $str = $$rtokens[$i];
23560                         my $saw_modifier_e;
23561                         while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
23562                             my $pos = pos($str);
23563                             my $char = substr( $str, $pos - 1, 1 );
23564                             $saw_modifier_e ||= ( $char eq 'e' );
23565                         }
23566
23567                         # For an 'e' quote modifier we must scan the replacement
23568                         # text for here-doc targets.
23569                         if ($saw_modifier_e) {
23570
23571                             my $rht = scan_replacement_text($qs1);
23572
23573                             # Change type from 'Q' to 'h' for quotes with
23574                             # here-doc targets so that the formatter (see sub
23575                             # print_line_of_tokens) will not make any line
23576                             # breaks after this point.
23577                             if ($rht) {
23578                                 push @{$rhere_target_list}, @{$rht};
23579                                 $type = 'h';
23580                                 if ( $i_tok < 0 ) {
23581                                     my $ilast = $routput_token_list->[-1];
23582                                     $routput_token_type->[$ilast] = $type;
23583                                 }
23584                             }
23585                         }
23586
23587                         if ( defined( pos($str) ) ) {
23588
23589                             # matched
23590                             if ( pos($str) == length($str) ) {
23591                                 last if ( ++$i > $max_token_index );
23592                             }
23593
23594                             # Looks like a joined quote modifier
23595                             # and keyword, maybe something like
23596                             # s/xxx/yyy/gefor @k=...
23597                             # Example is "galgen.pl".  Would have to split
23598                             # the word and insert a new token in the
23599                             # pre-token list.  This is so rare that I haven't
23600                             # done it.  Will just issue a warning citation.
23601
23602                             # This error might also be triggered if my quote
23603                             # modifier characters are incomplete
23604                             else {
23605                                 warning(<<EOM);
23606
23607 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
23608 Please put a space between quote modifiers and trailing keywords.
23609 EOM
23610
23611                            # print "token $$rtokens[$i]\n";
23612                            # my $num = length($str) - pos($str);
23613                            # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
23614                            # print "continuing with new token $$rtokens[$i]\n";
23615
23616                                 # skipping past this token does least damage
23617                                 last if ( ++$i > $max_token_index );
23618                             }
23619                         }
23620                         else {
23621
23622                             # example file: rokicki4.pl
23623                             # This error might also be triggered if my quote
23624                             # modifier characters are incomplete
23625                             write_logfile_entry(
23626 "Note: found word $str at quote modifier location\n"
23627                             );
23628                         }
23629                     }
23630
23631                     # re-initialize
23632                     $allowed_quote_modifiers = "";
23633                 }
23634             }
23635
23636             unless ( $tok =~ /^\s*$/ ) {
23637
23638                 # try to catch some common errors
23639                 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
23640
23641                     if ( $last_nonblank_token eq 'eq' ) {
23642                         complain("Should 'eq' be '==' here ?\n");
23643                     }
23644                     elsif ( $last_nonblank_token eq 'ne' ) {
23645                         complain("Should 'ne' be '!=' here ?\n");
23646                     }
23647                 }
23648
23649                 $last_last_nonblank_token      = $last_nonblank_token;
23650                 $last_last_nonblank_type       = $last_nonblank_type;
23651                 $last_last_nonblank_block_type = $last_nonblank_block_type;
23652                 $last_last_nonblank_container_type =
23653                   $last_nonblank_container_type;
23654                 $last_last_nonblank_type_sequence =
23655                   $last_nonblank_type_sequence;
23656                 $last_nonblank_token          = $tok;
23657                 $last_nonblank_type           = $type;
23658                 $last_nonblank_prototype      = $prototype;
23659                 $last_nonblank_block_type     = $block_type;
23660                 $last_nonblank_container_type = $container_type;
23661                 $last_nonblank_type_sequence  = $type_sequence;
23662                 $last_nonblank_i              = $i_tok;
23663             }
23664
23665             # store previous token type
23666             if ( $i_tok >= 0 ) {
23667                 $routput_token_type->[$i_tok]     = $type;
23668                 $routput_block_type->[$i_tok]     = $block_type;
23669                 $routput_container_type->[$i_tok] = $container_type;
23670                 $routput_type_sequence->[$i_tok]  = $type_sequence;
23671                 $routput_indent_flag->[$i_tok]    = $indent_flag;
23672             }
23673             my $pre_tok  = $$rtokens[$i];        # get the next pre-token
23674             my $pre_type = $$rtoken_type[$i];    # and type
23675             $tok  = $pre_tok;
23676             $type = $pre_type;                   # to be modified as necessary
23677             $block_type = "";    # blank for all tokens except code block braces
23678             $container_type = "";    # blank for all tokens except some parens
23679             $type_sequence  = "";    # blank for all tokens except ?/:
23680             $indent_flag    = 0;
23681             $prototype = "";    # blank for all tokens except user defined subs
23682             $i_tok     = $i;
23683
23684             # this pre-token will start an output token
23685             push( @{$routput_token_list}, $i_tok );
23686
23687             # continue gathering identifier if necessary
23688             # but do not start on blanks and comments
23689             if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
23690
23691                 if ( $id_scan_state =~ /^(sub|package)/ ) {
23692                     scan_id();
23693                 }
23694                 else {
23695                     scan_identifier();
23696                 }
23697
23698                 last if ($id_scan_state);
23699                 next if ( ( $i > 0 ) || $type );
23700
23701                 # didn't find any token; start over
23702                 $type = $pre_type;
23703                 $tok  = $pre_tok;
23704             }
23705
23706             # handle whitespace tokens..
23707             next if ( $type eq 'b' );
23708             my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
23709             my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
23710
23711             # Build larger tokens where possible, since we are not in a quote.
23712             #
23713             # First try to assemble digraphs.  The following tokens are
23714             # excluded and handled specially:
23715             # '/=' is excluded because the / might start a pattern.
23716             # 'x=' is excluded since it might be $x=, with $ on previous line
23717             # '**' and *= might be typeglobs of punctuation variables
23718             # I have allowed tokens starting with <, such as <=,
23719             # because I don't think these could be valid angle operators.
23720             # test file: storrs4.pl
23721             my $test_tok   = $tok . $$rtokens[ $i + 1 ];
23722             my $combine_ok = $is_digraph{$test_tok};
23723
23724             # check for special cases which cannot be combined
23725             if ($combine_ok) {
23726
23727                 # '//' must be defined_or operator if an operator is expected.
23728                 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
23729                 # could be migrated here for clarity
23730                 if ( $test_tok eq '//' ) {
23731                     my $next_type = $$rtokens[ $i + 1 ];
23732                     my $expecting =
23733                       operator_expected( $prev_type, $tok, $next_type );
23734                     $combine_ok = 0 unless ( $expecting == OPERATOR );
23735                 }
23736             }
23737
23738             if (
23739                 $combine_ok
23740                 && ( $test_tok ne '/=' )    # might be pattern
23741                 && ( $test_tok ne 'x=' )    # might be $x
23742                 && ( $test_tok ne '**' )    # typeglob?
23743                 && ( $test_tok ne '*=' )    # typeglob?
23744               )
23745             {
23746                 $tok = $test_tok;
23747                 $i++;
23748
23749                 # Now try to assemble trigraphs.  Note that all possible
23750                 # perl trigraphs can be constructed by appending a character
23751                 # to a digraph.
23752                 $test_tok = $tok . $$rtokens[ $i + 1 ];
23753
23754                 if ( $is_trigraph{$test_tok} ) {
23755                     $tok = $test_tok;
23756                     $i++;
23757                 }
23758             }
23759
23760             $type      = $tok;
23761             $next_tok  = $$rtokens[ $i + 1 ];
23762             $next_type = $$rtoken_type[ $i + 1 ];
23763
23764             TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
23765                 local $" = ')(';
23766                 my @debug_list = (
23767                     $last_nonblank_token,      $tok,
23768                     $next_tok,                 $brace_depth,
23769                     $brace_type[$brace_depth], $paren_depth,
23770                     $paren_type[$paren_depth]
23771                 );
23772                 print "TOKENIZE:(@debug_list)\n";
23773             };
23774
23775             # turn off attribute list on first non-blank, non-bareword
23776             if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
23777
23778             ###############################################################
23779             # We have the next token, $tok.
23780             # Now we have to examine this token and decide what it is
23781             # and define its $type
23782             #
23783             # section 1: bare words
23784             ###############################################################
23785
23786             if ( $pre_type eq 'w' ) {
23787                 $expecting = operator_expected( $prev_type, $tok, $next_type );
23788                 my ( $next_nonblank_token, $i_next ) =
23789                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23790
23791                 # ATTRS: handle sub and variable attributes
23792                 if ($in_attribute_list) {
23793
23794                     # treat bare word followed by open paren like qw(
23795                     if ( $next_nonblank_token eq '(' ) {
23796                         $in_quote                = $quote_items{'q'};
23797                         $allowed_quote_modifiers = $quote_modifiers{'q'};
23798                         $type                    = 'q';
23799                         $quote_type              = 'q';
23800                         next;
23801                     }
23802
23803                     # handle bareword not followed by open paren
23804                     else {
23805                         $type = 'w';
23806                         next;
23807                     }
23808                 }
23809
23810                 # quote a word followed by => operator
23811                 if ( $next_nonblank_token eq '=' ) {
23812
23813                     if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
23814                         if ( $is_constant{$current_package}{$tok} ) {
23815                             $type = 'C';
23816                         }
23817                         elsif ( $is_user_function{$current_package}{$tok} ) {
23818                             $type = 'U';
23819                             $prototype =
23820                               $user_function_prototype{$current_package}{$tok};
23821                         }
23822                         elsif ( $tok =~ /^v\d+$/ ) {
23823                             $type = 'v';
23824                             report_v_string($tok);
23825                         }
23826                         else { $type = 'w' }
23827
23828                         next;
23829                     }
23830                 }
23831
23832      # quote a bare word within braces..like xxx->{s}; note that we
23833      # must be sure this is not a structural brace, to avoid
23834      # mistaking {s} in the following for a quoted bare word:
23835      #     for(@[){s}bla}BLA}
23836      # Also treat q in something like var{-q} as a bare word, not qoute operator
23837                 ##if (   ( $last_nonblank_type eq 'L' )
23838                 ##    && ( $next_nonblank_token eq '}' ) )
23839                 if (
23840                     $next_nonblank_token eq '}'
23841                     && (
23842                         $last_nonblank_type eq 'L'
23843                         || (   $last_nonblank_type eq 'm'
23844                             && $last_last_nonblank_type eq 'L' )
23845                     )
23846                   )
23847                 {
23848                     $type = 'w';
23849                     next;
23850                 }
23851
23852                 # a bare word immediately followed by :: is not a keyword;
23853                 # use $tok_kw when testing for keywords to avoid a mistake
23854                 my $tok_kw = $tok;
23855                 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
23856                 {
23857                     $tok_kw .= '::';
23858                 }
23859
23860                 # handle operator x (now we know it isn't $x=)
23861                 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
23862                     if ( $tok eq 'x' ) {
23863
23864                         if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
23865                             $tok  = 'x=';
23866                             $type = $tok;
23867                             $i++;
23868                         }
23869                         else {
23870                             $type = 'x';
23871                         }
23872                     }
23873
23874                     # FIXME: Patch: mark something like x4 as an integer for now
23875                     # It gets fixed downstream.  This is easier than
23876                     # splitting the pretoken.
23877                     else {
23878                         $type = 'n';
23879                     }
23880                 }
23881
23882                 elsif ( ( $tok eq 'strict' )
23883                     and ( $last_nonblank_token eq 'use' ) )
23884                 {
23885                     $tokenizer_self->{_saw_use_strict} = 1;
23886                     scan_bare_identifier();
23887                 }
23888
23889                 elsif ( ( $tok eq 'warnings' )
23890                     and ( $last_nonblank_token eq 'use' ) )
23891                 {
23892                     $tokenizer_self->{_saw_perl_dash_w} = 1;
23893
23894                     # scan as identifier, so that we pick up something like:
23895                     # use warnings::register
23896                     scan_bare_identifier();
23897                 }
23898
23899                 elsif (
23900                        $tok eq 'AutoLoader'
23901                     && $tokenizer_self->{_look_for_autoloader}
23902                     && (
23903                         $last_nonblank_token eq 'use'
23904
23905                         # these regexes are from AutoSplit.pm, which we want
23906                         # to mimic
23907                         || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
23908                         || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
23909                     )
23910                   )
23911                 {
23912                     write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
23913                     $tokenizer_self->{_saw_autoloader}      = 1;
23914                     $tokenizer_self->{_look_for_autoloader} = 0;
23915                     scan_bare_identifier();
23916                 }
23917
23918                 elsif (
23919                        $tok eq 'SelfLoader'
23920                     && $tokenizer_self->{_look_for_selfloader}
23921                     && (   $last_nonblank_token eq 'use'
23922                         || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
23923                         || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
23924                   )
23925                 {
23926                     write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
23927                     $tokenizer_self->{_saw_selfloader}      = 1;
23928                     $tokenizer_self->{_look_for_selfloader} = 0;
23929                     scan_bare_identifier();
23930                 }
23931
23932                 elsif ( ( $tok eq 'constant' )
23933                     and ( $last_nonblank_token eq 'use' ) )
23934                 {
23935                     scan_bare_identifier();
23936                     my ( $next_nonblank_token, $i_next ) =
23937                       find_next_nonblank_token( $i, $rtokens,
23938                         $max_token_index );
23939
23940                     if ($next_nonblank_token) {
23941
23942                         if ( $is_keyword{$next_nonblank_token} ) {
23943                             warning(
23944 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
23945                             );
23946                         }
23947
23948                         # FIXME: could check for error in which next token is
23949                         # not a word (number, punctuation, ..)
23950                         else {
23951                             $is_constant{$current_package}
23952                               {$next_nonblank_token} = 1;
23953                         }
23954                     }
23955                 }
23956
23957                 # various quote operators
23958                 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
23959                     if ( $expecting == OPERATOR ) {
23960
23961                         # patch for paren-less for/foreach glitch, part 1
23962                         # perl will accept this construct as valid:
23963                         #
23964                         #    foreach my $key qw\Uno Due Tres Quadro\ {
23965                         #        print "Set $key\n";
23966                         #    }
23967                         unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
23968                         {
23969                             error_if_expecting_OPERATOR();
23970                         }
23971                     }
23972                     $in_quote                = $quote_items{$tok};
23973                     $allowed_quote_modifiers = $quote_modifiers{$tok};
23974
23975                    # All quote types are 'Q' except possibly qw quotes.
23976                    # qw quotes are special in that they may generally be trimmed
23977                    # of leading and trailing whitespace.  So they are given a
23978                    # separate type, 'q', unless requested otherwise.
23979                     $type =
23980                       ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
23981                       ? 'q'
23982                       : 'Q';
23983                     $quote_type = $type;
23984                 }
23985
23986                 # check for a statement label
23987                 elsif (
23988                        ( $next_nonblank_token eq ':' )
23989                     && ( $$rtokens[ $i_next + 1 ] ne ':' )
23990                     && ( $i_next <= $max_token_index )    # colon on same line
23991                     && label_ok()
23992                   )
23993                 {
23994                     if ( $tok !~ /[A-Z]/ ) {
23995                         push @{ $tokenizer_self->{_rlower_case_labels_at} },
23996                           $input_line_number;
23997                     }
23998                     $type = 'J';
23999                     $tok .= ':';
24000                     $i = $i_next;
24001                     next;
24002                 }
24003
24004                 #      'sub' || 'package'
24005                 elsif ( $is_sub_package{$tok_kw} ) {
24006                     error_if_expecting_OPERATOR()
24007                       if ( $expecting == OPERATOR );
24008                     scan_id();
24009                 }
24010
24011                 # Note on token types for format, __DATA__, __END__:
24012                 # It simplifies things to give these type ';', so that when we
24013                 # start rescanning we will be expecting a token of type TERM.
24014                 # We will switch to type 'k' before outputting the tokens.
24015                 elsif ( $is_format_END_DATA{$tok_kw} ) {
24016                     $type = ';';    # make tokenizer look for TERM next
24017                     $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
24018                     last;
24019                 }
24020
24021                 elsif ( $is_keyword{$tok_kw} ) {
24022                     $type = 'k';
24023
24024                     # Since for and foreach may not be followed immediately
24025                     # by an opening paren, we have to remember which keyword
24026                     # is associated with the next '('
24027                     if ( $is_for_foreach{$tok} ) {
24028                         if ( new_statement_ok() ) {
24029                             $want_paren = $tok;
24030                         }
24031                     }
24032
24033                     # recognize 'use' statements, which are special
24034                     elsif ( $is_use_require{$tok} ) {
24035                         $statement_type = $tok;
24036                         error_if_expecting_OPERATOR()
24037                           if ( $expecting == OPERATOR );
24038                     }
24039
24040                     # remember my and our to check for trailing ": shared"
24041                     elsif ( $is_my_our{$tok} ) {
24042                         $statement_type = $tok;
24043                     }
24044
24045                     # Check for misplaced 'elsif' and 'else', but allow isolated
24046                     # else or elsif blocks to be formatted.  This is indicated
24047                     # by a last noblank token of ';'
24048                     elsif ( $tok eq 'elsif' ) {
24049                         if (   $last_nonblank_token ne ';'
24050                             && $last_nonblank_block_type !~
24051                             /^(if|elsif|unless)$/ )
24052                         {
24053                             warning(
24054 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
24055                             );
24056                         }
24057                     }
24058                     elsif ( $tok eq 'else' ) {
24059
24060                         # patched for SWITCH/CASE
24061                         if (   $last_nonblank_token ne ';'
24062                             && $last_nonblank_block_type !~
24063                             /^(if|elsif|unless|case|when)$/ )
24064                         {
24065                             warning(
24066 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
24067                             );
24068                         }
24069                     }
24070                     elsif ( $tok eq 'continue' ) {
24071                         if (   $last_nonblank_token ne ';'
24072                             && $last_nonblank_block_type !~
24073                             /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
24074                         {
24075
24076                             # note: ';' '{' and '}' in list above
24077                             # because continues can follow bare blocks;
24078                             # ':' is labeled block
24079                             #
24080                             ############################################
24081                             # NOTE: This check has been deactivated because
24082                             # continue has an alternative usage for given/when
24083                             # blocks in perl 5.10
24084                             ## warning("'$tok' should follow a block\n");
24085                             ############################################
24086                         }
24087                     }
24088
24089                     # patch for SWITCH/CASE if 'case' and 'when are
24090                     # treated as keywords.
24091                     elsif ( $tok eq 'when' || $tok eq 'case' ) {
24092                         $statement_type = $tok;    # next '{' is block
24093                     }
24094
24095                     # indent trailing if/unless/while/until
24096                     # outdenting will be handled by later indentation loop
24097                     if (   $tok =~ /^(if|unless|while|until)$/
24098                         && $next_nonblank_token ne '(' )
24099                     {
24100                         $indent_flag = 1;
24101                     }
24102                 }
24103
24104                 # check for inline label following
24105                 #         /^(redo|last|next|goto)$/
24106                 elsif (( $last_nonblank_type eq 'k' )
24107                     && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
24108                 {
24109                     $type = 'j';
24110                     next;
24111                 }
24112
24113                 # something else --
24114                 else {
24115
24116                     scan_bare_identifier();
24117                     if ( $type eq 'w' ) {
24118
24119                         if ( $expecting == OPERATOR ) {
24120
24121                             # don't complain about possible indirect object
24122                             # notation.
24123                             # For example:
24124                             #   package main;
24125                             #   sub new($) { ... }
24126                             #   $b = new A::;  # calls A::new
24127                             #   $c = new A;    # same thing but suspicious
24128                             # This will call A::new but we have a 'new' in
24129                             # main:: which looks like a constant.
24130                             #
24131                             if ( $last_nonblank_type eq 'C' ) {
24132                                 if ( $tok !~ /::$/ ) {
24133                                     complain(<<EOM);
24134 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
24135        Maybe indirectet object notation?
24136 EOM
24137                                 }
24138                             }
24139                             else {
24140                                 error_if_expecting_OPERATOR("bareword");
24141                             }
24142                         }
24143
24144                         # mark bare words immediately followed by a paren as
24145                         # functions
24146                         $next_tok = $$rtokens[ $i + 1 ];
24147                         if ( $next_tok eq '(' ) {
24148                             $type = 'U';
24149                         }
24150
24151                         # underscore after file test operator is file handle
24152                         if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
24153                             $type = 'Z';
24154                         }
24155
24156                         # patch for SWITCH/CASE if 'case' and 'when are
24157                         # not treated as keywords:
24158                         if (
24159                             (
24160                                    $tok eq 'case'
24161                                 && $brace_type[$brace_depth] eq 'switch'
24162                             )
24163                             || (   $tok eq 'when'
24164                                 && $brace_type[$brace_depth] eq 'given' )
24165                           )
24166                         {
24167                             $statement_type = $tok;    # next '{' is block
24168                             $type = 'k';    # for keyword syntax coloring
24169                         }
24170
24171                         # patch for SWITCH/CASE if switch and given not keywords
24172                         # Switch is not a perl 5 keyword, but we will gamble
24173                         # and mark switch followed by paren as a keyword.  This
24174                         # is only necessary to get html syntax coloring nice,
24175                         # and does not commit this as being a switch/case.
24176                         if ( $next_nonblank_token eq '('
24177                             && ( $tok eq 'switch' || $tok eq 'given' ) )
24178                         {
24179                             $type = 'k';    # for keyword syntax coloring
24180                         }
24181                     }
24182                 }
24183             }
24184
24185             ###############################################################
24186             # section 2: strings of digits
24187             ###############################################################
24188             elsif ( $pre_type eq 'd' ) {
24189                 $expecting = operator_expected( $prev_type, $tok, $next_type );
24190                 error_if_expecting_OPERATOR("Number")
24191                   if ( $expecting == OPERATOR );
24192                 my $number = scan_number();
24193                 if ( !defined($number) ) {
24194
24195                     # shouldn't happen - we should always get a number
24196                     warning("non-number beginning with digit--program bug\n");
24197                     report_definite_bug();
24198                 }
24199             }
24200
24201             ###############################################################
24202             # section 3: all other tokens
24203             ###############################################################
24204
24205             else {
24206                 last if ( $tok eq '#' );
24207                 my $code = $tokenization_code->{$tok};
24208                 if ($code) {
24209                     $expecting =
24210                       operator_expected( $prev_type, $tok, $next_type );
24211                     $code->();
24212                     redo if $in_quote;
24213                 }
24214             }
24215         }
24216
24217         # -----------------------------
24218         # end of main tokenization loop
24219         # -----------------------------
24220
24221         if ( $i_tok >= 0 ) {
24222             $routput_token_type->[$i_tok]     = $type;
24223             $routput_block_type->[$i_tok]     = $block_type;
24224             $routput_container_type->[$i_tok] = $container_type;
24225             $routput_type_sequence->[$i_tok]  = $type_sequence;
24226             $routput_indent_flag->[$i_tok]    = $indent_flag;
24227         }
24228
24229         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
24230             $last_last_nonblank_token          = $last_nonblank_token;
24231             $last_last_nonblank_type           = $last_nonblank_type;
24232             $last_last_nonblank_block_type     = $last_nonblank_block_type;
24233             $last_last_nonblank_container_type = $last_nonblank_container_type;
24234             $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
24235             $last_nonblank_token               = $tok;
24236             $last_nonblank_type                = $type;
24237             $last_nonblank_block_type          = $block_type;
24238             $last_nonblank_container_type      = $container_type;
24239             $last_nonblank_type_sequence       = $type_sequence;
24240             $last_nonblank_prototype           = $prototype;
24241         }
24242
24243         # reset indentation level if necessary at a sub or package
24244         # in an attempt to recover from a nesting error
24245         if ( $level_in_tokenizer < 0 ) {
24246             if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
24247                 reset_indentation_level(0);
24248                 brace_warning("resetting level to 0 at $1 $2\n");
24249             }
24250         }
24251
24252         # all done tokenizing this line ...
24253         # now prepare the final list of tokens and types
24254
24255         my @token_type     = ();   # stack of output token types
24256         my @block_type     = ();   # stack of output code block types
24257         my @container_type = ();   # stack of output code container types
24258         my @type_sequence  = ();   # stack of output type sequence numbers
24259         my @tokens         = ();   # output tokens
24260         my @levels         = ();   # structural brace levels of output tokens
24261         my @slevels        = ();   # secondary nesting levels of output tokens
24262         my @nesting_tokens = ();   # string of tokens leading to this depth
24263         my @nesting_types  = ();   # string of token types leading to this depth
24264         my @nesting_blocks = ();   # string of block types leading to this depth
24265         my @nesting_lists  = ();   # string of list types leading to this depth
24266         my @ci_string = ();  # string needed to compute continuation indentation
24267         my @container_environment = ();    # BLOCK or LIST
24268         my $container_environment = '';
24269         my $im                    = -1;    # previous $i value
24270         my $num;
24271         my $ci_string_sum = ones_count($ci_string_in_tokenizer);
24272
24273 # Computing Token Indentation
24274 #
24275 #     The final section of the tokenizer forms tokens and also computes
24276 #     parameters needed to find indentation.  It is much easier to do it
24277 #     in the tokenizer than elsewhere.  Here is a brief description of how
24278 #     indentation is computed.  Perl::Tidy computes indentation as the sum
24279 #     of 2 terms:
24280 #
24281 #     (1) structural indentation, such as if/else/elsif blocks
24282 #     (2) continuation indentation, such as long parameter call lists.
24283 #
24284 #     These are occasionally called primary and secondary indentation.
24285 #
24286 #     Structural indentation is introduced by tokens of type '{', although
24287 #     the actual tokens might be '{', '(', or '['.  Structural indentation
24288 #     is of two types: BLOCK and non-BLOCK.  Default structural indentation
24289 #     is 4 characters if the standard indentation scheme is used.
24290 #
24291 #     Continuation indentation is introduced whenever a line at BLOCK level
24292 #     is broken before its termination.  Default continuation indentation
24293 #     is 2 characters in the standard indentation scheme.
24294 #
24295 #     Both types of indentation may be nested arbitrarily deep and
24296 #     interlaced.  The distinction between the two is somewhat arbitrary.
24297 #
24298 #     For each token, we will define two variables which would apply if
24299 #     the current statement were broken just before that token, so that
24300 #     that token started a new line:
24301 #
24302 #     $level = the structural indentation level,
24303 #     $ci_level = the continuation indentation level
24304 #
24305 #     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
24306 #     assuming defaults.  However, in some special cases it is customary
24307 #     to modify $ci_level from this strict value.
24308 #
24309 #     The total structural indentation is easy to compute by adding and
24310 #     subtracting 1 from a saved value as types '{' and '}' are seen.  The
24311 #     running value of this variable is $level_in_tokenizer.
24312 #
24313 #     The total continuation is much more difficult to compute, and requires
24314 #     several variables.  These veriables are:
24315 #
24316 #     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
24317 #       each indentation level, if there are intervening open secondary
24318 #       structures just prior to that level.
24319 #     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
24320 #       if the last token at that level is "continued", meaning that it
24321 #       is not the first token of an expression.
24322 #     $nesting_block_string = a string of 1's and 0's indicating, for each
24323 #       indentation level, if the level is of type BLOCK or not.
24324 #     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
24325 #     $nesting_list_string = a string of 1's and 0's indicating, for each
24326 #       indentation level, if it is is appropriate for list formatting.
24327 #       If so, continuation indentation is used to indent long list items.
24328 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
24329 #     @{$rslevel_stack} = a stack of total nesting depths at each
24330 #       structural indentation level, where "total nesting depth" means
24331 #       the nesting depth that would occur if every nesting token -- '{', '[',
24332 #       and '(' -- , regardless of context, is used to compute a nesting
24333 #       depth.
24334
24335         #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
24336         #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
24337
24338         my ( $ci_string_i, $level_i, $nesting_block_string_i,
24339             $nesting_list_string_i, $nesting_token_string_i,
24340             $nesting_type_string_i, );
24341
24342         foreach $i ( @{$routput_token_list} )
24343         {    # scan the list of pre-tokens indexes
24344
24345             # self-checking for valid token types
24346             my $type                    = $routput_token_type->[$i];
24347             my $forced_indentation_flag = $routput_indent_flag->[$i];
24348
24349             # See if we should undo the $forced_indentation_flag.
24350             # Forced indentation after 'if', 'unless', 'while' and 'until'
24351             # expressions without trailing parens is optional and doesn't
24352             # always look good.  It is usually okay for a trailing logical
24353             # expression, but if the expression is a function call, code block,
24354             # or some kind of list it puts in an unwanted extra indentation
24355             # level which is hard to remove.
24356             #
24357             # Example where extra indentation looks ok:
24358             # return 1
24359             #   if $det_a < 0 and $det_b > 0
24360             #       or $det_a > 0 and $det_b < 0;
24361             #
24362             # Example where extra indentation is not needed because
24363             # the eval brace also provides indentation:
24364             # print "not " if defined eval {
24365             #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
24366             # };
24367             #
24368             # The following rule works fairly well:
24369             #   Undo the flag if the end of this line, or start of the next
24370             #   line, is an opening container token or a comma.
24371             # This almost always works, but if not after another pass it will
24372             # be stable.
24373             if ( $forced_indentation_flag && $type eq 'k' ) {
24374                 my $ixlast  = -1;
24375                 my $ilast   = $routput_token_list->[$ixlast];
24376                 my $toklast = $routput_token_type->[$ilast];
24377                 if ( $toklast eq '#' ) {
24378                     $ixlast--;
24379                     $ilast   = $routput_token_list->[$ixlast];
24380                     $toklast = $routput_token_type->[$ilast];
24381                 }
24382                 if ( $toklast eq 'b' ) {
24383                     $ixlast--;
24384                     $ilast   = $routput_token_list->[$ixlast];
24385                     $toklast = $routput_token_type->[$ilast];
24386                 }
24387                 if ( $toklast =~ /^[\{,]$/ ) {
24388                     $forced_indentation_flag = 0;
24389                 }
24390                 else {
24391                     ( $toklast, my $i_next ) =
24392                       find_next_nonblank_token( $max_token_index, $rtokens,
24393                         $max_token_index );
24394                     if ( $toklast =~ /^[\{,]$/ ) {
24395                         $forced_indentation_flag = 0;
24396                     }
24397                 }
24398             }
24399
24400             # if we are already in an indented if, see if we should outdent
24401             if ($indented_if_level) {
24402
24403                 # don't try to nest trailing if's - shouldn't happen
24404                 if ( $type eq 'k' ) {
24405                     $forced_indentation_flag = 0;
24406                 }
24407
24408                 # check for the normal case - outdenting at next ';'
24409                 elsif ( $type eq ';' ) {
24410                     if ( $level_in_tokenizer == $indented_if_level ) {
24411                         $forced_indentation_flag = -1;
24412                         $indented_if_level       = 0;
24413                     }
24414                 }
24415
24416                 # handle case of missing semicolon
24417                 elsif ( $type eq '}' ) {
24418                     if ( $level_in_tokenizer == $indented_if_level ) {
24419                         $indented_if_level = 0;
24420
24421                         # TBD: This could be a subroutine call
24422                         $level_in_tokenizer--;
24423                         if ( @{$rslevel_stack} > 1 ) {
24424                             pop( @{$rslevel_stack} );
24425                         }
24426                         if ( length($nesting_block_string) > 1 )
24427                         {    # true for valid script
24428                             chop $nesting_block_string;
24429                             chop $nesting_list_string;
24430                         }
24431
24432                     }
24433                 }
24434             }
24435
24436             my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
24437             $level_i = $level_in_tokenizer;
24438
24439             # This can happen by running perltidy on non-scripts
24440             # although it could also be bug introduced by programming change.
24441             # Perl silently accepts a 032 (^Z) and takes it as the end
24442             if ( !$is_valid_token_type{$type} ) {
24443                 my $val = ord($type);
24444                 warning(
24445                     "unexpected character decimal $val ($type) in script\n");
24446                 $tokenizer_self->{_in_error} = 1;
24447             }
24448
24449             # ----------------------------------------------------------------
24450             # TOKEN TYPE PATCHES
24451             #  output __END__, __DATA__, and format as type 'k' instead of ';'
24452             # to make html colors correct, etc.
24453             my $fix_type = $type;
24454             if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
24455
24456             # output anonymous 'sub' as keyword
24457             if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
24458
24459             # -----------------------------------------------------------------
24460
24461             $nesting_token_string_i = $nesting_token_string;
24462             $nesting_type_string_i  = $nesting_type_string;
24463             $nesting_block_string_i = $nesting_block_string;
24464             $nesting_list_string_i  = $nesting_list_string;
24465
24466             # set primary indentation levels based on structural braces
24467             # Note: these are set so that the leading braces have a HIGHER
24468             # level than their CONTENTS, which is convenient for indentation
24469             # Also, define continuation indentation for each token.
24470             if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
24471             {
24472
24473                 # use environment before updating
24474                 $container_environment =
24475                     $nesting_block_flag ? 'BLOCK'
24476                   : $nesting_list_flag  ? 'LIST'
24477                   :                       "";
24478
24479                 # if the difference between total nesting levels is not 1,
24480                 # there are intervening non-structural nesting types between
24481                 # this '{' and the previous unclosed '{'
24482                 my $intervening_secondary_structure = 0;
24483                 if ( @{$rslevel_stack} ) {
24484                     $intervening_secondary_structure =
24485                       $slevel_in_tokenizer - $rslevel_stack->[-1];
24486                 }
24487
24488      # Continuation Indentation
24489      #
24490      # Having tried setting continuation indentation both in the formatter and
24491      # in the tokenizer, I can say that setting it in the tokenizer is much,
24492      # much easier.  The formatter already has too much to do, and can't
24493      # make decisions on line breaks without knowing what 'ci' will be at
24494      # arbitrary locations.
24495      #
24496      # But a problem with setting the continuation indentation (ci) here
24497      # in the tokenizer is that we do not know where line breaks will actually
24498      # be.  As a result, we don't know if we should propagate continuation
24499      # indentation to higher levels of structure.
24500      #
24501      # For nesting of only structural indentation, we never need to do this.
24502      # For example, in a long if statement, like this
24503      #
24504      #   if ( !$output_block_type[$i]
24505      #     && ($in_statement_continuation) )
24506      #   {           <--outdented
24507      #       do_something();
24508      #   }
24509      #
24510      # the second line has ci but we do normally give the lines within the BLOCK
24511      # any ci.  This would be true if we had blocks nested arbitrarily deeply.
24512      #
24513      # But consider something like this, where we have created a break after
24514      # an opening paren on line 1, and the paren is not (currently) a
24515      # structural indentation token:
24516      #
24517      # my $file = $menubar->Menubutton(
24518      #   qw/-text File -underline 0 -menuitems/ => [
24519      #       [
24520      #           Cascade    => '~View',
24521      #           -menuitems => [
24522      #           ...
24523      #
24524      # The second line has ci, so it would seem reasonable to propagate it
24525      # down, giving the third line 1 ci + 1 indentation.  This suggests the
24526      # following rule, which is currently used to propagating ci down: if there
24527      # are any non-structural opening parens (or brackets, or braces), before
24528      # an opening structural brace, then ci is propagated down, and otherwise
24529      # not.  The variable $intervening_secondary_structure contains this
24530      # information for the current token, and the string
24531      # "$ci_string_in_tokenizer" is a stack of previous values of this
24532      # variable.
24533
24534                 # save the current states
24535                 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
24536                 $level_in_tokenizer++;
24537
24538                 if ($forced_indentation_flag) {
24539
24540                     # break BEFORE '?' when there is forced indentation
24541                     if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
24542                     if ( $type eq 'k' ) {
24543                         $indented_if_level = $level_in_tokenizer;
24544                     }
24545                 }
24546
24547                 if ( $routput_block_type->[$i] ) {
24548                     $nesting_block_flag = 1;
24549                     $nesting_block_string .= '1';
24550                 }
24551                 else {
24552                     $nesting_block_flag = 0;
24553                     $nesting_block_string .= '0';
24554                 }
24555
24556                 # we will use continuation indentation within containers
24557                 # which are not blocks and not logical expressions
24558                 my $bit = 0;
24559                 if ( !$routput_block_type->[$i] ) {
24560
24561                     # propagate flag down at nested open parens
24562                     if ( $routput_container_type->[$i] eq '(' ) {
24563                         $bit = 1 if $nesting_list_flag;
24564                     }
24565
24566                   # use list continuation if not a logical grouping
24567                   # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
24568                     else {
24569                         $bit = 1
24570                           unless
24571                             $is_logical_container{ $routput_container_type->[$i]
24572                               };
24573                     }
24574                 }
24575                 $nesting_list_string .= $bit;
24576                 $nesting_list_flag = $bit;
24577
24578                 $ci_string_in_tokenizer .=
24579                   ( $intervening_secondary_structure != 0 ) ? '1' : '0';
24580                 $ci_string_sum = ones_count($ci_string_in_tokenizer);
24581                 $continuation_string_in_tokenizer .=
24582                   ( $in_statement_continuation > 0 ) ? '1' : '0';
24583
24584    #  Sometimes we want to give an opening brace continuation indentation,
24585    #  and sometimes not.  For code blocks, we don't do it, so that the leading
24586    #  '{' gets outdented, like this:
24587    #
24588    #   if ( !$output_block_type[$i]
24589    #     && ($in_statement_continuation) )
24590    #   {           <--outdented
24591    #
24592    #  For other types, we will give them continuation indentation.  For example,
24593    #  here is how a list looks with the opening paren indented:
24594    #
24595    #     @LoL =
24596    #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
24597    #         [ "homer", "marge", "bart" ], );
24598    #
24599    #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
24600
24601                 my $total_ci = $ci_string_sum;
24602                 if (
24603                     !$routput_block_type->[$i]    # patch: skip for BLOCK
24604                     && ($in_statement_continuation)
24605                     && !( $forced_indentation_flag && $type eq ':' )
24606                   )
24607                 {
24608                     $total_ci += $in_statement_continuation
24609                       unless ( $ci_string_in_tokenizer =~ /1$/ );
24610                 }
24611
24612                 $ci_string_i               = $total_ci;
24613                 $in_statement_continuation = 0;
24614             }
24615
24616             elsif ($type eq '}'
24617                 || $type eq 'R'
24618                 || $forced_indentation_flag < 0 )
24619             {
24620
24621                 # only a nesting error in the script would prevent popping here
24622                 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
24623
24624                 $level_i = --$level_in_tokenizer;
24625
24626                 # restore previous level values
24627                 if ( length($nesting_block_string) > 1 )
24628                 {    # true for valid script
24629                     chop $nesting_block_string;
24630                     $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
24631                     chop $nesting_list_string;
24632                     $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
24633
24634                     chop $ci_string_in_tokenizer;
24635                     $ci_string_sum = ones_count($ci_string_in_tokenizer);
24636
24637                     $in_statement_continuation =
24638                       chop $continuation_string_in_tokenizer;
24639
24640                     # zero continuation flag at terminal BLOCK '}' which
24641                     # ends a statement.
24642                     if ( $routput_block_type->[$i] ) {
24643
24644                         # ...These include non-anonymous subs
24645                         # note: could be sub ::abc { or sub 'abc
24646                         if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
24647
24648                          # note: older versions of perl require the /gc modifier
24649                          # here or else the \G does not work.
24650                             if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
24651                             {
24652                                 $in_statement_continuation = 0;
24653                             }
24654                         }
24655
24656 # ...and include all block types except user subs with
24657 # block prototypes and these: (sort|grep|map|do|eval)
24658 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
24659                         elsif (
24660                             $is_zero_continuation_block_type{
24661                                 $routput_block_type->[$i] } )
24662                         {
24663                             $in_statement_continuation = 0;
24664                         }
24665
24666                         # ..but these are not terminal types:
24667                         #     /^(sort|grep|map|do|eval)$/ )
24668                         elsif (
24669                             $is_not_zero_continuation_block_type{
24670                                 $routput_block_type->[$i] } )
24671                         {
24672                         }
24673
24674                         # ..and a block introduced by a label
24675                         # /^\w+\s*:$/gc ) {
24676                         elsif ( $routput_block_type->[$i] =~ /:$/ ) {
24677                             $in_statement_continuation = 0;
24678                         }
24679
24680                         # user function with block prototype
24681                         else {
24682                             $in_statement_continuation = 0;
24683                         }
24684                     }
24685
24686                     # If we are in a list, then
24687                     # we must set continuatoin indentation at the closing
24688                     # paren of something like this (paren after $check):
24689                     #     assert(
24690                     #         __LINE__,
24691                     #         ( not defined $check )
24692                     #           or ref $check
24693                     #           or $check eq "new"
24694                     #           or $check eq "old",
24695                     #     );
24696                     elsif ( $tok eq ')' ) {
24697                         $in_statement_continuation = 1
24698                           if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
24699                     }
24700
24701                     elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
24702                 }
24703
24704                 # use environment after updating
24705                 $container_environment =
24706                     $nesting_block_flag ? 'BLOCK'
24707                   : $nesting_list_flag  ? 'LIST'
24708                   :                       "";
24709                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
24710                 $nesting_block_string_i = $nesting_block_string;
24711                 $nesting_list_string_i  = $nesting_list_string;
24712             }
24713
24714             # not a structural indentation type..
24715             else {
24716
24717                 $container_environment =
24718                     $nesting_block_flag ? 'BLOCK'
24719                   : $nesting_list_flag  ? 'LIST'
24720                   :                       "";
24721
24722                 # zero the continuation indentation at certain tokens so
24723                 # that they will be at the same level as its container.  For
24724                 # commas, this simplifies the -lp indentation logic, which
24725                 # counts commas.  For ?: it makes them stand out.
24726                 if ($nesting_list_flag) {
24727                     if ( $type =~ /^[,\?\:]$/ ) {
24728                         $in_statement_continuation = 0;
24729                     }
24730                 }
24731
24732                 # be sure binary operators get continuation indentation
24733                 if (
24734                     $container_environment
24735                     && (   $type eq 'k' && $is_binary_keyword{$tok}
24736                         || $is_binary_type{$type} )
24737                   )
24738                 {
24739                     $in_statement_continuation = 1;
24740                 }
24741
24742                 # continuation indentation is sum of any open ci from previous
24743                 # levels plus the current level
24744                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
24745
24746                 # update continuation flag ...
24747                 # if this isn't a blank or comment..
24748                 if ( $type ne 'b' && $type ne '#' ) {
24749
24750                     # and we are in a BLOCK
24751                     if ($nesting_block_flag) {
24752
24753                         # the next token after a ';' and label starts a new stmt
24754                         if ( $type eq ';' || $type eq 'J' ) {
24755                             $in_statement_continuation = 0;
24756                         }
24757
24758                         # otherwise, we are continuing the current statement
24759                         else {
24760                             $in_statement_continuation = 1;
24761                         }
24762                     }
24763
24764                     # if we are not in a BLOCK..
24765                     else {
24766
24767                         # do not use continuation indentation if not list
24768                         # environment (could be within if/elsif clause)
24769                         if ( !$nesting_list_flag ) {
24770                             $in_statement_continuation = 0;
24771                         }
24772
24773                        # otherwise, the next token after a ',' starts a new term
24774                         elsif ( $type eq ',' ) {
24775                             $in_statement_continuation = 0;
24776                         }
24777
24778                         # otherwise, we are continuing the current term
24779                         else {
24780                             $in_statement_continuation = 1;
24781                         }
24782                     }
24783                 }
24784             }
24785
24786             if ( $level_in_tokenizer < 0 ) {
24787                 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
24788                     $tokenizer_self->{_saw_negative_indentation} = 1;
24789                     warning("Starting negative indentation\n");
24790                 }
24791             }
24792
24793             # set secondary nesting levels based on all continment token types
24794             # Note: these are set so that the nesting depth is the depth
24795             # of the PREVIOUS TOKEN, which is convenient for setting
24796             # the stength of token bonds
24797             my $slevel_i = $slevel_in_tokenizer;
24798
24799             #    /^[L\{\(\[]$/
24800             if ( $is_opening_type{$type} ) {
24801                 $slevel_in_tokenizer++;
24802                 $nesting_token_string .= $tok;
24803                 $nesting_type_string  .= $type;
24804             }
24805
24806             #       /^[R\}\)\]]$/
24807             elsif ( $is_closing_type{$type} ) {
24808                 $slevel_in_tokenizer--;
24809                 my $char = chop $nesting_token_string;
24810
24811                 if ( $char ne $matching_start_token{$tok} ) {
24812                     $nesting_token_string .= $char . $tok;
24813                     $nesting_type_string  .= $type;
24814                 }
24815                 else {
24816                     chop $nesting_type_string;
24817                 }
24818             }
24819
24820             push( @block_type,            $routput_block_type->[$i] );
24821             push( @ci_string,             $ci_string_i );
24822             push( @container_environment, $container_environment );
24823             push( @container_type,        $routput_container_type->[$i] );
24824             push( @levels,                $level_i );
24825             push( @nesting_tokens,        $nesting_token_string_i );
24826             push( @nesting_types,         $nesting_type_string_i );
24827             push( @slevels,               $slevel_i );
24828             push( @token_type,            $fix_type );
24829             push( @type_sequence,         $routput_type_sequence->[$i] );
24830             push( @nesting_blocks,        $nesting_block_string );
24831             push( @nesting_lists,         $nesting_list_string );
24832
24833             # now form the previous token
24834             if ( $im >= 0 ) {
24835                 $num =
24836                   $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
24837
24838                 if ( $num > 0 ) {
24839                     push( @tokens,
24840                         substr( $input_line, $$rtoken_map[$im], $num ) );
24841                 }
24842             }
24843             $im = $i;
24844         }
24845
24846         $num = length($input_line) - $$rtoken_map[$im];    # make the last token
24847         if ( $num > 0 ) {
24848             push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
24849         }
24850
24851         $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
24852         $tokenizer_self->{_in_quote}          = $in_quote;
24853         $tokenizer_self->{_quote_target} =
24854           $in_quote ? matching_end_token($quote_character) : "";
24855         $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
24856
24857         $line_of_tokens->{_rtoken_type}            = \@token_type;
24858         $line_of_tokens->{_rtokens}                = \@tokens;
24859         $line_of_tokens->{_rblock_type}            = \@block_type;
24860         $line_of_tokens->{_rcontainer_type}        = \@container_type;
24861         $line_of_tokens->{_rcontainer_environment} = \@container_environment;
24862         $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
24863         $line_of_tokens->{_rlevels}                = \@levels;
24864         $line_of_tokens->{_rslevels}               = \@slevels;
24865         $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
24866         $line_of_tokens->{_rci_levels}             = \@ci_string;
24867         $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
24868
24869         return;
24870     }
24871 }    # end tokenize_this_line
24872
24873 #########i#############################################################
24874 # Tokenizer routines which assist in identifying token types
24875 #######################################################################
24876
24877 sub operator_expected {
24878
24879     # Many perl symbols have two or more meanings.  For example, '<<'
24880     # can be a shift operator or a here-doc operator.  The
24881     # interpretation of these symbols depends on the current state of
24882     # the tokenizer, which may either be expecting a term or an
24883     # operator.  For this example, a << would be a shift if an operator
24884     # is expected, and a here-doc if a term is expected.  This routine
24885     # is called to make this decision for any current token.  It returns
24886     # one of three possible values:
24887     #
24888     #     OPERATOR - operator expected (or at least, not a term)
24889     #     UNKNOWN  - can't tell
24890     #     TERM     - a term is expected (or at least, not an operator)
24891     #
24892     # The decision is based on what has been seen so far.  This
24893     # information is stored in the "$last_nonblank_type" and
24894     # "$last_nonblank_token" variables.  For example, if the
24895     # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
24896     # if $last_nonblank_type is 'n' (numeric), we are expecting an
24897     # OPERATOR.
24898     #
24899     # If a UNKNOWN is returned, the calling routine must guess. A major
24900     # goal of this tokenizer is to minimize the possiblity of returning
24901     # UNKNOWN, because a wrong guess can spoil the formatting of a
24902     # script.
24903     #
24904     # adding NEW_TOKENS: it is critically important that this routine be
24905     # updated to allow it to determine if an operator or term is to be
24906     # expected after the new token.  Doing this simply involves adding
24907     # the new token character to one of the regexes in this routine or
24908     # to one of the hash lists
24909     # that it uses, which are initialized in the BEGIN section.
24910     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
24911     # $statement_type
24912
24913     my ( $prev_type, $tok, $next_type ) = @_;
24914
24915     my $op_expected = UNKNOWN;
24916
24917 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
24918
24919 # Note: function prototype is available for token type 'U' for future
24920 # program development.  It contains the leading and trailing parens,
24921 # and no blanks.  It might be used to eliminate token type 'C', for
24922 # example (prototype = '()'). Thus:
24923 # if ($last_nonblank_type eq 'U') {
24924 #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
24925 # }
24926
24927     # A possible filehandle (or object) requires some care...
24928     if ( $last_nonblank_type eq 'Z' ) {
24929
24930         # angle.t
24931         if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
24932             $op_expected = UNKNOWN;
24933         }
24934
24935         # For possible file handle like "$a", Perl uses weird parsing rules.
24936         # For example:
24937         # print $a/2,"/hi";   - division
24938         # print $a / 2,"/hi"; - division
24939         # print $a/ 2,"/hi";  - division
24940         # print $a /2,"/hi";  - pattern (and error)!
24941         elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
24942             $op_expected = TERM;
24943         }
24944
24945         # Note when an operation is being done where a
24946         # filehandle might be expected, since a change in whitespace
24947         # could change the interpretation of the statement.
24948         else {
24949             if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
24950                 complain("operator in print statement not recommended\n");
24951                 $op_expected = OPERATOR;
24952             }
24953         }
24954     }
24955
24956     # handle something after 'do' and 'eval'
24957     elsif ( $is_block_operator{$last_nonblank_token} ) {
24958
24959         # something like $a = eval "expression";
24960         #                          ^
24961         if ( $last_nonblank_type eq 'k' ) {
24962             $op_expected = TERM;    # expression or list mode following keyword
24963         }
24964
24965         # something like $a = do { BLOCK } / 2;
24966         #                                  ^
24967         else {
24968             $op_expected = OPERATOR;    # block mode following }
24969         }
24970     }
24971
24972     # handle bare word..
24973     elsif ( $last_nonblank_type eq 'w' ) {
24974
24975         # unfortunately, we can't tell what type of token to expect next
24976         # after most bare words
24977         $op_expected = UNKNOWN;
24978     }
24979
24980     # operator, but not term possible after these types
24981     # Note: moved ')' from type to token because parens in list context
24982     # get marked as '{' '}' now.  This is a minor glitch in the following:
24983     #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
24984     #
24985     elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
24986         || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
24987     {
24988         $op_expected = OPERATOR;
24989
24990         # in a 'use' statement, numbers and v-strings are not true
24991         # numbers, so to avoid incorrect error messages, we will
24992         # mark them as unknown for now (use.t)
24993         # TODO: it would be much nicer to create a new token V for VERSION
24994         # number in a use statement.  Then this could be a check on type V
24995         # and related patches which change $statement_type for '=>'
24996         # and ',' could be removed.  Further, it would clean things up to
24997         # scan the 'use' statement with a separate subroutine.
24998         if (   ( $statement_type eq 'use' )
24999             && ( $last_nonblank_type =~ /^[nv]$/ ) )
25000         {
25001             $op_expected = UNKNOWN;
25002         }
25003     }
25004
25005     # no operator after many keywords, such as "die", "warn", etc
25006     elsif ( $expecting_term_token{$last_nonblank_token} ) {
25007
25008         # patch for dor.t (defined or).
25009         # perl functions which may be unary operators
25010         # TODO: This list is incomplete, and these should be put
25011         # into a hash.
25012         if (   $tok eq '/'
25013             && $next_type          eq '/'
25014             && $last_nonblank_type eq 'k'
25015             && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
25016         {
25017             $op_expected = OPERATOR;
25018         }
25019         else {
25020             $op_expected = TERM;
25021         }
25022     }
25023
25024     # no operator after things like + - **  (i.e., other operators)
25025     elsif ( $expecting_term_types{$last_nonblank_type} ) {
25026         $op_expected = TERM;
25027     }
25028
25029     # a few operators, like "time", have an empty prototype () and so
25030     # take no parameters but produce a value to operate on
25031     elsif ( $expecting_operator_token{$last_nonblank_token} ) {
25032         $op_expected = OPERATOR;
25033     }
25034
25035     # post-increment and decrement produce values to be operated on
25036     elsif ( $expecting_operator_types{$last_nonblank_type} ) {
25037         $op_expected = OPERATOR;
25038     }
25039
25040     # no value to operate on after sub block
25041     elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
25042
25043     # a right brace here indicates the end of a simple block.
25044     # all non-structural right braces have type 'R'
25045     # all braces associated with block operator keywords have been given those
25046     # keywords as "last_nonblank_token" and caught above.
25047     # (This statement is order dependent, and must come after checking
25048     # $last_nonblank_token).
25049     elsif ( $last_nonblank_type eq '}' ) {
25050
25051         # patch for dor.t (defined or).
25052         if (   $tok eq '/'
25053             && $next_type eq '/'
25054             && $last_nonblank_token eq ']' )
25055         {
25056             $op_expected = OPERATOR;
25057         }
25058         else {
25059             $op_expected = TERM;
25060         }
25061     }
25062
25063     # something else..what did I forget?
25064     else {
25065
25066         # collecting diagnostics on unknown operator types..see what was missed
25067         $op_expected = UNKNOWN;
25068         write_diagnostics(
25069 "OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
25070         );
25071     }
25072
25073     TOKENIZER_DEBUG_FLAG_EXPECT && do {
25074         print
25075 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
25076     };
25077     return $op_expected;
25078 }
25079
25080 sub new_statement_ok {
25081
25082     # return true if the current token can start a new statement
25083     # USES GLOBAL VARIABLES: $last_nonblank_type
25084
25085     return label_ok()    # a label would be ok here
25086
25087       || $last_nonblank_type eq 'J';    # or we follow a label
25088
25089 }
25090
25091 sub label_ok {
25092
25093     # Decide if a bare word followed by a colon here is a label
25094     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25095     # $brace_depth, @brace_type
25096
25097     # if it follows an opening or closing code block curly brace..
25098     if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
25099         && $last_nonblank_type eq $last_nonblank_token )
25100     {
25101
25102         # it is a label if and only if the curly encloses a code block
25103         return $brace_type[$brace_depth];
25104     }
25105
25106     # otherwise, it is a label if and only if it follows a ';'
25107     # (real or fake)
25108     else {
25109         return ( $last_nonblank_type eq ';' );
25110     }
25111 }
25112
25113 sub code_block_type {
25114
25115     # Decide if this is a block of code, and its type.
25116     # Must be called only when $type = $token = '{'
25117     # The problem is to distinguish between the start of a block of code
25118     # and the start of an anonymous hash reference
25119     # Returns "" if not code block, otherwise returns 'last_nonblank_token'
25120     # to indicate the type of code block.  (For example, 'last_nonblank_token'
25121     # might be 'if' for an if block, 'else' for an else block, etc).
25122     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
25123     # $last_nonblank_block_type, $brace_depth, @brace_type
25124
25125     # handle case of multiple '{'s
25126
25127 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
25128
25129     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25130     if (   $last_nonblank_token eq '{'
25131         && $last_nonblank_type eq $last_nonblank_token )
25132     {
25133
25134         # opening brace where a statement may appear is probably
25135         # a code block but might be and anonymous hash reference
25136         if ( $brace_type[$brace_depth] ) {
25137             return decide_if_code_block( $i, $rtokens, $rtoken_type,
25138                 $max_token_index );
25139         }
25140
25141         # cannot start a code block within an anonymous hash
25142         else {
25143             return "";
25144         }
25145     }
25146
25147     elsif ( $last_nonblank_token eq ';' ) {
25148
25149         # an opening brace where a statement may appear is probably
25150         # a code block but might be and anonymous hash reference
25151         return decide_if_code_block( $i, $rtokens, $rtoken_type,
25152             $max_token_index );
25153     }
25154
25155     # handle case of '}{'
25156     elsif ($last_nonblank_token eq '}'
25157         && $last_nonblank_type eq $last_nonblank_token )
25158     {
25159
25160         # a } { situation ...
25161         # could be hash reference after code block..(blktype1.t)
25162         if ($last_nonblank_block_type) {
25163             return decide_if_code_block( $i, $rtokens, $rtoken_type,
25164                 $max_token_index );
25165         }
25166
25167         # must be a block if it follows a closing hash reference
25168         else {
25169             return $last_nonblank_token;
25170         }
25171     }
25172
25173     # NOTE: braces after type characters start code blocks, but for
25174     # simplicity these are not identified as such.  See also
25175     # sub is_non_structural_brace.
25176     # elsif ( $last_nonblank_type eq 't' ) {
25177     #    return $last_nonblank_token;
25178     # }
25179
25180     # brace after label:
25181     elsif ( $last_nonblank_type eq 'J' ) {
25182         return $last_nonblank_token;
25183     }
25184
25185 # otherwise, look at previous token.  This must be a code block if
25186 # it follows any of these:
25187 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
25188     elsif ( $is_code_block_token{$last_nonblank_token} ) {
25189
25190         # Bug Patch: Note that the opening brace after the 'if' in the following
25191         # snippet is an anonymous hash ref and not a code block!
25192         #   print 'hi' if { x => 1, }->{x};
25193         # We can identify this situation because the last nonblank type
25194         # will be a keyword (instead of a closing peren)
25195         if (   $last_nonblank_token =~ /^(if|unless)$/
25196             && $last_nonblank_type eq 'k' )
25197         {
25198             return "";
25199         }
25200         else {
25201             return $last_nonblank_token;
25202         }
25203     }
25204
25205     # or a sub definition
25206     elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
25207         && $last_nonblank_token =~ /^sub\b/ )
25208     {
25209         return $last_nonblank_token;
25210     }
25211
25212     # user-defined subs with block parameters (like grep/map/eval)
25213     elsif ( $last_nonblank_type eq 'G' ) {
25214         return $last_nonblank_token;
25215     }
25216
25217     # check bareword
25218     elsif ( $last_nonblank_type eq 'w' ) {
25219         return decide_if_code_block( $i, $rtokens, $rtoken_type,
25220             $max_token_index );
25221     }
25222
25223     # anything else must be anonymous hash reference
25224     else {
25225         return "";
25226     }
25227 }
25228
25229 sub decide_if_code_block {
25230
25231     # USES GLOBAL VARIABLES: $last_nonblank_token
25232     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25233     my ( $next_nonblank_token, $i_next ) =
25234       find_next_nonblank_token( $i, $rtokens, $max_token_index );
25235
25236     # we are at a '{' where a statement may appear.
25237     # We must decide if this brace starts an anonymous hash or a code
25238     # block.
25239     # return "" if anonymous hash, and $last_nonblank_token otherwise
25240
25241     # initialize to be code BLOCK
25242     my $code_block_type = $last_nonblank_token;
25243
25244     # Check for the common case of an empty anonymous hash reference:
25245     # Maybe something like sub { { } }
25246     if ( $next_nonblank_token eq '}' ) {
25247         $code_block_type = "";
25248     }
25249
25250     else {
25251
25252         # To guess if this '{' is an anonymous hash reference, look ahead
25253         # and test as follows:
25254         #
25255         # it is a hash reference if next come:
25256         #   - a string or digit followed by a comma or =>
25257         #   - bareword followed by =>
25258         # otherwise it is a code block
25259         #
25260         # Examples of anonymous hash ref:
25261         # {'aa',};
25262         # {1,2}
25263         #
25264         # Examples of code blocks:
25265         # {1; print "hello\n", 1;}
25266         # {$a,1};
25267
25268         # We are only going to look ahead one more (nonblank/comment) line.
25269         # Strange formatting could cause a bad guess, but that's unlikely.
25270         my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
25271         my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
25272         my ( $rpre_tokens, $rpre_types ) =
25273           peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
25274                                                        # generous, and prevents
25275                                                        # wasting lots of
25276                                                        # time in mangled files
25277         if ( defined($rpre_types) && @$rpre_types ) {
25278             push @pre_types,  @$rpre_types;
25279             push @pre_tokens, @$rpre_tokens;
25280         }
25281
25282         # put a sentinal token to simplify stopping the search
25283         push @pre_types, '}';
25284
25285         my $jbeg = 0;
25286         $jbeg = 1 if $pre_types[0] eq 'b';
25287
25288         # first look for one of these
25289         #  - bareword
25290         #  - bareword with leading -
25291         #  - digit
25292         #  - quoted string
25293         my $j = $jbeg;
25294         if ( $pre_types[$j] =~ /^[\'\"]/ ) {
25295
25296             # find the closing quote; don't worry about escapes
25297             my $quote_mark = $pre_types[$j];
25298             for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
25299                 if ( $pre_types[$k] eq $quote_mark ) {
25300                     $j = $k + 1;
25301                     my $next = $pre_types[$j];
25302                     last;
25303                 }
25304             }
25305         }
25306         elsif ( $pre_types[$j] eq 'd' ) {
25307             $j++;
25308         }
25309         elsif ( $pre_types[$j] eq 'w' ) {
25310             unless ( $is_keyword{ $pre_tokens[$j] } ) {
25311                 $j++;
25312             }
25313         }
25314         elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
25315             $j++;
25316         }
25317         if ( $j > $jbeg ) {
25318
25319             $j++ if $pre_types[$j] eq 'b';
25320
25321             # it's a hash ref if a comma or => follow next
25322             if ( $pre_types[$j] eq ','
25323                 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
25324             {
25325                 $code_block_type = "";
25326             }
25327         }
25328     }
25329
25330     return $code_block_type;
25331 }
25332
25333 sub unexpected {
25334
25335     # report unexpected token type and show where it is
25336     # USES GLOBAL VARIABLES: $tokenizer_self
25337     my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
25338         $rpretoken_type, $input_line )
25339       = @_;
25340
25341     if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
25342         my $msg = "found $found where $expecting expected";
25343         my $pos = $$rpretoken_map[$i_tok];
25344         interrupt_logfile();
25345         my $input_line_number = $tokenizer_self->{_last_line_number};
25346         my ( $offset, $numbered_line, $underline ) =
25347           make_numbered_line( $input_line_number, $input_line, $pos );
25348         $underline = write_on_underline( $underline, $pos - $offset, '^' );
25349
25350         my $trailer = "";
25351         if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
25352             my $pos_prev = $$rpretoken_map[$last_nonblank_i];
25353             my $num;
25354             if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
25355                 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
25356             }
25357             else {
25358                 $num = $pos - $pos_prev;
25359             }
25360             if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
25361
25362             $underline =
25363               write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
25364             $trailer = " (previous token underlined)";
25365         }
25366         warning( $numbered_line . "\n" );
25367         warning( $underline . "\n" );
25368         warning( $msg . $trailer . "\n" );
25369         resume_logfile();
25370     }
25371 }
25372
25373 sub is_non_structural_brace {
25374
25375     # Decide if a brace or bracket is structural or non-structural
25376     # by looking at the previous token and type
25377     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
25378
25379     # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
25380     # Tentatively deactivated because it caused the wrong operator expectation
25381     # for this code:
25382     #      $user = @vars[1] / 100;
25383     # Must update sub operator_expected before re-implementing.
25384     # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
25385     #    return 0;
25386     # }
25387
25388     # NOTE: braces after type characters start code blocks, but for
25389     # simplicity these are not identified as such.  See also
25390     # sub code_block_type
25391     # if ($last_nonblank_type eq 't') {return 0}
25392
25393     # otherwise, it is non-structural if it is decorated
25394     # by type information.
25395     # For example, the '{' here is non-structural:   ${xxx}
25396     (
25397         $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
25398
25399           # or if we follow a hash or array closing curly brace or bracket
25400           # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
25401           # because the first '}' would have been given type 'R'
25402           || $last_nonblank_type =~ /^([R\]])$/
25403     );
25404 }
25405
25406 #########i#############################################################
25407 # Tokenizer routines for tracking container nesting depths
25408 #######################################################################
25409
25410 # The following routines keep track of nesting depths of the nesting
25411 # types, ( [ { and ?.  This is necessary for determining the indentation
25412 # level, and also for debugging programs.  Not only do they keep track of
25413 # nesting depths of the individual brace types, but they check that each
25414 # of the other brace types is balanced within matching pairs.  For
25415 # example, if the program sees this sequence:
25416 #
25417 #         {  ( ( ) }
25418 #
25419 # then it can determine that there is an extra left paren somewhere
25420 # between the { and the }.  And so on with every other possible
25421 # combination of outer and inner brace types.  For another
25422 # example:
25423 #
25424 #         ( [ ..... ]  ] )
25425 #
25426 # which has an extra ] within the parens.
25427 #
25428 # The brace types have indexes 0 .. 3 which are indexes into
25429 # the matrices.
25430 #
25431 # The pair ? : are treated as just another nesting type, with ? acting
25432 # as the opening brace and : acting as the closing brace.
25433 #
25434 # The matrix
25435 #
25436 #         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
25437 #
25438 # saves the nesting depth of brace type $b (where $b is either of the other
25439 # nesting types) when brace type $a enters a new depth.  When this depth
25440 # decreases, a check is made that the current depth of brace types $b is
25441 # unchanged, or otherwise there must have been an error.  This can
25442 # be very useful for localizing errors, particularly when perl runs to
25443 # the end of a large file (such as this one) and announces that there
25444 # is a problem somewhere.
25445 #
25446 # A numerical sequence number is maintained for every nesting type,
25447 # so that each matching pair can be uniquely identified in a simple
25448 # way.
25449
25450 sub increase_nesting_depth {
25451     my ( $aa, $pos ) = @_;
25452
25453     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25454     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
25455     my $bb;
25456     $current_depth[$aa]++;
25457     $total_depth++;
25458     $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
25459     my $input_line_number = $tokenizer_self->{_last_line_number};
25460     my $input_line        = $tokenizer_self->{_line_text};
25461
25462     # Sequence numbers increment by number of items.  This keeps
25463     # a unique set of numbers but still allows the relative location
25464     # of any type to be determined.
25465     $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
25466     my $seqno = $nesting_sequence_number[$aa];
25467     $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
25468
25469     $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
25470       [ $input_line_number, $input_line, $pos ];
25471
25472     for $bb ( 0 .. $#closing_brace_names ) {
25473         next if ( $bb == $aa );
25474         $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
25475     }
25476
25477     # set a flag for indenting a nested ternary statement
25478     my $indent = 0;
25479     if ( $aa == QUESTION_COLON ) {
25480         $nested_ternary_flag[ $current_depth[$aa] ] = 0;
25481         if ( $current_depth[$aa] > 1 ) {
25482             if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
25483                 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
25484                 if ( $pdepth == $total_depth - 1 ) {
25485                     $indent = 1;
25486                     $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
25487                 }
25488             }
25489         }
25490     }
25491     return ( $seqno, $indent );
25492 }
25493
25494 sub decrease_nesting_depth {
25495
25496     my ( $aa, $pos ) = @_;
25497
25498     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25499     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
25500     my $bb;
25501     my $seqno             = 0;
25502     my $input_line_number = $tokenizer_self->{_last_line_number};
25503     my $input_line        = $tokenizer_self->{_line_text};
25504
25505     my $outdent = 0;
25506     $total_depth--;
25507     if ( $current_depth[$aa] > 0 ) {
25508
25509         # set a flag for un-indenting after seeing a nested ternary statement
25510         $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
25511         if ( $aa == QUESTION_COLON ) {
25512             $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
25513         }
25514
25515         # check that any brace types $bb contained within are balanced
25516         for $bb ( 0 .. $#closing_brace_names ) {
25517             next if ( $bb == $aa );
25518
25519             unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
25520                 $current_depth[$bb] )
25521             {
25522                 my $diff =
25523                   $current_depth[$bb] -
25524                   $depth_array[$aa][$bb][ $current_depth[$aa] ];
25525
25526                 # don't whine too many times
25527                 my $saw_brace_error = get_saw_brace_error();
25528                 if (
25529                     $saw_brace_error <= MAX_NAG_MESSAGES
25530
25531                     # if too many closing types have occured, we probably
25532                     # already caught this error
25533                     && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
25534                   )
25535                 {
25536                     interrupt_logfile();
25537                     my $rsl =
25538                       $starting_line_of_current_depth[$aa]
25539                       [ $current_depth[$aa] ];
25540                     my $sl  = $$rsl[0];
25541                     my $rel = [ $input_line_number, $input_line, $pos ];
25542                     my $el  = $$rel[0];
25543                     my ($ess);
25544
25545                     if ( $diff == 1 || $diff == -1 ) {
25546                         $ess = '';
25547                     }
25548                     else {
25549                         $ess = 's';
25550                     }
25551                     my $bname =
25552                       ( $diff > 0 )
25553                       ? $opening_brace_names[$bb]
25554                       : $closing_brace_names[$bb];
25555                     write_error_indicator_pair( @$rsl, '^' );
25556                     my $msg = <<"EOM";
25557 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
25558 EOM
25559
25560                     if ( $diff > 0 ) {
25561                         my $rml =
25562                           $starting_line_of_current_depth[$bb]
25563                           [ $current_depth[$bb] ];
25564                         my $ml = $$rml[0];
25565                         $msg .=
25566 "    The most recent un-matched $bname is on line $ml\n";
25567                         write_error_indicator_pair( @$rml, '^' );
25568                     }
25569                     write_error_indicator_pair( @$rel, '^' );
25570                     warning($msg);
25571                     resume_logfile();
25572                 }
25573                 increment_brace_error();
25574             }
25575         }
25576         $current_depth[$aa]--;
25577     }
25578     else {
25579
25580         my $saw_brace_error = get_saw_brace_error();
25581         if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
25582             my $msg = <<"EOM";
25583 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
25584 EOM
25585             indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
25586         }
25587         increment_brace_error();
25588     }
25589     return ( $seqno, $outdent );
25590 }
25591
25592 sub check_final_nesting_depths {
25593     my ($aa);
25594
25595     # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
25596
25597     for $aa ( 0 .. $#closing_brace_names ) {
25598
25599         if ( $current_depth[$aa] ) {
25600             my $rsl =
25601               $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
25602             my $sl  = $$rsl[0];
25603             my $msg = <<"EOM";
25604 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
25605 The most recent un-matched $opening_brace_names[$aa] is on line $sl
25606 EOM
25607             indicate_error( $msg, @$rsl, '^' );
25608             increment_brace_error();
25609         }
25610     }
25611 }
25612
25613 #########i#############################################################
25614 # Tokenizer routines for looking ahead in input stream
25615 #######################################################################
25616
25617 sub peek_ahead_for_n_nonblank_pre_tokens {
25618
25619     # returns next n pretokens if they exist
25620     # returns undef's if hits eof without seeing any pretokens
25621     # USES GLOBAL VARIABLES: $tokenizer_self
25622     my $max_pretokens = shift;
25623     my $line;
25624     my $i = 0;
25625     my ( $rpre_tokens, $rmap, $rpre_types );
25626
25627     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25628     {
25629         $line =~ s/^\s*//;    # trim leading blanks
25630         next if ( length($line) <= 0 );    # skip blank
25631         next if ( $line =~ /^#/ );         # skip comment
25632         ( $rpre_tokens, $rmap, $rpre_types ) =
25633           pre_tokenize( $line, $max_pretokens );
25634         last;
25635     }
25636     return ( $rpre_tokens, $rpre_types );
25637 }
25638
25639 # look ahead for next non-blank, non-comment line of code
25640 sub peek_ahead_for_nonblank_token {
25641
25642     # USES GLOBAL VARIABLES: $tokenizer_self
25643     my ( $rtokens, $max_token_index ) = @_;
25644     my $line;
25645     my $i = 0;
25646
25647     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25648     {
25649         $line =~ s/^\s*//;    # trim leading blanks
25650         next if ( length($line) <= 0 );    # skip blank
25651         next if ( $line =~ /^#/ );         # skip comment
25652         my ( $rtok, $rmap, $rtype ) =
25653           pre_tokenize( $line, 2 );        # only need 2 pre-tokens
25654         my $j = $max_token_index + 1;
25655         my $tok;
25656
25657         foreach $tok (@$rtok) {
25658             last if ( $tok =~ "\n" );
25659             $$rtokens[ ++$j ] = $tok;
25660         }
25661         last;
25662     }
25663     return $rtokens;
25664 }
25665
25666 #########i#############################################################
25667 # Tokenizer guessing routines for ambiguous situations
25668 #######################################################################
25669
25670 sub guess_if_pattern_or_conditional {
25671
25672     # this routine is called when we have encountered a ? following an
25673     # unknown bareword, and we must decide if it starts a pattern or not
25674     # input parameters:
25675     #   $i - token index of the ? starting possible pattern
25676     # output parameters:
25677     #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
25678     #   msg = a warning or diagnostic message
25679     # USES GLOBAL VARIABLES: $last_nonblank_token
25680     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25681     my $is_pattern = 0;
25682     my $msg        = "guessing that ? after $last_nonblank_token starts a ";
25683
25684     if ( $i >= $max_token_index ) {
25685         $msg .= "conditional (no end to pattern found on the line)\n";
25686     }
25687     else {
25688         my $ibeg = $i;
25689         $i = $ibeg + 1;
25690         my $next_token = $$rtokens[$i];    # first token after ?
25691
25692         # look for a possible ending ? on this line..
25693         my $in_quote        = 1;
25694         my $quote_depth     = 0;
25695         my $quote_character = '';
25696         my $quote_pos       = 0;
25697         my $quoted_string;
25698         (
25699             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25700             $quoted_string
25701           )
25702           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25703             $quote_pos, $quote_depth, $max_token_index );
25704
25705         if ($in_quote) {
25706
25707             # we didn't find an ending ? on this line,
25708             # so we bias towards conditional
25709             $is_pattern = 0;
25710             $msg .= "conditional (no ending ? on this line)\n";
25711
25712             # we found an ending ?, so we bias towards a pattern
25713         }
25714         else {
25715
25716             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25717                 $is_pattern = 1;
25718                 $msg .= "pattern (found ending ? and pattern expected)\n";
25719             }
25720             else {
25721                 $msg .= "pattern (uncertain, but found ending ?)\n";
25722             }
25723         }
25724     }
25725     return ( $is_pattern, $msg );
25726 }
25727
25728 sub guess_if_pattern_or_division {
25729
25730     # this routine is called when we have encountered a / following an
25731     # unknown bareword, and we must decide if it starts a pattern or is a
25732     # division
25733     # input parameters:
25734     #   $i - token index of the / starting possible pattern
25735     # output parameters:
25736     #   $is_pattern = 0 if probably division,  =1 if probably a pattern
25737     #   msg = a warning or diagnostic message
25738     # USES GLOBAL VARIABLES: $last_nonblank_token
25739     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25740     my $is_pattern = 0;
25741     my $msg        = "guessing that / after $last_nonblank_token starts a ";
25742
25743     if ( $i >= $max_token_index ) {
25744         "division (no end to pattern found on the line)\n";
25745     }
25746     else {
25747         my $ibeg = $i;
25748         my $divide_expected =
25749           numerator_expected( $i, $rtokens, $max_token_index );
25750         $i = $ibeg + 1;
25751         my $next_token = $$rtokens[$i];    # first token after slash
25752
25753         # look for a possible ending / on this line..
25754         my $in_quote        = 1;
25755         my $quote_depth     = 0;
25756         my $quote_character = '';
25757         my $quote_pos       = 0;
25758         my $quoted_string;
25759         (
25760             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25761             $quoted_string
25762           )
25763           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25764             $quote_pos, $quote_depth, $max_token_index );
25765
25766         if ($in_quote) {
25767
25768             # we didn't find an ending / on this line,
25769             # so we bias towards division
25770             if ( $divide_expected >= 0 ) {
25771                 $is_pattern = 0;
25772                 $msg .= "division (no ending / on this line)\n";
25773             }
25774             else {
25775                 $msg        = "multi-line pattern (division not possible)\n";
25776                 $is_pattern = 1;
25777             }
25778
25779         }
25780
25781         # we found an ending /, so we bias towards a pattern
25782         else {
25783
25784             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25785
25786                 if ( $divide_expected >= 0 ) {
25787
25788                     if ( $i - $ibeg > 60 ) {
25789                         $msg .= "division (matching / too distant)\n";
25790                         $is_pattern = 0;
25791                     }
25792                     else {
25793                         $msg .= "pattern (but division possible too)\n";
25794                         $is_pattern = 1;
25795                     }
25796                 }
25797                 else {
25798                     $is_pattern = 1;
25799                     $msg .= "pattern (division not possible)\n";
25800                 }
25801             }
25802             else {
25803
25804                 if ( $divide_expected >= 0 ) {
25805                     $is_pattern = 0;
25806                     $msg .= "division (pattern not possible)\n";
25807                 }
25808                 else {
25809                     $is_pattern = 1;
25810                     $msg .=
25811                       "pattern (uncertain, but division would not work here)\n";
25812                 }
25813             }
25814         }
25815     }
25816     return ( $is_pattern, $msg );
25817 }
25818
25819 # try to resolve here-doc vs. shift by looking ahead for
25820 # non-code or the end token (currently only looks for end token)
25821 # returns 1 if it is probably a here doc, 0 if not
25822 sub guess_if_here_doc {
25823
25824     # This is how many lines we will search for a target as part of the
25825     # guessing strategy.  It is a constant because there is probably
25826     # little reason to change it.
25827     # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
25828     # %is_constant,
25829     use constant HERE_DOC_WINDOW => 40;
25830
25831     my $next_token        = shift;
25832     my $here_doc_expected = 0;
25833     my $line;
25834     my $k   = 0;
25835     my $msg = "checking <<";
25836
25837     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
25838     {
25839         chomp $line;
25840
25841         if ( $line =~ /^$next_token$/ ) {
25842             $msg .= " -- found target $next_token ahead $k lines\n";
25843             $here_doc_expected = 1;    # got it
25844             last;
25845         }
25846         last if ( $k >= HERE_DOC_WINDOW );
25847     }
25848
25849     unless ($here_doc_expected) {
25850
25851         if ( !defined($line) ) {
25852             $here_doc_expected = -1;    # hit eof without seeing target
25853             $msg .= " -- must be shift; target $next_token not in file\n";
25854
25855         }
25856         else {                          # still unsure..taking a wild guess
25857
25858             if ( !$is_constant{$current_package}{$next_token} ) {
25859                 $here_doc_expected = 1;
25860                 $msg .=
25861                   " -- guessing it's a here-doc ($next_token not a constant)\n";
25862             }
25863             else {
25864                 $msg .=
25865                   " -- guessing it's a shift ($next_token is a constant)\n";
25866             }
25867         }
25868     }
25869     write_logfile_entry($msg);
25870     return $here_doc_expected;
25871 }
25872
25873 #########i#############################################################
25874 # Tokenizer Routines for scanning identifiers and related items
25875 #######################################################################
25876
25877 sub scan_bare_identifier_do {
25878
25879     # this routine is called to scan a token starting with an alphanumeric
25880     # variable or package separator, :: or '.
25881     # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
25882     # $last_nonblank_type,@paren_type, $paren_depth
25883
25884     my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
25885         $max_token_index )
25886       = @_;
25887     my $i_begin = $i;
25888     my $package = undef;
25889
25890     my $i_beg = $i;
25891
25892     # we have to back up one pretoken at a :: since each : is one pretoken
25893     if ( $tok eq '::' ) { $i_beg-- }
25894     if ( $tok eq '->' ) { $i_beg-- }
25895     my $pos_beg = $$rtoken_map[$i_beg];
25896     pos($input_line) = $pos_beg;
25897
25898     #  Examples:
25899     #   A::B::C
25900     #   A::
25901     #   ::A
25902     #   A'B
25903     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
25904
25905         my $pos  = pos($input_line);
25906         my $numc = $pos - $pos_beg;
25907         $tok = substr( $input_line, $pos_beg, $numc );
25908
25909         # type 'w' includes anything without leading type info
25910         # ($,%,@,*) including something like abc::def::ghi
25911         $type = 'w';
25912
25913         my $sub_name = "";
25914         if ( defined($2) ) { $sub_name = $2; }
25915         if ( defined($1) ) {
25916             $package = $1;
25917
25918             # patch: don't allow isolated package name which just ends
25919             # in the old style package separator (single quote).  Example:
25920             #   use CGI':all';
25921             if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
25922                 $pos--;
25923             }
25924
25925             $package =~ s/\'/::/g;
25926             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
25927             $package =~ s/::$//;
25928         }
25929         else {
25930             $package = $current_package;
25931
25932             if ( $is_keyword{$tok} ) {
25933                 $type = 'k';
25934             }
25935         }
25936
25937         # if it is a bareword..
25938         if ( $type eq 'w' ) {
25939
25940             # check for v-string with leading 'v' type character
25941             # (This seems to have presidence over filehandle, type 'Y')
25942             if ( $tok =~ /^v\d[_\d]*$/ ) {
25943
25944                 # we only have the first part - something like 'v101' -
25945                 # look for more
25946                 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
25947                     $pos  = pos($input_line);
25948                     $numc = $pos - $pos_beg;
25949                     $tok  = substr( $input_line, $pos_beg, $numc );
25950                 }
25951                 $type = 'v';
25952
25953                 # warn if this version can't handle v-strings
25954                 report_v_string($tok);
25955             }
25956
25957             elsif ( $is_constant{$package}{$sub_name} ) {
25958                 $type = 'C';
25959             }
25960
25961             # bareword after sort has implied empty prototype; for example:
25962             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
25963             # This has priority over whatever the user has specified.
25964             elsif ($last_nonblank_token eq 'sort'
25965                 && $last_nonblank_type eq 'k' )
25966             {
25967                 $type = 'Z';
25968             }
25969
25970             # Note: strangely, perl does not seem to really let you create
25971             # functions which act like eval and do, in the sense that eval
25972             # and do may have operators following the final }, but any operators
25973             # that you create with prototype (&) apparently do not allow
25974             # trailing operators, only terms.  This seems strange.
25975             # If this ever changes, here is the update
25976             # to make perltidy behave accordingly:
25977
25978             # elsif ( $is_block_function{$package}{$tok} ) {
25979             #    $tok='eval'; # patch to do braces like eval  - doesn't work
25980             #    $type = 'k';
25981             #}
25982             # FIXME: This could become a separate type to allow for different
25983             # future behavior:
25984             elsif ( $is_block_function{$package}{$sub_name} ) {
25985                 $type = 'G';
25986             }
25987
25988             elsif ( $is_block_list_function{$package}{$sub_name} ) {
25989                 $type = 'G';
25990             }
25991             elsif ( $is_user_function{$package}{$sub_name} ) {
25992                 $type      = 'U';
25993                 $prototype = $user_function_prototype{$package}{$sub_name};
25994             }
25995
25996             # check for indirect object
25997             elsif (
25998
25999                 # added 2001-03-27: must not be followed immediately by '('
26000                 # see fhandle.t
26001                 ( $input_line !~ m/\G\(/gc )
26002
26003                 # and
26004                 && (
26005
26006                     # preceded by keyword like 'print', 'printf' and friends
26007                     $is_indirect_object_taker{$last_nonblank_token}
26008
26009                     # or preceded by something like 'print(' or 'printf('
26010                     || (
26011                         ( $last_nonblank_token eq '(' )
26012                         && $is_indirect_object_taker{ $paren_type[$paren_depth]
26013                         }
26014
26015                     )
26016                 )
26017               )
26018             {
26019
26020                 # may not be indirect object unless followed by a space
26021                 if ( $input_line =~ m/\G\s+/gc ) {
26022                     $type = 'Y';
26023
26024                     # Abandon Hope ...
26025                     # Perl's indirect object notation is a very bad
26026                     # thing and can cause subtle bugs, especially for
26027                     # beginning programmers.  And I haven't even been
26028                     # able to figure out a sane warning scheme which
26029                     # doesn't get in the way of good scripts.
26030
26031                     # Complain if a filehandle has any lower case
26032                     # letters.  This is suggested good practice.
26033                     # Use 'sub_name' because something like
26034                     # main::MYHANDLE is ok for filehandle
26035                     if ( $sub_name =~ /[a-z]/ ) {
26036
26037                         # could be bug caused by older perltidy if
26038                         # followed by '('
26039                         if ( $input_line =~ m/\G\s*\(/gc ) {
26040                             complain(
26041 "Caution: unknown word '$tok' in indirect object slot\n"
26042                             );
26043                         }
26044                     }
26045                 }
26046
26047                 # bareword not followed by a space -- may not be filehandle
26048                 # (may be function call defined in a 'use' statement)
26049                 else {
26050                     $type = 'Z';
26051                 }
26052             }
26053         }
26054
26055         # Now we must convert back from character position
26056         # to pre_token index.
26057         # I don't think an error flag can occur here ..but who knows
26058         my $error;
26059         ( $i, $error ) =
26060           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26061         if ($error) {
26062             warning("scan_bare_identifier: Possibly invalid tokenization\n");
26063         }
26064     }
26065
26066     # no match but line not blank - could be syntax error
26067     # perl will take '::' alone without complaint
26068     else {
26069         $type = 'w';
26070
26071         # change this warning to log message if it becomes annoying
26072         warning("didn't find identifier after leading ::\n");
26073     }
26074     return ( $i, $tok, $type, $prototype );
26075 }
26076
26077 sub scan_id_do {
26078
26079 # This is the new scanner and will eventually replace scan_identifier.
26080 # Only type 'sub' and 'package' are implemented.
26081 # Token types $ * % @ & -> are not yet implemented.
26082 #
26083 # Scan identifier following a type token.
26084 # The type of call depends on $id_scan_state: $id_scan_state = ''
26085 # for starting call, in which case $tok must be the token defining
26086 # the type.
26087 #
26088 # If the type token is the last nonblank token on the line, a value
26089 # of $id_scan_state = $tok is returned, indicating that further
26090 # calls must be made to get the identifier.  If the type token is
26091 # not the last nonblank token on the line, the identifier is
26092 # scanned and handled and a value of '' is returned.
26093 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
26094 # $statement_type, $tokenizer_self
26095
26096     my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
26097         $max_token_index )
26098       = @_;
26099     my $type = '';
26100     my ( $i_beg, $pos_beg );
26101
26102     #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26103     #my ($a,$b,$c) = caller;
26104     #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
26105
26106     # on re-entry, start scanning at first token on the line
26107     if ($id_scan_state) {
26108         $i_beg = $i;
26109         $type  = '';
26110     }
26111
26112     # on initial entry, start scanning just after type token
26113     else {
26114         $i_beg         = $i + 1;
26115         $id_scan_state = $tok;
26116         $type          = 't';
26117     }
26118
26119     # find $i_beg = index of next nonblank token,
26120     # and handle empty lines
26121     my $blank_line          = 0;
26122     my $next_nonblank_token = $$rtokens[$i_beg];
26123     if ( $i_beg > $max_token_index ) {
26124         $blank_line = 1;
26125     }
26126     else {
26127
26128         # only a '#' immediately after a '$' is not a comment
26129         if ( $next_nonblank_token eq '#' ) {
26130             unless ( $tok eq '$' ) {
26131                 $blank_line = 1;
26132             }
26133         }
26134
26135         if ( $next_nonblank_token =~ /^\s/ ) {
26136             ( $next_nonblank_token, $i_beg ) =
26137               find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
26138                 $max_token_index );
26139             if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
26140                 $blank_line = 1;
26141             }
26142         }
26143     }
26144
26145     # handle non-blank line; identifier, if any, must follow
26146     unless ($blank_line) {
26147
26148         if ( $id_scan_state eq 'sub' ) {
26149             ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
26150                 $input_line, $i,             $i_beg,
26151                 $tok,        $type,          $rtokens,
26152                 $rtoken_map, $id_scan_state, $max_token_index
26153             );
26154         }
26155
26156         elsif ( $id_scan_state eq 'package' ) {
26157             ( $i, $tok, $type ) =
26158               do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
26159                 $rtoken_map, $max_token_index );
26160             $id_scan_state = '';
26161         }
26162
26163         else {
26164             warning("invalid token in scan_id: $tok\n");
26165             $id_scan_state = '';
26166         }
26167     }
26168
26169     if ( $id_scan_state && ( !defined($type) || !$type ) ) {
26170
26171         # shouldn't happen:
26172         warning(
26173 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
26174         );
26175         report_definite_bug();
26176     }
26177
26178     TOKENIZER_DEBUG_FLAG_NSCAN && do {
26179         print
26180           "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26181     };
26182     return ( $i, $tok, $type, $id_scan_state );
26183 }
26184
26185 sub check_prototype {
26186     my ( $proto, $package, $subname ) = @_;
26187     return unless ( defined($package) && defined($subname) );
26188     if ( defined($proto) ) {
26189         $proto =~ s/^\s*\(\s*//;
26190         $proto =~ s/\s*\)$//;
26191         if ($proto) {
26192             $is_user_function{$package}{$subname}        = 1;
26193             $user_function_prototype{$package}{$subname} = "($proto)";
26194
26195             # prototypes containing '&' must be treated specially..
26196             if ( $proto =~ /\&/ ) {
26197
26198                 # right curly braces of prototypes ending in
26199                 # '&' may be followed by an operator
26200                 if ( $proto =~ /\&$/ ) {
26201                     $is_block_function{$package}{$subname} = 1;
26202                 }
26203
26204                 # right curly braces of prototypes NOT ending in
26205                 # '&' may NOT be followed by an operator
26206                 elsif ( $proto !~ /\&$/ ) {
26207                     $is_block_list_function{$package}{$subname} = 1;
26208                 }
26209             }
26210         }
26211         else {
26212             $is_constant{$package}{$subname} = 1;
26213         }
26214     }
26215     else {
26216         $is_user_function{$package}{$subname} = 1;
26217     }
26218 }
26219
26220 sub do_scan_package {
26221
26222     # do_scan_package parses a package name
26223     # it is called with $i_beg equal to the index of the first nonblank
26224     # token following a 'package' token.
26225     # USES GLOBAL VARIABLES: $current_package,
26226
26227     my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
26228         $max_token_index )
26229       = @_;
26230     my $package = undef;
26231     my $pos_beg = $$rtoken_map[$i_beg];
26232     pos($input_line) = $pos_beg;
26233
26234     # handle non-blank line; package name, if any, must follow
26235     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
26236         $package = $1;
26237         $package = ( defined($1) && $1 ) ? $1 : 'main';
26238         $package =~ s/\'/::/g;
26239         if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26240         $package =~ s/::$//;
26241         my $pos  = pos($input_line);
26242         my $numc = $pos - $pos_beg;
26243         $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
26244         $type = 'i';
26245
26246         # Now we must convert back from character position
26247         # to pre_token index.
26248         # I don't think an error flag can occur here ..but ?
26249         my $error;
26250         ( $i, $error ) =
26251           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26252         if ($error) { warning("Possibly invalid package\n") }
26253         $current_package = $package;
26254
26255         # check for error
26256         my ( $next_nonblank_token, $i_next ) =
26257           find_next_nonblank_token( $i, $rtokens, $max_token_index );
26258         if ( $next_nonblank_token !~ /^[;\}]$/ ) {
26259             warning(
26260                 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
26261             );
26262         }
26263     }
26264
26265     # no match but line not blank --
26266     # could be a label with name package, like package:  , for example.
26267     else {
26268         $type = 'k';
26269     }
26270
26271     return ( $i, $tok, $type );
26272 }
26273
26274 sub scan_identifier_do {
26275
26276     # This routine assembles tokens into identifiers.  It maintains a
26277     # scan state, id_scan_state.  It updates id_scan_state based upon
26278     # current id_scan_state and token, and returns an updated
26279     # id_scan_state and the next index after the identifier.
26280     # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
26281     # $last_nonblank_type
26282
26283     my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
26284         $expecting )
26285       = @_;
26286     my $i_begin   = $i;
26287     my $type      = '';
26288     my $tok_begin = $$rtokens[$i_begin];
26289     if ( $tok_begin eq ':' ) { $tok_begin = '::' }
26290     my $id_scan_state_begin = $id_scan_state;
26291     my $identifier_begin    = $identifier;
26292     my $tok                 = $tok_begin;
26293     my $message             = "";
26294
26295     # these flags will be used to help figure out the type:
26296     my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
26297     my $saw_type;
26298
26299     # allow old package separator (') except in 'use' statement
26300     my $allow_tick = ( $last_nonblank_token ne 'use' );
26301
26302     # get started by defining a type and a state if necessary
26303     unless ($id_scan_state) {
26304         $context = UNKNOWN_CONTEXT;
26305
26306         # fixup for digraph
26307         if ( $tok eq '>' ) {
26308             $tok       = '->';
26309             $tok_begin = $tok;
26310         }
26311         $identifier = $tok;
26312
26313         if ( $tok eq '$' || $tok eq '*' ) {
26314             $id_scan_state = '$';
26315             $context       = SCALAR_CONTEXT;
26316         }
26317         elsif ( $tok eq '%' || $tok eq '@' ) {
26318             $id_scan_state = '$';
26319             $context       = LIST_CONTEXT;
26320         }
26321         elsif ( $tok eq '&' ) {
26322             $id_scan_state = '&';
26323         }
26324         elsif ( $tok eq 'sub' or $tok eq 'package' ) {
26325             $saw_alpha     = 0;     # 'sub' is considered type info here
26326             $id_scan_state = '$';
26327             $identifier .= ' ';     # need a space to separate sub from sub name
26328         }
26329         elsif ( $tok eq '::' ) {
26330             $id_scan_state = 'A';
26331         }
26332         elsif ( $tok =~ /^[A-Za-z_]/ ) {
26333             $id_scan_state = ':';
26334         }
26335         elsif ( $tok eq '->' ) {
26336             $id_scan_state = '$';
26337         }
26338         else {
26339
26340             # shouldn't happen
26341             my ( $a, $b, $c ) = caller;
26342             warning("Program Bug: scan_identifier given bad token = $tok \n");
26343             warning("   called from sub $a  line: $c\n");
26344             report_definite_bug();
26345         }
26346         $saw_type = !$saw_alpha;
26347     }
26348     else {
26349         $i--;
26350         $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
26351     }
26352
26353     # now loop to gather the identifier
26354     my $i_save = $i;
26355
26356     while ( $i < $max_token_index ) {
26357         $i_save = $i unless ( $tok =~ /^\s*$/ );
26358         $tok = $$rtokens[ ++$i ];
26359
26360         if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
26361             $tok = '::';
26362             $i++;
26363         }
26364
26365         if ( $id_scan_state eq '$' ) {    # starting variable name
26366
26367             if ( $tok eq '$' ) {
26368
26369                 $identifier .= $tok;
26370
26371                 # we've got a punctuation variable if end of line (punct.t)
26372                 if ( $i == $max_token_index ) {
26373                     $type          = 'i';
26374                     $id_scan_state = '';
26375                     last;
26376                 }
26377             }
26378             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
26379                 $saw_alpha     = 1;
26380                 $id_scan_state = ':';           # now need ::
26381                 $identifier .= $tok;
26382             }
26383             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26384                 $saw_alpha     = 1;
26385                 $id_scan_state = ':';                 # now need ::
26386                 $identifier .= $tok;
26387
26388                 # Perl will accept leading digits in identifiers,
26389                 # although they may not always produce useful results.
26390                 # Something like $main::0 is ok.  But this also works:
26391                 #
26392                 #  sub howdy::123::bubba{ print "bubba $54321!\n" }
26393                 #  howdy::123::bubba();
26394                 #
26395             }
26396             elsif ( $tok =~ /^[0-9]/ ) {              # numeric
26397                 $saw_alpha     = 1;
26398                 $id_scan_state = ':';                 # now need ::
26399                 $identifier .= $tok;
26400             }
26401             elsif ( $tok eq '::' ) {
26402                 $id_scan_state = 'A';
26403                 $identifier .= $tok;
26404             }
26405             elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
26406                 $identifier .= $tok;    # keep same state, a $ could follow
26407             }
26408             elsif ( $tok eq '{' ) {
26409
26410                 # check for something like ${#} or ${©}
26411                 if (   $identifier eq '$'
26412                     && $i + 2 <= $max_token_index
26413                     && $$rtokens[ $i + 2 ] eq '}'
26414                     && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
26415                 {
26416                     my $next2 = $$rtokens[ $i + 2 ];
26417                     my $next1 = $$rtokens[ $i + 1 ];
26418                     $identifier .= $tok . $next1 . $next2;
26419                     $i += 2;
26420                     $id_scan_state = '';
26421                     last;
26422                 }
26423
26424                 # skip something like ${xxx} or ->{
26425                 $id_scan_state = '';
26426
26427                 # if this is the first token of a line, any tokens for this
26428                 # identifier have already been accumulated
26429                 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
26430                 $i = $i_save;
26431                 last;
26432             }
26433
26434             # space ok after leading $ % * & @
26435             elsif ( $tok =~ /^\s*$/ ) {
26436
26437                 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
26438
26439                     if ( length($identifier) > 1 ) {
26440                         $id_scan_state = '';
26441                         $i             = $i_save;
26442                         $type          = 'i';    # probably punctuation variable
26443                         last;
26444                     }
26445                     else {
26446
26447                         # spaces after $'s are common, and space after @
26448                         # is harmless, so only complain about space
26449                         # after other type characters. Space after $ and
26450                         # @ will be removed in formatting.  Report space
26451                         # after % and * because they might indicate a
26452                         # parsing error.  In other words '% ' might be a
26453                         # modulo operator.  Delete this warning if it
26454                         # gets annoying.
26455                         if ( $identifier !~ /^[\@\$]$/ ) {
26456                             $message =
26457                               "Space in identifier, following $identifier\n";
26458                         }
26459                     }
26460                 }
26461
26462                 # else:
26463                 # space after '->' is ok
26464             }
26465             elsif ( $tok eq '^' ) {
26466
26467                 # check for some special variables like $^W
26468                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26469                     $identifier .= $tok;
26470                     $id_scan_state = 'A';
26471
26472                     # Perl accepts '$^]' or '@^]', but
26473                     # there must not be a space before the ']'.
26474                     my $next1 = $$rtokens[ $i + 1 ];
26475                     if ( $next1 eq ']' ) {
26476                         $i++;
26477                         $identifier .= $next1;
26478                         $id_scan_state = "";
26479                         last;
26480                     }
26481                 }
26482                 else {
26483                     $id_scan_state = '';
26484                 }
26485             }
26486             else {    # something else
26487
26488                 # check for various punctuation variables
26489                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26490                     $identifier .= $tok;
26491                 }
26492
26493                 elsif ( $identifier eq '$#' ) {
26494
26495                     if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
26496
26497                     # perl seems to allow just these: $#: $#- $#+
26498                     elsif ( $tok =~ /^[\:\-\+]$/ ) {
26499                         $type = 'i';
26500                         $identifier .= $tok;
26501                     }
26502                     else {
26503                         $i = $i_save;
26504                         write_logfile_entry( 'Use of $# is deprecated' . "\n" );
26505                     }
26506                 }
26507                 elsif ( $identifier eq '$$' ) {
26508
26509                     # perl does not allow references to punctuation
26510                     # variables without braces.  For example, this
26511                     # won't work:
26512                     #  $:=\4;
26513                     #  $a = $$:;
26514                     # You would have to use
26515                     #  $a = ${$:};
26516
26517                     $i = $i_save;
26518                     if   ( $tok eq '{' ) { $type = 't' }
26519                     else                 { $type = 'i' }
26520                 }
26521                 elsif ( $identifier eq '->' ) {
26522                     $i = $i_save;
26523                 }
26524                 else {
26525                     $i = $i_save;
26526                     if ( length($identifier) == 1 ) { $identifier = ''; }
26527                 }
26528                 $id_scan_state = '';
26529                 last;
26530             }
26531         }
26532         elsif ( $id_scan_state eq '&' ) {    # starting sub call?
26533
26534             if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
26535                 $id_scan_state = ':';          # now need ::
26536                 $saw_alpha     = 1;
26537                 $identifier .= $tok;
26538             }
26539             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26540                 $id_scan_state = ':';                 # now need ::
26541                 $saw_alpha     = 1;
26542                 $identifier .= $tok;
26543             }
26544             elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
26545                 $id_scan_state = ':';       # now need ::
26546                 $saw_alpha     = 1;
26547                 $identifier .= $tok;
26548             }
26549             elsif ( $tok =~ /^\s*$/ ) {     # allow space
26550             }
26551             elsif ( $tok eq '::' ) {        # leading ::
26552                 $id_scan_state = 'A';       # accept alpha next
26553                 $identifier .= $tok;
26554             }
26555             elsif ( $tok eq '{' ) {
26556                 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
26557                 $i             = $i_save;
26558                 $id_scan_state = '';
26559                 last;
26560             }
26561             else {
26562
26563                 # punctuation variable?
26564                 # testfile: cunningham4.pl
26565                 #
26566                 # We have to be careful here.  If we are in an unknown state,
26567                 # we will reject the punctuation variable.  In the following
26568                 # example the '&' is a binary opeator but we are in an unknown
26569                 # state because there is no sigil on 'Prima', so we don't
26570                 # know what it is.  But it is a bad guess that
26571                 # '&~' is a punction variable.
26572                 # $self->{text}->{colorMap}->[
26573                 #   Prima::PodView::COLOR_CODE_FOREGROUND
26574                 #   & ~tb::COLOR_INDEX ] =
26575                 #   $sec->{ColorCode}
26576                 if ( $identifier eq '&' && $expecting ) {
26577                     $identifier .= $tok;
26578                 }
26579                 else {
26580                     $identifier = '';
26581                     $i          = $i_save;
26582                     $type       = '&';
26583                 }
26584                 $id_scan_state = '';
26585                 last;
26586             }
26587         }
26588         elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
26589
26590             if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
26591                 $identifier .= $tok;
26592                 $id_scan_state = ':';        # now need ::
26593                 $saw_alpha     = 1;
26594             }
26595             elsif ( $tok eq "'" && $allow_tick ) {
26596                 $identifier .= $tok;
26597                 $id_scan_state = ':';        # now need ::
26598                 $saw_alpha     = 1;
26599             }
26600             elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
26601                 $identifier .= $tok;
26602                 $id_scan_state = ':';        # now need ::
26603                 $saw_alpha     = 1;
26604             }
26605             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26606                 $id_scan_state = '(';
26607                 $identifier .= $tok;
26608             }
26609             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26610                 $id_scan_state = ')';
26611                 $identifier .= $tok;
26612             }
26613             else {
26614                 $id_scan_state = '';
26615                 $i             = $i_save;
26616                 last;
26617             }
26618         }
26619         elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
26620
26621             if ( $tok eq '::' ) {            # got it
26622                 $identifier .= $tok;
26623                 $id_scan_state = 'A';        # now require alpha
26624             }
26625             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
26626                 $identifier .= $tok;
26627                 $id_scan_state = ':';           # now need ::
26628                 $saw_alpha     = 1;
26629             }
26630             elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
26631                 $identifier .= $tok;
26632                 $id_scan_state = ':';           # now need ::
26633                 $saw_alpha     = 1;
26634             }
26635             elsif ( $tok eq "'" && $allow_tick ) {    # tick
26636
26637                 if ( $is_keyword{$identifier} ) {
26638                     $id_scan_state = '';              # that's all
26639                     $i             = $i_save;
26640                 }
26641                 else {
26642                     $identifier .= $tok;
26643                 }
26644             }
26645             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26646                 $id_scan_state = '(';
26647                 $identifier .= $tok;
26648             }
26649             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26650                 $id_scan_state = ')';
26651                 $identifier .= $tok;
26652             }
26653             else {
26654                 $id_scan_state = '';        # that's all
26655                 $i             = $i_save;
26656                 last;
26657             }
26658         }
26659         elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
26660
26661             if ( $tok eq '(' ) {             # got it
26662                 $identifier .= $tok;
26663                 $id_scan_state = ')';        # now find the end of it
26664             }
26665             elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
26666                 $identifier .= $tok;
26667             }
26668             else {
26669                 $id_scan_state = '';         # that's all - no prototype
26670                 $i             = $i_save;
26671                 last;
26672             }
26673         }
26674         elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
26675
26676             if ( $tok eq ')' ) {             # got it
26677                 $identifier .= $tok;
26678                 $id_scan_state = '';         # all done
26679                 last;
26680             }
26681             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
26682                 $identifier .= $tok;
26683             }
26684             else {    # probable error in script, but keep going
26685                 warning("Unexpected '$tok' while seeking end of prototype\n");
26686                 $identifier .= $tok;
26687             }
26688         }
26689         else {        # can get here due to error in initialization
26690             $id_scan_state = '';
26691             $i             = $i_save;
26692             last;
26693         }
26694     }
26695
26696     if ( $id_scan_state eq ')' ) {
26697         warning("Hit end of line while seeking ) to end prototype\n");
26698     }
26699
26700     # once we enter the actual identifier, it may not extend beyond
26701     # the end of the current line
26702     if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
26703         $id_scan_state = '';
26704     }
26705     if ( $i < 0 ) { $i = 0 }
26706
26707     unless ($type) {
26708
26709         if ($saw_type) {
26710
26711             if ($saw_alpha) {
26712                 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
26713                     $type = 'w';
26714                 }
26715                 else { $type = 'i' }
26716             }
26717             elsif ( $identifier eq '->' ) {
26718                 $type = '->';
26719             }
26720             elsif (
26721                 ( length($identifier) > 1 )
26722
26723                 # In something like '@$=' we have an identifier '@$'
26724                 # In something like '$${' we have type '$$' (and only
26725                 # part of an identifier)
26726                 && !( $identifier =~ /\$$/ && $tok eq '{' )
26727                 && ( $identifier !~ /^(sub |package )$/ )
26728               )
26729             {
26730                 $type = 'i';
26731             }
26732             else { $type = 't' }
26733         }
26734         elsif ($saw_alpha) {
26735
26736             # type 'w' includes anything without leading type info
26737             # ($,%,@,*) including something like abc::def::ghi
26738             $type = 'w';
26739         }
26740         else {
26741             $type = '';
26742         }    # this can happen on a restart
26743     }
26744
26745     if ($identifier) {
26746         $tok = $identifier;
26747         if ($message) { write_logfile_entry($message) }
26748     }
26749     else {
26750         $tok = $tok_begin;
26751         $i   = $i_begin;
26752     }
26753
26754     TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
26755         my ( $a, $b, $c ) = caller;
26756         print
26757 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
26758         print
26759 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
26760     };
26761     return ( $i, $tok, $type, $id_scan_state, $identifier );
26762 }
26763
26764 {
26765
26766     # saved package and subnames in case prototype is on separate line
26767     my ( $package_saved, $subname_saved );
26768
26769     sub do_scan_sub {
26770
26771         # do_scan_sub parses a sub name and prototype
26772         # it is called with $i_beg equal to the index of the first nonblank
26773         # token following a 'sub' token.
26774
26775         # TODO: add future error checks to be sure we have a valid
26776         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
26777         # a name is given if and only if a non-anonymous sub is
26778         # appropriate.
26779         # USES GLOBAL VARS: $current_package, $last_nonblank_token,
26780         # $in_attribute_list, %saw_function_definition,
26781         # $statement_type
26782
26783         my (
26784             $input_line, $i,             $i_beg,
26785             $tok,        $type,          $rtokens,
26786             $rtoken_map, $id_scan_state, $max_token_index
26787         ) = @_;
26788         $id_scan_state = "";    # normally we get everything in one call
26789         my $subname = undef;
26790         my $package = undef;
26791         my $proto   = undef;
26792         my $attrs   = undef;
26793         my $match;
26794
26795         my $pos_beg = $$rtoken_map[$i_beg];
26796         pos($input_line) = $pos_beg;
26797
26798         # sub NAME PROTO ATTRS
26799         if (
26800             $input_line =~ m/\G\s*
26801         ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
26802         (\w+)               # NAME    - required
26803         (\s*\([^){]*\))?    # PROTO   - something in parens
26804         (\s*:)?             # ATTRS   - leading : of attribute list
26805         /gcx
26806           )
26807         {
26808             $match   = 1;
26809             $subname = $2;
26810             $proto   = $3;
26811             $attrs   = $4;
26812
26813             $package = ( defined($1) && $1 ) ? $1 : $current_package;
26814             $package =~ s/\'/::/g;
26815             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26816             $package =~ s/::$//;
26817             my $pos  = pos($input_line);
26818             my $numc = $pos - $pos_beg;
26819             $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
26820             $type = 'i';
26821         }
26822
26823         # Look for prototype/attributes not preceded on this line by subname;
26824         # This might be an anonymous sub with attributes,
26825         # or a prototype on a separate line from its sub name
26826         elsif (
26827             $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
26828             (\s*:)?                              # ATTRS leading ':'
26829             /gcx
26830             && ( $1 || $2 )
26831           )
26832         {
26833             $match = 1;
26834             $proto = $1;
26835             $attrs = $2;
26836
26837             # Handle prototype on separate line from subname
26838             if ($subname_saved) {
26839                 $package = $package_saved;
26840                 $subname = $subname_saved;
26841                 $tok     = $last_nonblank_token;
26842             }
26843             $type = 'i';
26844         }
26845
26846         if ($match) {
26847
26848             # ATTRS: if there are attributes, back up and let the ':' be
26849             # found later by the scanner.
26850             my $pos = pos($input_line);
26851             if ($attrs) {
26852                 $pos -= length($attrs);
26853             }
26854
26855             my $next_nonblank_token = $tok;
26856
26857             # catch case of line with leading ATTR ':' after anonymous sub
26858             if ( $pos == $pos_beg && $tok eq ':' ) {
26859                 $type              = 'A';
26860                 $in_attribute_list = 1;
26861             }
26862
26863             # We must convert back from character position
26864             # to pre_token index.
26865             else {
26866
26867                 # I don't think an error flag can occur here ..but ?
26868                 my $error;
26869                 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
26870                     $max_token_index );
26871                 if ($error) { warning("Possibly invalid sub\n") }
26872
26873                 # check for multiple definitions of a sub
26874                 ( $next_nonblank_token, my $i_next ) =
26875                   find_next_nonblank_token_on_this_line( $i, $rtokens,
26876                     $max_token_index );
26877             }
26878
26879             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
26880             {    # skip blank or side comment
26881                 my ( $rpre_tokens, $rpre_types ) =
26882                   peek_ahead_for_n_nonblank_pre_tokens(1);
26883                 if ( defined($rpre_tokens) && @$rpre_tokens ) {
26884                     $next_nonblank_token = $rpre_tokens->[0];
26885                 }
26886                 else {
26887                     $next_nonblank_token = '}';
26888                 }
26889             }
26890             $package_saved = "";
26891             $subname_saved = "";
26892             if ( $next_nonblank_token eq '{' ) {
26893                 if ($subname) {
26894
26895                     # Check for multiple definitions of a sub, but
26896                     # it is ok to have multiple sub BEGIN, etc,
26897                     # so we do not complain if name is all caps
26898                     if (   $saw_function_definition{$package}{$subname}
26899                         && $subname !~ /^[A-Z]+$/ )
26900                     {
26901                         my $lno = $saw_function_definition{$package}{$subname};
26902                         warning(
26903 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
26904                         );
26905                     }
26906                     $saw_function_definition{$package}{$subname} =
26907                       $tokenizer_self->{_last_line_number};
26908                 }
26909             }
26910             elsif ( $next_nonblank_token eq ';' ) {
26911             }
26912             elsif ( $next_nonblank_token eq '}' ) {
26913             }
26914
26915             # ATTRS - if an attribute list follows, remember the name
26916             # of the sub so the next opening brace can be labeled.
26917             # Setting 'statement_type' causes any ':'s to introduce
26918             # attributes.
26919             elsif ( $next_nonblank_token eq ':' ) {
26920                 $statement_type = $tok;
26921             }
26922
26923             # see if PROTO follows on another line:
26924             elsif ( $next_nonblank_token eq '(' ) {
26925                 if ( $attrs || $proto ) {
26926                     warning(
26927 "unexpected '(' after definition or declaration of sub '$subname'\n"
26928                     );
26929                 }
26930                 else {
26931                     $id_scan_state  = 'sub';    # we must come back to get proto
26932                     $statement_type = $tok;
26933                     $package_saved  = $package;
26934                     $subname_saved  = $subname;
26935                 }
26936             }
26937             elsif ($next_nonblank_token) {      # EOF technically ok
26938                 warning(
26939 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
26940                 );
26941             }
26942             check_prototype( $proto, $package, $subname );
26943         }
26944
26945         # no match but line not blank
26946         else {
26947         }
26948         return ( $i, $tok, $type, $id_scan_state );
26949     }
26950 }
26951
26952 #########i###############################################################
26953 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
26954 #########################################################################
26955
26956 sub find_next_nonblank_token {
26957     my ( $i, $rtokens, $max_token_index ) = @_;
26958
26959     if ( $i >= $max_token_index ) {
26960         if ( !peeked_ahead() ) {
26961             peeked_ahead(1);
26962             $rtokens =
26963               peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
26964         }
26965     }
26966     my $next_nonblank_token = $$rtokens[ ++$i ];
26967
26968     if ( $next_nonblank_token =~ /^\s*$/ ) {
26969         $next_nonblank_token = $$rtokens[ ++$i ];
26970     }
26971     return ( $next_nonblank_token, $i );
26972 }
26973
26974 sub numerator_expected {
26975
26976     # this is a filter for a possible numerator, in support of guessing
26977     # for the / pattern delimiter token.
26978     # returns -
26979     #   1 - yes
26980     #   0 - can't tell
26981     #  -1 - no
26982     # Note: I am using the convention that variables ending in
26983     # _expected have these 3 possible values.
26984     my ( $i, $rtokens, $max_token_index ) = @_;
26985     my $next_token = $$rtokens[ $i + 1 ];
26986     if ( $next_token eq '=' ) { $i++; }    # handle /=
26987     my ( $next_nonblank_token, $i_next ) =
26988       find_next_nonblank_token( $i, $rtokens, $max_token_index );
26989
26990     if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
26991         1;
26992     }
26993     else {
26994
26995         if ( $next_nonblank_token =~ /^\s*$/ ) {
26996             0;
26997         }
26998         else {
26999             -1;
27000         }
27001     }
27002 }
27003
27004 sub pattern_expected {
27005
27006     # This is the start of a filter for a possible pattern.
27007     # It looks at the token after a possbible pattern and tries to
27008     # determine if that token could end a pattern.
27009     # returns -
27010     #   1 - yes
27011     #   0 - can't tell
27012     #  -1 - no
27013     my ( $i, $rtokens, $max_token_index ) = @_;
27014     my $next_token = $$rtokens[ $i + 1 ];
27015     if ( $next_token =~ /^[cgimosxp]/ ) { $i++; }    # skip possible modifier
27016     my ( $next_nonblank_token, $i_next ) =
27017       find_next_nonblank_token( $i, $rtokens, $max_token_index );
27018
27019     # list of tokens which may follow a pattern
27020     # (can probably be expanded)
27021     if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
27022     {
27023         1;
27024     }
27025     else {
27026
27027         if ( $next_nonblank_token =~ /^\s*$/ ) {
27028             0;
27029         }
27030         else {
27031             -1;
27032         }
27033     }
27034 }
27035
27036 sub find_next_nonblank_token_on_this_line {
27037     my ( $i, $rtokens, $max_token_index ) = @_;
27038     my $next_nonblank_token;
27039
27040     if ( $i < $max_token_index ) {
27041         $next_nonblank_token = $$rtokens[ ++$i ];
27042
27043         if ( $next_nonblank_token =~ /^\s*$/ ) {
27044
27045             if ( $i < $max_token_index ) {
27046                 $next_nonblank_token = $$rtokens[ ++$i ];
27047             }
27048         }
27049     }
27050     else {
27051         $next_nonblank_token = "";
27052     }
27053     return ( $next_nonblank_token, $i );
27054 }
27055
27056 sub find_angle_operator_termination {
27057
27058     # We are looking at a '<' and want to know if it is an angle operator.
27059     # We are to return:
27060     #   $i = pretoken index of ending '>' if found, current $i otherwise
27061     #   $type = 'Q' if found, '>' otherwise
27062     my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
27063     my $i    = $i_beg;
27064     my $type = '<';
27065     pos($input_line) = 1 + $$rtoken_map[$i];
27066
27067     my $filter;
27068
27069     # we just have to find the next '>' if a term is expected
27070     if ( $expecting == TERM ) { $filter = '[\>]' }
27071
27072     # we have to guess if we don't know what is expected
27073     elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
27074
27075     # shouldn't happen - we shouldn't be here if operator is expected
27076     else { warning("Program Bug in find_angle_operator_termination\n") }
27077
27078     # To illustrate what we might be looking at, in case we are
27079     # guessing, here are some examples of valid angle operators
27080     # (or file globs):
27081     #  <tmp_imp/*>
27082     #  <FH>
27083     #  <$fh>
27084     #  <*.c *.h>
27085     #  <_>
27086     #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
27087     #  <${PREFIX}*img*.$IMAGE_TYPE>
27088     #  <img*.$IMAGE_TYPE>
27089     #  <Timg*.$IMAGE_TYPE>
27090     #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
27091     #
27092     # Here are some examples of lines which do not have angle operators:
27093     #  return undef unless $self->[2]++ < $#{$self->[1]};
27094     #  < 2  || @$t >
27095     #
27096     # the following line from dlister.pl caused trouble:
27097     #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
27098     #
27099     # If the '<' starts an angle operator, it must end on this line and
27100     # it must not have certain characters like ';' and '=' in it.  I use
27101     # this to limit the testing.  This filter should be improved if
27102     # possible.
27103
27104     if ( $input_line =~ /($filter)/g ) {
27105
27106         if ( $1 eq '>' ) {
27107
27108             # We MAY have found an angle operator termination if we get
27109             # here, but we need to do more to be sure we haven't been
27110             # fooled.
27111             my $pos = pos($input_line);
27112
27113             my $pos_beg = $$rtoken_map[$i];
27114             my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
27115
27116             # Reject if the closing '>' follows a '-' as in:
27117             # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
27118             if ( $expecting eq UNKNOWN ) {
27119                 my $check = substr( $input_line, $pos - 2, 1 );
27120                 if ( $check eq '-' ) {
27121                     return ( $i, $type );
27122                 }
27123             }
27124
27125             ######################################debug#####
27126             #write_diagnostics( "ANGLE? :$str\n");
27127             #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
27128             ######################################debug#####
27129             $type = 'Q';
27130             my $error;
27131             ( $i, $error ) =
27132               inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27133
27134             # It may be possible that a quote ends midway in a pretoken.
27135             # If this happens, it may be necessary to split the pretoken.
27136             if ($error) {
27137                 warning(
27138                     "Possible tokinization error..please check this line\n");
27139                 report_possible_bug();
27140             }
27141
27142             # Now let's see where we stand....
27143             # OK if math op not possible
27144             if ( $expecting == TERM ) {
27145             }
27146
27147             # OK if there are no more than 2 pre-tokens inside
27148             # (not possible to write 2 token math between < and >)
27149             # This catches most common cases
27150             elsif ( $i <= $i_beg + 3 ) {
27151                 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
27152             }
27153
27154             # Not sure..
27155             else {
27156
27157                 # Let's try a Brace Test: any braces inside must balance
27158                 my $br = 0;
27159                 while ( $str =~ /\{/g ) { $br++ }
27160                 while ( $str =~ /\}/g ) { $br-- }
27161                 my $sb = 0;
27162                 while ( $str =~ /\[/g ) { $sb++ }
27163                 while ( $str =~ /\]/g ) { $sb-- }
27164                 my $pr = 0;
27165                 while ( $str =~ /\(/g ) { $pr++ }
27166                 while ( $str =~ /\)/g ) { $pr-- }
27167
27168                 # if braces do not balance - not angle operator
27169                 if ( $br || $sb || $pr ) {
27170                     $i    = $i_beg;
27171                     $type = '<';
27172                     write_diagnostics(
27173                         "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
27174                 }
27175
27176                 # we should keep doing more checks here...to be continued
27177                 # Tentatively accepting this as a valid angle operator.
27178                 # There are lots more things that can be checked.
27179                 else {
27180                     write_diagnostics(
27181                         "ANGLE-Guessing yes: $str expecting=$expecting\n");
27182                     write_logfile_entry("Guessing angle operator here: $str\n");
27183                 }
27184             }
27185         }
27186
27187         # didn't find ending >
27188         else {
27189             if ( $expecting == TERM ) {
27190                 warning("No ending > for angle operator\n");
27191             }
27192         }
27193     }
27194     return ( $i, $type );
27195 }
27196
27197 sub scan_number_do {
27198
27199     #  scan a number in any of the formats that Perl accepts
27200     #  Underbars (_) are allowed in decimal numbers.
27201     #  input parameters -
27202     #      $input_line  - the string to scan
27203     #      $i           - pre_token index to start scanning
27204     #    $rtoken_map    - reference to the pre_token map giving starting
27205     #                    character position in $input_line of token $i
27206     #  output parameters -
27207     #    $i            - last pre_token index of the number just scanned
27208     #    number        - the number (characters); or undef if not a number
27209
27210     my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
27211     my $pos_beg = $$rtoken_map[$i];
27212     my $pos;
27213     my $i_begin = $i;
27214     my $number  = undef;
27215     my $type    = $input_type;
27216
27217     my $first_char = substr( $input_line, $pos_beg, 1 );
27218
27219     # Look for bad starting characters; Shouldn't happen..
27220     if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
27221         warning("Program bug - scan_number given character $first_char\n");
27222         report_definite_bug();
27223         return ( $i, $type, $number );
27224     }
27225
27226     # handle v-string without leading 'v' character ('Two Dot' rule)
27227     # (vstring.t)
27228     # TODO: v-strings may contain underscores
27229     pos($input_line) = $pos_beg;
27230     if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
27231         $pos = pos($input_line);
27232         my $numc = $pos - $pos_beg;
27233         $number = substr( $input_line, $pos_beg, $numc );
27234         $type = 'v';
27235         report_v_string($number);
27236     }
27237
27238     # handle octal, hex, binary
27239     if ( !defined($number) ) {
27240         pos($input_line) = $pos_beg;
27241         if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
27242         {
27243             $pos = pos($input_line);
27244             my $numc = $pos - $pos_beg;
27245             $number = substr( $input_line, $pos_beg, $numc );
27246             $type = 'n';
27247         }
27248     }
27249
27250     # handle decimal
27251     if ( !defined($number) ) {
27252         pos($input_line) = $pos_beg;
27253
27254         if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
27255             $pos = pos($input_line);
27256
27257             # watch out for things like 0..40 which would give 0. by this;
27258             if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
27259                 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
27260             {
27261                 $pos--;
27262             }
27263             my $numc = $pos - $pos_beg;
27264             $number = substr( $input_line, $pos_beg, $numc );
27265             $type = 'n';
27266         }
27267     }
27268
27269     # filter out non-numbers like e + - . e2  .e3 +e6
27270     # the rule: at least one digit, and any 'e' must be preceded by a digit
27271     if (
27272         $number !~ /\d/    # no digits
27273         || (   $number =~ /^(.*)[eE]/
27274             && $1 !~ /\d/ )    # or no digits before the 'e'
27275       )
27276     {
27277         $number = undef;
27278         $type   = $input_type;
27279         return ( $i, $type, $number );
27280     }
27281
27282     # Found a number; now we must convert back from character position
27283     # to pre_token index. An error here implies user syntax error.
27284     # An example would be an invalid octal number like '009'.
27285     my $error;
27286     ( $i, $error ) =
27287       inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27288     if ($error) { warning("Possibly invalid number\n") }
27289
27290     return ( $i, $type, $number );
27291 }
27292
27293 sub inverse_pretoken_map {
27294
27295     # Starting with the current pre_token index $i, scan forward until
27296     # finding the index of the next pre_token whose position is $pos.
27297     my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
27298     my $error = 0;
27299
27300     while ( ++$i <= $max_token_index ) {
27301
27302         if ( $pos <= $$rtoken_map[$i] ) {
27303
27304             # Let the calling routine handle errors in which we do not
27305             # land on a pre-token boundary.  It can happen by running
27306             # perltidy on some non-perl scripts, for example.
27307             if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
27308             $i--;
27309             last;
27310         }
27311     }
27312     return ( $i, $error );
27313 }
27314
27315 sub find_here_doc {
27316
27317     # find the target of a here document, if any
27318     # input parameters:
27319     #   $i - token index of the second < of <<
27320     #   ($i must be less than the last token index if this is called)
27321     # output parameters:
27322     #   $found_target = 0 didn't find target; =1 found target
27323     #   HERE_TARGET - the target string (may be empty string)
27324     #   $i - unchanged if not here doc,
27325     #    or index of the last token of the here target
27326     #   $saw_error - flag noting unbalanced quote on here target
27327     my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27328     my $ibeg                 = $i;
27329     my $found_target         = 0;
27330     my $here_doc_target      = '';
27331     my $here_quote_character = '';
27332     my $saw_error            = 0;
27333     my ( $next_nonblank_token, $i_next_nonblank, $next_token );
27334     $next_token = $$rtokens[ $i + 1 ];
27335
27336     # perl allows a backslash before the target string (heredoc.t)
27337     my $backslash = 0;
27338     if ( $next_token eq '\\' ) {
27339         $backslash  = 1;
27340         $next_token = $$rtokens[ $i + 2 ];
27341     }
27342
27343     ( $next_nonblank_token, $i_next_nonblank ) =
27344       find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
27345
27346     if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
27347
27348         my $in_quote    = 1;
27349         my $quote_depth = 0;
27350         my $quote_pos   = 0;
27351         my $quoted_string;
27352
27353         (
27354             $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
27355             $quoted_string
27356           )
27357           = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
27358             $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
27359
27360         if ($in_quote) {    # didn't find end of quote, so no target found
27361             $i = $ibeg;
27362             if ( $expecting == TERM ) {
27363                 warning(
27364 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
27365                 );
27366                 $saw_error = 1;
27367             }
27368         }
27369         else {              # found ending quote
27370             my $j;
27371             $found_target = 1;
27372
27373             my $tokj;
27374             for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
27375                 $tokj = $$rtokens[$j];
27376
27377                 # we have to remove any backslash before the quote character
27378                 # so that the here-doc-target exactly matches this string
27379                 next
27380                   if ( $tokj eq "\\"
27381                     && $j < $i - 1
27382                     && $$rtokens[ $j + 1 ] eq $here_quote_character );
27383                 $here_doc_target .= $tokj;
27384             }
27385         }
27386     }
27387
27388     elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
27389         $found_target = 1;
27390         write_logfile_entry(
27391             "found blank here-target after <<; suggest using \"\"\n");
27392         $i = $ibeg;
27393     }
27394     elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
27395
27396         my $here_doc_expected;
27397         if ( $expecting == UNKNOWN ) {
27398             $here_doc_expected = guess_if_here_doc($next_token);
27399         }
27400         else {
27401             $here_doc_expected = 1;
27402         }
27403
27404         if ($here_doc_expected) {
27405             $found_target    = 1;
27406             $here_doc_target = $next_token;
27407             $i               = $ibeg + 1;
27408         }
27409
27410     }
27411     else {
27412
27413         if ( $expecting == TERM ) {
27414             $found_target = 1;
27415             write_logfile_entry("Note: bare here-doc operator <<\n");
27416         }
27417         else {
27418             $i = $ibeg;
27419         }
27420     }
27421
27422     # patch to neglect any prepended backslash
27423     if ( $found_target && $backslash ) { $i++ }
27424
27425     return ( $found_target, $here_doc_target, $here_quote_character, $i,
27426         $saw_error );
27427 }
27428
27429 sub do_quote {
27430
27431     # follow (or continue following) quoted string(s)
27432     # $in_quote return code:
27433     #   0 - ok, found end
27434     #   1 - still must find end of quote whose target is $quote_character
27435     #   2 - still looking for end of first of two quotes
27436     #
27437     # Returns updated strings:
27438     #  $quoted_string_1 = quoted string seen while in_quote=1
27439     #  $quoted_string_2 = quoted string seen while in_quote=2
27440     my (
27441         $i,               $in_quote,    $quote_character,
27442         $quote_pos,       $quote_depth, $quoted_string_1,
27443         $quoted_string_2, $rtokens,     $rtoken_map,
27444         $max_token_index
27445     ) = @_;
27446
27447     my $in_quote_starting = $in_quote;
27448
27449     my $quoted_string;
27450     if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
27451         my $ibeg = $i;
27452         (
27453             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27454             $quoted_string
27455           )
27456           = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
27457             $quote_pos, $quote_depth, $max_token_index );
27458         $quoted_string_2 .= $quoted_string;
27459         if ( $in_quote == 1 ) {
27460             if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
27461             $quote_character = '';
27462         }
27463         else {
27464             $quoted_string_2 .= "\n";
27465         }
27466     }
27467
27468     if ( $in_quote == 1 ) {    # one (more) quote to follow
27469         my $ibeg = $i;
27470         (
27471             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27472             $quoted_string
27473           )
27474           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27475             $quote_pos, $quote_depth, $max_token_index );
27476         $quoted_string_1 .= $quoted_string;
27477         if ( $in_quote == 1 ) {
27478             $quoted_string_1 .= "\n";
27479         }
27480     }
27481     return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27482         $quoted_string_1, $quoted_string_2 );
27483 }
27484
27485 sub follow_quoted_string {
27486
27487     # scan for a specific token, skipping escaped characters
27488     # if the quote character is blank, use the first non-blank character
27489     # input parameters:
27490     #   $rtokens = reference to the array of tokens
27491     #   $i = the token index of the first character to search
27492     #   $in_quote = number of quoted strings being followed
27493     #   $beginning_tok = the starting quote character
27494     #   $quote_pos = index to check next for alphanumeric delimiter
27495     # output parameters:
27496     #   $i = the token index of the ending quote character
27497     #   $in_quote = decremented if found end, unchanged if not
27498     #   $beginning_tok = the starting quote character
27499     #   $quote_pos = index to check next for alphanumeric delimiter
27500     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
27501     #   $quoted_string = the text of the quote (without quotation tokens)
27502     my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
27503         $max_token_index )
27504       = @_;
27505     my ( $tok, $end_tok );
27506     my $i             = $i_beg - 1;
27507     my $quoted_string = "";
27508
27509     TOKENIZER_DEBUG_FLAG_QUOTE && do {
27510         print
27511 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
27512     };
27513
27514     # get the corresponding end token
27515     if ( $beginning_tok !~ /^\s*$/ ) {
27516         $end_tok = matching_end_token($beginning_tok);
27517     }
27518
27519     # a blank token means we must find and use the first non-blank one
27520     else {
27521         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
27522
27523         while ( $i < $max_token_index ) {
27524             $tok = $$rtokens[ ++$i ];
27525
27526             if ( $tok !~ /^\s*$/ ) {
27527
27528                 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
27529                     $i = $max_token_index;
27530                 }
27531                 else {
27532
27533                     if ( length($tok) > 1 ) {
27534                         if ( $quote_pos <= 0 ) { $quote_pos = 1 }
27535                         $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
27536                     }
27537                     else {
27538                         $beginning_tok = $tok;
27539                         $quote_pos     = 0;
27540                     }
27541                     $end_tok     = matching_end_token($beginning_tok);
27542                     $quote_depth = 1;
27543                     last;
27544                 }
27545             }
27546             else {
27547                 $allow_quote_comments = 1;
27548             }
27549         }
27550     }
27551
27552     # There are two different loops which search for the ending quote
27553     # character.  In the rare case of an alphanumeric quote delimiter, we
27554     # have to look through alphanumeric tokens character-by-character, since
27555     # the pre-tokenization process combines multiple alphanumeric
27556     # characters, whereas for a non-alphanumeric delimiter, only tokens of
27557     # length 1 can match.
27558
27559     ###################################################################
27560     # Case 1 (rare): loop for case of alphanumeric quote delimiter..
27561     # "quote_pos" is the position the current word to begin searching
27562     ###################################################################
27563     if ( $beginning_tok =~ /\w/ ) {
27564
27565         # Note this because it is not recommended practice except
27566         # for obfuscated perl contests
27567         if ( $in_quote == 1 ) {
27568             write_logfile_entry(
27569                 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
27570         }
27571
27572         while ( $i < $max_token_index ) {
27573
27574             if ( $quote_pos == 0 || ( $i < 0 ) ) {
27575                 $tok = $$rtokens[ ++$i ];
27576
27577                 if ( $tok eq '\\' ) {
27578
27579                     # retain backslash unless it hides the end token
27580                     $quoted_string .= $tok
27581                       unless $$rtokens[ $i + 1 ] eq $end_tok;
27582                     $quote_pos++;
27583                     last if ( $i >= $max_token_index );
27584                     $tok = $$rtokens[ ++$i ];
27585                 }
27586             }
27587             my $old_pos = $quote_pos;
27588
27589             unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
27590             {
27591
27592             }
27593             $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
27594
27595             if ( $quote_pos > 0 ) {
27596
27597                 $quoted_string .=
27598                   substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
27599
27600                 $quote_depth--;
27601
27602                 if ( $quote_depth == 0 ) {
27603                     $in_quote--;
27604                     last;
27605                 }
27606             }
27607             else {
27608                 $quoted_string .= substr( $tok, $old_pos );
27609             }
27610         }
27611     }
27612
27613     ########################################################################
27614     # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
27615     ########################################################################
27616     else {
27617
27618         while ( $i < $max_token_index ) {
27619             $tok = $$rtokens[ ++$i ];
27620
27621             if ( $tok eq $end_tok ) {
27622                 $quote_depth--;
27623
27624                 if ( $quote_depth == 0 ) {
27625                     $in_quote--;
27626                     last;
27627                 }
27628             }
27629             elsif ( $tok eq $beginning_tok ) {
27630                 $quote_depth++;
27631             }
27632             elsif ( $tok eq '\\' ) {
27633
27634                 # retain backslash unless it hides the beginning or end token
27635                 $tok = $$rtokens[ ++$i ];
27636                 $quoted_string .= '\\'
27637                   unless ( $tok eq $end_tok || $tok eq $beginning_tok );
27638             }
27639             $quoted_string .= $tok;
27640         }
27641     }
27642     if ( $i > $max_token_index ) { $i = $max_token_index }
27643     return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
27644         $quoted_string );
27645 }
27646
27647 sub indicate_error {
27648     my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
27649     interrupt_logfile();
27650     warning($msg);
27651     write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
27652     resume_logfile();
27653 }
27654
27655 sub write_error_indicator_pair {
27656     my ( $line_number, $input_line, $pos, $carrat ) = @_;
27657     my ( $offset, $numbered_line, $underline ) =
27658       make_numbered_line( $line_number, $input_line, $pos );
27659     $underline = write_on_underline( $underline, $pos - $offset, $carrat );
27660     warning( $numbered_line . "\n" );
27661     $underline =~ s/\s*$//;
27662     warning( $underline . "\n" );
27663 }
27664
27665 sub make_numbered_line {
27666
27667     #  Given an input line, its line number, and a character position of
27668     #  interest, create a string not longer than 80 characters of the form
27669     #     $lineno: sub_string
27670     #  such that the sub_string of $str contains the position of interest
27671     #
27672     #  Here is an example of what we want, in this case we add trailing
27673     #  '...' because the line is long.
27674     #
27675     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27676     #
27677     #  Here is another example, this time in which we used leading '...'
27678     #  because of excessive length:
27679     #
27680     # 2: ... er of the World Wide Web Consortium's
27681     #
27682     #  input parameters are:
27683     #   $lineno = line number
27684     #   $str = the text of the line
27685     #   $pos = position of interest (the error) : 0 = first character
27686     #
27687     #   We return :
27688     #     - $offset = an offset which corrects the position in case we only
27689     #       display part of a line, such that $pos-$offset is the effective
27690     #       position from the start of the displayed line.
27691     #     - $numbered_line = the numbered line as above,
27692     #     - $underline = a blank 'underline' which is all spaces with the same
27693     #       number of characters as the numbered line.
27694
27695     my ( $lineno, $str, $pos ) = @_;
27696     my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
27697     my $excess = length($str) - $offset - 68;
27698     my $numc   = ( $excess > 0 ) ? 68 : undef;
27699
27700     if ( defined($numc) ) {
27701         if ( $offset == 0 ) {
27702             $str = substr( $str, $offset, $numc - 4 ) . " ...";
27703         }
27704         else {
27705             $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
27706         }
27707     }
27708     else {
27709
27710         if ( $offset == 0 ) {
27711         }
27712         else {
27713             $str = "... " . substr( $str, $offset + 4 );
27714         }
27715     }
27716
27717     my $numbered_line = sprintf( "%d: ", $lineno );
27718     $offset -= length($numbered_line);
27719     $numbered_line .= $str;
27720     my $underline = " " x length($numbered_line);
27721     return ( $offset, $numbered_line, $underline );
27722 }
27723
27724 sub write_on_underline {
27725
27726     # The "underline" is a string that shows where an error is; it starts
27727     # out as a string of blanks with the same length as the numbered line of
27728     # code above it, and we have to add marking to show where an error is.
27729     # In the example below, we want to write the string '--^' just below
27730     # the line of bad code:
27731     #
27732     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27733     #                 ---^
27734     # We are given the current underline string, plus a position and a
27735     # string to write on it.
27736     #
27737     # In the above example, there will be 2 calls to do this:
27738     # First call:  $pos=19, pos_chr=^
27739     # Second call: $pos=16, pos_chr=---
27740     #
27741     # This is a trivial thing to do with substr, but there is some
27742     # checking to do.
27743
27744     my ( $underline, $pos, $pos_chr ) = @_;
27745
27746     # check for error..shouldn't happen
27747     unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
27748         return $underline;
27749     }
27750     my $excess = length($pos_chr) + $pos - length($underline);
27751     if ( $excess > 0 ) {
27752         $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
27753     }
27754     substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
27755     return ($underline);
27756 }
27757
27758 sub pre_tokenize {
27759
27760     # Break a string, $str, into a sequence of preliminary tokens.  We
27761     # are interested in these types of tokens:
27762     #   words       (type='w'),            example: 'max_tokens_wanted'
27763     #   digits      (type = 'd'),          example: '0755'
27764     #   whitespace  (type = 'b'),          example: '   '
27765     #   any other single character (i.e. punct; type = the character itself).
27766     # We cannot do better than this yet because we might be in a quoted
27767     # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
27768     # tokens.
27769     my ( $str, $max_tokens_wanted ) = @_;
27770
27771     # we return references to these 3 arrays:
27772     my @tokens    = ();     # array of the tokens themselves
27773     my @token_map = (0);    # string position of start of each token
27774     my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
27775
27776     do {
27777
27778         # whitespace
27779         if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
27780
27781         # numbers
27782         # note that this must come before words!
27783         elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
27784
27785         # words
27786         elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
27787
27788         # single-character punctuation
27789         elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
27790
27791         # that's all..
27792         else {
27793             return ( \@tokens, \@token_map, \@type );
27794         }
27795
27796         push @tokens,    $1;
27797         push @token_map, pos($str);
27798
27799     } while ( --$max_tokens_wanted != 0 );
27800
27801     return ( \@tokens, \@token_map, \@type );
27802 }
27803
27804 sub show_tokens {
27805
27806     # this is an old debug routine
27807     my ( $rtokens, $rtoken_map ) = @_;
27808     my $num = scalar(@$rtokens);
27809     my $i;
27810
27811     for ( $i = 0 ; $i < $num ; $i++ ) {
27812         my $len = length( $$rtokens[$i] );
27813         print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
27814     }
27815 }
27816
27817 sub matching_end_token {
27818
27819     # find closing character for a pattern
27820     my $beginning_token = shift;
27821
27822     if ( $beginning_token eq '{' ) {
27823         '}';
27824     }
27825     elsif ( $beginning_token eq '[' ) {
27826         ']';
27827     }
27828     elsif ( $beginning_token eq '<' ) {
27829         '>';
27830     }
27831     elsif ( $beginning_token eq '(' ) {
27832         ')';
27833     }
27834     else {
27835         $beginning_token;
27836     }
27837 }
27838
27839 sub dump_token_types {
27840     my $class = shift;
27841     my $fh    = shift;
27842
27843     # This should be the latest list of token types in use
27844     # adding NEW_TOKENS: add a comment here
27845     print $fh <<'END_OF_LIST';
27846
27847 Here is a list of the token types currently used for lines of type 'CODE'.
27848 For the following tokens, the "type" of a token is just the token itself.
27849
27850 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27851 ( ) <= >= == =~ !~ != ++ -- /= x=
27852 ... **= <<= >>= &&= ||= //= <=>
27853 , + - / * | % ! x ~ = \ ? : . < > ^ &
27854
27855 The following additional token types are defined:
27856
27857  type    meaning
27858     b    blank (white space)
27859     {    indent: opening structural curly brace or square bracket or paren
27860          (code block, anonymous hash reference, or anonymous array reference)
27861     }    outdent: right structural curly brace or square bracket or paren
27862     [    left non-structural square bracket (enclosing an array index)
27863     ]    right non-structural square bracket
27864     (    left non-structural paren (all but a list right of an =)
27865     )    right non-structural parena
27866     L    left non-structural curly brace (enclosing a key)
27867     R    right non-structural curly brace
27868     ;    terminal semicolon
27869     f    indicates a semicolon in a "for" statement
27870     h    here_doc operator <<
27871     #    a comment
27872     Q    indicates a quote or pattern
27873     q    indicates a qw quote block
27874     k    a perl keyword
27875     C    user-defined constant or constant function (with void prototype = ())
27876     U    user-defined function taking parameters
27877     G    user-defined function taking block parameter (like grep/map/eval)
27878     M    (unused, but reserved for subroutine definition name)
27879     P    (unused, but -html uses it to label pod text)
27880     t    type indicater such as %,$,@,*,&,sub
27881     w    bare word (perhaps a subroutine call)
27882     i    identifier of some type (with leading %, $, @, *, &, sub, -> )
27883     n    a number
27884     v    a v-string
27885     F    a file test operator (like -e)
27886     Y    File handle
27887     Z    identifier in indirect object slot: may be file handle, object
27888     J    LABEL:  code block label
27889     j    LABEL after next, last, redo, goto
27890     p    unary +
27891     m    unary -
27892     pp   pre-increment operator ++
27893     mm   pre-decrement operator --
27894     A    : used as attribute separator
27895
27896     Here are the '_line_type' codes used internally:
27897     SYSTEM         - system-specific code before hash-bang line
27898     CODE           - line of perl code (including comments)
27899     POD_START      - line starting pod, such as '=head'
27900     POD            - pod documentation text
27901     POD_END        - last line of pod section, '=cut'
27902     HERE           - text of here-document
27903     HERE_END       - last line of here-doc (target word)
27904     FORMAT         - format section
27905     FORMAT_END     - last line of format section, '.'
27906     DATA_START     - __DATA__ line
27907     DATA           - unidentified text following __DATA__
27908     END_START      - __END__ line
27909     END            - unidentified text following __END__
27910     ERROR          - we are in big trouble, probably not a perl script
27911 END_OF_LIST
27912 }
27913
27914 BEGIN {
27915
27916     # These names are used in error messages
27917     @opening_brace_names = qw# '{' '[' '(' '?' #;
27918     @closing_brace_names = qw# '}' ']' ')' ':' #;
27919
27920     my @digraphs = qw(
27921       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27922       <= >= == =~ !~ != ++ -- /= x= ~~
27923     );
27924     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
27925
27926     my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
27927     @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
27928
27929     # make a hash of all valid token types for self-checking the tokenizer
27930     # (adding NEW_TOKENS : select a new character and add to this list)
27931     my @valid_token_types = qw#
27932       A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
27933       { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
27934       #;
27935     push( @valid_token_types, @digraphs );
27936     push( @valid_token_types, @trigraphs );
27937     push( @valid_token_types, '#' );
27938     push( @valid_token_types, ',' );
27939     @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
27940
27941     # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
27942     my @file_test_operators =
27943       qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
27944     @is_file_test_operator{@file_test_operators} =
27945       (1) x scalar(@file_test_operators);
27946
27947     # these functions have prototypes of the form (&), so when they are
27948     # followed by a block, that block MAY BE followed by an operator.
27949     @_ = qw( do eval );
27950     @is_block_operator{@_} = (1) x scalar(@_);
27951
27952     # these functions allow an identifier in the indirect object slot
27953     @_ = qw( print printf sort exec system say);
27954     @is_indirect_object_taker{@_} = (1) x scalar(@_);
27955
27956     # These tokens may precede a code block
27957     # patched for SWITCH/CASE
27958     @_ =
27959       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
27960       unless do while until eval for foreach map grep sort
27961       switch case given when);
27962     @is_code_block_token{@_} = (1) x scalar(@_);
27963
27964     # I'll build the list of keywords incrementally
27965     my @Keywords = ();
27966
27967     # keywords and tokens after which a value or pattern is expected,
27968     # but not an operator.  In other words, these should consume terms
27969     # to their right, or at least they are not expected to be followed
27970     # immediately by operators.
27971     my @value_requestor = qw(
27972       AUTOLOAD
27973       BEGIN
27974       CHECK
27975       DESTROY
27976       END
27977       EQ
27978       GE
27979       GT
27980       INIT
27981       LE
27982       LT
27983       NE
27984       UNITCHECK
27985       abs
27986       accept
27987       alarm
27988       and
27989       atan2
27990       bind
27991       binmode
27992       bless
27993       break
27994       caller
27995       chdir
27996       chmod
27997       chomp
27998       chop
27999       chown
28000       chr
28001       chroot
28002       close
28003       closedir
28004       cmp
28005       connect
28006       continue
28007       cos
28008       crypt
28009       dbmclose
28010       dbmopen
28011       defined
28012       delete
28013       die
28014       dump
28015       each
28016       else
28017       elsif
28018       eof
28019       eq
28020       exec
28021       exists
28022       exit
28023       exp
28024       fcntl
28025       fileno
28026       flock
28027       for
28028       foreach
28029       formline
28030       ge
28031       getc
28032       getgrgid
28033       getgrnam
28034       gethostbyaddr
28035       gethostbyname
28036       getnetbyaddr
28037       getnetbyname
28038       getpeername
28039       getpgrp
28040       getpriority
28041       getprotobyname
28042       getprotobynumber
28043       getpwnam
28044       getpwuid
28045       getservbyname
28046       getservbyport
28047       getsockname
28048       getsockopt
28049       glob
28050       gmtime
28051       goto
28052       grep
28053       gt
28054       hex
28055       if
28056       index
28057       int
28058       ioctl
28059       join
28060       keys
28061       kill
28062       last
28063       lc
28064       lcfirst
28065       le
28066       length
28067       link
28068       listen
28069       local
28070       localtime
28071       lock
28072       log
28073       lstat
28074       lt
28075       map
28076       mkdir
28077       msgctl
28078       msgget
28079       msgrcv
28080       msgsnd
28081       my
28082       ne
28083       next
28084       no
28085       not
28086       oct
28087       open
28088       opendir
28089       or
28090       ord
28091       our
28092       pack
28093       pipe
28094       pop
28095       pos
28096       print
28097       printf
28098       prototype
28099       push
28100       quotemeta
28101       rand
28102       read
28103       readdir
28104       readlink
28105       readline
28106       readpipe
28107       recv
28108       redo
28109       ref
28110       rename
28111       require
28112       reset
28113       return
28114       reverse
28115       rewinddir
28116       rindex
28117       rmdir
28118       scalar
28119       seek
28120       seekdir
28121       select
28122       semctl
28123       semget
28124       semop
28125       send
28126       sethostent
28127       setnetent
28128       setpgrp
28129       setpriority
28130       setprotoent
28131       setservent
28132       setsockopt
28133       shift
28134       shmctl
28135       shmget
28136       shmread
28137       shmwrite
28138       shutdown
28139       sin
28140       sleep
28141       socket
28142       socketpair
28143       sort
28144       splice
28145       split
28146       sprintf
28147       sqrt
28148       srand
28149       stat
28150       study
28151       substr
28152       symlink
28153       syscall
28154       sysopen
28155       sysread
28156       sysseek
28157       system
28158       syswrite
28159       tell
28160       telldir
28161       tie
28162       tied
28163       truncate
28164       uc
28165       ucfirst
28166       umask
28167       undef
28168       unless
28169       unlink
28170       unpack
28171       unshift
28172       untie
28173       until
28174       use
28175       utime
28176       values
28177       vec
28178       waitpid
28179       warn
28180       while
28181       write
28182       xor
28183
28184       switch
28185       case
28186       given
28187       when
28188       err
28189       say
28190     );
28191
28192     # patched above for SWITCH/CASE given/when err say
28193     # 'err' is a fairly safe addition.
28194     # TODO: 'default' still needed if appropriate
28195     # 'use feature' seen, but perltidy works ok without it.
28196     # Concerned that 'default' could break code.
28197     push( @Keywords, @value_requestor );
28198
28199     # These are treated the same but are not keywords:
28200     my @extra_vr = qw(
28201       constant
28202       vars
28203     );
28204     push( @value_requestor, @extra_vr );
28205
28206     @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
28207
28208     # this list contains keywords which do not look for arguments,
28209     # so that they might be followed by an operator, or at least
28210     # not a term.
28211     my @operator_requestor = qw(
28212       endgrent
28213       endhostent
28214       endnetent
28215       endprotoent
28216       endpwent
28217       endservent
28218       fork
28219       getgrent
28220       gethostent
28221       getlogin
28222       getnetent
28223       getppid
28224       getprotoent
28225       getpwent
28226       getservent
28227       setgrent
28228       setpwent
28229       time
28230       times
28231       wait
28232       wantarray
28233     );
28234
28235     push( @Keywords, @operator_requestor );
28236
28237     # These are treated the same but are not considered keywords:
28238     my @extra_or = qw(
28239       STDERR
28240       STDIN
28241       STDOUT
28242     );
28243
28244     push( @operator_requestor, @extra_or );
28245
28246     @expecting_operator_token{@operator_requestor} =
28247       (1) x scalar(@operator_requestor);
28248
28249     # these token TYPES expect trailing operator but not a term
28250     # note: ++ and -- are post-increment and decrement, 'C' = constant
28251     my @operator_requestor_types = qw( ++ -- C <> q );
28252     @expecting_operator_types{@operator_requestor_types} =
28253       (1) x scalar(@operator_requestor_types);
28254
28255     # these token TYPES consume values (terms)
28256     # note: pp and mm are pre-increment and decrement
28257     # f=semicolon in for,  F=file test operator
28258     my @value_requestor_type = qw#
28259       L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
28260       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
28261       <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
28262       f F pp mm Y p m U J G j >> << ^ t
28263       #;
28264     push( @value_requestor_type, ',' )
28265       ;    # (perl doesn't like a ',' in a qw block)
28266     @expecting_term_types{@value_requestor_type} =
28267       (1) x scalar(@value_requestor_type);
28268
28269     # Note: the following valid token types are not assigned here to
28270     # hashes requesting to be followed by values or terms, but are
28271     # instead currently hard-coded into sub operator_expected:
28272     # ) -> :: Q R Z ] b h i k n v w } #
28273
28274     # For simple syntax checking, it is nice to have a list of operators which
28275     # will really be unhappy if not followed by a term.  This includes most
28276     # of the above...
28277     %really_want_term = %expecting_term_types;
28278
28279     # with these exceptions...
28280     delete $really_want_term{'U'}; # user sub, depends on prototype
28281     delete $really_want_term{'F'}; # file test works on $_ if no following term
28282     delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
28283                                    # let perl do it
28284
28285     @_ = qw(q qq qw qx qr s y tr m);
28286     @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
28287
28288     # These keywords are handled specially in the tokenizer code:
28289     my @special_keywords = qw(
28290       do
28291       eval
28292       format
28293       m
28294       package
28295       q
28296       qq
28297       qr
28298       qw
28299       qx
28300       s
28301       sub
28302       tr
28303       y
28304     );
28305     push( @Keywords, @special_keywords );
28306
28307     # Keywords after which list formatting may be used
28308     # WARNING: do not include |map|grep|eval or perl may die on
28309     # syntax errors (map1.t).
28310     my @keyword_taking_list = qw(
28311       and
28312       chmod
28313       chomp
28314       chop
28315       chown
28316       dbmopen
28317       die
28318       elsif
28319       exec
28320       fcntl
28321       for
28322       foreach
28323       formline
28324       getsockopt
28325       if
28326       index
28327       ioctl
28328       join
28329       kill
28330       local
28331       msgctl
28332       msgrcv
28333       msgsnd
28334       my
28335       open
28336       or
28337       our
28338       pack
28339       print
28340       printf
28341       push
28342       read
28343       readpipe
28344       recv
28345       return
28346       reverse
28347       rindex
28348       seek
28349       select
28350       semctl
28351       semget
28352       send
28353       setpriority
28354       setsockopt
28355       shmctl
28356       shmget
28357       shmread
28358       shmwrite
28359       socket
28360       socketpair
28361       sort
28362       splice
28363       split
28364       sprintf
28365       substr
28366       syscall
28367       sysopen
28368       sysread
28369       sysseek
28370       system
28371       syswrite
28372       tie
28373       unless
28374       unlink
28375       unpack
28376       unshift
28377       until
28378       vec
28379       warn
28380       while
28381     );
28382     @is_keyword_taking_list{@keyword_taking_list} =
28383       (1) x scalar(@keyword_taking_list);
28384
28385     # These are not used in any way yet
28386     #    my @unused_keywords = qw(
28387     #      CORE
28388     #     __FILE__
28389     #     __LINE__
28390     #     __PACKAGE__
28391     #     );
28392
28393     #  The list of keywords was extracted from function 'keyword' in
28394     #  perl file toke.c version 5.005.03, using this utility, plus a
28395     #  little editing: (file getkwd.pl):
28396     #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
28397     #  Add 'get' prefix where necessary, then split into the above lists.
28398     #  This list should be updated as necessary.
28399     #  The list should not contain these special variables:
28400     #  ARGV DATA ENV SIG STDERR STDIN STDOUT
28401     #  __DATA__ __END__
28402
28403     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
28404 }
28405 1;
28406 __END__
28407
28408 =head1 NAME
28409
28410 Perl::Tidy - Parses and beautifies perl source
28411
28412 =head1 SYNOPSIS
28413
28414     use Perl::Tidy;
28415
28416     Perl::Tidy::perltidy(
28417         source            => $source,
28418         destination       => $destination,
28419         stderr            => $stderr,
28420         argv              => $argv,
28421         perltidyrc        => $perltidyrc,
28422         logfile           => $logfile,
28423         errorfile         => $errorfile,
28424         formatter         => $formatter,           # callback object (see below)
28425         dump_options      => $dump_options,
28426         dump_options_type => $dump_options_type,
28427         prefilter         => $prefilter_coderef,
28428         postfilter        => $postfilter_coderef,
28429     );
28430
28431 =head1 DESCRIPTION
28432
28433 This module makes the functionality of the perltidy utility available to perl
28434 scripts.  Any or all of the input parameters may be omitted, in which case the
28435 @ARGV array will be used to provide input parameters as described
28436 in the perltidy(1) man page.
28437
28438 For example, the perltidy script is basically just this:
28439
28440     use Perl::Tidy;
28441     Perl::Tidy::perltidy();
28442
28443 The module accepts input and output streams by a variety of methods.
28444 The following list of parameters may be any of a the following: a
28445 filename, an ARRAY reference, a SCALAR reference, or an object with
28446 either a B<getline> or B<print> method, as appropriate.
28447
28448         source            - the source of the script to be formatted
28449         destination       - the destination of the formatted output
28450         stderr            - standard error output
28451         perltidyrc        - the .perltidyrc file
28452         logfile           - the .LOG file stream, if any
28453         errorfile         - the .ERR file stream, if any
28454         dump_options      - ref to a hash to receive parameters (see below),
28455         dump_options_type - controls contents of dump_options
28456         dump_getopt_flags - ref to a hash to receive Getopt flags
28457         dump_options_category - ref to a hash giving category of options
28458         dump_abbreviations    - ref to a hash giving all abbreviations
28459
28460 The following chart illustrates the logic used to decide how to
28461 treat a parameter.
28462
28463    ref($param)  $param is assumed to be:
28464    -----------  ---------------------
28465    undef        a filename
28466    SCALAR       ref to string
28467    ARRAY        ref to array
28468    (other)      object with getline (if source) or print method
28469
28470 If the parameter is an object, and the object has a B<close> method, that
28471 close method will be called at the end of the stream.
28472
28473 =over 4
28474
28475 =item source
28476
28477 If the B<source> parameter is given, it defines the source of the
28478 input stream.
28479
28480 =item destination
28481
28482 If the B<destination> parameter is given, it will be used to define the
28483 file or memory location to receive output of perltidy.
28484
28485 =item stderr
28486
28487 The B<stderr> parameter allows the calling program to capture the output
28488 to what would otherwise go to the standard error output device.
28489
28490 =item perltidyrc
28491
28492 If the B<perltidyrc> file is given, it will be used instead of any
28493 F<.perltidyrc> configuration file that would otherwise be used.
28494
28495 =item argv
28496
28497 If the B<argv> parameter is given, it will be used instead of the
28498 B<@ARGV> array.  The B<argv> parameter may be a string, a reference to a
28499 string, or a reference to an array.  If it is a string or reference to a
28500 string, it will be parsed into an array of items just as if it were a
28501 command line string.
28502
28503 =item dump_options
28504
28505 If the B<dump_options> parameter is given, it must be the reference to a hash.
28506 In this case, the parameters contained in any perltidyrc configuration file
28507 will be placed in this hash and perltidy will return immediately.  This is
28508 equivalent to running perltidy with --dump-options, except that the perameters
28509 are returned in a hash rather than dumped to standard output.  Also, by default
28510 only the parameters in the perltidyrc file are returned, but this can be
28511 changed (see the next parameter).  This parameter provides a convenient method
28512 for external programs to read a perltidyrc file.  An example program using
28513 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
28514
28515 Any combination of the B<dump_> parameters may be used together.
28516
28517 =item dump_options_type
28518
28519 This parameter is a string which can be used to control the parameters placed
28520 in the hash reference supplied by B<dump_options>.  The possible values are
28521 'perltidyrc' (default) and 'full'.  The 'full' parameter causes both the
28522 default options plus any options found in a perltidyrc file to be returned.
28523
28524 =item dump_getopt_flags
28525
28526 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
28527 hash.  This hash will receive all of the parameters that perltidy understands
28528 and flags that are passed to Getopt::Long.  This parameter may be
28529 used alone or with the B<dump_options> flag.  Perltidy will
28530 exit immediately after filling this hash.  See the demo program
28531 F<perltidyrc_dump.pl> for example usage.
28532
28533 =item dump_options_category
28534
28535 If the B<dump_options_category> parameter is given, it must be the reference to a
28536 hash.  This hash will receive a hash with keys equal to all long parameter names
28537 and values equal to the title of the corresponding section of the perltidy manual.
28538 See the demo program F<perltidyrc_dump.pl> for example usage.
28539
28540 =item dump_abbreviations
28541
28542 If the B<dump_abbreviations> parameter is given, it must be the reference to a
28543 hash.  This hash will receive all abbreviations used by Perl::Tidy.  See the
28544 demo program F<perltidyrc_dump.pl> for example usage.
28545
28546 =item prefilter
28547
28548 A code reference that will be applied to the source before tidying. It is
28549 expected to take the full content as a string in its input, and output the
28550 transformed content.
28551
28552 =item postfilter
28553
28554 A code reference that will be applied to the tidied result before outputting.
28555 It is expected to take the full content as a string in its input, and output
28556 the transformed content.
28557
28558 Note: A convenient way to check the function of your custom prefilter and
28559 postfilter code is to use the --notidy option, first with just the prefilter
28560 and then with both the prefilter and postfilter.  See also the file
28561 B<filter_example.pl> in the perltidy distribution.
28562
28563 =back
28564
28565 =head1 EXAMPLE
28566
28567 The following example passes perltidy a snippet as a reference
28568 to a string and receives the result back in a reference to
28569 an array.
28570
28571  use Perl::Tidy;
28572
28573  # some messy source code to format
28574  my $source = <<'EOM';
28575  use strict;
28576  my @editors=('Emacs', 'Vi   '); my $rand = rand();
28577  print "A poll of 10 random programmers gave these results:\n";
28578  foreach(0..10) {
28579  my $i=int ($rand+rand());
28580  print " $editors[$i] users are from Venus" . ", " .
28581  "$editors[1-$i] users are from Mars" .
28582  "\n";
28583  }
28584  EOM
28585
28586  # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
28587  my @dest;
28588  perltidy( source => \$source, destination => \@dest );
28589  foreach (@dest) {print}
28590
28591 =head1 Using the B<formatter> Callback Object
28592
28593 The B<formatter> parameter is an optional callback object which allows
28594 the calling program to receive tokenized lines directly from perltidy for
28595 further specialized processing.  When this parameter is used, the two
28596 formatting options which are built into perltidy (beautification or
28597 html) are ignored.  The following diagram illustrates the logical flow:
28598
28599                     |-- (normal route)   -> code beautification
28600   caller->perltidy->|-- (-html flag )    -> create html
28601                     |-- (formatter given)-> callback to write_line
28602
28603 This can be useful for processing perl scripts in some way.  The
28604 parameter C<$formatter> in the perltidy call,
28605
28606         formatter   => $formatter,
28607
28608 is an object created by the caller with a C<write_line> method which
28609 will accept and process tokenized lines, one line per call.  Here is
28610 a simple example of a C<write_line> which merely prints the line number,
28611 the line type (as determined by perltidy), and the text of the line:
28612
28613  sub write_line {
28614
28615      # This is called from perltidy line-by-line
28616      my $self              = shift;
28617      my $line_of_tokens    = shift;
28618      my $line_type         = $line_of_tokens->{_line_type};
28619      my $input_line_number = $line_of_tokens->{_line_number};
28620      my $input_line        = $line_of_tokens->{_line_text};
28621      print "$input_line_number:$line_type:$input_line";
28622  }
28623
28624 The complete program, B<perllinetype>, is contained in the examples section of
28625 the source distribution.  As this example shows, the callback method
28626 receives a parameter B<$line_of_tokens>, which is a reference to a hash
28627 of other useful information.  This example uses these hash entries:
28628
28629  $line_of_tokens->{_line_number} - the line number (1,2,...)
28630  $line_of_tokens->{_line_text}   - the text of the line
28631  $line_of_tokens->{_line_type}   - the type of the line, one of:
28632
28633     SYSTEM         - system-specific code before hash-bang line
28634     CODE           - line of perl code (including comments)
28635     POD_START      - line starting pod, such as '=head'
28636     POD            - pod documentation text
28637     POD_END        - last line of pod section, '=cut'
28638     HERE           - text of here-document
28639     HERE_END       - last line of here-doc (target word)
28640     FORMAT         - format section
28641     FORMAT_END     - last line of format section, '.'
28642     DATA_START     - __DATA__ line
28643     DATA           - unidentified text following __DATA__
28644     END_START      - __END__ line
28645     END            - unidentified text following __END__
28646     ERROR          - we are in big trouble, probably not a perl script
28647
28648 Most applications will be only interested in lines of type B<CODE>.  For
28649 another example, let's write a program which checks for one of the
28650 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
28651 can slow down processing.  Here is a B<write_line>, from the example
28652 program B<find_naughty.pl>, which does that:
28653
28654  sub write_line {
28655
28656      # This is called back from perltidy line-by-line
28657      # We're looking for $`, $&, and $'
28658      my ( $self, $line_of_tokens ) = @_;
28659
28660      # pull out some stuff we might need
28661      my $line_type         = $line_of_tokens->{_line_type};
28662      my $input_line_number = $line_of_tokens->{_line_number};
28663      my $input_line        = $line_of_tokens->{_line_text};
28664      my $rtoken_type       = $line_of_tokens->{_rtoken_type};
28665      my $rtokens           = $line_of_tokens->{_rtokens};
28666      chomp $input_line;
28667
28668      # skip comments, pod, etc
28669      return if ( $line_type ne 'CODE' );
28670
28671      # loop over tokens looking for $`, $&, and $'
28672      for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
28673
28674          # we only want to examine token types 'i' (identifier)
28675          next unless $$rtoken_type[$j] eq 'i';
28676
28677          # pull out the actual token text
28678          my $token = $$rtokens[$j];
28679
28680          # and check it
28681          if ( $token =~ /^\$[\`\&\']$/ ) {
28682              print STDERR
28683                "$input_line_number: $token\n";
28684          }
28685      }
28686  }
28687
28688 This example pulls out these tokenization variables from the $line_of_tokens
28689 hash reference:
28690
28691      $rtoken_type = $line_of_tokens->{_rtoken_type};
28692      $rtokens     = $line_of_tokens->{_rtokens};
28693
28694 The variable C<$rtoken_type> is a reference to an array of token type codes,
28695 and C<$rtokens> is a reference to a corresponding array of token text.
28696 These are obviously only defined for lines of type B<CODE>.
28697 Perltidy classifies tokens into types, and has a brief code for each type.
28698 You can get a complete list at any time by running perltidy from the
28699 command line with
28700
28701      perltidy --dump-token-types
28702
28703 In the present example, we are only looking for tokens of type B<i>
28704 (identifiers), so the for loop skips past all other types.  When an
28705 identifier is found, its actual text is checked to see if it is one
28706 being sought.  If so, the above write_line prints the token and its
28707 line number.
28708
28709 The B<formatter> feature is relatively new in perltidy, and further
28710 documentation needs to be written to complete its description.  However,
28711 several example programs have been written and can be found in the
28712 B<examples> section of the source distribution.  Probably the best way
28713 to get started is to find one of the examples which most closely matches
28714 your application and start modifying it.
28715
28716 For help with perltidy's pecular way of breaking lines into tokens, you
28717 might run, from the command line,
28718
28719  perltidy -D filename
28720
28721 where F<filename> is a short script of interest.  This will produce
28722 F<filename.DEBUG> with interleaved lines of text and their token types.
28723 The B<-D> flag has been in perltidy from the beginning for this purpose.
28724 If you want to see the code which creates this file, it is
28725 C<write_debug_entry> in Tidy.pm.
28726
28727 =head1 EXPORT
28728
28729   &perltidy
28730
28731 =head1 CREDITS
28732
28733 Thanks to Hugh Myers who developed the initial modular interface
28734 to perltidy.
28735
28736 =head1 VERSION
28737
28738 This man page documents Perl::Tidy version 20101217.
28739
28740 =head1 AUTHOR
28741
28742  Steve Hancock
28743  perltidy at users.sourceforge.net
28744
28745 =head1 SEE ALSO
28746
28747 The perltidy(1) man page describes all of the features of perltidy.  It
28748 can be found at http://perltidy.sourceforge.net.
28749
28750 =cut