lib/Perl/Tidy.pm

   1 #
   2 ###########################################################-
   3 #
   4 #    perltidy - a perl script indenter and formatter
   5 #
   6 #    Copyright (c) 2000-2018 by Steve Hancock
   7 #    Distributed under the GPL license agreement; see file COPYING
   8 #
   9 #    This program is free software; you can redistribute it and/or modify
  10 #    it under the terms of the GNU General Public License as published by
  11 #    the Free Software Foundation; either version 2 of the License, or
  12 #    (at your option) any later version.
  13 #
  14 #    This program is distributed in the hope that it will be useful,
  15 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 #    GNU General Public License for more details.
  18 #
  19 #    You should have received a copy of the GNU General Public License along
  20 #    with this program; if not, write to the Free Software Foundation, Inc.,
  21 #    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  22 #
  23 #    For brief instructions, try 'perltidy -h'.
  24 #    For more complete documentation, try 'man perltidy'
  25 #    or visit http://perltidy.sourceforge.net
  26 #
  27 #    This script is an example of the default style.  It was formatted with:
  28 #
  29 #      perltidy Tidy.pm
  30 #
  31 #    Code Contributions: See ChangeLog.html for a complete history.
  32 #      Michael Cartmell supplied code for adaptation to VMS and helped with
  33 #        v-strings.
  34 #      Hugh S. Myers supplied sub streamhandle and the supporting code to
  35 #        create a Perl::Tidy module which can operate on strings, arrays, etc.
  36 #      Yves Orton supplied coding to help detect Windows versions.
  37 #      Axel Rose supplied a patch for MacPerl.
  38 #      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
  39 #      Dan Tyrell contributed a patch for binary I/O.
  40 #      Ueli Hugenschmidt contributed a patch for -fpsc
  41 #      Sam Kington supplied a patch to identify the initial indentation of
  42 #      entabbed code.
  43 #      jonathan swartz supplied patches for:
  44 #      * .../ pattern, which looks upwards from directory
  45 #      * --notidy, to be used in directories where we want to avoid
  46 #        accidentally tidying
  47 #      * prefilter and postfilter
  48 #      * iterations option
  49 #
  50 #      Many others have supplied key ideas, suggestions, and bug reports;
  51 #        see the CHANGES file.
  52 #
  53 ############################################################
  54
  55 package Perl::Tidy;
  56
  57 # perlver reports minimum version needed is 5.8.0
  58 # 5.004 needed for IO::File
  59 # 5.008 needed for wide characters
  60 use 5.008;
  61 use warnings;
  62 use strict;
  63 use Exporter;
  64 use Carp;
  65 $|++;
  66
  67 use vars qw{
  68   $VERSION
  69   @ISA
  70   @EXPORT
  71   $missing_file_spec
  72   $fh_stderr
  73   $rOpts_character_encoding
  74 };
  75
  76 @ISA    = qw( Exporter );
  77 @EXPORT = qw( &perltidy );
  78
  79 use Cwd;
  80 use Encode ();
  81 use IO::File;
  82 use File::Basename;
  83 use File::Copy;
  84 use File::Temp qw(tempfile);
  85
  86 BEGIN {
  87     ( $VERSION = q($Id: Tidy.pm,v 1.74 2018/02/20 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  88 }
  89
  90 sub streamhandle {
  91
  92     # given filename and mode (r or w), create an object which:
  93     #   has a 'getline' method if mode='r', and
  94     #   has a 'print' method if mode='w'.
  95     # The objects also need a 'close' method.
  96     #
  97     # How the object is made:
  98     #
  99     # if $filename is:     Make object using:
 100     # ----------------     -----------------
 101     # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
 102     # string               IO::File
 103     # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
 104     # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
 105     # object               object
 106     #                      (check for 'print' method for 'w' mode)
 107     #                      (check for 'getline' method for 'r' mode)
 108     my ( $filename, $mode ) = @_;
 109
 110     my $ref = ref($filename);
 111     my $New;
 112     my $fh;
 113
 114     # handle a reference
 115     if ($ref) {
 116         if ( $ref eq 'ARRAY' ) {
 117             $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
 118         }
 119         elsif ( $ref eq 'SCALAR' ) {
 120             $New = sub { Perl::Tidy::IOScalar->new(@_) };
 121         }
 122         else {
 123
 124             # Accept an object with a getline method for reading. Note:
 125             # IO::File is built-in and does not respond to the defined
 126             # operator.  If this causes trouble, the check can be
 127             # skipped and we can just let it crash if there is no
 128             # getline.
 129             if ( $mode =~ /[rR]/ ) {
 130
 131                 # RT#97159; part 1 of 2: updated to use 'can'
 132                 ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
 133                 if ( $ref->can('getline') ) {
 134                     $New = sub { $filename };
 135                 }
 136                 else {
 137                     $New = sub { undef };
 138                     confess <<EOM;
 139 ------------------------------------------------------------------------
 140 No 'getline' method is defined for object of class $ref
 141 Please check your call to Perl::Tidy::perltidy.  Trace follows.
 142 ------------------------------------------------------------------------
 143 EOM
 144                 }
 145             }
 146
 147             # Accept an object with a print method for writing.
 148             # See note above about IO::File
 149             if ( $mode =~ /[wW]/ ) {
 150
 151                 # RT#97159; part 2 of 2: updated to use 'can'
 152                 ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
 153                 if ( $ref->can('print') ) {
 154                     $New = sub { $filename };
 155                 }
 156                 else {
 157                     $New = sub { undef };
 158                     confess <<EOM;
 159 ------------------------------------------------------------------------
 160 No 'print' method is defined for object of class $ref
 161 Please check your call to Perl::Tidy::perltidy. Trace follows.
 162 ------------------------------------------------------------------------
 163 EOM
 164                 }
 165             }
 166         }
 167     }
 168
 169     # handle a string
 170     else {
 171         if ( $filename eq '-' ) {
 172             $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
 173         }
 174         else {
 175             $New = sub { IO::File->new(@_) };
 176         }
 177     }
 178     $fh = $New->( $filename, $mode )
 179       or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
 180
 181     return $fh, ( $ref or $filename );
 182 }
 183
 184 sub find_input_line_ending {
 185
 186     # Peek at a file and return first line ending character.
 187     # Quietly return undef in case of any trouble.
 188     my ($input_file) = @_;
 189     my $ending;
 190
 191     # silently ignore input from object or stdin
 192     if ( ref($input_file) || $input_file eq '-' ) {
 193         return $ending;
 194     }
 195
 196     my $fh;
 197     open( $fh, '<', $input_file ) || return $ending;
 198
 199     binmode $fh;
 200     my $buf;
 201     read( $fh, $buf, 1024 );
 202     close $fh;
 203     if ( $buf && $buf =~ /([\012\015]+)/ ) {
 204         my $test = $1;
 205
 206         # dos
 207         if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 208
 209         # mac
 210         elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 211
 212         # unix
 213         elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 214
 215         # unknown
 216         else { }
 217     }
 218
 219     # no ending seen
 220     else { }
 221
 222     return $ending;
 223 }
 224
 225 sub catfile {
 226
 227     # concatenate a path and file basename
 228     # returns undef in case of error
 229
 230     my @parts = @_;
 231
 232     #BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
 233     BEGIN {
 234         eval { require File::Spec };
 235         $missing_file_spec = $@;
 236     }
 237
 238     # use File::Spec if we can
 239     unless ($missing_file_spec) {
 240         return File::Spec->catfile(@parts);
 241     }
 242
 243     # Perl 5.004 systems may not have File::Spec so we'll make
 244     # a simple try.  We assume File::Basename is available.
 245     # return undef if not successful.
 246     my $name      = pop @parts;
 247     my $path      = join '/', @parts;
 248     my $test_file = $path . $name;
 249     my ( $test_name, $test_path ) = fileparse($test_file);
 250     return $test_file if ( $test_name eq $name );
 251     return if ( $^O eq 'VMS' );
 252
 253     # this should work at least for Windows and Unix:
 254     $test_file = $path . '/' . $name;
 255     ( $test_name, $test_path ) = fileparse($test_file);
 256     return $test_file if ( $test_name eq $name );
 257     return;
 258 }
 259
 260 # Here is a map of the flow of data from the input source to the output
 261 # line sink:
 262 #
 263 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
 264 #       input                         groups                 output
 265 #       lines   tokens      lines       of          lines    lines
 266 #                                      lines
 267 #
 268 # The names correspond to the package names responsible for the unit processes.
 269 #
 270 # The overall process is controlled by the "main" package.
 271 #
 272 # LineSource is the stream of input lines
 273 #
 274 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
 275 # if necessary.  A token is any section of the input line which should be
 276 # manipulated as a single entity during formatting.  For example, a single
 277 # ',' character is a token, and so is an entire side comment.  It handles
 278 # the complexities of Perl syntax, such as distinguishing between '<<' as
 279 # a shift operator and as a here-document, or distinguishing between '/'
 280 # as a divide symbol and as a pattern delimiter.
 281 #
 282 # Formatter inserts and deletes whitespace between tokens, and breaks
 283 # sequences of tokens at appropriate points as output lines.  It bases its
 284 # decisions on the default rules as modified by any command-line options.
 285 #
 286 # VerticalAligner collects groups of lines together and tries to line up
 287 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
 288 #
 289 # FileWriter simply writes lines to the output stream.
 290 #
 291 # The Logger package, not shown, records significant events and warning
 292 # messages.  It writes a .LOG file, which may be saved with a
 293 # '-log' or a '-g' flag.
 294
 295 sub perltidy {
 296
 297     my %input_hash = @_;
 298
 299     my %defaults = (
 300         argv                  => undef,
 301         destination           => undef,
 302         formatter             => undef,
 303         logfile               => undef,
 304         errorfile             => undef,
 305         perltidyrc            => undef,
 306         source                => undef,
 307         stderr                => undef,
 308         dump_options          => undef,
 309         dump_options_type     => undef,
 310         dump_getopt_flags     => undef,
 311         dump_options_category => undef,
 312         dump_options_range    => undef,
 313         dump_abbreviations    => undef,
 314         prefilter             => undef,
 315         postfilter            => undef,
 316     );
 317
 318     # don't overwrite callers ARGV
 319     local @ARGV   = @ARGV;
 320     local *STDERR = *STDERR;
 321
 322     if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
 323         local $" = ')(';
 324         my @good_keys = sort keys %defaults;
 325         @bad_keys = sort @bad_keys;
 326         confess <<EOM;
 327 ------------------------------------------------------------------------
 328 Unknown perltidy parameter : (@bad_keys)
 329 perltidy only understands : (@good_keys)
 330 ------------------------------------------------------------------------
 331
 332 EOM
 333     }
 334
 335     my $get_hash_ref = sub {
 336         my ($key) = @_;
 337         my $hash_ref = $input_hash{$key};
 338         if ( defined($hash_ref) ) {
 339             unless ( ref($hash_ref) eq 'HASH' ) {
 340                 my $what = ref($hash_ref);
 341                 my $but_is =
 342                   $what ? "but is ref to $what" : "but is not a reference";
 343                 croak <<EOM;
 344 ------------------------------------------------------------------------
 345 error in call to perltidy:
 346 -$key must be reference to HASH $but_is
 347 ------------------------------------------------------------------------
 348 EOM
 349             }
 350         }
 351         return $hash_ref;
 352     };
 353
 354     %input_hash = ( %defaults, %input_hash );
 355     my $argv               = $input_hash{'argv'};
 356     my $destination_stream = $input_hash{'destination'};
 357     my $errorfile_stream   = $input_hash{'errorfile'};
 358     my $logfile_stream     = $input_hash{'logfile'};
 359     my $perltidyrc_stream  = $input_hash{'perltidyrc'};
 360     my $source_stream      = $input_hash{'source'};
 361     my $stderr_stream      = $input_hash{'stderr'};
 362     my $user_formatter     = $input_hash{'formatter'};
 363     my $prefilter          = $input_hash{'prefilter'};
 364     my $postfilter         = $input_hash{'postfilter'};
 365
 366     if ($stderr_stream) {
 367         ( $fh_stderr, my $stderr_file ) =
 368           Perl::Tidy::streamhandle( $stderr_stream, 'w' );
 369         if ( !$fh_stderr ) {
 370             croak <<EOM;
 371 ------------------------------------------------------------------------
 372 Unable to redirect STDERR to $stderr_stream
 373 Please check value of -stderr in call to perltidy
 374 ------------------------------------------------------------------------
 375 EOM
 376         }
 377     }
 378     else {
 379         $fh_stderr = *STDERR;
 380     }
 381
 382     sub Warn { my $msg = shift; $fh_stderr->print($msg); return }
 383
 384     sub Exit {
 385         my $flag = shift;
 386         if   ($flag) { goto ERROR_EXIT }
 387         else         { goto NORMAL_EXIT }
 388     }
 389
 390     sub Die { my $msg = shift; Warn($msg); Exit(1); }
 391
 392     # extract various dump parameters
 393     my $dump_options_type     = $input_hash{'dump_options_type'};
 394     my $dump_options          = $get_hash_ref->('dump_options');
 395     my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
 396     my $dump_options_category = $get_hash_ref->('dump_options_category');
 397     my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
 398     my $dump_options_range    = $get_hash_ref->('dump_options_range');
 399
 400     # validate dump_options_type
 401     if ( defined($dump_options) ) {
 402         unless ( defined($dump_options_type) ) {
 403             $dump_options_type = 'perltidyrc';
 404         }
 405         unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
 406             croak <<EOM;
 407 ------------------------------------------------------------------------
 408 Please check value of -dump_options_type in call to perltidy;
 409 saw: '$dump_options_type'
 410 expecting: 'perltidyrc' or 'full'
 411 ------------------------------------------------------------------------
 412 EOM
 413
 414         }
 415     }
 416     else {
 417         $dump_options_type = "";
 418     }
 419
 420     if ($user_formatter) {
 421
 422         # if the user defines a formatter, there is no output stream,
 423         # but we need a null stream to keep coding simple
 424         $destination_stream = Perl::Tidy::DevNull->new();
 425     }
 426
 427     # see if ARGV is overridden
 428     if ( defined($argv) ) {
 429
 430         my $rargv = ref $argv;
 431         if ( $rargv eq 'SCALAR' ) { $argv = ${$argv}; $rargv = undef }
 432
 433         # ref to ARRAY
 434         if ($rargv) {
 435             if ( $rargv eq 'ARRAY' ) {
 436                 @ARGV = @{$argv};
 437             }
 438             else {
 439                 croak <<EOM;
 440 ------------------------------------------------------------------------
 441 Please check value of -argv in call to perltidy;
 442 it must be a string or ref to ARRAY but is: $rargv
 443 ------------------------------------------------------------------------
 444 EOM
 445             }
 446         }
 447
 448         # string
 449         else {
 450             my ( $rargv, $msg ) = parse_args($argv);
 451             if ($msg) {
 452                 Die <<EOM;
 453 Error parsing this string passed to to perltidy with 'argv':
 454 $msg
 455 EOM
 456             }
 457             @ARGV = @{$rargv};
 458         }
 459     }
 460
 461     my $rpending_complaint;
 462     ${$rpending_complaint} = "";
 463     my $rpending_logfile_message;
 464     ${$rpending_logfile_message} = "";
 465
 466     my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
 467
 468     # VMS file names are restricted to a 40.40 format, so we append _tdy
 469     # instead of .tdy, etc. (but see also sub check_vms_filename)
 470     my $dot;
 471     my $dot_pattern;
 472     if ( $^O eq 'VMS' ) {
 473         $dot         = '_';
 474         $dot_pattern = '_';
 475     }
 476     else {
 477         $dot         = '.';
 478         $dot_pattern = '\.';    # must escape for use in regex
 479     }
 480
 481     #---------------------------------------------------------------
 482     # get command line options
 483     #---------------------------------------------------------------
 484     my ( $rOpts, $config_file, $rraw_options, $roption_string,
 485         $rexpansion, $roption_category, $roption_range )
 486       = process_command_line(
 487         $perltidyrc_stream,  $is_Windows, $Windows_type,
 488         $rpending_complaint, $dump_options_type,
 489       );
 490
 491     my $saw_extrude = ( grep m/^-extrude$/, @{$rraw_options} ) ? 1 : 0;
 492     my $saw_pbp =
 493       ( grep m/^-(pbp|perl-best-practices)$/, @{$rraw_options} ) ? 1 : 0;
 494
 495     #---------------------------------------------------------------
 496     # Handle requests to dump information
 497     #---------------------------------------------------------------
 498
 499     # return or exit immediately after all dumps
 500     my $quit_now = 0;
 501
 502     # Getopt parameters and their flags
 503     if ( defined($dump_getopt_flags) ) {
 504         $quit_now = 1;
 505         foreach my $op ( @{$roption_string} ) {
 506             my $opt  = $op;
 507             my $flag = "";
 508
 509             # Examples:
 510             #  some-option=s
 511             #  some-option=i
 512             #  some-option:i
 513             #  some-option!
 514             if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
 515                 $opt  = $1;
 516                 $flag = $2;
 517             }
 518             $dump_getopt_flags->{$opt} = $flag;
 519         }
 520     }
 521
 522     if ( defined($dump_options_category) ) {
 523         $quit_now = 1;
 524         %{$dump_options_category} = %{$roption_category};
 525     }
 526
 527     if ( defined($dump_options_range) ) {
 528         $quit_now = 1;
 529         %{$dump_options_range} = %{$roption_range};
 530     }
 531
 532     if ( defined($dump_abbreviations) ) {
 533         $quit_now = 1;
 534         %{$dump_abbreviations} = %{$rexpansion};
 535     }
 536
 537     if ( defined($dump_options) ) {
 538         $quit_now = 1;
 539         %{$dump_options} = %{$rOpts};
 540     }
 541
 542     Exit 0 if ($quit_now);
 543
 544     # make printable string of options for this run as possible diagnostic
 545     my $readable_options = readable_options( $rOpts, $roption_string );
 546
 547     # dump from command line
 548     if ( $rOpts->{'dump-options'} ) {
 549         print STDOUT $readable_options;
 550         Exit 0;
 551     }
 552
 553     #---------------------------------------------------------------
 554     # check parameters and their interactions
 555     #---------------------------------------------------------------
 556     my $tabsize =
 557       check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
 558
 559     if ($user_formatter) {
 560         $rOpts->{'format'} = 'user';
 561     }
 562
 563     # there must be one entry here for every possible format
 564     my %default_file_extension = (
 565         tidy => 'tdy',
 566         html => 'html',
 567         user => '',
 568     );
 569
 570     $rOpts_character_encoding = $rOpts->{'character-encoding'};
 571
 572     # be sure we have a valid output format
 573     unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
 574         my $formats = join ' ',
 575           sort map { "'" . $_ . "'" } keys %default_file_extension;
 576         my $fmt = $rOpts->{'format'};
 577         Die "-format='$fmt' but must be one of: $formats\n";
 578     }
 579
 580     my $output_extension = make_extension( $rOpts->{'output-file-extension'},
 581         $default_file_extension{ $rOpts->{'format'} }, $dot );
 582
 583     # If the backup extension contains a / character then the backup should
 584     # be deleted when the -b option is used.   On older versions of
 585     # perltidy this will generate an error message due to an illegal
 586     # file name.
 587     #
 588     # A backup file will still be generated but will be deleted
 589     # at the end.  If -bext='/' then this extension will be
 590     # the default 'bak'.  Otherwise it will be whatever characters
 591     # remains after all '/' characters are removed.  For example:
 592     # -bext         extension     slashes
 593     #  '/'          bak           1
 594     #  '/delete'    delete        1
 595     #  'delete/'    delete        1
 596     #  '/dev/null'  devnull       2    (Currently not allowed)
 597     my $bext          = $rOpts->{'backup-file-extension'};
 598     my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
 599
 600     # At present only one forward slash is allowed.  In the future multiple
 601     # slashes may be allowed to allow for other options
 602     if ( $delete_backup > 1 ) {
 603         Die "-bext=$bext contains more than one '/'\n";
 604     }
 605
 606     my $backup_extension =
 607       make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
 608
 609     my $html_toc_extension =
 610       make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
 611
 612     my $html_src_extension =
 613       make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
 614
 615     # check for -b option;
 616     # silently ignore unless beautify mode
 617     my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
 618       && $rOpts->{'format'} eq 'tidy';
 619
 620     # Turn off -b with warnings in case of conflicts with other options.
 621     # NOTE: Do this silently, without warnings, if there is a source or
 622     # destination stream, or standard output is used.  This is because the -b
 623     # flag may have been in a .perltidyrc file and warnings break
 624     # Test::NoWarnings.  See email discussion with Merijn Brand 26 Feb 2014.
 625     if ($in_place_modify) {
 626         if (   $rOpts->{'standard-output'}
 627             || $destination_stream
 628             || ref $source_stream
 629             || $rOpts->{'outfile'}
 630             || defined( $rOpts->{'output-path'} ) )
 631         {
 632             $in_place_modify = 0;
 633         }
 634     }
 635
 636     Perl::Tidy::Formatter::check_options($rOpts);
 637     if ( $rOpts->{'format'} eq 'html' ) {
 638         Perl::Tidy::HtmlWriter->check_options($rOpts);
 639     }
 640
 641     # make the pattern of file extensions that we shouldn't touch
 642     my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
 643     if ($output_extension) {
 644         my $ext = quotemeta($output_extension);
 645         $forbidden_file_extensions .= "|$ext";
 646     }
 647     if ( $in_place_modify && $backup_extension ) {
 648         my $ext = quotemeta($backup_extension);
 649         $forbidden_file_extensions .= "|$ext";
 650     }
 651     $forbidden_file_extensions .= ')$';
 652
 653     # Create a diagnostics object if requested;
 654     # This is only useful for code development
 655     my $diagnostics_object = undef;
 656     if ( $rOpts->{'DIAGNOSTICS'} ) {
 657         $diagnostics_object = Perl::Tidy::Diagnostics->new();
 658     }
 659
 660     # no filenames should be given if input is from an array
 661     if ($source_stream) {
 662         if ( @ARGV > 0 ) {
 663             Die
 664 "You may not specify any filenames when a source array is given\n";
 665         }
 666
 667         # we'll stuff the source array into ARGV
 668         unshift( @ARGV, $source_stream );
 669
 670         # No special treatment for source stream which is a filename.
 671         # This will enable checks for binary files and other bad stuff.
 672         $source_stream = undef unless ref($source_stream);
 673     }
 674
 675     # use stdin by default if no source array and no args
 676     else {
 677         unshift( @ARGV, '-' ) unless @ARGV;
 678     }
 679
 680     #---------------------------------------------------------------
 681     # Ready to go...
 682     # main loop to process all files in argument list
 683     #---------------------------------------------------------------
 684     my $number_of_files = @ARGV;
 685     my $formatter       = undef;
 686     my $tokenizer       = undef;
 687
 688     # If requested, process in order of increasing file size
 689     # This can significantly reduce perl's virtual memory usage during testing.
 690     if ( $number_of_files > 1 && $rOpts->{'file-size-order'} ) {
 691         @ARGV =
 692           map  { $_->[0] }
 693           sort { $a->[1] <=> $b->[1] }
 694           map  { [ $_, -e $_ ? -s $_ : 0 ] } @ARGV;
 695     }
 696
 697     while ( my $input_file = shift @ARGV ) {
 698         my $fileroot;
 699         my $input_file_permissions;
 700
 701         #---------------------------------------------------------------
 702         # prepare this input stream
 703         #---------------------------------------------------------------
 704         if ($source_stream) {
 705             $fileroot = "perltidy";
 706
 707             # If the source is from an array or string, then .LOG output
 708             # is only possible if a logfile stream is specified.  This prevents
 709             # unexpected perltidy.LOG files.
 710             if ( !defined($logfile_stream) ) {
 711                 $logfile_stream = Perl::Tidy::DevNull->new();
 712             }
 713         }
 714         elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
 715             $fileroot = "perltidy";       # root name to use for .ERR, .LOG, etc
 716             $in_place_modify = 0;
 717         }
 718         else {
 719             $fileroot = $input_file;
 720             unless ( -e $input_file ) {
 721
 722                 # file doesn't exist - check for a file glob
 723                 if ( $input_file =~ /([\?\*\[\{])/ ) {
 724
 725                     # Windows shell may not remove quotes, so do it
 726                     my $input_file = $input_file;
 727                     if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
 728                     if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
 729                     my $pattern = fileglob_to_re($input_file);
 730                     ##eval "/$pattern/";
 731                     if ( !$@ && opendir( DIR, './' ) ) {
 732                         my @files =
 733                           grep { /$pattern/ && !-d $_ } readdir(DIR);
 734                         closedir(DIR);
 735                         if (@files) {
 736                             unshift @ARGV, @files;
 737                             next;
 738                         }
 739                     }
 740                 }
 741                 Warn "skipping file: '$input_file': no matches found\n";
 742                 next;
 743             }
 744
 745             unless ( -f $input_file ) {
 746                 Warn "skipping file: $input_file: not a regular file\n";
 747                 next;
 748             }
 749
 750             # As a safety precaution, skip zero length files.
 751             # If for example a source file got clobbered somehow,
 752             # the old .tdy or .bak files might still exist so we
 753             # shouldn't overwrite them with zero length files.
 754             unless ( -s $input_file ) {
 755                 Warn "skipping file: $input_file: Zero size\n";
 756                 next;
 757             }
 758
 759             unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
 760                 Warn
 761                   "skipping file: $input_file: Non-text (override with -f)\n";
 762                 next;
 763             }
 764
 765             # we should have a valid filename now
 766             $fileroot               = $input_file;
 767             $input_file_permissions = ( stat $input_file )[2] & oct(7777);
 768
 769             if ( $^O eq 'VMS' ) {
 770                 ( $fileroot, $dot ) = check_vms_filename($fileroot);
 771             }
 772
 773             # add option to change path here
 774             if ( defined( $rOpts->{'output-path'} ) ) {
 775
 776                 my ( $base, $old_path ) = fileparse($fileroot);
 777                 my $new_path = $rOpts->{'output-path'};
 778                 unless ( -d $new_path ) {
 779                     unless ( mkdir $new_path, 0777 ) {
 780                         Die "unable to create directory $new_path: $!\n";
 781                     }
 782                 }
 783                 my $path = $new_path;
 784                 $fileroot = catfile( $path, $base );
 785                 unless ($fileroot) {
 786                     Die <<EOM;
 787 ------------------------------------------------------------------------
 788 Problem combining $new_path and $base to make a filename; check -opath
 789 ------------------------------------------------------------------------
 790 EOM
 791                 }
 792             }
 793         }
 794
 795         # Skip files with same extension as the output files because
 796         # this can lead to a messy situation with files like
 797         # script.tdy.tdy.tdy ... or worse problems ...  when you
 798         # rerun perltidy over and over with wildcard input.
 799         if (
 800             !$source_stream
 801             && (   $input_file =~ /$forbidden_file_extensions/o
 802                 || $input_file eq 'DIAGNOSTICS' )
 803           )
 804         {
 805             Warn "skipping file: $input_file: wrong extension\n";
 806             next;
 807         }
 808
 809         # the 'source_object' supplies a method to read the input file
 810         my $source_object =
 811           Perl::Tidy::LineSource->new( $input_file, $rOpts,
 812             $rpending_logfile_message );
 813         next unless ($source_object);
 814
 815         # Prefilters and postfilters: The prefilter is a code reference
 816         # that will be applied to the source before tidying, and the
 817         # postfilter is a code reference to the result before outputting.
 818         if (
 819             $prefilter
 820             || (   $rOpts_character_encoding
 821                 && $rOpts_character_encoding eq 'utf8' )
 822           )
 823         {
 824             my $buf = '';
 825             while ( my $line = $source_object->get_line() ) {
 826                 $buf .= $line;
 827             }
 828
 829             $buf = $prefilter->($buf) if $prefilter;
 830
 831             if (   $rOpts_character_encoding
 832                 && $rOpts_character_encoding eq 'utf8'
 833                 && !utf8::is_utf8($buf) )
 834             {
 835                 eval {
 836                     $buf = Encode::decode( 'UTF-8', $buf,
 837                         Encode::FB_CROAK | Encode::LEAVE_SRC );
 838                 };
 839                 if ($@) {
 840                     Warn
 841 "skipping file: $input_file: Unable to decode source as UTF-8\n";
 842                     next;
 843                 }
 844             }
 845
 846             $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
 847                 $rpending_logfile_message );
 848         }
 849
 850         # register this file name with the Diagnostics package
 851         $diagnostics_object->set_input_file($input_file)
 852           if $diagnostics_object;
 853
 854         #---------------------------------------------------------------
 855         # prepare the output stream
 856         #---------------------------------------------------------------
 857         my $output_file = undef;
 858         my $actual_output_extension;
 859
 860         if ( $rOpts->{'outfile'} ) {
 861
 862             if ( $number_of_files <= 1 ) {
 863
 864                 if ( $rOpts->{'standard-output'} ) {
 865                     my $msg = "You may not use -o and -st together";
 866                     $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
 867                     Die "$msg\n";
 868                 }
 869                 elsif ($destination_stream) {
 870                     Die
 871 "You may not specify a destination array and -o together\n";
 872                 }
 873                 elsif ( defined( $rOpts->{'output-path'} ) ) {
 874                     Die "You may not specify -o and -opath together\n";
 875                 }
 876                 elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
 877                     Die "You may not specify -o and -oext together\n";
 878                 }
 879                 $output_file = $rOpts->{outfile};
 880
 881                 # make sure user gives a file name after -o
 882                 if ( $output_file =~ /^-/ ) {
 883                     Die "You must specify a valid filename after -o\n";
 884                 }
 885
 886                 # do not overwrite input file with -o
 887                 if ( defined($input_file_permissions)
 888                     && ( $output_file eq $input_file ) )
 889                 {
 890                     Die "Use 'perltidy -b $input_file' to modify in-place\n";
 891                 }
 892             }
 893             else {
 894                 Die "You may not use -o with more than one input file\n";
 895             }
 896         }
 897         elsif ( $rOpts->{'standard-output'} ) {
 898             if ($destination_stream) {
 899                 my $msg =
 900                   "You may not specify a destination array and -st together\n";
 901                 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
 902                 Die "$msg\n";
 903             }
 904             $output_file = '-';
 905
 906             if ( $number_of_files <= 1 ) {
 907             }
 908             else {
 909                 Die "You may not use -st with more than one input file\n";
 910             }
 911         }
 912         elsif ($destination_stream) {
 913             $output_file = $destination_stream;
 914         }
 915         elsif ($source_stream) {    # source but no destination goes to stdout
 916             $output_file = '-';
 917         }
 918         elsif ( $input_file eq '-' ) {
 919             $output_file = '-';
 920         }
 921         else {
 922             if ($in_place_modify) {
 923                 $output_file = IO::File->new_tmpfile()
 924                   or Die "cannot open temp file for -b option: $!\n";
 925             }
 926             else {
 927                 $actual_output_extension = $output_extension;
 928                 $output_file             = $fileroot . $output_extension;
 929             }
 930         }
 931
 932         # the 'sink_object' knows how to write the output file
 933         my $tee_file = $fileroot . $dot . "TEE";
 934
 935         my $line_separator = $rOpts->{'output-line-ending'};
 936         if ( $rOpts->{'preserve-line-endings'} ) {
 937             $line_separator = find_input_line_ending($input_file);
 938         }
 939
 940         # Eventually all I/O may be done with binmode, but for now it is
 941         # only done when a user requests a particular line separator
 942         # through the -ple or -ole flags
 943         my $binmode = defined($line_separator)
 944           || defined($rOpts_character_encoding);
 945         $line_separator = "\n" unless defined($line_separator);
 946
 947         my ( $sink_object, $postfilter_buffer );
 948         if ($postfilter) {
 949             $sink_object =
 950               Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
 951                 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
 952         }
 953         else {
 954             $sink_object =
 955               Perl::Tidy::LineSink->new( $output_file, $tee_file,
 956                 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
 957         }
 958
 959         #---------------------------------------------------------------
 960         # initialize the error logger for this file
 961         #---------------------------------------------------------------
 962         my $warning_file = $fileroot . $dot . "ERR";
 963         if ($errorfile_stream) { $warning_file = $errorfile_stream }
 964         my $log_file = $fileroot . $dot . "LOG";
 965         if ($logfile_stream) { $log_file = $logfile_stream }
 966
 967         my $logger_object =
 968           Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
 969             $fh_stderr, $saw_extrude );
 970         write_logfile_header(
 971             $rOpts,        $logger_object, $config_file,
 972             $rraw_options, $Windows_type,  $readable_options,
 973         );
 974         if ( ${$rpending_logfile_message} ) {
 975             $logger_object->write_logfile_entry( ${$rpending_logfile_message} );
 976         }
 977         if ( ${$rpending_complaint} ) {
 978             $logger_object->complain( ${$rpending_complaint} );
 979         }
 980
 981         #---------------------------------------------------------------
 982         # initialize the debug object, if any
 983         #---------------------------------------------------------------
 984         my $debugger_object = undef;
 985         if ( $rOpts->{DEBUG} ) {
 986             $debugger_object =
 987               Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
 988         }
 989
 990         #---------------------------------------------------------------
 991         # loop over iterations for one source stream
 992         #---------------------------------------------------------------
 993
 994         # We will do a convergence test if 3 or more iterations are allowed.
 995         # It would be pointless for fewer because we have to make at least
 996         # two passes before we can see if we are converged, and the test
 997         # would just slow things down.
 998         my $max_iterations = $rOpts->{'iterations'};
 999         my $convergence_log_message;
1000         my %saw_md5;
1001         my $do_convergence_test = $max_iterations > 2;
1002         if ($do_convergence_test) {
1003             eval "use Digest::MD5 qw(md5_hex)";
1004             $do_convergence_test = !$@;
1005
1006             ### Trying to avoid problems with ancient versions of perl
1007             ##eval { my $string = "perltidy"; utf8::encode($string) };
1008             ##$do_convergence_test = $do_convergence_test && !$@;
1009         }
1010
1011         # save objects to allow redirecting output during iterations
1012         my $sink_object_final     = $sink_object;
1013         my $debugger_object_final = $debugger_object;
1014         my $logger_object_final   = $logger_object;
1015
1016         foreach my $iter ( 1 .. $max_iterations ) {
1017
1018             # send output stream to temp buffers until last iteration
1019             my $sink_buffer;
1020             if ( $iter < $max_iterations ) {
1021                 $sink_object =
1022                   Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
1023                     $line_separator, $rOpts, $rpending_logfile_message,
1024                     $binmode );
1025             }
1026             else {
1027                 $sink_object = $sink_object_final;
1028             }
1029
1030             # Save logger, debugger output only on pass 1 because:
1031             # (1) line number references must be to the starting
1032             # source, not an intermediate result, and
1033             # (2) we need to know if there are errors so we can stop the
1034             # iterations early if necessary.
1035             if ( $iter > 1 ) {
1036                 $debugger_object = undef;
1037                 $logger_object   = undef;
1038             }
1039
1040             #------------------------------------------------------------
1041             # create a formatter for this file : html writer or
1042             # pretty printer
1043             #------------------------------------------------------------
1044
1045             # we have to delete any old formatter because, for safety,
1046             # the formatter will check to see that there is only one.
1047             $formatter = undef;
1048
1049             if ($user_formatter) {
1050                 $formatter = $user_formatter;
1051             }
1052             elsif ( $rOpts->{'format'} eq 'html' ) {
1053                 $formatter =
1054                   Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1055                     $actual_output_extension, $html_toc_extension,
1056                     $html_src_extension );
1057             }
1058             elsif ( $rOpts->{'format'} eq 'tidy' ) {
1059                 $formatter = Perl::Tidy::Formatter->new(
1060                     logger_object      => $logger_object,
1061                     diagnostics_object => $diagnostics_object,
1062                     sink_object        => $sink_object,
1063                 );
1064             }
1065             else {
1066                 Die "I don't know how to do -format=$rOpts->{'format'}\n";
1067             }
1068
1069             unless ($formatter) {
1070                 Die "Unable to continue with $rOpts->{'format'} formatting\n";
1071             }
1072
1073             #---------------------------------------------------------------
1074             # create the tokenizer for this file
1075             #---------------------------------------------------------------
1076             $tokenizer = undef;                     # must destroy old tokenizer
1077             $tokenizer = Perl::Tidy::Tokenizer->new(
1078                 source_object      => $source_object,
1079                 logger_object      => $logger_object,
1080                 debugger_object    => $debugger_object,
1081                 diagnostics_object => $diagnostics_object,
1082                 tabsize            => $tabsize,
1083
1084                 starting_level      => $rOpts->{'starting-indentation-level'},
1085                 indent_columns      => $rOpts->{'indent-columns'},
1086                 look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
1087                 look_for_autoloader => $rOpts->{'look-for-autoloader'},
1088                 look_for_selfloader => $rOpts->{'look-for-selfloader'},
1089                 trim_qw             => $rOpts->{'trim-qw'},
1090                 extended_syntax     => $rOpts->{'extended-syntax'},
1091
1092                 continuation_indentation =>
1093                   $rOpts->{'continuation-indentation'},
1094                 outdent_labels => $rOpts->{'outdent-labels'},
1095             );
1096
1097             #---------------------------------------------------------------
1098             # now we can do it
1099             #---------------------------------------------------------------
1100             process_this_file( $tokenizer, $formatter );
1101
1102             #---------------------------------------------------------------
1103             # close the input source and report errors
1104             #---------------------------------------------------------------
1105             $source_object->close_input_file();
1106
1107             # line source for next iteration (if any) comes from the current
1108             # temporary output buffer
1109             if ( $iter < $max_iterations ) {
1110
1111                 $sink_object->close_output_file();
1112                 $source_object =
1113                   Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1114                     $rpending_logfile_message );
1115
1116                 # stop iterations if errors or converged
1117                 #my $stop_now = $logger_object->{_warning_count};
1118                 my $stop_now = $tokenizer->report_tokenization_errors();
1119                 if ($stop_now) {
1120                     $convergence_log_message = <<EOM;
1121 Stopping iterations because of severe errors.
1122 EOM
1123                 }
1124                 elsif ($do_convergence_test) {
1125
1126                     # Patch for [rt.cpan.org #88020]
1127                     # Use utf8::encode since md5_hex() only operates on bytes.
1128                     # my $digest = md5_hex( utf8::encode($sink_buffer) );
1129
1130                     # Note added 20180114: this patch did not work correctly.
1131                     # I'm not sure why.  But switching to the method
1132                     # recommended in the Perl 5 documentation for Encode
1133                     # worked.  According to this we can either use
1134                     #    $octets = encode_utf8($string)  or equivalently
1135                     #    $octets = encode("utf8",$string)
1136                     # and then calculate the checksum.  So:
1137                     my $octets = Encode::encode( "utf8", $sink_buffer );
1138                     my $digest = md5_hex($octets);
1139                     if ( !$saw_md5{$digest} ) {
1140                         $saw_md5{$digest} = $iter;
1141                     }
1142                     else {
1143
1144                         # Deja vu, stop iterating
1145                         $stop_now = 1;
1146                         my $iterm = $iter - 1;
1147                         if ( $saw_md5{$digest} != $iterm ) {
1148
1149                             # Blinking (oscillating) between two stable
1150                             # end states.  This has happened in the past
1151                             # but at present there are no known instances.
1152                             $convergence_log_message = <<EOM;
1153 Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1154 EOM
1155                             $diagnostics_object->write_diagnostics(
1156                                 $convergence_log_message)
1157                               if $diagnostics_object;
1158                         }
1159                         else {
1160                             $convergence_log_message = <<EOM;
1161 Converged.  Output for iteration $iter same as for iter $iterm.
1162 EOM
1163                             $diagnostics_object->write_diagnostics(
1164                                 $convergence_log_message)
1165                               if $diagnostics_object && $iterm > 2;
1166                         }
1167                     }
1168                 } ## end if ($do_convergence_test)
1169
1170                 if ($stop_now) {
1171
1172                     # we are stopping the iterations early;
1173                     # copy the output stream to its final destination
1174                     $sink_object = $sink_object_final;
1175                     while ( my $line = $source_object->get_line() ) {
1176                         $sink_object->write_line($line);
1177                     }
1178                     $source_object->close_input_file();
1179                     last;
1180                 }
1181             } ## end if ( $iter < $max_iterations)
1182         }    # end loop over iterations for one source file
1183
1184         # restore objects which have been temporarily undefined
1185         # for second and higher iterations
1186         $debugger_object = $debugger_object_final;
1187         $logger_object   = $logger_object_final;
1188
1189         $logger_object->write_logfile_entry($convergence_log_message)
1190           if $convergence_log_message;
1191
1192         #---------------------------------------------------------------
1193         # Perform any postfilter operation
1194         #---------------------------------------------------------------
1195         if ($postfilter) {
1196             $sink_object->close_output_file();
1197             $sink_object =
1198               Perl::Tidy::LineSink->new( $output_file, $tee_file,
1199                 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
1200             my $buf = $postfilter->($postfilter_buffer);
1201             $source_object =
1202               Perl::Tidy::LineSource->new( \$buf, $rOpts,
1203                 $rpending_logfile_message );
1204             while ( my $line = $source_object->get_line() ) {
1205                 $sink_object->write_line($line);
1206             }
1207             $source_object->close_input_file();
1208         }
1209
1210         # Save names of the input and output files for syntax check
1211         my $ifname = $input_file;
1212         my $ofname = $output_file;
1213
1214         #---------------------------------------------------------------
1215         # handle the -b option (backup and modify in-place)
1216         #---------------------------------------------------------------
1217         if ($in_place_modify) {
1218             unless ( -f $input_file ) {
1219
1220                 # oh, oh, no real file to backup ..
1221                 # shouldn't happen because of numerous preliminary checks
1222                 Die
1223 "problem with -b backing up input file '$input_file': not a file\n";
1224             }
1225             my $backup_name = $input_file . $backup_extension;
1226             if ( -f $backup_name ) {
1227                 unlink($backup_name)
1228                   or Die
1229 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1230             }
1231
1232             # backup the input file
1233             # we use copy for symlinks, move for regular files
1234             if ( -l $input_file ) {
1235                 File::Copy::copy( $input_file, $backup_name )
1236                   or Die "File::Copy failed trying to backup source: $!";
1237             }
1238             else {
1239                 rename( $input_file, $backup_name )
1240                   or Die
1241 "problem renaming $input_file to $backup_name for -b option: $!\n";
1242             }
1243             $ifname = $backup_name;
1244
1245             # copy the output to the original input file
1246             # NOTE: it would be nice to just close $output_file and use
1247             # File::Copy::copy here, but in this case $output_file is the
1248             # handle of an open nameless temporary file so we would lose
1249             # everything if we closed it.
1250             seek( $output_file, 0, 0 )
1251               or Die "unable to rewind a temporary file for -b option: $!\n";
1252             my $fout = IO::File->new("> $input_file")
1253               or Die
1254 "problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1255             if ($binmode) {
1256                 if (   $rOpts->{'character-encoding'}
1257                     && $rOpts->{'character-encoding'} eq 'utf8' )
1258                 {
1259                     binmode $fout, ":encoding(UTF-8)";
1260                 }
1261                 else { binmode $fout }
1262             }
1263             my $line;
1264             while ( $line = $output_file->getline() ) {
1265                 $fout->print($line);
1266             }
1267             $fout->close();
1268             $output_file = $input_file;
1269             $ofname      = $input_file;
1270         }
1271
1272         #---------------------------------------------------------------
1273         # clean up and report errors
1274         #---------------------------------------------------------------
1275         $sink_object->close_output_file()    if $sink_object;
1276         $debugger_object->close_debug_file() if $debugger_object;
1277
1278         # set output file permissions
1279         if ( $output_file && -f $output_file && !-l $output_file ) {
1280             if ($input_file_permissions) {
1281
1282                 # give output script same permissions as input script, but
1283                 # make it user-writable or else we can't run perltidy again.
1284                 # Thus we retain whatever executable flags were set.
1285                 if ( $rOpts->{'format'} eq 'tidy' ) {
1286                     chmod( $input_file_permissions | oct(600), $output_file );
1287                 }
1288
1289                 # else use default permissions for html and any other format
1290             }
1291         }
1292
1293         #---------------------------------------------------------------
1294         # Do syntax check if requested and possible
1295         #---------------------------------------------------------------
1296         my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
1297         if (   $logger_object
1298             && $rOpts->{'check-syntax'}
1299             && $ifname
1300             && $ofname )
1301         {
1302             $infile_syntax_ok =
1303               check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1304         }
1305
1306         #---------------------------------------------------------------
1307         # remove the original file for in-place modify as follows:
1308         #   $delete_backup=0 never
1309         #   $delete_backup=1 only if no errors
1310         #   $delete_backup>1 always  : NOT ALLOWED, too risky, see above
1311         #---------------------------------------------------------------
1312         if (   $in_place_modify
1313             && $delete_backup
1314             && -f $ifname
1315             && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1316         {
1317
1318             # As an added safety precaution, do not delete the source file
1319             # if its size has dropped from positive to zero, since this
1320             # could indicate a disaster of some kind, including a hardware
1321             # failure.  Actually, this could happen if you had a file of
1322             # all comments (or pod) and deleted everything with -dac (-dap)
1323             # for some reason.
1324             if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1325                 Warn(
1326 "output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1327                 );
1328             }
1329             else {
1330                 unlink($ifname)
1331                   or Die
1332 "unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1333             }
1334         }
1335
1336         $logger_object->finish( $infile_syntax_ok, $formatter )
1337           if $logger_object;
1338     }    # end of main loop to process all files
1339
1340   NORMAL_EXIT:
1341     return 0;
1342
1343   ERROR_EXIT:
1344     return 1;
1345 }    # end of main program perltidy
1346
1347 sub get_stream_as_named_file {
1348
1349     # Return the name of a file containing a stream of data, creating
1350     # a temporary file if necessary.
1351     # Given:
1352     #  $stream - the name of a file or stream
1353     # Returns:
1354     #  $fname = name of file if possible, or undef
1355     #  $if_tmpfile = true if temp file, undef if not temp file
1356     #
1357     # This routine is needed for passing actual files to Perl for
1358     # a syntax check.
1359     my ($stream) = @_;
1360     my $is_tmpfile;
1361     my $fname;
1362     if ($stream) {
1363         if ( ref($stream) ) {
1364             my ( $fh_stream, $fh_name ) =
1365               Perl::Tidy::streamhandle( $stream, 'r' );
1366             if ($fh_stream) {
1367                 my ( $fout, $tmpnam ) = File::Temp::tempfile();
1368                 if ($fout) {
1369                     $fname      = $tmpnam;
1370                     $is_tmpfile = 1;
1371                     binmode $fout;
1372                     while ( my $line = $fh_stream->getline() ) {
1373                         $fout->print($line);
1374                     }
1375                     $fout->close();
1376                 }
1377                 $fh_stream->close();
1378             }
1379         }
1380         elsif ( $stream ne '-' && -f $stream ) {
1381             $fname = $stream;
1382         }
1383     }
1384     return ( $fname, $is_tmpfile );
1385 }
1386
1387 sub fileglob_to_re {
1388
1389     # modified (corrected) from version in find2perl
1390     my $x = shift;
1391     $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1392     $x =~ s#\*#.*#g;               # '*' -> '.*'
1393     $x =~ s#\?#.#g;                # '?' -> '.'
1394     return "^$x\\z";               # match whole word
1395 }
1396
1397 sub make_extension {
1398
1399     # Make a file extension, including any leading '.' if necessary
1400     # The '.' may actually be an '_' under VMS
1401     my ( $extension, $default, $dot ) = @_;
1402
1403     # Use the default if none specified
1404     $extension = $default unless ($extension);
1405
1406     # Only extensions with these leading characters get a '.'
1407     # This rule gives the user some freedom
1408     if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1409         $extension = $dot . $extension;
1410     }
1411     return $extension;
1412 }
1413
1414 sub write_logfile_header {
1415     my (
1416         $rOpts,        $logger_object, $config_file,
1417         $rraw_options, $Windows_type,  $readable_options
1418     ) = @_;
1419     $logger_object->write_logfile_entry(
1420 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1421     );
1422     if ($Windows_type) {
1423         $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1424     }
1425     my $options_string = join( ' ', @{$rraw_options} );
1426
1427     if ($config_file) {
1428         $logger_object->write_logfile_entry(
1429             "Found Configuration File >>> $config_file \n");
1430     }
1431     $logger_object->write_logfile_entry(
1432         "Configuration and command line parameters for this run:\n");
1433     $logger_object->write_logfile_entry("$options_string\n");
1434
1435     if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1436         $rOpts->{'logfile'} = 1;    # force logfile to be saved
1437         $logger_object->write_logfile_entry(
1438             "Final parameter set for this run\n");
1439         $logger_object->write_logfile_entry(
1440             "------------------------------------\n");
1441
1442         $logger_object->write_logfile_entry($readable_options);
1443
1444         $logger_object->write_logfile_entry(
1445             "------------------------------------\n");
1446     }
1447     $logger_object->write_logfile_entry(
1448         "To find error messages search for 'WARNING' with your editor\n");
1449     return;
1450 }
1451
1452 sub generate_options {
1453
1454     ######################################################################
1455     # Generate and return references to:
1456     #  @option_string - the list of options to be passed to Getopt::Long
1457     #  @defaults - the list of default options
1458     #  %expansion - a hash showing how all abbreviations are expanded
1459     #  %category - a hash giving the general category of each option
1460     #  %option_range - a hash giving the valid ranges of certain options
1461
1462     # Note: a few options are not documented in the man page and usage
1463     # message. This is because these are experimental or debug options and
1464     # may or may not be retained in future versions.
1465     #
1466     # Here are the undocumented flags as far as I know.  Any of them
1467     # may disappear at any time.  They are mainly for fine-tuning
1468     # and debugging.
1469     #
1470     # fll --> fuzzy-line-length           # a trivial parameter which gets
1471     #                                       turned off for the extrude option
1472     #                                       which is mainly for debugging
1473     # scl --> short-concatenation-item-length   # helps break at '.'
1474     # recombine                           # for debugging line breaks
1475     # valign                              # for debugging vertical alignment
1476     # I   --> DIAGNOSTICS                 # for debugging [**DEACTIVATED**]
1477     ######################################################################
1478
1479     # here is a summary of the Getopt codes:
1480     # <none> does not take an argument
1481     # =s takes a mandatory string
1482     # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1483     # =i takes a mandatory integer
1484     # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1485     # ! does not take an argument and may be negated
1486     #  i.e., -foo and -nofoo are allowed
1487     # a double dash signals the end of the options list
1488     #
1489     #---------------------------------------------------------------
1490     # Define the option string passed to GetOptions.
1491     #---------------------------------------------------------------
1492
1493     my @option_string   = ();
1494     my %expansion       = ();
1495     my %option_category = ();
1496     my %option_range    = ();
1497     my $rexpansion      = \%expansion;
1498
1499     # names of categories in manual
1500     # leading integers will allow sorting
1501     my @category_name = (
1502         '0. I/O control',
1503         '1. Basic formatting options',
1504         '2. Code indentation control',
1505         '3. Whitespace control',
1506         '4. Comment controls',
1507         '5. Linebreak controls',
1508         '6. Controlling list formatting',
1509         '7. Retaining or ignoring existing line breaks',
1510         '8. Blank line control',
1511         '9. Other controls',
1512         '10. HTML options',
1513         '11. pod2html options',
1514         '12. Controlling HTML properties',
1515         '13. Debugging',
1516     );
1517
1518     #  These options are parsed directly by perltidy:
1519     #    help h
1520     #    version v
1521     #  However, they are included in the option set so that they will
1522     #  be seen in the options dump.
1523
1524     # These long option names have no abbreviations or are treated specially
1525     @option_string = qw(
1526       html!
1527       noprofile
1528       no-profile
1529       npro
1530       recombine!
1531       valign!
1532       notidy
1533     );
1534
1535     my $category = 13;    # Debugging
1536     foreach (@option_string) {
1537         my $opt = $_;     # must avoid changing the actual flag
1538         $opt =~ s/!$//;
1539         $option_category{$opt} = $category_name[$category];
1540     }
1541
1542     $category = 11;                                       # HTML
1543     $option_category{html} = $category_name[$category];
1544
1545     # routine to install and check options
1546     my $add_option = sub {
1547         my ( $long_name, $short_name, $flag ) = @_;
1548         push @option_string, $long_name . $flag;
1549         $option_category{$long_name} = $category_name[$category];
1550         if ($short_name) {
1551             if ( $expansion{$short_name} ) {
1552                 my $existing_name = $expansion{$short_name}[0];
1553                 Die
1554 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1555             }
1556             $expansion{$short_name} = [$long_name];
1557             if ( $flag eq '!' ) {
1558                 my $nshort_name = 'n' . $short_name;
1559                 my $nolong_name = 'no' . $long_name;
1560                 if ( $expansion{$nshort_name} ) {
1561                     my $existing_name = $expansion{$nshort_name}[0];
1562                     Die
1563 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1564                 }
1565                 $expansion{$nshort_name} = [$nolong_name];
1566             }
1567         }
1568     };
1569
1570     # Install long option names which have a simple abbreviation.
1571     # Options with code '!' get standard negation ('no' for long names,
1572     # 'n' for abbreviations).  Categories follow the manual.
1573
1574     ###########################
1575     $category = 0;    # I/O_Control
1576     ###########################
1577     $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1578     $add_option->( 'backup-file-extension',      'bext',  '=s' );
1579     $add_option->( 'force-read-binary',          'f',     '!' );
1580     $add_option->( 'format',                     'fmt',   '=s' );
1581     $add_option->( 'iterations',                 'it',    '=i' );
1582     $add_option->( 'logfile',                    'log',   '!' );
1583     $add_option->( 'logfile-gap',                'g',     ':i' );
1584     $add_option->( 'outfile',                    'o',     '=s' );
1585     $add_option->( 'output-file-extension',      'oext',  '=s' );
1586     $add_option->( 'output-path',                'opath', '=s' );
1587     $add_option->( 'profile',                    'pro',   '=s' );
1588     $add_option->( 'quiet',                      'q',     '!' );
1589     $add_option->( 'standard-error-output',      'se',    '!' );
1590     $add_option->( 'standard-output',            'st',    '!' );
1591     $add_option->( 'warning-output',             'w',     '!' );
1592     $add_option->( 'character-encoding',         'enc',   '=s' );
1593
1594     # options which are both toggle switches and values moved here
1595     # to hide from tidyview (which does not show category 0 flags):
1596     # -ole moved here from category 1
1597     # -sil moved here from category 2
1598     $add_option->( 'output-line-ending',         'ole', '=s' );
1599     $add_option->( 'starting-indentation-level', 'sil', '=i' );
1600
1601     ########################################
1602     $category = 1;    # Basic formatting options
1603     ########################################
1604     $add_option->( 'check-syntax',                 'syn',  '!' );
1605     $add_option->( 'entab-leading-whitespace',     'et',   '=i' );
1606     $add_option->( 'indent-columns',               'i',    '=i' );
1607     $add_option->( 'maximum-line-length',          'l',    '=i' );
1608     $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
1609     $add_option->( 'whitespace-cycle',             'wc',   '=i' );
1610     $add_option->( 'perl-syntax-check-flags',      'pscf', '=s' );
1611     $add_option->( 'preserve-line-endings',        'ple',  '!' );
1612     $add_option->( 'tabs',                         't',    '!' );
1613     $add_option->( 'default-tabsize',              'dt',   '=i' );
1614     $add_option->( 'extended-syntax',              'xs',   '!' );
1615
1616     ########################################
1617     $category = 2;    # Code indentation control
1618     ########################################
1619     $add_option->( 'continuation-indentation',           'ci',   '=i' );
1620     $add_option->( 'line-up-parentheses',                'lp',   '!' );
1621     $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1622     $add_option->( 'outdent-keywords',                   'okw',  '!' );
1623     $add_option->( 'outdent-labels',                     'ola',  '!' );
1624     $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1625     $add_option->( 'indent-closing-brace',               'icb',  '!' );
1626     $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1627     $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1628     $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1629     $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1630     $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1631     $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1632
1633     ########################################
1634     $category = 3;    # Whitespace control
1635     ########################################
1636     $add_option->( 'add-semicolons',                            'asc',   '!' );
1637     $add_option->( 'add-whitespace',                            'aws',   '!' );
1638     $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1639     $add_option->( 'brace-tightness',                           'bt',    '=i' );
1640     $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1641     $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1642     $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1643     $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1644     $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1645     $add_option->( 'paren-tightness',                           'pt',    '=i' );
1646     $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1647     $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1648     $add_option->( 'space-function-paren',                      'sfp',   '!' );
1649     $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1650     $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1651     $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1652     $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1653     $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1654     $add_option->( 'tight-secret-operators',                    'tso',   '!' );
1655     $add_option->( 'trim-qw',                                   'tqw',   '!' );
1656     $add_option->( 'trim-pod',                                  'trp',   '!' );
1657     $add_option->( 'want-left-space',                           'wls',   '=s' );
1658     $add_option->( 'want-right-space',                          'wrs',   '=s' );
1659
1660     ########################################
1661     $category = 4;    # Comment controls
1662     ########################################
1663     $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1664     $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1665     $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1666     $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1667     $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1668     $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1669     $add_option->( 'closing-side-comments',             'csc',  '!' );
1670     $add_option->( 'closing-side-comments-balanced',    'cscb', '!' );
1671     $add_option->( 'format-skipping',                   'fs',   '!' );
1672     $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1673     $add_option->( 'format-skipping-end',               'fse',  '=s' );
1674     $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1675     $add_option->( 'indent-block-comments',             'ibc',  '!' );
1676     $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1677     $add_option->( 'fixed-position-side-comment',       'fpsc', '=i' );
1678     $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1679     $add_option->( 'outdent-long-comments',             'olc',  '!' );
1680     $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1681     $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1682     $add_option->( 'static-block-comments',             'sbc',  '!' );
1683     $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1684     $add_option->( 'static-side-comments',              'ssc',  '!' );
1685     $add_option->( 'ignore-side-comment-lengths',       'iscl', '!' );
1686
1687     ########################################
1688     $category = 5;    # Linebreak controls
1689     ########################################
1690     $add_option->( 'add-newlines',                            'anl',   '!' );
1691     $add_option->( 'block-brace-vertical-tightness',          'bbvt',  '=i' );
1692     $add_option->( 'block-brace-vertical-tightness-list',     'bbvtl', '=s' );
1693     $add_option->( 'brace-vertical-tightness',                'bvt',   '=i' );
1694     $add_option->( 'brace-vertical-tightness-closing',        'bvtc',  '=i' );
1695     $add_option->( 'cuddled-else',                            'ce',    '!' );
1696     $add_option->( 'cuddled-blocks',                          'cb',    '!' );
1697     $add_option->( 'cuddled-block-list',                      'cbl',   '=s' );
1698     $add_option->( 'cuddled-break-option',                    'cbo',   '=i' );
1699     $add_option->( 'delete-old-newlines',                     'dnl',   '!' );
1700     $add_option->( 'opening-brace-always-on-right',           'bar',   '!' );
1701     $add_option->( 'opening-brace-on-new-line',               'bl',    '!' );
1702     $add_option->( 'opening-hash-brace-right',                'ohbr',  '!' );
1703     $add_option->( 'opening-paren-right',                     'opr',   '!' );
1704     $add_option->( 'opening-square-bracket-right',            'osbr',  '!' );
1705     $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl',  '!' );
1706     $add_option->( 'opening-sub-brace-on-new-line',           'sbl',   '!' );
1707     $add_option->( 'paren-vertical-tightness',                'pvt',   '=i' );
1708     $add_option->( 'paren-vertical-tightness-closing',        'pvtc',  '=i' );
1709     $add_option->( 'weld-nested-containers',                  'wn',    '!' );
1710     $add_option->( 'space-backslash-quote',                   'sbq',   '=i' );
1711     $add_option->( 'stack-closing-block-brace',               'scbb',  '!' );
1712     $add_option->( 'stack-closing-hash-brace',                'schb',  '!' );
1713     $add_option->( 'stack-closing-paren',                     'scp',   '!' );
1714     $add_option->( 'stack-closing-square-bracket',            'scsb',  '!' );
1715     $add_option->( 'stack-opening-block-brace',               'sobb',  '!' );
1716     $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
1717     $add_option->( 'stack-opening-paren',                     'sop',   '!' );
1718     $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
1719     $add_option->( 'vertical-tightness',                      'vt',    '=i' );
1720     $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
1721     $add_option->( 'want-break-after',                        'wba',   '=s' );
1722     $add_option->( 'want-break-before',                       'wbb',   '=s' );
1723     $add_option->( 'break-after-all-operators',               'baao',  '!' );
1724     $add_option->( 'break-before-all-operators',              'bbao',  '!' );
1725     $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
1726
1727     ########################################
1728     $category = 6;    # Controlling list formatting
1729     ########################################
1730     $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1731     $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1732     $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1733
1734     ########################################
1735     $category = 7;    # Retaining or ignoring existing line breaks
1736     ########################################
1737     $add_option->( 'break-at-old-keyword-breakpoints',   'bok', '!' );
1738     $add_option->( 'break-at-old-logical-breakpoints',   'bol', '!' );
1739     $add_option->( 'break-at-old-ternary-breakpoints',   'bot', '!' );
1740     $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1741     $add_option->( 'ignore-old-breakpoints',             'iob', '!' );
1742
1743     ########################################
1744     $category = 8;    # Blank line control
1745     ########################################
1746     $add_option->( 'blanks-before-blocks',            'bbb',  '!' );
1747     $add_option->( 'blanks-before-comments',          'bbc',  '!' );
1748     $add_option->( 'blank-lines-before-subs',         'blbs', '=i' );
1749     $add_option->( 'blank-lines-before-packages',     'blbp', '=i' );
1750     $add_option->( 'long-block-line-count',           'lbl',  '=i' );
1751     $add_option->( 'maximum-consecutive-blank-lines', 'mbl',  '=i' );
1752     $add_option->( 'keep-old-blank-lines',            'kbl',  '=i' );
1753
1754     $add_option->( 'blank-lines-after-opening-block',       'blao',  '=i' );
1755     $add_option->( 'blank-lines-before-closing-block',      'blbc',  '=i' );
1756     $add_option->( 'blank-lines-after-opening-block-list',  'blaol', '=s' );
1757     $add_option->( 'blank-lines-before-closing-block-list', 'blbcl', '=s' );
1758
1759     ########################################
1760     $category = 9;    # Other controls
1761     ########################################
1762     $add_option->( 'delete-block-comments',        'dbc',  '!' );
1763     $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1764     $add_option->( 'delete-pod',                   'dp',   '!' );
1765     $add_option->( 'delete-side-comments',         'dsc',  '!' );
1766     $add_option->( 'tee-block-comments',           'tbc',  '!' );
1767     $add_option->( 'tee-pod',                      'tp',   '!' );
1768     $add_option->( 'tee-side-comments',            'tsc',  '!' );
1769     $add_option->( 'look-for-autoloader',          'lal',  '!' );
1770     $add_option->( 'look-for-hash-bang',           'x',    '!' );
1771     $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1772     $add_option->( 'pass-version-line',            'pvl',  '!' );
1773
1774     ########################################
1775     $category = 13;    # Debugging
1776     ########################################
1777 ##  $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1778     $add_option->( 'DEBUG',                           'D',    '!' );
1779     $add_option->( 'dump-cuddled-block-list',         'dcbl', '!' );
1780     $add_option->( 'dump-defaults',                   'ddf',  '!' );
1781     $add_option->( 'dump-long-names',                 'dln',  '!' );
1782     $add_option->( 'dump-options',                    'dop',  '!' );
1783     $add_option->( 'dump-profile',                    'dpro', '!' );
1784     $add_option->( 'dump-short-names',                'dsn',  '!' );
1785     $add_option->( 'dump-token-types',                'dtt',  '!' );
1786     $add_option->( 'dump-want-left-space',            'dwls', '!' );
1787     $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1788     $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1789     $add_option->( 'help',                            'h',    '' );
1790     $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1791     $add_option->( 'show-options',                    'opt',  '!' );
1792     $add_option->( 'version',                         'v',    '' );
1793     $add_option->( 'memoize',                         'mem',  '!' );
1794     $add_option->( 'file-size-order',                 'fso',  '!' );
1795
1796     #---------------------------------------------------------------------
1797
1798     # The Perl::Tidy::HtmlWriter will add its own options to the string
1799     Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1800
1801     ########################################
1802     # Set categories 10, 11, 12
1803     ########################################
1804     # Based on their known order
1805     $category = 12;    # HTML properties
1806     foreach my $opt (@option_string) {
1807         my $long_name = $opt;
1808         $long_name =~ s/(!|=.*|:.*)$//;
1809         unless ( defined( $option_category{$long_name} ) ) {
1810             if ( $long_name =~ /^html-linked/ ) {
1811                 $category = 10;    # HTML options
1812             }
1813             elsif ( $long_name =~ /^pod2html/ ) {
1814                 $category = 11;    # Pod2html
1815             }
1816             $option_category{$long_name} = $category_name[$category];
1817         }
1818     }
1819
1820     #---------------------------------------------------------------
1821     # Assign valid ranges to certain options
1822     #---------------------------------------------------------------
1823     # In the future, these may be used to make preliminary checks
1824     # hash keys are long names
1825     # If key or value is undefined:
1826     #   strings may have any value
1827     #   integer ranges are >=0
1828     # If value is defined:
1829     #   value is [qw(any valid words)] for strings
1830     #   value is [min, max] for integers
1831     #   if min is undefined, there is no lower limit
1832     #   if max is undefined, there is no upper limit
1833     # Parameters not listed here have defaults
1834     %option_range = (
1835         'format'             => [ 'tidy', 'html', 'user' ],
1836         'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
1837         'character-encoding' => [ 'none', 'utf8' ],
1838
1839         'space-backslash-quote' => [ 0, 2 ],
1840
1841         'block-brace-tightness'    => [ 0, 2 ],
1842         'brace-tightness'          => [ 0, 2 ],
1843         'paren-tightness'          => [ 0, 2 ],
1844         'square-bracket-tightness' => [ 0, 2 ],
1845
1846         'block-brace-vertical-tightness'            => [ 0, 2 ],
1847         'brace-vertical-tightness'                  => [ 0, 2 ],
1848         'brace-vertical-tightness-closing'          => [ 0, 2 ],
1849         'paren-vertical-tightness'                  => [ 0, 2 ],
1850         'paren-vertical-tightness-closing'          => [ 0, 2 ],
1851         'square-bracket-vertical-tightness'         => [ 0, 2 ],
1852         'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1853         'vertical-tightness'                        => [ 0, 2 ],
1854         'vertical-tightness-closing'                => [ 0, 2 ],
1855
1856         'closing-brace-indentation'          => [ 0, 3 ],
1857         'closing-paren-indentation'          => [ 0, 3 ],
1858         'closing-square-bracket-indentation' => [ 0, 3 ],
1859         'closing-token-indentation'          => [ 0, 3 ],
1860
1861         'closing-side-comment-else-flag' => [ 0, 2 ],
1862         'comma-arrow-breakpoints'        => [ 0, 5 ],
1863     );
1864
1865     # Note: we could actually allow negative ci if someone really wants it:
1866     # $option_range{'continuation-indentation'} = [ undef, undef ];
1867
1868     #---------------------------------------------------------------
1869     # Assign default values to the above options here, except
1870     # for 'outfile' and 'help'.
1871     # These settings should approximate the perlstyle(1) suggestions.
1872     #---------------------------------------------------------------
1873     my @defaults = qw(
1874       add-newlines
1875       add-semicolons
1876       add-whitespace
1877       blanks-before-blocks
1878       blanks-before-comments
1879       blank-lines-before-subs=1
1880       blank-lines-before-packages=1
1881       block-brace-tightness=0
1882       block-brace-vertical-tightness=0
1883       brace-tightness=1
1884       brace-vertical-tightness-closing=0
1885       brace-vertical-tightness=0
1886       break-at-old-logical-breakpoints
1887       break-at-old-ternary-breakpoints
1888       break-at-old-attribute-breakpoints
1889       break-at-old-keyword-breakpoints
1890       comma-arrow-breakpoints=5
1891       nocheck-syntax
1892       closing-side-comment-interval=6
1893       closing-side-comment-maximum-text=20
1894       closing-side-comment-else-flag=0
1895       closing-side-comments-balanced
1896       closing-paren-indentation=0
1897       closing-brace-indentation=0
1898       closing-square-bracket-indentation=0
1899       continuation-indentation=2
1900       cuddled-break-option=1
1901       delete-old-newlines
1902       delete-semicolons
1903       extended-syntax
1904       fuzzy-line-length
1905       hanging-side-comments
1906       indent-block-comments
1907       indent-columns=4
1908       iterations=1
1909       keep-old-blank-lines=1
1910       long-block-line-count=8
1911       look-for-autoloader
1912       look-for-selfloader
1913       maximum-consecutive-blank-lines=1
1914       maximum-fields-per-table=0
1915       maximum-line-length=80
1916       memoize
1917       minimum-space-to-comment=4
1918       nobrace-left-and-indent
1919       nocuddled-else
1920       nocuddled-blocks
1921       nodelete-old-whitespace
1922       nohtml
1923       nologfile
1924       noquiet
1925       noshow-options
1926       nostatic-side-comments
1927       notabs
1928       nowarning-output
1929       character-encoding=none
1930       outdent-labels
1931       outdent-long-quotes
1932       outdent-long-comments
1933       paren-tightness=1
1934       paren-vertical-tightness-closing=0
1935       paren-vertical-tightness=0
1936       pass-version-line
1937       noweld-nested-containers
1938       recombine
1939       valign
1940       short-concatenation-item-length=8
1941       space-for-semicolon
1942       space-backslash-quote=1
1943       square-bracket-tightness=1
1944       square-bracket-vertical-tightness-closing=0
1945       square-bracket-vertical-tightness=0
1946       static-block-comments
1947       trim-qw
1948       format=tidy
1949       backup-file-extension=bak
1950       format-skipping
1951       default-tabsize=8
1952
1953       pod2html
1954       html-table-of-contents
1955       html-entities
1956     );
1957
1958     push @defaults, "perl-syntax-check-flags=-c -T";
1959
1960     #---------------------------------------------------------------
1961     # Define abbreviations which will be expanded into the above primitives.
1962     # These may be defined recursively.
1963     #---------------------------------------------------------------
1964     %expansion = (
1965         %expansion,
1966         'freeze-newlines'   => [qw(noadd-newlines nodelete-old-newlines)],
1967         'fnl'               => [qw(freeze-newlines)],
1968         'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1969         'fws'               => [qw(freeze-whitespace)],
1970         'freeze-blank-lines' =>
1971           [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1972         'fbl'                => [qw(freeze-blank-lines)],
1973         'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1974         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1975         'nooutdent-long-lines' =>
1976           [qw(nooutdent-long-quotes nooutdent-long-comments)],
1977         'noll' => [qw(nooutdent-long-lines)],
1978         'io'   => [qw(indent-only)],
1979         'delete-all-comments' =>
1980           [qw(delete-block-comments delete-side-comments delete-pod)],
1981         'nodelete-all-comments' =>
1982           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1983         'dac'  => [qw(delete-all-comments)],
1984         'ndac' => [qw(nodelete-all-comments)],
1985         'gnu'  => [qw(gnu-style)],
1986         'pbp'  => [qw(perl-best-practices)],
1987         'tee-all-comments' =>
1988           [qw(tee-block-comments tee-side-comments tee-pod)],
1989         'notee-all-comments' =>
1990           [qw(notee-block-comments notee-side-comments notee-pod)],
1991         'tac'   => [qw(tee-all-comments)],
1992         'ntac'  => [qw(notee-all-comments)],
1993         'html'  => [qw(format=html)],
1994         'nhtml' => [qw(format=tidy)],
1995         'tidy'  => [qw(format=tidy)],
1996
1997         'utf8' => [qw(character-encoding=utf8)],
1998         'UTF8' => [qw(character-encoding=utf8)],
1999
2000         'swallow-optional-blank-lines'   => [qw(kbl=0)],
2001         'noswallow-optional-blank-lines' => [qw(kbl=1)],
2002         'sob'                            => [qw(kbl=0)],
2003         'nsob'                           => [qw(kbl=1)],
2004
2005         'break-after-comma-arrows'   => [qw(cab=0)],
2006         'nobreak-after-comma-arrows' => [qw(cab=1)],
2007         'baa'                        => [qw(cab=0)],
2008         'nbaa'                       => [qw(cab=1)],
2009
2010         'blanks-before-subs'   => [qw(blbs=1 blbp=1)],
2011         'bbs'                  => [qw(blbs=1 blbp=1)],
2012         'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
2013         'nbbs'                 => [qw(blbs=0 blbp=0)],
2014
2015         'break-at-old-trinary-breakpoints' => [qw(bot)],
2016
2017         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
2018         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
2019         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
2020         'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
2021         'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
2022
2023         'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
2024         'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
2025         'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
2026         'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
2027         'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
2028
2029         'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
2030         'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
2031         'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
2032
2033         'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
2034         'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
2035         'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
2036
2037         'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
2038         'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
2039         'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
2040
2041         'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
2042         'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
2043         'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
2044
2045         'otr'                   => [qw(opr ohbr osbr)],
2046         'opening-token-right'   => [qw(opr ohbr osbr)],
2047         'notr'                  => [qw(nopr nohbr nosbr)],
2048         'noopening-token-right' => [qw(nopr nohbr nosbr)],
2049
2050         'sot'                    => [qw(sop sohb sosb)],
2051         'nsot'                   => [qw(nsop nsohb nsosb)],
2052         'stack-opening-tokens'   => [qw(sop sohb sosb)],
2053         'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
2054
2055         'sct'                    => [qw(scp schb scsb)],
2056         'stack-closing-tokens'   => => [qw(scp schb scsb)],
2057         'nsct'                   => [qw(nscp nschb nscsb)],
2058         'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
2059
2060         'sac'                    => [qw(sot sct)],
2061         'nsac'                   => [qw(nsot nsct)],
2062         'stack-all-containers'   => [qw(sot sct)],
2063         'nostack-all-containers' => [qw(nsot nsct)],
2064
2065         'act=0'                      => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2066         'act=1'                      => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2067         'act=2'                      => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2068         'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
2069         'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
2070         'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2071
2072         'stack-opening-block-brace'   => [qw(bbvt=2 bbvtl=*)],
2073         'sobb'                        => [qw(bbvt=2 bbvtl=*)],
2074         'nostack-opening-block-brace' => [qw(bbvt=0)],
2075         'nsobb'                       => [qw(bbvt=0)],
2076
2077         'converge'   => [qw(it=4)],
2078         'noconverge' => [qw(it=1)],
2079         'conv'       => [qw(it=4)],
2080         'nconv'      => [qw(it=1)],
2081
2082         # 'mangle' originally deleted pod and comments, but to keep it
2083         # reversible, it no longer does.  But if you really want to
2084         # delete them, just use:
2085         #   -mangle -dac
2086
2087         # An interesting use for 'mangle' is to do this:
2088         #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2089         # which will form as many one-line blocks as possible
2090
2091         'mangle' => [
2092             qw(
2093               check-syntax
2094               keep-old-blank-lines=0
2095               delete-old-newlines
2096               delete-old-whitespace
2097               delete-semicolons
2098               indent-columns=0
2099               maximum-consecutive-blank-lines=0
2100               maximum-line-length=100000
2101               noadd-newlines
2102               noadd-semicolons
2103               noadd-whitespace
2104               noblanks-before-blocks
2105               blank-lines-before-subs=0
2106               blank-lines-before-packages=0
2107               notabs
2108               )
2109         ],
2110
2111         # 'extrude' originally deleted pod and comments, but to keep it
2112         # reversible, it no longer does.  But if you really want to
2113         # delete them, just use
2114         #   extrude -dac
2115         #
2116         # An interesting use for 'extrude' is to do this:
2117         #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2118         # which will break up all one-line blocks.
2119         #
2120         # Removed 'check-syntax' option, which is unsafe because it may execute
2121         # code in BEGIN blocks.  Example 'Moose/debugger-duck_type.t'.
2122
2123         'extrude' => [
2124             qw(
2125               ci=0
2126               delete-old-newlines
2127               delete-old-whitespace
2128               delete-semicolons
2129               indent-columns=0
2130               maximum-consecutive-blank-lines=0
2131               maximum-line-length=1
2132               noadd-semicolons
2133               noadd-whitespace
2134               noblanks-before-blocks
2135               blank-lines-before-subs=0
2136               blank-lines-before-packages=0
2137               nofuzzy-line-length
2138               notabs
2139               norecombine
2140               )
2141         ],
2142
2143         # this style tries to follow the GNU Coding Standards (which do
2144         # not really apply to perl but which are followed by some perl
2145         # programmers).
2146         'gnu-style' => [
2147             qw(
2148               lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2149               )
2150         ],
2151
2152         # Style suggested in Damian Conway's Perl Best Practices
2153         'perl-best-practices' => [
2154             qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2155 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2156         ],
2157
2158         # Additional styles can be added here
2159     );
2160
2161     Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2162
2163     # Uncomment next line to dump all expansions for debugging:
2164     # dump_short_names(\%expansion);
2165     return (
2166         \@option_string,   \@defaults, \%expansion,
2167         \%option_category, \%option_range
2168     );
2169
2170 }    # end of generate_options
2171
2172 # Memoize process_command_line. Given same @ARGV passed in, return same
2173 # values and same @ARGV back.
2174 # This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
2175 # up masontidy (https://metacpan.org/module/masontidy)
2176
2177 my %process_command_line_cache;
2178
2179 sub process_command_line {
2180
2181     my (
2182         $perltidyrc_stream,  $is_Windows, $Windows_type,
2183         $rpending_complaint, $dump_options_type
2184     ) = @_;
2185
2186     my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
2187     if ($use_cache) {
2188         my $cache_key = join( chr(28), @ARGV );
2189         if ( my $result = $process_command_line_cache{$cache_key} ) {
2190             my ( $argv, @retvals ) = @{$result};
2191             @ARGV = @{$argv};
2192             return @retvals;
2193         }
2194         else {
2195             my @retvals = _process_command_line(@_);
2196             $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
2197               if $retvals[0]->{'memoize'};
2198             return @retvals;
2199         }
2200     }
2201     else {
2202         return _process_command_line(@_);
2203     }
2204 }
2205
2206 # (note the underscore here)
2207 sub _process_command_line {
2208
2209     my (
2210         $perltidyrc_stream,  $is_Windows, $Windows_type,
2211         $rpending_complaint, $dump_options_type
2212     ) = @_;
2213
2214     use Getopt::Long;
2215
2216     # Save any current Getopt::Long configuration
2217     # and set to Getopt::Long defaults.  Use eval to avoid
2218     # breaking old versions of Perl without these routines.
2219     # Previous configuration is reset at the exit of this routine.
2220     my $glc;
2221     eval { $glc = Getopt::Long::Configure() };
2222     unless ($@) {
2223         eval { Getopt::Long::ConfigDefaults() };
2224     }
2225     else { $glc = undef }
2226
2227     my (
2228         $roption_string,   $rdefaults, $rexpansion,
2229         $roption_category, $roption_range
2230     ) = generate_options();
2231
2232     #---------------------------------------------------------------
2233     # set the defaults by passing the above list through GetOptions
2234     #---------------------------------------------------------------
2235     my %Opts = ();
2236     {
2237         local @ARGV;
2238
2239         # do not load the defaults if we are just dumping perltidyrc
2240         unless ( $dump_options_type eq 'perltidyrc' ) {
2241             for my $i ( @{$rdefaults} ) { push @ARGV, "--" . $i }
2242         }
2243         if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
2244             Die
2245 "Programming Bug reported by 'GetOptions': error in setting default options";
2246         }
2247     }
2248
2249     my $word;
2250     my @raw_options        = ();
2251     my $config_file        = "";
2252     my $saw_ignore_profile = 0;
2253     my $saw_dump_profile   = 0;
2254
2255     #---------------------------------------------------------------
2256     # Take a first look at the command-line parameters.  Do as many
2257     # immediate dumps as possible, which can avoid confusion if the
2258     # perltidyrc file has an error.
2259     #---------------------------------------------------------------
2260     foreach my $i (@ARGV) {
2261
2262         $i =~ s/^--/-/;
2263         if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2264             $saw_ignore_profile = 1;
2265         }
2266
2267         # note: this must come before -pro and -profile, below:
2268         elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2269             $saw_dump_profile = 1;
2270         }
2271         elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2272             if ($config_file) {
2273                 Warn
2274 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2275             }
2276             $config_file = $2;
2277
2278             # resolve <dir>/.../<file>, meaning look upwards from directory
2279             if ( defined($config_file) ) {
2280                 if ( my ( $start_dir, $search_file ) =
2281                     ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2282                 {
2283                     $start_dir = '.' if !$start_dir;
2284                     $start_dir = Cwd::realpath($start_dir);
2285                     if ( my $found_file =
2286                         find_file_upwards( $start_dir, $search_file ) )
2287                     {
2288                         $config_file = $found_file;
2289                     }
2290                 }
2291             }
2292             unless ( -e $config_file ) {
2293                 Warn "cannot find file given with -pro=$config_file: $!\n";
2294                 $config_file = "";
2295             }
2296         }
2297         elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2298             Die "usage: -pro=filename or --profile=filename, no spaces\n";
2299         }
2300         elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2301             usage();
2302             Exit 0;
2303         }
2304         elsif ( $i =~ /^-(version|v)$/ ) {
2305             show_version();
2306             Exit 0;
2307         }
2308         elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2309             dump_defaults( @{$rdefaults} );
2310             Exit 0;
2311         }
2312         elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2313             dump_long_names( @{$roption_string} );
2314             Exit 0;
2315         }
2316         elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2317             dump_short_names($rexpansion);
2318             Exit 0;
2319         }
2320         elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2321             Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2322             Exit 0;
2323         }
2324     }
2325
2326     if ( $saw_dump_profile && $saw_ignore_profile ) {
2327         Warn "No profile to dump because of -npro\n";
2328         Exit 1;
2329     }
2330
2331     #---------------------------------------------------------------
2332     # read any .perltidyrc configuration file
2333     #---------------------------------------------------------------
2334     unless ($saw_ignore_profile) {
2335
2336         # resolve possible conflict between $perltidyrc_stream passed
2337         # as call parameter to perltidy and -pro=filename on command
2338         # line.
2339         if ($perltidyrc_stream) {
2340             if ($config_file) {
2341                 Warn <<EOM;
2342  Conflict: a perltidyrc configuration file was specified both as this
2343  perltidy call parameter: $perltidyrc_stream
2344  and with this -profile=$config_file.
2345  Using -profile=$config_file.
2346 EOM
2347             }
2348             else {
2349                 $config_file = $perltidyrc_stream;
2350             }
2351         }
2352
2353         # look for a config file if we don't have one yet
2354         my $rconfig_file_chatter;
2355         ${$rconfig_file_chatter} = "";
2356         $config_file =
2357           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2358             $rpending_complaint )
2359           unless $config_file;
2360
2361         # open any config file
2362         my $fh_config;
2363         if ($config_file) {
2364             ( $fh_config, $config_file ) =
2365               Perl::Tidy::streamhandle( $config_file, 'r' );
2366             unless ($fh_config) {
2367                 ${$rconfig_file_chatter} .=
2368                   "# $config_file exists but cannot be opened\n";
2369             }
2370         }
2371
2372         if ($saw_dump_profile) {
2373             dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2374             Exit 0;
2375         }
2376
2377         if ($fh_config) {
2378
2379             my ( $rconfig_list, $death_message ) =
2380               read_config_file( $fh_config, $config_file, $rexpansion );
2381             Die $death_message if ($death_message);
2382
2383             # process any .perltidyrc parameters right now so we can
2384             # localize errors
2385             if ( @{$rconfig_list} ) {
2386                 local @ARGV = @{$rconfig_list};
2387
2388                 expand_command_abbreviations( $rexpansion, \@raw_options,
2389                     $config_file );
2390
2391                 if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
2392                     Die
2393 "Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
2394                 }
2395
2396                 # Anything left in this local @ARGV is an error and must be
2397                 # invalid bare words from the configuration file.  We cannot
2398                 # check this earlier because bare words may have been valid
2399                 # values for parameters.  We had to wait for GetOptions to have
2400                 # a look at @ARGV.
2401                 if (@ARGV) {
2402                     my $count = @ARGV;
2403                     my $str   = "\'" . pop(@ARGV) . "\'";
2404                     while ( my $param = pop(@ARGV) ) {
2405                         if ( length($str) < 70 ) {
2406                             $str .= ", '$param'";
2407                         }
2408                         else {
2409                             $str .= ", ...";
2410                             last;
2411                         }
2412                     }
2413                     Die <<EOM;
2414 There are $count unrecognized values in the configuration file '$config_file':
2415 $str
2416 Use leading dashes for parameters.  Use -npro to ignore this file.
2417 EOM
2418                 }
2419
2420                 # Undo any options which cause premature exit.  They are not
2421                 # appropriate for a config file, and it could be hard to
2422                 # diagnose the cause of the premature exit.
2423                 foreach (
2424                     qw{
2425                     dump-cuddled-block-list
2426                     dump-defaults
2427                     dump-long-names
2428                     dump-options
2429                     dump-profile
2430                     dump-short-names
2431                     dump-token-types
2432                     dump-want-left-space
2433                     dump-want-right-space
2434                     help
2435                     stylesheet
2436                     version
2437                     }
2438                   )
2439                 {
2440
2441                     if ( defined( $Opts{$_} ) ) {
2442                         delete $Opts{$_};
2443                         Warn "ignoring --$_ in config file: $config_file\n";
2444                     }
2445                 }
2446             }
2447         }
2448     }
2449
2450     #---------------------------------------------------------------
2451     # now process the command line parameters
2452     #---------------------------------------------------------------
2453     expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2454
2455     local $SIG{'__WARN__'} = sub { Warn $_[0] };
2456     if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
2457         Die "Error on command line; for help try 'perltidy -h'\n";
2458     }
2459
2460     # reset Getopt::Long configuration back to its previous value
2461     eval { Getopt::Long::Configure($glc) } if defined $glc;
2462
2463     return ( \%Opts, $config_file, \@raw_options, $roption_string,
2464         $rexpansion, $roption_category, $roption_range );
2465 }    # end of _process_command_line
2466
2467 sub check_options {
2468
2469     my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2470
2471     #---------------------------------------------------------------
2472     # check and handle any interactions among the basic options..
2473     #---------------------------------------------------------------
2474
2475     # Since -vt, -vtc, and -cti are abbreviations, but under
2476     # msdos, an unquoted input parameter like vtc=1 will be
2477     # seen as 2 parameters, vtc and 1, so the abbreviations
2478     # won't be seen.  Therefore, we will catch them here if
2479     # they get through.
2480
2481     if ( defined $rOpts->{'vertical-tightness'} ) {
2482         my $vt = $rOpts->{'vertical-tightness'};
2483         $rOpts->{'paren-vertical-tightness'}          = $vt;
2484         $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2485         $rOpts->{'brace-vertical-tightness'}          = $vt;
2486     }
2487
2488     if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2489         my $vtc = $rOpts->{'vertical-tightness-closing'};
2490         $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
2491         $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2492         $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
2493     }
2494
2495     if ( defined $rOpts->{'closing-token-indentation'} ) {
2496         my $cti = $rOpts->{'closing-token-indentation'};
2497         $rOpts->{'closing-square-bracket-indentation'} = $cti;
2498         $rOpts->{'closing-brace-indentation'}          = $cti;
2499         $rOpts->{'closing-paren-indentation'}          = $cti;
2500     }
2501
2502     # In quiet mode, there is no log file and hence no way to report
2503     # results of syntax check, so don't do it.
2504     if ( $rOpts->{'quiet'} ) {
2505         $rOpts->{'check-syntax'} = 0;
2506     }
2507
2508     # can't check syntax if no output
2509     if ( $rOpts->{'format'} ne 'tidy' ) {
2510         $rOpts->{'check-syntax'} = 0;
2511     }
2512
2513     # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2514     # wide variety of nasty problems on these systems, because they cannot
2515     # reliably run backticks.  Don't even think about changing this!
2516     if (   $rOpts->{'check-syntax'}
2517         && $is_Windows
2518         && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2519     {
2520         $rOpts->{'check-syntax'} = 0;
2521     }
2522
2523     # Added Dec 2017: Deactivating check-syntax for all systems for safety
2524     # because unexpected results can occur when code in BEGIN blocks is
2525     # executed.  This flag was included to help check for perltidy mistakes,
2526     # and may still be useful for debugging.  To activate for testing comment
2527     # out the next three lines.
2528     else {
2529         $rOpts->{'check-syntax'} = 0;
2530     }
2531
2532     # It's really a bad idea to check syntax as root unless you wrote
2533     # the script yourself.  FIXME: not sure if this works with VMS
2534     unless ($is_Windows) {
2535
2536         if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2537             $rOpts->{'check-syntax'} = 0;
2538             ${$rpending_complaint} .=
2539 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2540         }
2541     }
2542
2543     # check iteration count and quietly fix if necessary:
2544     # - iterations option only applies to code beautification mode
2545     # - the convergence check should stop most runs on iteration 2, and
2546     #   virtually all on iteration 3.  But we'll allow up to 6.
2547     if ( $rOpts->{'format'} ne 'tidy' ) {
2548         $rOpts->{'iterations'} = 1;
2549     }
2550     elsif ( defined( $rOpts->{'iterations'} ) ) {
2551         if    ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2552         elsif ( $rOpts->{'iterations'} > 6 )  { $rOpts->{'iterations'} = 6 }
2553     }
2554     else {
2555         $rOpts->{'iterations'} = 1;
2556     }
2557
2558     my $check_blank_count = sub {
2559         my ( $key, $abbrev ) = @_;
2560         if ( $rOpts->{$key} ) {
2561             if ( $rOpts->{$key} < 0 ) {
2562                 $rOpts->{$key} = 0;
2563                 Warn "negative value of $abbrev, setting 0\n";
2564             }
2565             if ( $rOpts->{$key} > 100 ) {
2566                 Warn "unreasonably large value of $abbrev, reducing\n";
2567                 $rOpts->{$key} = 100;
2568             }
2569         }
2570     };
2571
2572     # check for reasonable number of blank lines and fix to avoid problems
2573     $check_blank_count->( 'blank-lines-before-subs',          '-blbs' );
2574     $check_blank_count->( 'blank-lines-before-packages',      '-blbp' );
2575     $check_blank_count->( 'blank-lines-after-block-opening',  '-blao' );
2576     $check_blank_count->( 'blank-lines-before-block-closing', '-blbc' );
2577
2578     # setting a non-negative logfile gap causes logfile to be saved
2579     if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2580         $rOpts->{'logfile'} = 1;
2581     }
2582
2583     # set short-cut flag when only indentation is to be done.
2584     # Note that the user may or may not have already set the
2585     # indent-only flag.
2586     if (   !$rOpts->{'add-whitespace'}
2587         && !$rOpts->{'delete-old-whitespace'}
2588         && !$rOpts->{'add-newlines'}
2589         && !$rOpts->{'delete-old-newlines'} )
2590     {
2591         $rOpts->{'indent-only'} = 1;
2592     }
2593
2594     # -isbc implies -ibc
2595     if ( $rOpts->{'indent-spaced-block-comments'} ) {
2596         $rOpts->{'indent-block-comments'} = 1;
2597     }
2598
2599     # -bli flag implies -bl
2600     if ( $rOpts->{'brace-left-and-indent'} ) {
2601         $rOpts->{'opening-brace-on-new-line'} = 1;
2602     }
2603
2604     if (   $rOpts->{'opening-brace-always-on-right'}
2605         && $rOpts->{'opening-brace-on-new-line'} )
2606     {
2607         Warn <<EOM;
2608  Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2609   'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2610 EOM
2611         $rOpts->{'opening-brace-on-new-line'} = 0;
2612     }
2613
2614     # it simplifies things if -bl is 0 rather than undefined
2615     if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2616         $rOpts->{'opening-brace-on-new-line'} = 0;
2617     }
2618
2619     # -sbl defaults to -bl if not defined
2620     if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2621         $rOpts->{'opening-sub-brace-on-new-line'} =
2622           $rOpts->{'opening-brace-on-new-line'};
2623     }
2624
2625     if ( $rOpts->{'entab-leading-whitespace'} ) {
2626         if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2627             Warn "-et=n must use a positive integer; ignoring -et\n";
2628             $rOpts->{'entab-leading-whitespace'} = undef;
2629         }
2630
2631         # entab leading whitespace has priority over the older 'tabs' option
2632         if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2633     }
2634
2635     # set a default tabsize to be used in guessing the starting indentation
2636     # level if and only if this run does not use tabs and the old code does
2637     # use tabs
2638     if ( $rOpts->{'default-tabsize'} ) {
2639         if ( $rOpts->{'default-tabsize'} < 0 ) {
2640             Warn "negative value of -dt, setting 0\n";
2641             $rOpts->{'default-tabsize'} = 0;
2642         }
2643         if ( $rOpts->{'default-tabsize'} > 20 ) {
2644             Warn "unreasonably large value of -dt, reducing\n";
2645             $rOpts->{'default-tabsize'} = 20;
2646         }
2647     }
2648     else {
2649         $rOpts->{'default-tabsize'} = 8;
2650     }
2651
2652     # Define $tabsize, the number of spaces per tab for use in
2653     # guessing the indentation of source lines with leading tabs.
2654     # Assume same as for this run if tabs are used , otherwise assume
2655     # a default value, typically 8
2656     my $tabsize =
2657         $rOpts->{'entab-leading-whitespace'}
2658       ? $rOpts->{'entab-leading-whitespace'}
2659       : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
2660       :                    $rOpts->{'default-tabsize'};
2661     return $tabsize;
2662 }
2663
2664 sub find_file_upwards {
2665     my ( $search_dir, $search_file ) = @_;
2666
2667     $search_dir =~ s{/+$}{};
2668     $search_file =~ s{^/+}{};
2669
2670     while (1) {
2671         my $try_path = "$search_dir/$search_file";
2672         if ( -f $try_path ) {
2673             return $try_path;
2674         }
2675         elsif ( $search_dir eq '/' ) {
2676             return;
2677         }
2678         else {
2679             $search_dir = dirname($search_dir);
2680         }
2681     }
2682 }
2683
2684 sub expand_command_abbreviations {
2685
2686     # go through @ARGV and expand any abbreviations
2687
2688     my ( $rexpansion, $rraw_options, $config_file ) = @_;
2689
2690     # set a pass limit to prevent an infinite loop;
2691     # 10 should be plenty, but it may be increased to allow deeply
2692     # nested expansions.
2693     my $max_passes = 10;
2694     my @new_argv   = ();
2695
2696     # keep looping until all expansions have been converted into actual
2697     # dash parameters..
2698     foreach my $pass_count ( 0 .. $max_passes ) {
2699         my @new_argv     = ();
2700         my $abbrev_count = 0;
2701
2702         # loop over each item in @ARGV..
2703         foreach my $word (@ARGV) {
2704
2705             # convert any leading 'no-' to just 'no'
2706             if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2707
2708             # if it is a dash flag (instead of a file name)..
2709             if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2710
2711                 my $abr   = $1;
2712                 my $flags = $2;
2713
2714                 # save the raw input for debug output in case of circular refs
2715                 if ( $pass_count == 0 ) {
2716                     push( @{$rraw_options}, $word );
2717                 }
2718
2719                 # recombine abbreviation and flag, if necessary,
2720                 # to allow abbreviations with arguments such as '-vt=1'
2721                 if ( $rexpansion->{ $abr . $flags } ) {
2722                     $abr   = $abr . $flags;
2723                     $flags = "";
2724                 }
2725
2726                 # if we see this dash item in the expansion hash..
2727                 if ( $rexpansion->{$abr} ) {
2728                     $abbrev_count++;
2729
2730                     # stuff all of the words that it expands to into the
2731                     # new arg list for the next pass
2732                     foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2733                         next unless $abbrev;    # for safety; shouldn't happen
2734                         push( @new_argv, '--' . $abbrev . $flags );
2735                     }
2736                 }
2737
2738                 # not in expansion hash, must be actual long name
2739                 else {
2740                     push( @new_argv, $word );
2741                 }
2742             }
2743
2744             # not a dash item, so just save it for the next pass
2745             else {
2746                 push( @new_argv, $word );
2747             }
2748         }    # end of this pass
2749
2750         # update parameter list @ARGV to the new one
2751         @ARGV = @new_argv;
2752         last unless ( $abbrev_count > 0 );
2753
2754         # make sure we are not in an infinite loop
2755         if ( $pass_count == $max_passes ) {
2756             local $" = ')(';
2757             Warn <<EOM;
2758 I'm tired. We seem to be in an infinite loop trying to expand aliases.
2759 Here are the raw options;
2760 (rraw_options)
2761 EOM
2762             my $num = @new_argv;
2763             if ( $num < 50 ) {
2764                 Warn <<EOM;
2765 After $max_passes passes here is ARGV
2766 (@new_argv)
2767 EOM
2768             }
2769             else {
2770                 Warn <<EOM;
2771 After $max_passes passes ARGV has $num entries
2772 EOM
2773             }
2774
2775             if ($config_file) {
2776                 Die <<"DIE";
2777 Please check your configuration file $config_file for circular-references.
2778 To deactivate it, use -npro.
2779 DIE
2780             }
2781             else {
2782                 Die <<'DIE';
2783 Program bug - circular-references in the %expansion hash, probably due to
2784 a recent program change.
2785 DIE
2786             }
2787         }    # end of check for circular references
2788     }    # end of loop over all passes
2789     return;
2790 }
2791
2792 # Debug routine -- this will dump the expansion hash
2793 sub dump_short_names {
2794     my $rexpansion = shift;
2795     print STDOUT <<EOM;
2796 List of short names.  This list shows how all abbreviations are
2797 translated into other abbreviations and, eventually, into long names.
2798 New abbreviations may be defined in a .perltidyrc file.
2799 For a list of all long names, use perltidy --dump-long-names (-dln).
2800 --------------------------------------------------------------------------
2801 EOM
2802     foreach my $abbrev ( sort keys %$rexpansion ) {
2803         my @list = @{ $rexpansion->{$abbrev} };
2804         print STDOUT "$abbrev --> @list\n";
2805     }
2806     return;
2807 }
2808
2809 sub check_vms_filename {
2810
2811     # given a valid filename (the perltidy input file)
2812     # create a modified filename and separator character
2813     # suitable for VMS.
2814     #
2815     # Contributed by Michael Cartmell
2816     #
2817     my $filename = shift;
2818     my ( $base, $path ) = fileparse($filename);
2819
2820     # remove explicit ; version
2821     $base =~ s/;-?\d*$//
2822
2823       # remove explicit . version ie two dots in filename NB ^ escapes a dot
2824       or $base =~ s/(          # begin capture $1
2825                   (?:^|[^^])\. # match a dot not preceded by a caret
2826                   (?:          # followed by nothing
2827                     |          # or
2828                     .*[^^]     # anything ending in a non caret
2829                   )
2830                 )              # end capture $1
2831                 \.-?\d*$       # match . version number
2832               /$1/x;
2833
2834     # normalise filename, if there are no unescaped dots then append one
2835     $base .= '.' unless $base =~ /(?:^|[^^])\./;
2836
2837     # if we don't already have an extension then we just append the extension
2838     my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2839     return ( $path . $base, $separator );
2840 }
2841
2842 sub Win_OS_Type {
2843
2844     # TODO: are these more standard names?
2845     # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2846
2847     # Returns a string that determines what MS OS we are on.
2848     # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2849     # Returns blank string if not an MS system.
2850     # Original code contributed by: Yves Orton
2851     # We need to know this to decide where to look for config files
2852
2853     my $rpending_complaint = shift;
2854     my $os                 = "";
2855     return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2856
2857     # Systems built from Perl source may not have Win32.pm
2858     # But probably have Win32::GetOSVersion() anyway so the
2859     # following line is not 'required':
2860     # return $os unless eval('require Win32');
2861
2862     # Use the standard API call to determine the version
2863     my ( $undef, $major, $minor, $build, $id );
2864     eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2865
2866     #
2867     #    NAME                   ID   MAJOR  MINOR
2868     #    Windows NT 4           2      4       0
2869     #    Windows 2000           2      5       0
2870     #    Windows XP             2      5       1
2871     #    Windows Server 2003    2      5       2
2872
2873     return "win32s" unless $id;    # If id==0 then its a win32s box.
2874     $os = {                        # Magic numbers from MSDN
2875                                    # documentation of GetOSVersion
2876         1 => {
2877             0  => "95",
2878             10 => "98",
2879             90 => "Me"
2880         },
2881         2 => {
2882             0  => "2000",          # or NT 4, see below
2883             1  => "XP/.Net",
2884             2  => "Win2003",
2885             51 => "NT3.51"
2886         }
2887     }->{$id}->{$minor};
2888
2889     # If $os is undefined, the above code is out of date.  Suggested updates
2890     # are welcome.
2891     unless ( defined $os ) {
2892         $os = "";
2893         ${$rpending_complaint} .= <<EOS;
2894 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2895 We won't be able to look for a system-wide config file.
2896 EOS
2897     }
2898
2899     # Unfortunately the logic used for the various versions isn't so clever..
2900     # so we have to handle an outside case.
2901     return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2902 }
2903
2904 sub is_unix {
2905     return
2906          ( $^O !~ /win32|dos/i )
2907       && ( $^O ne 'VMS' )
2908       && ( $^O ne 'OS2' )
2909       && ( $^O ne 'MacOS' );
2910 }
2911
2912 sub look_for_Windows {
2913
2914     # determine Windows sub-type and location of
2915     # system-wide configuration files
2916     my $rpending_complaint = shift;
2917     my $is_Windows         = ( $^O =~ /win32|dos/i );
2918     my $Windows_type;
2919     $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2920     return ( $is_Windows, $Windows_type );
2921 }
2922
2923 sub find_config_file {
2924
2925     # look for a .perltidyrc configuration file
2926     # For Windows also look for a file named perltidy.ini
2927     my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2928         $rpending_complaint ) = @_;
2929
2930     ${$rconfig_file_chatter} .= "# Config file search...system reported as:";
2931     if ($is_Windows) {
2932         ${$rconfig_file_chatter} .= "Windows $Windows_type\n";
2933     }
2934     else {
2935         ${$rconfig_file_chatter} .= " $^O\n";
2936     }
2937
2938     # sub to check file existence and record all tests
2939     my $exists_config_file = sub {
2940         my $config_file = shift;
2941         return 0 unless $config_file;
2942         ${$rconfig_file_chatter} .= "# Testing: $config_file\n";
2943         return -f $config_file;
2944     };
2945
2946     # Sub to search upward for config file
2947     my $resolve_config_file = sub {
2948
2949         # resolve <dir>/.../<file>, meaning look upwards from directory
2950         my $config_file = shift;
2951         if ($config_file) {
2952             if ( my ( $start_dir, $search_file ) =
2953                 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2954             {
2955                 ${$rconfig_file_chatter} .=
2956                   "# Searching Upward: $config_file\n";
2957                 $start_dir = '.' if !$start_dir;
2958                 $start_dir = Cwd::realpath($start_dir);
2959                 if ( my $found_file =
2960                     find_file_upwards( $start_dir, $search_file ) )
2961                 {
2962                     $config_file = $found_file;
2963                     ${$rconfig_file_chatter} .= "# Found: $config_file\n";
2964                 }
2965             }
2966         }
2967         return $config_file;
2968     };
2969
2970     my $config_file;
2971
2972     # look in current directory first
2973     $config_file = ".perltidyrc";
2974     return $config_file if $exists_config_file->($config_file);
2975     if ($is_Windows) {
2976         $config_file = "perltidy.ini";
2977         return $config_file if $exists_config_file->($config_file);
2978     }
2979
2980     # Default environment vars.
2981     my @envs = qw(PERLTIDY HOME);
2982
2983     # Check the NT/2k/XP locations, first a local machine def, then a
2984     # network def
2985     push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2986
2987     # Now go through the environment ...
2988     foreach my $var (@envs) {
2989         ${$rconfig_file_chatter} .= "# Examining: \$ENV{$var}";
2990         if ( defined( $ENV{$var} ) ) {
2991             ${$rconfig_file_chatter} .= " = $ENV{$var}\n";
2992
2993             # test ENV{ PERLTIDY } as file:
2994             if ( $var eq 'PERLTIDY' ) {
2995                 $config_file = "$ENV{$var}";
2996                 $config_file = $resolve_config_file->($config_file);
2997                 return $config_file if $exists_config_file->($config_file);
2998             }
2999
3000             # test ENV as directory:
3001             $config_file = catfile( $ENV{$var}, ".perltidyrc" );
3002             $config_file = $resolve_config_file->($config_file);
3003             return $config_file if $exists_config_file->($config_file);
3004
3005             if ($is_Windows) {
3006                 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
3007                 $config_file = $resolve_config_file->($config_file);
3008                 return $config_file if $exists_config_file->($config_file);
3009             }
3010         }
3011         else {
3012             ${$rconfig_file_chatter} .= "\n";
3013         }
3014     }
3015
3016     # then look for a system-wide definition
3017     # where to look varies with OS
3018     if ($is_Windows) {
3019
3020         if ($Windows_type) {
3021             my ( $os, $system, $allusers ) =
3022               Win_Config_Locs( $rpending_complaint, $Windows_type );
3023
3024             # Check All Users directory, if there is one.
3025             # i.e. C:\Documents and Settings\User\perltidy.ini
3026             if ($allusers) {
3027
3028                 $config_file = catfile( $allusers, ".perltidyrc" );
3029                 return $config_file if $exists_config_file->($config_file);
3030
3031                 $config_file = catfile( $allusers, "perltidy.ini" );
3032                 return $config_file if $exists_config_file->($config_file);
3033             }
3034
3035             # Check system directory.
3036             # retain old code in case someone has been able to create
3037             # a file with a leading period.
3038             $config_file = catfile( $system, ".perltidyrc" );
3039             return $config_file if $exists_config_file->($config_file);
3040
3041             $config_file = catfile( $system, "perltidy.ini" );
3042             return $config_file if $exists_config_file->($config_file);
3043         }
3044     }
3045
3046     # Place to add customization code for other systems
3047     elsif ( $^O eq 'OS2' ) {
3048     }
3049     elsif ( $^O eq 'MacOS' ) {
3050     }
3051     elsif ( $^O eq 'VMS' ) {
3052     }
3053
3054     # Assume some kind of Unix
3055     else {
3056
3057         $config_file = "/usr/local/etc/perltidyrc";
3058         return $config_file if $exists_config_file->($config_file);
3059
3060         $config_file = "/etc/perltidyrc";
3061         return $config_file if $exists_config_file->($config_file);
3062     }
3063
3064     # Couldn't find a config file
3065     return;
3066 }
3067
3068 sub Win_Config_Locs {
3069
3070     # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
3071     # or undef if its not a win32 OS.  In list context returns OS, System
3072     # Directory, and All Users Directory.  All Users will be empty on a
3073     # 9x/Me box.  Contributed by: Yves Orton.
3074
3075     # my ( $rpending_complaint, $os ) = @_;
3076     # if ( !$os ) { $os = Win_OS_Type(); }
3077
3078     my $rpending_complaint = shift;
3079     my $os = (@_) ? shift : Win_OS_Type();
3080     return unless $os;
3081
3082     my $system   = "";
3083     my $allusers = "";
3084
3085     if ( $os =~ /9[58]|Me/ ) {
3086         $system = "C:/Windows";
3087     }
3088     elsif ( $os =~ /NT|XP|200?/ ) {
3089         $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
3090         $allusers =
3091           ( $os =~ /NT/ )
3092           ? "C:/WinNT/profiles/All Users/"
3093           : "C:/Documents and Settings/All Users/";
3094     }
3095     else {
3096
3097         # This currently would only happen on a win32s computer.  I don't have
3098         # one to test, so I am unsure how to proceed.  Suggestions welcome!
3099         ${$rpending_complaint} .=
3100 "I dont know a sensible place to look for config files on an $os system.\n";
3101         return;
3102     }
3103     return wantarray ? ( $os, $system, $allusers ) : $os;
3104 }
3105
3106 sub dump_config_file {
3107     my ( $fh, $config_file, $rconfig_file_chatter ) = @_;
3108     print STDOUT "$$rconfig_file_chatter";
3109     if ($fh) {
3110         print STDOUT "# Dump of file: '$config_file'\n";
3111         while ( my $line = $fh->getline() ) { print STDOUT $line }
3112         eval { $fh->close() };
3113     }
3114     else {
3115         print STDOUT "# ...no config file found\n";
3116     }
3117     return;
3118 }
3119
3120 sub read_config_file {
3121
3122     my ( $fh, $config_file, $rexpansion ) = @_;
3123     my @config_list = ();
3124
3125     # file is bad if non-empty $death_message is returned
3126     my $death_message = "";
3127
3128     my $name = undef;
3129     my $line_no;
3130     my $opening_brace_line;
3131     while ( my $line = $fh->getline() ) {
3132         $line_no++;
3133         chomp $line;
3134         ( $line, $death_message ) =
3135           strip_comment( $line, $config_file, $line_no );
3136         last if ($death_message);
3137         next unless $line;
3138         $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
3139         next unless $line;
3140
3141         my $body = $line;
3142
3143         # Look for complete or partial abbreviation definition of the form
3144         #     name { body }   or  name {   or    name { body
3145         # See rules in perltidy's perldoc page
3146         # Section: Other Controls - Creating a new abbreviation
3147         if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
3148             my $oldname = $name;
3149             ( $name, $body ) = ( $2, $3 );
3150
3151             # Cannot start new abbreviation unless old abbreviation is complete
3152             last if ($opening_brace_line);
3153
3154             $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
3155
3156             # handle a new alias definition
3157             if ( ${$rexpansion}{$name} ) {
3158                 local $" = ')(';
3159                 my @names = sort keys %$rexpansion;
3160                 $death_message =
3161                     "Here is a list of all installed aliases\n(@names)\n"
3162                   . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
3163                 last;
3164             }
3165             ${$rexpansion}{$name} = [];
3166         }
3167
3168         # leading opening braces not allowed
3169         elsif ( $line =~ /^{/ ) {
3170             $opening_brace_line = undef;
3171             $death_message =
3172               "Unexpected '{' at line $line_no in config file '$config_file'\n";
3173             last;
3174         }
3175
3176         # Look for abbreviation closing:    body }   or    }
3177         elsif ( $line =~ /^(.*)?\}$/ ) {
3178             $body = $1;
3179             if ($opening_brace_line) {
3180                 $opening_brace_line = undef;
3181             }
3182             else {
3183                 $death_message =
3184 "Unexpected '}' at line $line_no in config file '$config_file'\n";
3185                 last;
3186             }
3187         }
3188
3189         # Now store any parameters
3190         if ($body) {
3191
3192             my ( $rbody_parts, $msg ) = parse_args($body);
3193             if ($msg) {
3194                 $death_message = <<EOM;
3195 Error reading file '$config_file' at line number $line_no.
3196 $msg
3197 Please fix this line or use -npro to avoid reading this file
3198 EOM
3199                 last;
3200             }
3201
3202             if ($name) {
3203
3204                 # remove leading dashes if this is an alias
3205                 foreach ( @{$rbody_parts} ) { s/^\-+//; }
3206                 push @{ ${$rexpansion}{$name} }, @{$rbody_parts};
3207             }
3208             else {
3209                 push( @config_list, @{$rbody_parts} );
3210             }
3211         }
3212     }
3213
3214     if ($opening_brace_line) {
3215         $death_message =
3216 "Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
3217     }
3218     eval { $fh->close() };
3219     return ( \@config_list, $death_message );
3220 }
3221
3222 sub strip_comment {
3223
3224     # Strip any comment from a command line
3225     my ( $instr, $config_file, $line_no ) = @_;
3226     my $msg = "";
3227
3228     # check for full-line comment
3229     if ( $instr =~ /^\s*#/ ) {
3230         return ( "", $msg );
3231     }
3232
3233     # nothing to do if no comments
3234     if ( $instr !~ /#/ ) {
3235         return ( $instr, $msg );
3236     }
3237
3238     # handle case of no quotes
3239     elsif ( $instr !~ /['"]/ ) {
3240
3241         # We now require a space before the # of a side comment
3242         # this allows something like:
3243         #    -sbcp=#
3244         # Otherwise, it would have to be quoted:
3245         #    -sbcp='#'
3246         $instr =~ s/\s+\#.*$//;
3247         return ( $instr, $msg );
3248     }
3249
3250     # handle comments and quotes
3251     my $outstr     = "";
3252     my $quote_char = "";
3253     while (1) {
3254
3255         # looking for ending quote character
3256         if ($quote_char) {
3257             if ( $instr =~ /\G($quote_char)/gc ) {
3258                 $quote_char = "";
3259                 $outstr .= $1;
3260             }
3261             elsif ( $instr =~ /\G(.)/gc ) {
3262                 $outstr .= $1;
3263             }
3264
3265             # error..we reached the end without seeing the ending quote char
3266             else {
3267                 $msg = <<EOM;
3268 Error reading file $config_file at line number $line_no.
3269 Did not see ending quote character <$quote_char> in this text:
3270 $instr
3271 Please fix this line or use -npro to avoid reading this file
3272 EOM
3273                 last;
3274             }
3275         }
3276
3277         # accumulating characters and looking for start of a quoted string
3278         else {
3279             if ( $instr =~ /\G([\"\'])/gc ) {
3280                 $outstr .= $1;
3281                 $quote_char = $1;
3282             }
3283
3284             # Note: not yet enforcing the space-before-hash rule for side
3285             # comments if the parameter is quoted.
3286             elsif ( $instr =~ /\G#/gc ) {
3287                 last;
3288             }
3289             elsif ( $instr =~ /\G(.)/gc ) {
3290                 $outstr .= $1;
3291             }
3292             else {
3293                 last;
3294             }
3295         }
3296     }
3297     return ( $outstr, $msg );
3298 }
3299
3300 sub parse_args {
3301
3302     # Parse a command string containing multiple string with possible
3303     # quotes, into individual commands.  It might look like this, for example:
3304     #
3305     #    -wba=" + - "  -some-thing -wbb='. && ||'
3306     #
3307     # There is no need, at present, to handle escaped quote characters.
3308     # (They are not perltidy tokens, so needn't be in strings).
3309
3310     my ($body)     = @_;
3311     my @body_parts = ();
3312     my $quote_char = "";
3313     my $part       = "";
3314     my $msg        = "";
3315     while (1) {
3316
3317         # looking for ending quote character
3318         if ($quote_char) {
3319             if ( $body =~ /\G($quote_char)/gc ) {
3320                 $quote_char = "";
3321             }
3322             elsif ( $body =~ /\G(.)/gc ) {
3323                 $part .= $1;
3324             }
3325
3326             # error..we reached the end without seeing the ending quote char
3327             else {
3328                 if ( length($part) ) { push @body_parts, $part; }
3329                 $msg = <<EOM;
3330 Did not see ending quote character <$quote_char> in this text:
3331 $body
3332 EOM
3333                 last;
3334             }
3335         }
3336
3337         # accumulating characters and looking for start of a quoted string
3338         else {
3339             if ( $body =~ /\G([\"\'])/gc ) {
3340                 $quote_char = $1;
3341             }
3342             elsif ( $body =~ /\G(\s+)/gc ) {
3343                 if ( length($part) ) { push @body_parts, $part; }
3344                 $part = "";
3345             }
3346             elsif ( $body =~ /\G(.)/gc ) {
3347                 $part .= $1;
3348             }
3349             else {
3350                 if ( length($part) ) { push @body_parts, $part; }
3351                 last;
3352             }
3353         }
3354     }
3355     return ( \@body_parts, $msg );
3356 }
3357
3358 sub dump_long_names {
3359
3360     my @names = @_;
3361     print STDOUT <<EOM;
3362 # Command line long names (passed to GetOptions)
3363 #---------------------------------------------------------------
3364 # here is a summary of the Getopt codes:
3365 # <none> does not take an argument
3366 # =s takes a mandatory string
3367 # :s takes an optional string
3368 # =i takes a mandatory integer
3369 # :i takes an optional integer
3370 # ! does not take an argument and may be negated
3371 #  i.e., -foo and -nofoo are allowed
3372 # a double dash signals the end of the options list
3373 #
3374 #---------------------------------------------------------------
3375 EOM
3376
3377     foreach my $name ( sort @names ) { print STDOUT "$name\n" }
3378     return;
3379 }
3380
3381 sub dump_defaults {
3382     my @defaults = @_;
3383     print STDOUT "Default command line options:\n";
3384     foreach my $line ( sort @defaults ) { print STDOUT "$line\n" }
3385     return;
3386 }
3387
3388 sub readable_options {
3389
3390     # return options for this run as a string which could be
3391     # put in a perltidyrc file
3392     my ( $rOpts, $roption_string ) = @_;
3393     my %Getopt_flags;
3394     my $rGetopt_flags    = \%Getopt_flags;
3395     my $readable_options = "# Final parameter set for this run.\n";
3396     $readable_options .=
3397       "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3398     foreach my $opt ( @{$roption_string} ) {
3399         my $flag = "";
3400         if ( $opt =~ /(.*)(!|=.*)$/ ) {
3401             $opt  = $1;
3402             $flag = $2;
3403         }
3404         if ( defined( $rOpts->{$opt} ) ) {
3405             $rGetopt_flags->{$opt} = $flag;
3406         }
3407     }
3408     foreach my $key ( sort keys %{$rOpts} ) {
3409         my $flag   = $rGetopt_flags->{$key};
3410         my $value  = $rOpts->{$key};
3411         my $prefix = '--';
3412         my $suffix = "";
3413         if ($flag) {
3414             if ( $flag =~ /^=/ ) {
3415                 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3416                 $suffix = "=" . $value;
3417             }
3418             elsif ( $flag =~ /^!/ ) {
3419                 $prefix .= "no" unless ($value);
3420             }
3421             else {
3422
3423                 # shouldn't happen
3424                 $readable_options .=
3425                   "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3426             }
3427         }
3428         $readable_options .= $prefix . $key . $suffix . "\n";
3429     }
3430     return $readable_options;
3431 }
3432
3433 sub show_version {
3434     print STDOUT <<"EOM";
3435 This is perltidy, v$VERSION
3436
3437 Copyright 2000-2018, Steve Hancock
3438
3439 Perltidy is free software and may be copied under the terms of the GNU
3440 General Public License, which is included in the distribution files.
3441
3442 Complete documentation for perltidy can be found using 'man perltidy'
3443 or on the internet at http://perltidy.sourceforge.net.
3444 EOM
3445     return;
3446 }
3447
3448 sub usage {
3449
3450     print STDOUT <<EOF;
3451 This is perltidy version $VERSION, a perl script indenter.  Usage:
3452
3453     perltidy [ options ] file1 file2 file3 ...
3454             (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3455     perltidy [ options ] file1 -o outfile
3456     perltidy [ options ] file1 -st >outfile
3457     perltidy [ options ] <infile >outfile
3458
3459 Options have short and long forms. Short forms are shown; see
3460 man pages for long forms.  Note: '=s' indicates a required string,
3461 and '=n' indicates a required integer.
3462
3463 I/O control
3464  -h      show this help
3465  -o=file name of the output file (only if single input file)
3466  -oext=s change output extension from 'tdy' to s
3467  -opath=path  change path to be 'path' for output files
3468  -b      backup original to .bak and modify file in-place
3469  -bext=s change default backup extension from 'bak' to s
3470  -q      deactivate error messages (for running under editor)
3471  -w      include non-critical warning messages in the .ERR error output
3472  -syn    run perl -c to check syntax (default under unix systems)
3473  -log    save .LOG file, which has useful diagnostics
3474  -f      force perltidy to read a binary file
3475  -g      like -log but writes more detailed .LOG file, for debugging scripts
3476  -opt    write the set of options actually used to a .LOG file
3477  -npro   ignore .perltidyrc configuration command file
3478  -pro=file   read configuration commands from file instead of .perltidyrc
3479  -st     send output to standard output, STDOUT
3480  -se     send all error output to standard error output, STDERR
3481  -v      display version number to standard output and quit
3482
3483 Basic Options:
3484  -i=n    use n columns per indentation level (default n=4)
3485  -t      tabs: use one tab character per indentation level, not recommeded
3486  -nt     no tabs: use n spaces per indentation level (default)
3487  -et=n   entab leading whitespace n spaces per tab; not recommended
3488  -io     "indent only": just do indentation, no other formatting.
3489  -sil=n  set starting indentation level to n;  use if auto detection fails
3490  -ole=s  specify output line ending (s=dos or win, mac, unix)
3491  -ple    keep output line endings same as input (input must be filename)
3492
3493 Whitespace Control
3494  -fws    freeze whitespace; this disables all whitespace changes
3495            and disables the following switches:
3496  -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
3497  -bbt    same as -bt but for code block braces; same as -bt if not given
3498  -bbvt   block braces vertically tight; use with -bl or -bli
3499  -bbvtl=s  make -bbvt to apply to selected list of block types
3500  -pt=n   paren tightness (n=0, 1 or 2)
3501  -sbt=n  square bracket tightness (n=0, 1, or 2)
3502  -bvt=n  brace vertical tightness,
3503          n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3504  -pvt=n  paren vertical tightness (see -bvt for n)
3505  -sbvt=n square bracket vertical tightness (see -bvt for n)
3506  -bvtc=n closing brace vertical tightness:
3507          n=(0=open, 1=sometimes close, 2=always close)
3508  -pvtc=n closing paren vertical tightness, see -bvtc for n.
3509  -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3510  -ci=n   sets continuation indentation=n,  default is n=2 spaces
3511  -lp     line up parentheses, brackets, and non-BLOCK braces
3512  -sfs    add space before semicolon in for( ; ; )
3513  -aws    allow perltidy to add whitespace (default)
3514  -dws    delete all old non-essential whitespace
3515  -icb    indent closing brace of a code block
3516  -cti=n  closing indentation of paren, square bracket, or non-block brace:
3517          n=0 none, =1 align with opening, =2 one full indentation level
3518  -icp    equivalent to -cti=2
3519  -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
3520  -wrs=s  want space right of tokens in string;
3521  -sts    put space before terminal semicolon of a statement
3522  -sak=s  put space between keywords given in s and '(';
3523  -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3524
3525 Line Break Control
3526  -fnl    freeze newlines; this disables all line break changes
3527             and disables the following switches:
3528  -anl    add newlines;  ok to introduce new line breaks
3529  -bbs    add blank line before subs and packages
3530  -bbc    add blank line before block comments
3531  -bbb    add blank line between major blocks
3532  -kbl=n  keep old blank lines? 0=no, 1=some, 2=all
3533  -mbl=n  maximum consecutive blank lines to output (default=1)
3534  -ce     cuddled else; use this style: '} else {'
3535  -cb     cuddled blocks (other than 'if-elsif-else')
3536  -cbl=s  list of blocks to cuddled, default 'try-catch-finally'
3537  -dnl    delete old newlines (default)
3538  -l=n    maximum line length;  default n=80
3539  -bl     opening brace on new line
3540  -sbl    opening sub brace on new line.  value of -bl is used if not given.
3541  -bli    opening brace on new line and indented
3542  -bar    opening brace always on right, even for long clauses
3543  -vt=n   vertical tightness (requires -lp); n controls break after opening
3544          token: 0=never  1=no break if next line balanced   2=no break
3545  -vtc=n  vertical tightness of closing container; n controls if closing
3546          token starts new line: 0=always  1=not unless list  1=never
3547  -wba=s  want break after tokens in string; i.e. wba=': .'
3548  -wbb=s  want break before tokens in string
3549  -wn     weld nested: combines opening and closing tokens when both are adjacent
3550
3551 Following Old Breakpoints
3552  -kis    keep interior semicolons.  Allows multiple statements per line.
3553  -boc    break at old comma breaks: turns off all automatic list formatting
3554  -bol    break at old logical breakpoints: or, and, ||, && (default)
3555  -bok    break at old list keyword breakpoints such as map, sort (default)
3556  -bot    break at old conditional (ternary ?:) operator breakpoints (default)
3557  -boa    break at old attribute breakpoints
3558  -cab=n  break at commas after a comma-arrow (=>):
3559          n=0 break at all commas after =>
3560          n=1 stable: break unless this breaks an existing one-line container
3561          n=2 break only if a one-line container cannot be formed
3562          n=3 do not treat commas after => specially at all
3563
3564 Comment controls
3565  -ibc    indent block comments (default)
3566  -isbc   indent spaced block comments; may indent unless no leading space
3567  -msc=n  minimum desired spaces to side comment, default 4
3568  -fpsc=n fix position for side comments; default 0;
3569  -csc    add or update closing side comments after closing BLOCK brace
3570  -dcsc   delete closing side comments created by a -csc command
3571  -cscp=s change closing side comment prefix to be other than '## end'
3572  -cscl=s change closing side comment to apply to selected list of blocks
3573  -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3574  -csct=n maximum number of columns of appended text, default n=20
3575  -cscw   causes warning if old side comment is overwritten with -csc
3576
3577  -sbc    use 'static block comments' identified by leading '##' (default)
3578  -sbcp=s change static block comment identifier to be other than '##'
3579  -osbc   outdent static block comments
3580
3581  -ssc    use 'static side comments' identified by leading '##' (default)
3582  -sscp=s change static side comment identifier to be other than '##'
3583
3584 Delete selected text
3585  -dac    delete all comments AND pod
3586  -dbc    delete block comments
3587  -dsc    delete side comments
3588  -dp     delete pod
3589
3590 Send selected text to a '.TEE' file
3591  -tac    tee all comments AND pod
3592  -tbc    tee block comments
3593  -tsc    tee side comments
3594  -tp     tee pod
3595
3596 Outdenting
3597  -olq    outdent long quoted strings (default)
3598  -olc    outdent a long block comment line
3599  -ola    outdent statement labels
3600  -okw    outdent control keywords (redo, next, last, goto, return)
3601  -okwl=s specify alternative keywords for -okw command
3602
3603 Other controls
3604  -mft=n  maximum fields per table; default n=40
3605  -x      do not format lines before hash-bang line (i.e., for VMS)
3606  -asc    allows perltidy to add a ';' when missing (default)
3607  -dsm    allows perltidy to delete an unnecessary ';'  (default)
3608
3609 Combinations of other parameters
3610  -gnu     attempt to follow GNU Coding Standards as applied to perl
3611  -mangle  remove as many newlines as possible (but keep comments and pods)
3612  -extrude  insert as many newlines as possible
3613
3614 Dump and die, debugging
3615  -dop    dump options used in this run to standard output and quit
3616  -ddf    dump default options to standard output and quit
3617  -dsn    dump all option short names to standard output and quit
3618  -dln    dump option long names to standard output and quit
3619  -dpro   dump whatever configuration file is in effect to standard output
3620  -dtt    dump all token types to standard output and quit
3621
3622 HTML
3623  -html write an html file (see 'man perl2web' for many options)
3624        Note: when -html is used, no indentation or formatting are done.
3625        Hint: try perltidy -html -css=mystyle.css filename.pl
3626        and edit mystyle.css to change the appearance of filename.html.
3627        -nnn gives line numbers
3628        -pre only writes out <pre>..</pre> code section
3629        -toc places a table of contents to subs at the top (default)
3630        -pod passes pod text through pod2html (default)
3631        -frm write html as a frame (3 files)
3632        -text=s extra extension for table of contents if -frm, default='toc'
3633        -sext=s extra extension for file content if -frm, default='src'
3634
3635 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3636 negates the long forms.  For example, -nasc means don't add missing
3637 semicolons.
3638
3639 If you are unable to see this entire text, try "perltidy -h | more"
3640 For more detailed information, and additional options, try "man perltidy",
3641 or go to the perltidy home page at http://perltidy.sourceforge.net
3642 EOF
3643
3644     return;
3645 }
3646
3647 sub process_this_file {
3648
3649     my ( $tokenizer, $formatter ) = @_;
3650
3651     while ( my $line = $tokenizer->get_line() ) {
3652         $formatter->write_line($line);
3653     }
3654     my $severe_error = $tokenizer->report_tokenization_errors();
3655     eval { $formatter->finish_formatting($severe_error) };
3656
3657     return;
3658 }
3659
3660 sub check_syntax {
3661
3662     # Use 'perl -c' to make sure that we did not create bad syntax
3663     # This is a very good independent check for programming errors
3664     #
3665     # Given names of the input and output files, ($istream, $ostream),
3666     # we do the following:
3667     # - check syntax of the input file
3668     # - if bad, all done (could be an incomplete code snippet)
3669     # - if infile syntax ok, then check syntax of the output file;
3670     #   - if outfile syntax bad, issue warning; this implies a code bug!
3671     # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3672
3673     my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3674     my $infile_syntax_ok = 0;
3675     my $line_of_dashes   = '-' x 42 . "\n";
3676
3677     my $flags = $rOpts->{'perl-syntax-check-flags'};
3678
3679     # be sure we invoke perl with -c
3680     # note: perl will accept repeated flags like '-c -c'.  It is safest
3681     # to append another -c than try to find an interior bundled c, as
3682     # in -Tc, because such a 'c' might be in a quoted string, for example.
3683     if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3684
3685     # be sure we invoke perl with -x if requested
3686     # same comments about repeated parameters applies
3687     if ( $rOpts->{'look-for-hash-bang'} ) {
3688         if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3689     }
3690
3691     # this shouldn't happen unless a temporary file couldn't be made
3692     if ( $istream eq '-' ) {
3693         $logger_object->write_logfile_entry(
3694             "Cannot run perl -c on STDIN and STDOUT\n");
3695         return $infile_syntax_ok;
3696     }
3697
3698     $logger_object->write_logfile_entry(
3699         "checking input file syntax with perl $flags\n");
3700
3701     # Not all operating systems/shells support redirection of the standard
3702     # error output.
3703     my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3704
3705     my ( $istream_filename, $perl_output ) =
3706       do_syntax_check( $istream, $flags, $error_redirection );
3707     $logger_object->write_logfile_entry(
3708         "Input stream passed to Perl as file $istream_filename\n");
3709     $logger_object->write_logfile_entry($line_of_dashes);
3710     $logger_object->write_logfile_entry("$perl_output\n");
3711
3712     if ( $perl_output =~ /syntax\s*OK/ ) {
3713         $infile_syntax_ok = 1;
3714         $logger_object->write_logfile_entry($line_of_dashes);
3715         $logger_object->write_logfile_entry(
3716             "checking output file syntax with perl $flags ...\n");
3717         my ( $ostream_filename, $perl_output ) =
3718           do_syntax_check( $ostream, $flags, $error_redirection );
3719         $logger_object->write_logfile_entry(
3720             "Output stream passed to Perl as file $ostream_filename\n");
3721         $logger_object->write_logfile_entry($line_of_dashes);
3722         $logger_object->write_logfile_entry("$perl_output\n");
3723
3724         unless ( $perl_output =~ /syntax\s*OK/ ) {
3725             $logger_object->write_logfile_entry($line_of_dashes);
3726             $logger_object->warning(
3727 "The output file has a syntax error when tested with perl $flags $ostream !\n"
3728             );
3729             $logger_object->warning(
3730                 "This implies an error in perltidy; the file $ostream is bad\n"
3731             );
3732             $logger_object->report_definite_bug();
3733
3734             # the perl version number will be helpful for diagnosing the problem
3735             $logger_object->write_logfile_entry(
3736                 qx/perl -v $error_redirection/ . "\n" );
3737         }
3738     }
3739     else {
3740
3741         # Only warn of perl -c syntax errors.  Other messages,
3742         # such as missing modules, are too common.  They can be
3743         # seen by running with perltidy -w
3744         $logger_object->complain("A syntax check using perl $flags\n");
3745         $logger_object->complain(
3746             "for the output in file $istream_filename gives:\n");
3747         $logger_object->complain($line_of_dashes);
3748         $logger_object->complain("$perl_output\n");
3749         $logger_object->complain($line_of_dashes);
3750         $infile_syntax_ok = -1;
3751         $logger_object->write_logfile_entry($line_of_dashes);
3752         $logger_object->write_logfile_entry(
3753 "The output file will not be checked because of input file problems\n"
3754         );
3755     }
3756     return $infile_syntax_ok;
3757 }
3758
3759 sub do_syntax_check {
3760     my ( $stream, $flags, $error_redirection ) = @_;
3761
3762     ############################################################
3763     # This code is not reachable because syntax check is deactivated,
3764     # but it is retained for reference.
3765     ############################################################
3766
3767     # We need a named input file for executing perl
3768     my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3769
3770     # TODO: Need to add name of file to log somewhere
3771     # otherwise Perl output is hard to read
3772     if ( !$stream_filename ) { return $stream_filename, "" }
3773
3774     # We have to quote the filename in case it has unusual characters
3775     # or spaces.  Example: this filename #CM11.pm# gives trouble.
3776     my $quoted_stream_filename = '"' . $stream_filename . '"';
3777
3778     # Under VMS something like -T will become -t (and an error) so we
3779     # will put quotes around the flags.  Double quotes seem to work on
3780     # Unix/Windows/VMS, but this may not work on all systems.  (Single
3781     # quotes do not work under Windows).  It could become necessary to
3782     # put double quotes around each flag, such as:  -"c"  -"T"
3783     # We may eventually need some system-dependent coding here.
3784     $flags = '"' . $flags . '"';
3785
3786     # now wish for luck...
3787     my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3788
3789     if ($is_tmpfile) {
3790         unlink $stream_filename
3791           or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n");
3792     }
3793     return $stream_filename, $msg;
3794 }
3795
3796 #####################################################################
3797 #
3798 # This is a stripped down version of IO::Scalar
3799 # Given a reference to a scalar, it supplies either:
3800 # a getline method which reads lines (mode='r'), or
3801 # a print method which reads lines (mode='w')
3802 #
3803 #####################################################################
3804 package Perl::Tidy::IOScalar;
3805 use Carp;
3806
3807 sub new {
3808     my ( $package, $rscalar, $mode ) = @_;
3809     my $ref = ref $rscalar;
3810     if ( $ref ne 'SCALAR' ) {
3811         confess <<EOM;
3812 ------------------------------------------------------------------------
3813 expecting ref to SCALAR but got ref to ($ref); trace follows:
3814 ------------------------------------------------------------------------
3815 EOM
3816
3817     }
3818     if ( $mode eq 'w' ) {
3819         ${$rscalar} = "";
3820         return bless [ $rscalar, $mode ], $package;
3821     }
3822     elsif ( $mode eq 'r' ) {
3823
3824         # Convert a scalar to an array.
3825         # This avoids looking for "\n" on each call to getline
3826         #
3827         # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3828         # (which might be important in a DATA section).
3829         my @array;
3830         if ( $rscalar && ${$rscalar} ) {
3831             @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3832
3833             # remove possible extra blank line introduced with split
3834             if ( @array && $array[-1] eq "\n" ) { pop @array }
3835         }
3836         my $i_next = 0;
3837         return bless [ \@array, $mode, $i_next ], $package;
3838     }
3839     else {
3840         confess <<EOM;
3841 ------------------------------------------------------------------------
3842 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3843 ------------------------------------------------------------------------
3844 EOM
3845     }
3846 }
3847
3848 sub getline {
3849     my $self = shift;
3850     my $mode = $self->[1];
3851     if ( $mode ne 'r' ) {
3852         confess <<EOM;
3853 ------------------------------------------------------------------------
3854 getline call requires mode = 'r' but mode = ($mode); trace follows:
3855 ------------------------------------------------------------------------
3856 EOM
3857     }
3858     my $i = $self->[2]++;
3859     return $self->[0]->[$i];
3860 }
3861
3862 sub print {
3863     my ( $self, $msg ) = @_;
3864     my $mode = $self->[1];
3865     if ( $mode ne 'w' ) {
3866         confess <<EOM;
3867 ------------------------------------------------------------------------
3868 print call requires mode = 'w' but mode = ($mode); trace follows:
3869 ------------------------------------------------------------------------
3870 EOM
3871     }
3872     ${ $self->[0] } .= $msg;
3873 }
3874 sub close { return }
3875
3876 #####################################################################
3877 #
3878 # This is a stripped down version of IO::ScalarArray
3879 # Given a reference to an array, it supplies either:
3880 # a getline method which reads lines (mode='r'), or
3881 # a print method which reads lines (mode='w')
3882 #
3883 # NOTE: this routine assumes that there aren't any embedded
3884 # newlines within any of the array elements.  There are no checks
3885 # for that.
3886 #
3887 #####################################################################
3888 package Perl::Tidy::IOScalarArray;
3889 use Carp;
3890
3891 sub new {
3892     my ( $package, $rarray, $mode ) = @_;
3893     my $ref = ref $rarray;
3894     if ( $ref ne 'ARRAY' ) {
3895         confess <<EOM;
3896 ------------------------------------------------------------------------
3897 expecting ref to ARRAY but got ref to ($ref); trace follows:
3898 ------------------------------------------------------------------------
3899 EOM
3900
3901     }
3902     if ( $mode eq 'w' ) {
3903         @{$rarray} = ();
3904         return bless [ $rarray, $mode ], $package;
3905     }
3906     elsif ( $mode eq 'r' ) {
3907         my $i_next = 0;
3908         return bless [ $rarray, $mode, $i_next ], $package;
3909     }
3910     else {
3911         confess <<EOM;
3912 ------------------------------------------------------------------------
3913 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3914 ------------------------------------------------------------------------
3915 EOM
3916     }
3917 }
3918
3919 sub getline {
3920     my $self = shift;
3921     my $mode = $self->[1];
3922     if ( $mode ne 'r' ) {
3923         confess <<EOM;
3924 ------------------------------------------------------------------------
3925 getline requires mode = 'r' but mode = ($mode); trace follows:
3926 ------------------------------------------------------------------------
3927 EOM
3928     }
3929     my $i = $self->[2]++;
3930     return $self->[0]->[$i];
3931 }
3932
3933 sub print {
3934     my ( $self, $msg ) = @_;
3935     my $mode = $self->[1];
3936     if ( $mode ne 'w' ) {
3937         confess <<EOM;
3938 ------------------------------------------------------------------------
3939 print requires mode = 'w' but mode = ($mode); trace follows:
3940 ------------------------------------------------------------------------
3941 EOM
3942     }
3943     push @{ $self->[0] }, $msg;
3944 }
3945 sub close { return }
3946
3947 #####################################################################
3948 #
3949 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3950 # which returns the next line to be parsed
3951 #
3952 #####################################################################
3953
3954 package Perl::Tidy::LineSource;
3955
3956 sub new {
3957
3958     my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3959
3960     my $input_line_ending;
3961     if ( $rOpts->{'preserve-line-endings'} ) {
3962         $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3963     }
3964
3965     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3966     return unless $fh;
3967
3968     # in order to check output syntax when standard output is used,
3969     # or when it is an object, we have to make a copy of the file
3970     if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3971     {
3972
3973         # Turning off syntax check when input output is used.
3974         # The reason is that temporary files cause problems on
3975         # on many systems.
3976         $rOpts->{'check-syntax'} = 0;
3977
3978         ${$rpending_logfile_message} .= <<EOM;
3979 Note: --syntax check will be skipped because standard input is used
3980 EOM
3981
3982     }
3983
3984     return bless {
3985         _fh                => $fh,
3986         _filename          => $input_file,
3987         _input_line_ending => $input_line_ending,
3988         _rinput_buffer     => [],
3989         _started           => 0,
3990     }, $class;
3991 }
3992
3993 sub close_input_file {
3994     my $self = shift;
3995
3996     # Only close physical files, not STDIN and other objects
3997     my $filename = $self->{_filename};
3998     if ( $filename ne '-' && !ref $filename ) {
3999         eval { $self->{_fh}->close() };
4000     }
4001     return;
4002 }
4003
4004 sub get_line {
4005     my $self          = shift;
4006     my $line          = undef;
4007     my $fh            = $self->{_fh};
4008     my $rinput_buffer = $self->{_rinput_buffer};
4009
4010     if ( scalar( @{$rinput_buffer} ) ) {
4011         $line = shift @{$rinput_buffer};
4012     }
4013     else {
4014         $line = $fh->getline();
4015
4016         # patch to read raw mac files under unix, dos
4017         # see if the first line has embedded \r's
4018         if ( $line && !$self->{_started} ) {
4019             if ( $line =~ /[\015][^\015\012]/ ) {
4020
4021                 # found one -- break the line up and store in a buffer
4022                 @{$rinput_buffer} = map { $_ . "\n" } split /\015/, $line;
4023                 my $count = @{$rinput_buffer};
4024                 $line = shift @{$rinput_buffer};
4025             }
4026             $self->{_started}++;
4027         }
4028     }
4029     return $line;
4030 }
4031
4032 #####################################################################
4033 #
4034 # the Perl::Tidy::LineSink class supplies a write_line method for
4035 # actual file writing
4036 #
4037 #####################################################################
4038
4039 package Perl::Tidy::LineSink;
4040
4041 sub new {
4042
4043     my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
4044         $rpending_logfile_message, $binmode )
4045       = @_;
4046     my $fh     = undef;
4047     my $fh_tee = undef;
4048
4049     my $output_file_open = 0;
4050
4051     if ( $rOpts->{'format'} eq 'tidy' ) {
4052         ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
4053         unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
4054         $output_file_open = 1;
4055         if ($binmode) {
4056             if (   $rOpts->{'character-encoding'}
4057                 && $rOpts->{'character-encoding'} eq 'utf8' )
4058             {
4059                 if ( ref($fh) eq 'IO::File' ) {
4060                     $fh->binmode(":encoding(UTF-8)");
4061                 }
4062                 elsif ( $output_file eq '-' ) {
4063                     binmode STDOUT, ":encoding(UTF-8)";
4064                 }
4065             }
4066
4067             # Patch for RT 122030
4068             elsif ( ref($fh) eq 'IO::File' ) { $fh->binmode(); }
4069
4070             elsif ( $output_file eq '-' ) { binmode STDOUT }
4071         }
4072     }
4073
4074     # in order to check output syntax when standard output is used,
4075     # or when it is an object, we have to make a copy of the file
4076     if ( $output_file eq '-' || ref $output_file ) {
4077         if ( $rOpts->{'check-syntax'} ) {
4078
4079             # Turning off syntax check when standard output is used.
4080             # The reason is that temporary files cause problems on
4081             # on many systems.
4082             $rOpts->{'check-syntax'} = 0;
4083             ${$rpending_logfile_message} .= <<EOM;
4084 Note: --syntax check will be skipped because standard output is used
4085 EOM
4086
4087         }
4088     }
4089
4090     return bless {
4091         _fh               => $fh,
4092         _fh_tee           => $fh_tee,
4093         _output_file      => $output_file,
4094         _output_file_open => $output_file_open,
4095         _tee_flag         => 0,
4096         _tee_file         => $tee_file,
4097         _tee_file_opened  => 0,
4098         _line_separator   => $line_separator,
4099         _binmode          => $binmode,
4100     }, $class;
4101 }
4102
4103 sub write_line {
4104
4105     my ( $self, $line ) = @_;
4106     my $fh = $self->{_fh};
4107
4108     my $output_file_open = $self->{_output_file_open};
4109     chomp $line;
4110     $line .= $self->{_line_separator};
4111
4112     $fh->print($line) if ( $self->{_output_file_open} );
4113
4114     if ( $self->{_tee_flag} ) {
4115         unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
4116         my $fh_tee = $self->{_fh_tee};
4117         print $fh_tee $line;
4118     }
4119     return;
4120 }
4121
4122 sub tee_on {
4123     my $self = shift;
4124     $self->{_tee_flag} = 1;
4125     return;
4126 }
4127
4128 sub tee_off {
4129     my $self = shift;
4130     $self->{_tee_flag} = 0;
4131     return;
4132 }
4133
4134 sub really_open_tee_file {
4135     my $self     = shift;
4136     my $tee_file = $self->{_tee_file};
4137     my $fh_tee;
4138     $fh_tee = IO::File->new(">$tee_file")
4139       or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
4140     binmode $fh_tee if $self->{_binmode};
4141     $self->{_tee_file_opened} = 1;
4142     $self->{_fh_tee}          = $fh_tee;
4143     return;
4144 }
4145
4146 sub close_output_file {
4147     my $self = shift;
4148
4149     # Only close physical files, not STDOUT and other objects
4150     my $output_file = $self->{_output_file};
4151     if ( $output_file ne '-' && !ref $output_file ) {
4152         eval { $self->{_fh}->close() } if $self->{_output_file_open};
4153     }
4154     $self->close_tee_file();
4155     return;
4156 }
4157
4158 sub close_tee_file {
4159     my $self = shift;
4160
4161     # Only close physical files, not STDOUT and other objects
4162     if ( $self->{_tee_file_opened} ) {
4163         my $tee_file = $self->{_tee_file};
4164         if ( $tee_file ne '-' && !ref $tee_file ) {
4165             eval { $self->{_fh_tee}->close() };
4166             $self->{_tee_file_opened} = 0;
4167         }
4168     }
4169     return;
4170 }
4171
4172 #####################################################################
4173 #
4174 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
4175 # useful for program development.
4176 #
4177 # Only one such file is created regardless of the number of input
4178 # files processed.  This allows the results of processing many files
4179 # to be summarized in a single file.
4180
4181 # Output messages go to a file named DIAGNOSTICS, where
4182 # they are labeled by file and line.  This allows many files to be
4183 # scanned at once for some particular condition of interest.  It was
4184 # particularly useful for developing guessing strategies.
4185 #
4186 # NOTE: This feature is deactivated in final releases but can be
4187 # reactivated for debugging by un-commenting the 'I' options flag
4188 #
4189 #####################################################################
4190
4191 package Perl::Tidy::Diagnostics;
4192
4193 sub new {
4194
4195     my $class = shift;
4196     return bless {
4197         _write_diagnostics_count => 0,
4198         _last_diagnostic_file    => "",
4199         _input_file              => "",
4200         _fh                      => undef,
4201     }, $class;
4202 }
4203
4204 sub set_input_file {
4205     my ( $self, $input_file ) = @_;
4206     $self->{_input_file} = $input_file;
4207     return;
4208 }
4209
4210 sub write_diagnostics {
4211     my ( $self, $msg ) = @_;
4212
4213     unless ( $self->{_write_diagnostics_count} ) {
4214         open( $self->{_fh}, ">", "DIAGNOSTICS" )
4215           or Perl::Tidy::Die("couldn't open DIAGNOSTICS: $!\n");
4216     }
4217
4218     my $fh                   = $self->{_fh};
4219     my $last_diagnostic_file = $self->{_last_diagnostic_file};
4220     my $input_file           = $self->{_input_file};
4221     if ( $last_diagnostic_file ne $input_file ) {
4222         $fh->print("\nFILE:$input_file\n");
4223     }
4224     $self->{_last_diagnostic_file} = $input_file;
4225     my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
4226     $fh->print("$input_line_number:\t$msg");
4227     $self->{_write_diagnostics_count}++;
4228     return;
4229 }
4230
4231 #####################################################################
4232 #
4233 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
4234 #
4235 #####################################################################
4236
4237 package Perl::Tidy::Logger;
4238
4239 sub new {
4240
4241     my ( $class, $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude ) =
4242       @_;
4243
4244     my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
4245
4246     # remove any old error output file if we might write a new one
4247     unless ( $fh_warnings || ref($warning_file) ) {
4248         if ( -e $warning_file ) {
4249             unlink($warning_file)
4250               or Perl::Tidy::Die(
4251                 "couldn't unlink warning file $warning_file: $!\n");
4252         }
4253     }
4254
4255     my $logfile_gap =
4256       defined( $rOpts->{'logfile-gap'} )
4257       ? $rOpts->{'logfile-gap'}
4258       : 50;
4259     if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
4260
4261     return bless {
4262         _log_file                      => $log_file,
4263         _logfile_gap                   => $logfile_gap,
4264         _rOpts                         => $rOpts,
4265         _fh_warnings                   => $fh_warnings,
4266         _last_input_line_written       => 0,
4267         _at_end_of_file                => 0,
4268         _use_prefix                    => 1,
4269         _block_log_output              => 0,
4270         _line_of_tokens                => undef,
4271         _output_line_number            => undef,
4272         _wrote_line_information_string => 0,
4273         _wrote_column_headings         => 0,
4274         _warning_file                  => $warning_file,
4275         _warning_count                 => 0,
4276         _complaint_count               => 0,
4277         _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
4278         _saw_brace_error => 0,
4279         _saw_extrude     => $saw_extrude,
4280         _output_array    => [],
4281     }, $class;
4282 }
4283
4284 sub get_warning_count {
4285     my $self = shift;
4286     return $self->{_warning_count};
4287 }
4288
4289 sub get_use_prefix {
4290     my $self = shift;
4291     return $self->{_use_prefix};
4292 }
4293
4294 sub block_log_output {
4295     my $self = shift;
4296     $self->{_block_log_output} = 1;
4297     return;
4298 }
4299
4300 sub unblock_log_output {
4301     my $self = shift;
4302     $self->{_block_log_output} = 0;
4303     return;
4304 }
4305
4306 sub interrupt_logfile {
4307     my $self = shift;
4308     $self->{_use_prefix} = 0;
4309     $self->warning("\n");
4310     $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
4311     return;
4312 }
4313
4314 sub resume_logfile {
4315     my $self = shift;
4316     $self->write_logfile_entry( '#' x 60 . "\n" );
4317     $self->{_use_prefix} = 1;
4318     return;
4319 }
4320
4321 sub we_are_at_the_last_line {
4322     my $self = shift;
4323     unless ( $self->{_wrote_line_information_string} ) {
4324         $self->write_logfile_entry("Last line\n\n");
4325     }
4326     $self->{_at_end_of_file} = 1;
4327     return;
4328 }
4329
4330 # record some stuff in case we go down in flames
4331 sub black_box {
4332     my ( $self, $line_of_tokens, $output_line_number ) = @_;
4333     my $input_line        = $line_of_tokens->{_line_text};
4334     my $input_line_number = $line_of_tokens->{_line_number};
4335
4336     # save line information in case we have to write a logfile message
4337     $self->{_line_of_tokens}                = $line_of_tokens;
4338     $self->{_output_line_number}            = $output_line_number;
4339     $self->{_wrote_line_information_string} = 0;
4340
4341     my $last_input_line_written = $self->{_last_input_line_written};
4342     my $rOpts                   = $self->{_rOpts};
4343     if (
4344         (
4345             ( $input_line_number - $last_input_line_written ) >=
4346             $self->{_logfile_gap}
4347         )
4348         || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4349       )
4350     {
4351         my $structural_indentation_level = $line_of_tokens->{_level_0};
4352         $structural_indentation_level = 0
4353           if ( $structural_indentation_level < 0 );
4354         $self->{_last_input_line_written} = $input_line_number;
4355         ( my $out_str = $input_line ) =~ s/^\s*//;
4356         chomp $out_str;
4357
4358         $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4359
4360         if ( length($out_str) > 35 ) {
4361             $out_str = substr( $out_str, 0, 35 ) . " ....";
4362         }
4363         $self->logfile_output( "", "$out_str\n" );
4364     }
4365     return;
4366 }
4367
4368 sub write_logfile_entry {
4369
4370     my ( $self, @msg ) = @_;
4371
4372     # add leading >>> to avoid confusing error messages and code
4373     $self->logfile_output( ">>>", "@msg" );
4374     return;
4375 }
4376
4377 sub write_column_headings {
4378     my $self = shift;
4379
4380     $self->{_wrote_column_headings} = 1;
4381     my $routput_array = $self->{_output_array};
4382     push @{$routput_array}, <<EOM;
4383 The nesting depths in the table below are at the start of the lines.
4384 The indicated output line numbers are not always exact.
4385 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4386
4387 in:out indent c b  nesting   code + messages; (messages begin with >>>)
4388 lines  levels i k            (code begins with one '.' per indent level)
4389 ------  ----- - - --------   -------------------------------------------
4390 EOM
4391     return;
4392 }
4393
4394 sub make_line_information_string {
4395
4396     # make columns of information when a logfile message needs to go out
4397     my $self                    = shift;
4398     my $line_of_tokens          = $self->{_line_of_tokens};
4399     my $input_line_number       = $line_of_tokens->{_line_number};
4400     my $line_information_string = "";
4401     if ($input_line_number) {
4402
4403         my $output_line_number   = $self->{_output_line_number};
4404         my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
4405         my $paren_depth          = $line_of_tokens->{_paren_depth};
4406         my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4407         my $guessed_indentation_level =
4408           $line_of_tokens->{_guessed_indentation_level};
4409         ##my $rtoken_array = $line_of_tokens->{_rtoken_array};
4410
4411         my $structural_indentation_level = $line_of_tokens->{_level_0};
4412
4413         $self->write_column_headings() unless $self->{_wrote_column_headings};
4414
4415         # keep logfile columns aligned for scripts up to 999 lines;
4416         # for longer scripts it doesn't really matter
4417         my $extra_space = "";
4418         $extra_space .=
4419             ( $input_line_number < 10 )  ? "  "
4420           : ( $input_line_number < 100 ) ? " "
4421           :                                "";
4422         $extra_space .=
4423             ( $output_line_number < 10 )  ? "  "
4424           : ( $output_line_number < 100 ) ? " "
4425           :                                 "";
4426
4427         # there are 2 possible nesting strings:
4428         # the original which looks like this:  (0 [1 {2
4429         # the new one, which looks like this:  {{[
4430         # the new one is easier to read, and shows the order, but
4431         # could be arbitrarily long, so we use it unless it is too long
4432         my $nesting_string =
4433           "($paren_depth [$square_bracket_depth {$brace_depth";
4434         my $nesting_string_new = $line_of_tokens->{_nesting_tokens_0};
4435         my $ci_level           = $line_of_tokens->{_ci_level_0};
4436         if ( $ci_level > 9 ) { $ci_level = '*' }
4437         my $bk = ( $line_of_tokens->{_nesting_blocks_0} =~ /1$/ ) ? '1' : '0';
4438
4439         if ( length($nesting_string_new) <= 8 ) {
4440             $nesting_string =
4441               $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4442         }
4443         $line_information_string =
4444 "L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4445     }
4446     return $line_information_string;
4447 }
4448
4449 sub logfile_output {
4450     my ( $self, $prompt, $msg ) = @_;
4451     return if ( $self->{_block_log_output} );
4452
4453     my $routput_array = $self->{_output_array};
4454     if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4455         push @{$routput_array}, "$msg";
4456     }
4457     else {
4458         my $line_information_string = $self->make_line_information_string();
4459         $self->{_wrote_line_information_string} = 1;
4460
4461         if ($line_information_string) {
4462             push @{$routput_array}, "$line_information_string   $prompt$msg";
4463         }
4464         else {
4465             push @{$routput_array}, "$msg";
4466         }
4467     }
4468     return;
4469 }
4470
4471 sub get_saw_brace_error {
4472     my $self = shift;
4473     return $self->{_saw_brace_error};
4474 }
4475
4476 sub increment_brace_error {
4477     my $self = shift;
4478     $self->{_saw_brace_error}++;
4479     return;
4480 }
4481
4482 sub brace_warning {
4483     my ( $self, $msg ) = @_;
4484
4485     #use constant BRACE_WARNING_LIMIT => 10;
4486     my $BRACE_WARNING_LIMIT = 10;
4487     my $saw_brace_error     = $self->{_saw_brace_error};
4488
4489     if ( $saw_brace_error < $BRACE_WARNING_LIMIT ) {
4490         $self->warning($msg);
4491     }
4492     $saw_brace_error++;
4493     $self->{_saw_brace_error} = $saw_brace_error;
4494
4495     if ( $saw_brace_error == $BRACE_WARNING_LIMIT ) {
4496         $self->warning("No further warnings of this type will be given\n");
4497     }
4498     return;
4499 }
4500
4501 sub complain {
4502
4503     # handle non-critical warning messages based on input flag
4504     my ( $self, $msg ) = @_;
4505     my $rOpts = $self->{_rOpts};
4506
4507     # these appear in .ERR output only if -w flag is used
4508     if ( $rOpts->{'warning-output'} ) {
4509         $self->warning($msg);
4510     }
4511
4512     # otherwise, they go to the .LOG file
4513     else {
4514         $self->{_complaint_count}++;
4515         $self->write_logfile_entry($msg);
4516     }
4517     return;
4518 }
4519
4520 sub warning {
4521
4522     # report errors to .ERR file (or stdout)
4523     my ( $self, $msg ) = @_;
4524
4525     #use constant WARNING_LIMIT => 50;
4526     my $WARNING_LIMIT = 50;
4527
4528     my $rOpts = $self->{_rOpts};
4529     unless ( $rOpts->{'quiet'} ) {
4530
4531         my $warning_count = $self->{_warning_count};
4532         my $fh_warnings   = $self->{_fh_warnings};
4533         if ( !$fh_warnings ) {
4534             my $warning_file = $self->{_warning_file};
4535             ( $fh_warnings, my $filename ) =
4536               Perl::Tidy::streamhandle( $warning_file, 'w' );
4537             $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
4538             Perl::Tidy::Warn "## Please see file $filename\n"
4539               unless ref($warning_file);
4540             $self->{_fh_warnings} = $fh_warnings;
4541             $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
4542         }
4543
4544         if ( $warning_count < $WARNING_LIMIT ) {
4545             if ( $self->get_use_prefix() > 0 ) {
4546                 my $input_line_number =
4547                   Perl::Tidy::Tokenizer::get_input_line_number();
4548                 if ( !defined($input_line_number) ) { $input_line_number = -1 }
4549                 $fh_warnings->print("$input_line_number:\t$msg");
4550                 $self->write_logfile_entry("WARNING: $msg");
4551             }
4552             else {
4553                 $fh_warnings->print($msg);
4554                 $self->write_logfile_entry($msg);
4555             }
4556         }
4557         $warning_count++;
4558         $self->{_warning_count} = $warning_count;
4559
4560         if ( $warning_count == $WARNING_LIMIT ) {
4561             $fh_warnings->print("No further warnings will be given\n");
4562         }
4563     }
4564     return;
4565 }
4566
4567 # programming bug codes:
4568 #   -1 = no bug
4569 #    0 = maybe, not sure.
4570 #    1 = definitely
4571 sub report_possible_bug {
4572     my $self         = shift;
4573     my $saw_code_bug = $self->{_saw_code_bug};
4574     $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4575     return;
4576 }
4577
4578 sub report_definite_bug {
4579     my $self = shift;
4580     $self->{_saw_code_bug} = 1;
4581     return;
4582 }
4583
4584 sub ask_user_for_bug_report {
4585
4586     my ( $self, $infile_syntax_ok, $formatter ) = @_;
4587     my $saw_code_bug = $self->{_saw_code_bug};
4588     if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4589         $self->warning(<<EOM);
4590
4591 You may have encountered a code bug in perltidy.  If you think so, and
4592 the problem is not listed in the BUGS file at
4593 http://perltidy.sourceforge.net, please report it so that it can be
4594 corrected.  Include the smallest possible script which has the problem,
4595 along with the .LOG file. See the manual pages for contact information.
4596 Thank you!
4597 EOM
4598
4599     }
4600     elsif ( $saw_code_bug == 1 ) {
4601         if ( $self->{_saw_extrude} ) {
4602             $self->warning(<<EOM);
4603
4604 You may have encountered a bug in perltidy.  However, since you are using the
4605 -extrude option, the problem may be with perl or one of its modules, which have
4606 occasional problems with this type of file.  If you believe that the
4607 problem is with perltidy, and the problem is not listed in the BUGS file at
4608 http://perltidy.sourceforge.net, please report it so that it can be corrected.
4609 Include the smallest possible script which has the problem, along with the .LOG
4610 file. See the manual pages for contact information.
4611 Thank you!
4612 EOM
4613         }
4614         else {
4615             $self->warning(<<EOM);
4616
4617 Oops, you seem to have encountered a bug in perltidy.  Please check the
4618 BUGS file at http://perltidy.sourceforge.net.  If the problem is not
4619 listed there, please report it so that it can be corrected.  Include the
4620 smallest possible script which produces this message, along with the
4621 .LOG file if appropriate.  See the manual pages for contact information.
4622 Your efforts are appreciated.
4623 Thank you!
4624 EOM
4625             my $added_semicolon_count = 0;
4626             eval {
4627                 $added_semicolon_count =
4628                   $formatter->get_added_semicolon_count();
4629             };
4630             if ( $added_semicolon_count > 0 ) {
4631                 $self->warning(<<EOM);
4632
4633 The log file shows that perltidy added $added_semicolon_count semicolons.
4634 Please rerun with -nasc to see if that is the cause of the syntax error.  Even
4635 if that is the problem, please report it so that it can be fixed.
4636 EOM
4637
4638             }
4639         }
4640     }
4641     return;
4642 }
4643
4644 sub finish {
4645
4646     # called after all formatting to summarize errors
4647     my ( $self, $infile_syntax_ok, $formatter ) = @_;
4648
4649     my $rOpts         = $self->{_rOpts};
4650     my $warning_count = $self->{_warning_count};
4651     my $saw_code_bug  = $self->{_saw_code_bug};
4652
4653     my $save_logfile =
4654          ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4655       || $saw_code_bug == 1
4656       || $rOpts->{'logfile'};
4657     my $log_file = $self->{_log_file};
4658     if ($warning_count) {
4659         if ($save_logfile) {
4660             $self->block_log_output();    # avoid echoing this to the logfile
4661             $self->warning(
4662                 "The logfile $log_file may contain useful information\n");
4663             $self->unblock_log_output();
4664         }
4665
4666         if ( $self->{_complaint_count} > 0 ) {
4667             $self->warning(
4668 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4669             );
4670         }
4671
4672         if ( $self->{_saw_brace_error}
4673             && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
4674         {
4675             $self->warning("To save a full .LOG file rerun with -g\n");
4676         }
4677     }
4678     $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4679
4680     if ($save_logfile) {
4681         my $log_file = $self->{_log_file};
4682         my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4683         if ($fh) {
4684             my $routput_array = $self->{_output_array};
4685             foreach ( @{$routput_array} ) { $fh->print($_) }
4686             if ( $log_file ne '-' && !ref $log_file ) {
4687                 eval { $fh->close() };
4688             }
4689         }
4690     }
4691     return;
4692 }
4693
4694 #####################################################################
4695 #
4696 # The Perl::Tidy::DevNull class supplies a dummy print method
4697 #
4698 #####################################################################
4699
4700 package Perl::Tidy::DevNull;
4701 sub new { my $self = shift; return bless {}, $self }
4702 sub print { return }
4703 sub close { return }
4704
4705 #####################################################################
4706 #
4707 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4708 #
4709 #####################################################################
4710
4711 package Perl::Tidy::HtmlWriter;
4712
4713 use File::Basename;
4714
4715 # class variables
4716 use vars qw{
4717   %html_color
4718   %html_bold
4719   %html_italic
4720   %token_short_names
4721   %short_to_long_names
4722   $rOpts
4723   $css_filename
4724   $css_linkname
4725   $missing_html_entities
4726 };
4727
4728 # replace unsafe characters with HTML entity representation if HTML::Entities
4729 # is available
4730 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4731
4732 sub new {
4733
4734     my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4735         $html_src_extension )
4736       = @_;
4737
4738     my $html_file_opened = 0;
4739     my $html_fh;
4740     ( $html_fh, my $html_filename ) =
4741       Perl::Tidy::streamhandle( $html_file, 'w' );
4742     unless ($html_fh) {
4743         Perl::Tidy::Warn("can't open $html_file: $!\n");
4744         return;
4745     }
4746     $html_file_opened = 1;
4747
4748     if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4749         $input_file = "NONAME";
4750     }
4751
4752     # write the table of contents to a string
4753     my $toc_string;
4754     my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4755
4756     my $html_pre_fh;
4757     my @pre_string_stack;
4758     if ( $rOpts->{'html-pre-only'} ) {
4759
4760         # pre section goes directly to the output stream
4761         $html_pre_fh = $html_fh;
4762         $html_pre_fh->print( <<"PRE_END");
4763 <pre>
4764 PRE_END
4765     }
4766     else {
4767
4768         # pre section go out to a temporary string
4769         my $pre_string;
4770         $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4771         push @pre_string_stack, \$pre_string;
4772     }
4773
4774     # pod text gets diverted if the 'pod2html' is used
4775     my $html_pod_fh;
4776     my $pod_string;
4777     if ( $rOpts->{'pod2html'} ) {
4778         if ( $rOpts->{'html-pre-only'} ) {
4779             undef $rOpts->{'pod2html'};
4780         }
4781         else {
4782             eval "use Pod::Html";
4783             if ($@) {
4784                 Perl::Tidy::Warn
4785 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4786                 undef $rOpts->{'pod2html'};
4787             }
4788             else {
4789                 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4790             }
4791         }
4792     }
4793
4794     my $toc_filename;
4795     my $src_filename;
4796     if ( $rOpts->{'frames'} ) {
4797         unless ($extension) {
4798             Perl::Tidy::Warn
4799 "cannot use frames without a specified output extension; ignoring -frm\n";
4800             undef $rOpts->{'frames'};
4801         }
4802         else {
4803             $toc_filename = $input_file . $html_toc_extension . $extension;
4804             $src_filename = $input_file . $html_src_extension . $extension;
4805         }
4806     }
4807
4808     # ----------------------------------------------------------
4809     # Output is now directed as follows:
4810     # html_toc_fh <-- table of contents items
4811     # html_pre_fh <-- the <pre> section of formatted code, except:
4812     # html_pod_fh <-- pod goes here with the pod2html option
4813     # ----------------------------------------------------------
4814
4815     my $title = $rOpts->{'title'};
4816     unless ($title) {
4817         ( $title, my $path ) = fileparse($input_file);
4818     }
4819     my $toc_item_count = 0;
4820     my $in_toc_package = "";
4821     my $last_level     = 0;
4822     return bless {
4823         _input_file        => $input_file,          # name of input file
4824         _title             => $title,               # title, unescaped
4825         _html_file         => $html_file,           # name of .html output file
4826         _toc_filename      => $toc_filename,        # for frames option
4827         _src_filename      => $src_filename,        # for frames option
4828         _html_file_opened  => $html_file_opened,    # a flag
4829         _html_fh           => $html_fh,             # the output stream
4830         _html_pre_fh       => $html_pre_fh,         # pre section goes here
4831         _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4832         _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4833         _rpod_string       => \$pod_string,         # string holding pod
4834         _pod_cut_count     => 0,                    # how many =cut's?
4835         _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4836         _rtoc_string       => \$toc_string,         # string holding toc
4837         _rtoc_item_count   => \$toc_item_count,     # how many toc items
4838         _rin_toc_package   => \$in_toc_package,     # package name
4839         _rtoc_name_count   => {},                   # hash to track unique names
4840         _rpackage_stack    => [],                   # stack to check for package
4841                                                     # name changes
4842         _rlast_level       => \$last_level,         # brace indentation level
4843     }, $class;
4844 }
4845
4846 sub add_toc_item {
4847
4848     # Add an item to the html table of contents.
4849     # This is called even if no table of contents is written,
4850     # because we still want to put the anchors in the <pre> text.
4851     # We are given an anchor name and its type; types are:
4852     #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4853     # There must be an 'EOF' call at the end to wrap things up.
4854     my ( $self, $name, $type ) = @_;
4855     my $html_toc_fh     = $self->{_html_toc_fh};
4856     my $html_pre_fh     = $self->{_html_pre_fh};
4857     my $rtoc_name_count = $self->{_rtoc_name_count};
4858     my $rtoc_item_count = $self->{_rtoc_item_count};
4859     my $rlast_level     = $self->{_rlast_level};
4860     my $rin_toc_package = $self->{_rin_toc_package};
4861     my $rpackage_stack  = $self->{_rpackage_stack};
4862
4863     # packages contain sublists of subs, so to avoid errors all package
4864     # items are written and finished with the following routines
4865     my $end_package_list = sub {
4866         if ( ${$rin_toc_package} ) {
4867             $html_toc_fh->print("</ul>\n</li>\n");
4868             ${$rin_toc_package} = "";
4869         }
4870     };
4871
4872     my $start_package_list = sub {
4873         my ( $unique_name, $package ) = @_;
4874         if ( ${$rin_toc_package} ) { $end_package_list->() }
4875         $html_toc_fh->print(<<EOM);
4876 <li><a href=\"#$unique_name\">package $package</a>
4877 <ul>
4878 EOM
4879         ${$rin_toc_package} = $package;
4880     };
4881
4882     # start the table of contents on the first item
4883     unless ( ${$rtoc_item_count} ) {
4884
4885         # but just quit if we hit EOF without any other entries
4886         # in this case, there will be no toc
4887         return if ( $type eq 'EOF' );
4888         $html_toc_fh->print( <<"TOC_END");
4889 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4890 <ul>
4891 TOC_END
4892     }
4893     ${$rtoc_item_count}++;
4894
4895     # make a unique anchor name for this location:
4896     #   - packages get a 'package-' prefix
4897     #   - subs use their names
4898     my $unique_name = $name;
4899     if ( $type eq 'package' ) { $unique_name = "package-$name" }
4900
4901     # append '-1', '-2', etc if necessary to make unique; this will
4902     # be unique because subs and packages cannot have a '-'
4903     if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4904         $unique_name .= "-$count";
4905     }
4906
4907     #   - all names get terminal '-' if pod2html is used, to avoid
4908     #     conflicts with anchor names created by pod2html
4909     if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4910
4911     # start/stop lists of subs
4912     if ( $type eq 'sub' ) {
4913         my $package = $rpackage_stack->[ ${$rlast_level} ];
4914         unless ($package) { $package = 'main' }
4915
4916         # if we're already in a package/sub list, be sure its the right
4917         # package or else close it
4918         if ( ${$rin_toc_package} && ${$rin_toc_package} ne $package ) {
4919             $end_package_list->();
4920         }
4921
4922         # start a package/sub list if necessary
4923         unless ( ${$rin_toc_package} ) {
4924             $start_package_list->( $unique_name, $package );
4925         }
4926     }
4927
4928     # now write an entry in the toc for this item
4929     if ( $type eq 'package' ) {
4930         $start_package_list->( $unique_name, $name );
4931     }
4932     elsif ( $type eq 'sub' ) {
4933         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4934     }
4935     else {
4936         $end_package_list->();
4937         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4938     }
4939
4940     # write the anchor in the <pre> section
4941     $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4942
4943     # end the table of contents, if any, on the end of file
4944     if ( $type eq 'EOF' ) {
4945         $html_toc_fh->print( <<"TOC_END");
4946 </ul>
4947 <!-- END CODE INDEX -->
4948 TOC_END
4949     }
4950     return;
4951 }
4952
4953 BEGIN {
4954
4955     # This is the official list of tokens which may be identified by the
4956     # user.  Long names are used as getopt keys.  Short names are
4957     # convenient short abbreviations for specifying input.  Short names
4958     # somewhat resemble token type characters, but are often different
4959     # because they may only be alphanumeric, to allow command line
4960     # input.  Also, note that because of case insensitivity of html,
4961     # this table must be in a single case only (I've chosen to use all
4962     # lower case).
4963     # When adding NEW_TOKENS: update this hash table
4964     # short names => long names
4965     %short_to_long_names = (
4966         'n'  => 'numeric',
4967         'p'  => 'paren',
4968         'q'  => 'quote',
4969         's'  => 'structure',
4970         'c'  => 'comment',
4971         'v'  => 'v-string',
4972         'cm' => 'comma',
4973         'w'  => 'bareword',
4974         'co' => 'colon',
4975         'pu' => 'punctuation',
4976         'i'  => 'identifier',
4977         'j'  => 'label',
4978         'h'  => 'here-doc-target',
4979         'hh' => 'here-doc-text',
4980         'k'  => 'keyword',
4981         'sc' => 'semicolon',
4982         'm'  => 'subroutine',
4983         'pd' => 'pod-text',
4984     );
4985
4986     # Now we have to map actual token types into one of the above short
4987     # names; any token types not mapped will get 'punctuation'
4988     # properties.
4989
4990     # The values of this hash table correspond to the keys of the
4991     # previous hash table.
4992     # The keys of this hash table are token types and can be seen
4993     # by running with --dump-token-types (-dtt).
4994
4995     # When adding NEW_TOKENS: update this hash table
4996     # $type => $short_name
4997     %token_short_names = (
4998         '#'  => 'c',
4999         'n'  => 'n',
5000         'v'  => 'v',
5001         'k'  => 'k',
5002         'F'  => 'k',
5003         'Q'  => 'q',
5004         'q'  => 'q',
5005         'J'  => 'j',
5006         'j'  => 'j',
5007         'h'  => 'h',
5008         'H'  => 'hh',
5009         'w'  => 'w',
5010         ','  => 'cm',
5011         '=>' => 'cm',
5012         ';'  => 'sc',
5013         ':'  => 'co',
5014         'f'  => 'sc',
5015         '('  => 'p',
5016         ')'  => 'p',
5017         'M'  => 'm',
5018         'P'  => 'pd',
5019         'A'  => 'co',
5020     );
5021
5022     # These token types will all be called identifiers for now
5023     # FIXME: could separate user defined modules as separate type
5024     my @identifier = qw" i t U C Y Z G :: CORE::";
5025     @token_short_names{@identifier} = ('i') x scalar(@identifier);
5026
5027     # These token types will be called 'structure'
5028     my @structure = qw" { } ";
5029     @token_short_names{@structure} = ('s') x scalar(@structure);
5030
5031     # OLD NOTES: save for reference
5032     # Any of these could be added later if it would be useful.
5033     # For now, they will by default become punctuation
5034     #    my @list = qw" L R [ ] ";
5035     #    @token_long_names{@list} = ('non-structure') x scalar(@list);
5036     #
5037     #    my @list = qw"
5038     #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
5039     #      ";
5040     #    @token_long_names{@list} = ('math') x scalar(@list);
5041     #
5042     #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
5043     #    @token_long_names{@list} = ('bit') x scalar(@list);
5044     #
5045     #    my @list = qw" == != < > <= <=> ";
5046     #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
5047     #
5048     #    my @list = qw" && || ! &&= ||= //= ";
5049     #    @token_long_names{@list} = ('logical') x scalar(@list);
5050     #
5051     #    my @list = qw" . .= =~ !~ x x= ";
5052     #    @token_long_names{@list} = ('string-operators') x scalar(@list);
5053     #
5054     #    # Incomplete..
5055     #    my @list = qw" .. -> <> ... \ ? ";
5056     #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
5057
5058 }
5059
5060 sub make_getopt_long_names {
5061     my ( $class, $rgetopt_names ) = @_;
5062     while ( my ( $short_name, $name ) = each %short_to_long_names ) {
5063         push @{$rgetopt_names}, "html-color-$name=s";
5064         push @{$rgetopt_names}, "html-italic-$name!";
5065         push @{$rgetopt_names}, "html-bold-$name!";
5066     }
5067     push @{$rgetopt_names}, "html-color-background=s";
5068     push @{$rgetopt_names}, "html-linked-style-sheet=s";
5069     push @{$rgetopt_names}, "nohtml-style-sheets";
5070     push @{$rgetopt_names}, "html-pre-only";
5071     push @{$rgetopt_names}, "html-line-numbers";
5072     push @{$rgetopt_names}, "html-entities!";
5073     push @{$rgetopt_names}, "stylesheet";
5074     push @{$rgetopt_names}, "html-table-of-contents!";
5075     push @{$rgetopt_names}, "pod2html!";
5076     push @{$rgetopt_names}, "frames!";
5077     push @{$rgetopt_names}, "html-toc-extension=s";
5078     push @{$rgetopt_names}, "html-src-extension=s";
5079
5080     # Pod::Html parameters:
5081     push @{$rgetopt_names}, "backlink=s";
5082     push @{$rgetopt_names}, "cachedir=s";
5083     push @{$rgetopt_names}, "htmlroot=s";
5084     push @{$rgetopt_names}, "libpods=s";
5085     push @{$rgetopt_names}, "podpath=s";
5086     push @{$rgetopt_names}, "podroot=s";
5087     push @{$rgetopt_names}, "title=s";
5088
5089     # Pod::Html parameters with leading 'pod' which will be removed
5090     # before the call to Pod::Html
5091     push @{$rgetopt_names}, "podquiet!";
5092     push @{$rgetopt_names}, "podverbose!";
5093     push @{$rgetopt_names}, "podrecurse!";
5094     push @{$rgetopt_names}, "podflush";
5095     push @{$rgetopt_names}, "podheader!";
5096     push @{$rgetopt_names}, "podindex!";
5097     return;
5098 }
5099
5100 sub make_abbreviated_names {
5101
5102     # We're appending things like this to the expansion list:
5103     #      'hcc'    => [qw(html-color-comment)],
5104     #      'hck'    => [qw(html-color-keyword)],
5105     #  etc
5106     my ( $class, $rexpansion ) = @_;
5107
5108     # abbreviations for color/bold/italic properties
5109     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
5110         ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
5111         ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
5112         ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
5113         ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
5114         ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
5115     }
5116
5117     # abbreviations for all other html options
5118     ${$rexpansion}{"hcbg"}  = ["html-color-background"];
5119     ${$rexpansion}{"pre"}   = ["html-pre-only"];
5120     ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
5121     ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
5122     ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
5123     ${$rexpansion}{"hent"}  = ["html-entities"];
5124     ${$rexpansion}{"nhent"} = ["nohtml-entities"];
5125     ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
5126     ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
5127     ${$rexpansion}{"ss"}    = ["stylesheet"];
5128     ${$rexpansion}{"pod"}   = ["pod2html"];
5129     ${$rexpansion}{"npod"}  = ["nopod2html"];
5130     ${$rexpansion}{"frm"}   = ["frames"];
5131     ${$rexpansion}{"nfrm"}  = ["noframes"];
5132     ${$rexpansion}{"text"}  = ["html-toc-extension"];
5133     ${$rexpansion}{"sext"}  = ["html-src-extension"];
5134     return;
5135 }
5136
5137 sub check_options {
5138
5139     # This will be called once after options have been parsed
5140     my ( $class, $rOpts ) = @_;
5141
5142     # X11 color names for default settings that seemed to look ok
5143     # (these color names are only used for programming clarity; the hex
5144     # numbers are actually written)
5145     use constant ForestGreen   => "#228B22";
5146     use constant SaddleBrown   => "#8B4513";
5147     use constant magenta4      => "#8B008B";
5148     use constant IndianRed3    => "#CD5555";
5149     use constant DeepSkyBlue4  => "#00688B";
5150     use constant MediumOrchid3 => "#B452CD";
5151     use constant black         => "#000000";
5152     use constant white         => "#FFFFFF";
5153     use constant red           => "#FF0000";
5154
5155     # set default color, bold, italic properties
5156     # anything not listed here will be given the default (punctuation) color --
5157     # these types currently not listed and get default: ws pu s sc cm co p
5158     # When adding NEW_TOKENS: add an entry here if you don't want defaults
5159
5160     # set_default_properties( $short_name, default_color, bold?, italic? );
5161     set_default_properties( 'c',  ForestGreen,   0, 0 );
5162     set_default_properties( 'pd', ForestGreen,   0, 1 );
5163     set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
5164     set_default_properties( 'q',  IndianRed3,    0, 0 );
5165     set_default_properties( 'hh', IndianRed3,    0, 1 );
5166     set_default_properties( 'h',  IndianRed3,    1, 0 );
5167     set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
5168     set_default_properties( 'w',  black,         0, 0 );
5169     set_default_properties( 'n',  MediumOrchid3, 0, 0 );
5170     set_default_properties( 'v',  MediumOrchid3, 0, 0 );
5171     set_default_properties( 'j',  IndianRed3,    1, 0 );
5172     set_default_properties( 'm',  red,           1, 0 );
5173
5174     set_default_color( 'html-color-background',  white );
5175     set_default_color( 'html-color-punctuation', black );
5176
5177     # setup property lookup tables for tokens based on their short names
5178     # every token type has a short name, and will use these tables
5179     # to do the html markup
5180     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
5181         $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
5182         $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
5183         $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
5184     }
5185
5186     # write style sheet to STDOUT and die if requested
5187     if ( defined( $rOpts->{'stylesheet'} ) ) {
5188         write_style_sheet_file('-');
5189         Perl::Tidy::Exit 0;
5190     }
5191
5192     # make sure user gives a file name after -css
5193     if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
5194         $css_linkname = $rOpts->{'html-linked-style-sheet'};
5195         if ( $css_linkname =~ /^-/ ) {
5196             Perl::Tidy::Die "You must specify a valid filename after -css\n";
5197         }
5198     }
5199
5200     # check for conflict
5201     if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
5202         $rOpts->{'nohtml-style-sheets'} = 0;
5203         warning("You can't specify both -css and -nss; -nss ignored\n");
5204     }
5205
5206     # write a style sheet file if necessary
5207     if ($css_linkname) {
5208
5209         # if the selected filename exists, don't write, because user may
5210         # have done some work by hand to create it; use backup name instead
5211         # Also, this will avoid a potential disaster in which the user
5212         # forgets to specify the style sheet, like this:
5213         #    perltidy -html -css myfile1.pl myfile2.pl
5214         # This would cause myfile1.pl to parsed as the style sheet by GetOpts
5215         my $css_filename = $css_linkname;
5216         unless ( -e $css_filename ) {
5217             write_style_sheet_file($css_filename);
5218         }
5219     }
5220     $missing_html_entities = 1 unless $rOpts->{'html-entities'};
5221     return;
5222 }
5223
5224 sub write_style_sheet_file {
5225
5226     my $css_filename = shift;
5227     my $fh;
5228     unless ( $fh = IO::File->new("> $css_filename") ) {
5229         Perl::Tidy::Die "can't open $css_filename: $!\n";
5230     }
5231     write_style_sheet_data($fh);
5232     eval { $fh->close };
5233     return;
5234 }
5235
5236 sub write_style_sheet_data {
5237
5238     # write the style sheet data to an open file handle
5239     my $fh = shift;
5240
5241     my $bg_color   = $rOpts->{'html-color-background'};
5242     my $text_color = $rOpts->{'html-color-punctuation'};
5243
5244     # pre-bgcolor is new, and may not be defined
5245     my $pre_bg_color = $rOpts->{'html-pre-color-background'};
5246     $pre_bg_color = $bg_color unless $pre_bg_color;
5247
5248     $fh->print(<<"EOM");
5249 /* default style sheet generated by perltidy */
5250 body {background: $bg_color; color: $text_color}
5251 pre { color: $text_color;
5252       background: $pre_bg_color;
5253       font-family: courier;
5254     }
5255
5256 EOM
5257
5258     foreach my $short_name ( sort keys %short_to_long_names ) {
5259         my $long_name = $short_to_long_names{$short_name};
5260
5261         my $abbrev = '.' . $short_name;
5262         if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
5263         my $color = $html_color{$short_name};
5264         if ( !defined($color) ) { $color = $text_color }
5265         $fh->print("$abbrev \{ color: $color;");
5266
5267         if ( $html_bold{$short_name} ) {
5268             $fh->print(" font-weight:bold;");
5269         }
5270
5271         if ( $html_italic{$short_name} ) {
5272             $fh->print(" font-style:italic;");
5273         }
5274         $fh->print("} /* $long_name */\n");
5275     }
5276     return;
5277 }
5278
5279 sub set_default_color {
5280
5281     # make sure that options hash $rOpts->{$key} contains a valid color
5282     my ( $key, $color ) = @_;
5283     if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5284     $rOpts->{$key} = check_RGB($color);
5285     return;
5286 }
5287
5288 sub check_RGB {
5289
5290     # if color is a 6 digit hex RGB value, prepend a #, otherwise
5291     # assume that it is a valid ascii color name
5292     my ($color) = @_;
5293     if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5294     return $color;
5295 }
5296
5297 sub set_default_properties {
5298     my ( $short_name, $color, $bold, $italic ) = @_;
5299
5300     set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5301     my $key;
5302     $key = "html-bold-$short_to_long_names{$short_name}";
5303     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5304     $key = "html-italic-$short_to_long_names{$short_name}";
5305     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5306     return;
5307 }
5308
5309 sub pod_to_html {
5310
5311     # Use Pod::Html to process the pod and make the page
5312     # then merge the perltidy code sections into it.
5313     # return 1 if success, 0 otherwise
5314     my ( $self, $pod_string, $css_string, $toc_string, $rpre_string_stack ) =
5315       @_;
5316     my $input_file   = $self->{_input_file};
5317     my $title        = $self->{_title};
5318     my $success_flag = 0;
5319
5320     # don't try to use pod2html if no pod
5321     unless ($pod_string) {
5322         return $success_flag;
5323     }
5324
5325     # Pod::Html requires a real temporary filename
5326     my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
5327     unless ($fh_tmp) {
5328         Perl::Tidy::Warn
5329           "unable to open temporary file $tmpfile; cannot use pod2html\n";
5330         return $success_flag;
5331     }
5332
5333     #------------------------------------------------------------------
5334     # Warning: a temporary file is open; we have to clean up if
5335     # things go bad.  From here on all returns should be by going to
5336     # RETURN so that the temporary file gets unlinked.
5337     #------------------------------------------------------------------
5338
5339     # write the pod text to the temporary file
5340     $fh_tmp->print($pod_string);
5341     $fh_tmp->close();
5342
5343     # Hand off the pod to pod2html.
5344     # Note that we can use the same temporary filename for input and output
5345     # because of the way pod2html works.
5346     {
5347
5348         my @args;
5349         push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5350
5351         # Flags with string args:
5352         # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5353         # "podpath=s", "podroot=s"
5354         # Note: -css=s is handled by perltidy itself
5355         foreach my $kw (qw(backlink cachedir htmlroot libpods podpath podroot))
5356         {
5357             if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5358         }
5359
5360         # Toggle switches; these have extra leading 'pod'
5361         # "header!", "index!", "recurse!", "quiet!", "verbose!"
5362         foreach my $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5363             my $kwd = $kw;    # allows us to strip 'pod'
5364             if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5365             elsif ( defined( $rOpts->{$kw} ) ) {
5366                 $kwd =~ s/^pod//;
5367                 push @args, "--no$kwd";
5368             }
5369         }
5370
5371         # "flush",
5372         my $kw = 'podflush';
5373         if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5374
5375         # Must clean up if pod2html dies (it can);
5376         # Be careful not to overwrite callers __DIE__ routine
5377         local $SIG{__DIE__} = sub {
5378             unlink $tmpfile if -e $tmpfile;
5379             Perl::Tidy::Die $_[0];
5380         };
5381
5382         pod2html(@args);
5383     }
5384     $fh_tmp = IO::File->new( $tmpfile, 'r' );
5385     unless ($fh_tmp) {
5386
5387         # this error shouldn't happen ... we just used this filename
5388         Perl::Tidy::Warn
5389           "unable to open temporary file $tmpfile; cannot use pod2html\n";
5390         goto RETURN;
5391     }
5392
5393     my $html_fh = $self->{_html_fh};
5394     my @toc;
5395     my $in_toc;
5396     my $ul_level = 0;
5397     my $no_print;
5398
5399     # This routine will write the html selectively and store the toc
5400     my $html_print = sub {
5401         foreach (@_) {
5402             $html_fh->print($_) unless ($no_print);
5403             if ($in_toc) { push @toc, $_ }
5404         }
5405     };
5406
5407     # loop over lines of html output from pod2html and merge in
5408     # the necessary perltidy html sections
5409     my ( $saw_body, $saw_index, $saw_body_end );
5410     while ( my $line = $fh_tmp->getline() ) {
5411
5412         if ( $line =~ /^\s*<html>\s*$/i ) {
5413             my $date = localtime;
5414             $html_print->("<!-- Generated by perltidy on $date -->\n");
5415             $html_print->($line);
5416         }
5417
5418         # Copy the perltidy css, if any, after <body> tag
5419         elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5420             $saw_body = 1;
5421             $html_print->($css_string) if $css_string;
5422             $html_print->($line);
5423
5424             # add a top anchor and heading
5425             $html_print->("<a name=\"-top-\"></a>\n");
5426             $title = escape_html($title);
5427             $html_print->("<h1>$title</h1>\n");
5428         }
5429
5430         # check for start of index, old pod2html
5431         # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
5432         #    <!-- INDEX BEGIN -->
5433         #    <ul>
5434         #     ...
5435         #    </ul>
5436         #    <!-- INDEX END -->
5437         #
5438         elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5439             $in_toc = 'INDEX';
5440
5441             # when frames are used, an extra table of contents in the
5442             # contents panel is confusing, so don't print it
5443             $no_print = $rOpts->{'frames'}
5444               || !$rOpts->{'html-table-of-contents'};
5445             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5446             $html_print->($line);
5447         }
5448
5449         # check for start of index, new pod2html
5450         # After Pod::Html VERSION 1.15_02 it is delimited as:
5451         # <ul id="index">
5452         # ...
5453         # </ul>
5454         elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
5455             $in_toc   = 'UL';
5456             $ul_level = 1;
5457
5458             # when frames are used, an extra table of contents in the
5459             # contents panel is confusing, so don't print it
5460             $no_print = $rOpts->{'frames'}
5461               || !$rOpts->{'html-table-of-contents'};
5462             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5463             $html_print->($line);
5464         }
5465
5466         # Check for end of index, old pod2html
5467         elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5468             $saw_index = 1;
5469             $html_print->($line);
5470
5471             # Copy the perltidy toc, if any, after the Pod::Html toc
5472             if ($toc_string) {
5473                 $html_print->("<hr />\n") if $rOpts->{'frames'};
5474                 $html_print->("<h2>Code Index:</h2>\n");
5475                 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5476                 $html_print->(@toc);
5477             }
5478             $in_toc   = "";
5479             $no_print = 0;
5480         }
5481
5482         # must track <ul> depth level for new pod2html
5483         elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
5484             $ul_level++;
5485             $html_print->($line);
5486         }
5487
5488         # Check for end of index, for new pod2html
5489         elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
5490             $ul_level--;
5491             $html_print->($line);
5492
5493             # Copy the perltidy toc, if any, after the Pod::Html toc
5494             if ( $ul_level <= 0 ) {
5495                 $saw_index = 1;
5496                 if ($toc_string) {
5497                     $html_print->("<hr />\n") if $rOpts->{'frames'};
5498                     $html_print->("<h2>Code Index:</h2>\n");
5499                     my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5500                     $html_print->(@toc);
5501                 }
5502                 $in_toc   = "";
5503                 $ul_level = 0;
5504                 $no_print = 0;
5505             }
5506         }
5507
5508         # Copy one perltidy section after each marker
5509         elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5510             $line = $2;
5511             $html_print->($1) if $1;
5512
5513             # Intermingle code and pod sections if we saw multiple =cut's.
5514             if ( $self->{_pod_cut_count} > 1 ) {
5515                 my $rpre_string = shift( @{$rpre_string_stack} );
5516                 if ( ${$rpre_string} ) {
5517                     $html_print->('<pre>');
5518                     $html_print->( ${$rpre_string} );
5519                     $html_print->('</pre>');
5520                 }
5521                 else {
5522
5523                     # shouldn't happen: we stored a string before writing
5524                     # each marker.
5525                     Perl::Tidy::Warn
5526 "Problem merging html stream with pod2html; order may be wrong\n";
5527                 }
5528                 $html_print->($line);
5529             }
5530
5531             # If didn't see multiple =cut lines, we'll put the pod out first
5532             # and then the code, because it's less confusing.
5533             else {
5534
5535                 # since we are not intermixing code and pod, we don't need
5536                 # or want any <hr> lines which separated pod and code
5537                 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5538             }
5539         }
5540
5541         # Copy any remaining code section before the </body> tag
5542         elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5543             $saw_body_end = 1;
5544             if ( @{$rpre_string_stack} ) {
5545                 unless ( $self->{_pod_cut_count} > 1 ) {
5546                     $html_print->('<hr />');
5547                 }
5548                 while ( my $rpre_string = shift( @{$rpre_string_stack} ) ) {
5549                     $html_print->('<pre>');
5550                     $html_print->( ${$rpre_string} );
5551                     $html_print->('</pre>');
5552                 }
5553             }
5554             $html_print->($line);
5555         }
5556         else {
5557             $html_print->($line);
5558         }
5559     }
5560
5561     $success_flag = 1;
5562     unless ($saw_body) {
5563         Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
5564         $success_flag = 0;
5565     }
5566     unless ($saw_body_end) {
5567         Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
5568         $success_flag = 0;
5569     }
5570     unless ($saw_index) {
5571         Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
5572         $success_flag = 0;
5573     }
5574
5575   RETURN:
5576     eval { $html_fh->close() };
5577
5578     # note that we have to unlink tmpfile before making frames
5579     # because the tmpfile may be one of the names used for frames
5580     if ( -e $tmpfile ) {
5581         unless ( unlink($tmpfile) ) {
5582             Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n");
5583             $success_flag = 0;
5584         }
5585     }
5586
5587     if ( $success_flag && $rOpts->{'frames'} ) {
5588         $self->make_frame( \@toc );
5589     }
5590     return $success_flag;
5591 }
5592
5593 sub make_frame {
5594
5595     # Make a frame with table of contents in the left panel
5596     # and the text in the right panel.
5597     # On entry:
5598     #  $html_filename contains the no-frames html output
5599     #  $rtoc is a reference to an array with the table of contents
5600     my ( $self, $rtoc ) = @_;
5601     my $input_file    = $self->{_input_file};
5602     my $html_filename = $self->{_html_file};
5603     my $toc_filename  = $self->{_toc_filename};
5604     my $src_filename  = $self->{_src_filename};
5605     my $title         = $self->{_title};
5606     $title = escape_html($title);
5607
5608     # FUTURE input parameter:
5609     my $top_basename = "";
5610
5611     # We need to produce 3 html files:
5612     # 1. - the table of contents
5613     # 2. - the contents (source code) itself
5614     # 3. - the frame which contains them
5615
5616     # get basenames for relative links
5617     my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5618     my ( $src_basename, $src_path ) = fileparse($src_filename);
5619
5620     # 1. Make the table of contents panel, with appropriate changes
5621     # to the anchor names
5622     my $src_frame_name = 'SRC';
5623     my $first_anchor =
5624       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5625         $src_frame_name );
5626
5627     # 2. The current .html filename is renamed to be the contents panel
5628     rename( $html_filename, $src_filename )
5629       or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
5630
5631     # 3. Then use the original html filename for the frame
5632     write_frame_html(
5633         $title,        $html_filename, $top_basename,
5634         $toc_basename, $src_basename,  $src_frame_name
5635     );
5636     return;
5637 }
5638
5639 sub write_toc_html {
5640
5641     # write a separate html table of contents file for frames
5642     my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5643     my $fh = IO::File->new( $toc_filename, 'w' )
5644       or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
5645     $fh->print(<<EOM);
5646 <html>
5647 <head>
5648 <title>$title</title>
5649 </head>
5650 <body>
5651 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5652 EOM
5653
5654     my $first_anchor =
5655       change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5656     $fh->print( join "", @{$rtoc} );
5657
5658     $fh->print(<<EOM);
5659 </body>
5660 </html>
5661 EOM
5662
5663     return;
5664 }
5665
5666 sub write_frame_html {
5667
5668     # write an html file to be the table of contents frame
5669     my (
5670         $title,        $frame_filename, $top_basename,
5671         $toc_basename, $src_basename,   $src_frame_name
5672     ) = @_;
5673
5674     my $fh = IO::File->new( $frame_filename, 'w' )
5675       or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
5676
5677     $fh->print(<<EOM);
5678 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5679     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5680 <?xml version="1.0" encoding="iso-8859-1" ?>
5681 <html xmlns="http://www.w3.org/1999/xhtml">
5682 <head>
5683 <title>$title</title>
5684 </head>
5685 EOM
5686
5687     # two left panels, one right, if master index file
5688     if ($top_basename) {
5689         $fh->print(<<EOM);
5690 <frameset cols="20%,80%">
5691 <frameset rows="30%,70%">
5692 <frame src = "$top_basename" />
5693 <frame src = "$toc_basename" />
5694 </frameset>
5695 EOM
5696     }
5697
5698     # one left panels, one right, if no master index file
5699     else {
5700         $fh->print(<<EOM);
5701 <frameset cols="20%,*">
5702 <frame src = "$toc_basename" />
5703 EOM
5704     }
5705     $fh->print(<<EOM);
5706 <frame src = "$src_basename" name = "$src_frame_name" />
5707 <noframes>
5708 <body>
5709 <p>If you see this message, you are using a non-frame-capable web client.</p>
5710 <p>This document contains:</p>
5711 <ul>
5712 <li><a href="$toc_basename">A table of contents</a></li>
5713 <li><a href="$src_basename">The source code</a></li>
5714 </ul>
5715 </body>
5716 </noframes>
5717 </frameset>
5718 </html>
5719 EOM
5720     return;
5721 }
5722
5723 sub change_anchor_names {
5724
5725     # add a filename and target to anchors
5726     # also return the first anchor
5727     my ( $rlines, $filename, $target ) = @_;
5728     my $first_anchor;
5729     foreach my $line ( @{$rlines} ) {
5730
5731         #  We're looking for lines like this:
5732         #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5733         #  ----  -       --------  -----------------
5734         #  $1              $4            $5
5735         if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5736             my $pre  = $1;
5737             my $name = $4;
5738             my $post = $5;
5739             my $href = "$filename#$name";
5740             $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5741             unless ($first_anchor) { $first_anchor = $href }
5742         }
5743     }
5744     return $first_anchor;
5745 }
5746
5747 sub close_html_file {
5748     my $self = shift;
5749     return unless $self->{_html_file_opened};
5750
5751     my $html_fh     = $self->{_html_fh};
5752     my $rtoc_string = $self->{_rtoc_string};
5753
5754     # There are 3 basic paths to html output...
5755
5756     # ---------------------------------
5757     # Path 1: finish up if in -pre mode
5758     # ---------------------------------
5759     if ( $rOpts->{'html-pre-only'} ) {
5760         $html_fh->print( <<"PRE_END");
5761 </pre>
5762 PRE_END
5763         eval { $html_fh->close() };
5764         return;
5765     }
5766
5767     # Finish the index
5768     $self->add_toc_item( 'EOF', 'EOF' );
5769
5770     my $rpre_string_stack = $self->{_rpre_string_stack};
5771
5772     # Patch to darken the <pre> background color in case of pod2html and
5773     # interleaved code/documentation.  Otherwise, the distinction
5774     # between code and documentation is blurred.
5775     if (   $rOpts->{pod2html}
5776         && $self->{_pod_cut_count} >= 1
5777         && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5778     {
5779         $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5780     }
5781
5782     # put the css or its link into a string, if used
5783     my $css_string;
5784     my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5785
5786     # use css linked to another file
5787     if ( $rOpts->{'html-linked-style-sheet'} ) {
5788         $fh_css->print(
5789             qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />));
5790     }
5791
5792     # use css embedded in this file
5793     elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5794         $fh_css->print( <<'ENDCSS');
5795 <style type="text/css">
5796 <!--
5797 ENDCSS
5798         write_style_sheet_data($fh_css);
5799         $fh_css->print( <<"ENDCSS");
5800 -->
5801 </style>
5802 ENDCSS
5803     }
5804
5805     # -----------------------------------------------------------
5806     # path 2: use pod2html if requested
5807     #         If we fail for some reason, continue on to path 3
5808     # -----------------------------------------------------------
5809     if ( $rOpts->{'pod2html'} ) {
5810         my $rpod_string = $self->{_rpod_string};
5811         $self->pod_to_html(
5812             ${$rpod_string}, $css_string,
5813             ${$rtoc_string}, $rpre_string_stack
5814         ) && return;
5815     }
5816
5817     # --------------------------------------------------
5818     # path 3: write code in html, with pod only in italics
5819     # --------------------------------------------------
5820     my $input_file = $self->{_input_file};
5821     my $title      = escape_html($input_file);
5822     my $date       = localtime;
5823     $html_fh->print( <<"HTML_START");
5824 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5825    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5826 <!-- Generated by perltidy on $date -->
5827 <html xmlns="http://www.w3.org/1999/xhtml">
5828 <head>
5829 <title>$title</title>
5830 HTML_START
5831
5832     # output the css, if used
5833     if ($css_string) {
5834         $html_fh->print($css_string);
5835         $html_fh->print( <<"ENDCSS");
5836 </head>
5837 <body>
5838 ENDCSS
5839     }
5840     else {
5841
5842         $html_fh->print( <<"HTML_START");
5843 </head>
5844 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5845 HTML_START
5846     }
5847
5848     $html_fh->print("<a name=\"-top-\"></a>\n");
5849     $html_fh->print( <<"EOM");
5850 <h1>$title</h1>
5851 EOM
5852
5853     # copy the table of contents
5854     if (   ${$rtoc_string}
5855         && !$rOpts->{'frames'}
5856         && $rOpts->{'html-table-of-contents'} )
5857     {
5858         $html_fh->print( ${$rtoc_string} );
5859     }
5860
5861     # copy the pre section(s)
5862     my $fname_comment = $input_file;
5863     $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5864     $html_fh->print( <<"END_PRE");
5865 <hr />
5866 <!-- contents of filename: $fname_comment -->
5867 <pre>
5868 END_PRE
5869
5870     foreach my $rpre_string ( @{$rpre_string_stack} ) {
5871         $html_fh->print( ${$rpre_string} );
5872     }
5873
5874     # and finish the html page
5875     $html_fh->print( <<"HTML_END");
5876 </pre>
5877 </body>
5878 </html>
5879 HTML_END
5880     eval { $html_fh->close() };    # could be object without close method
5881
5882     if ( $rOpts->{'frames'} ) {
5883         my @toc = map { $_ .= "\n" } split /\n/, ${$rtoc_string};
5884         $self->make_frame( \@toc );
5885     }
5886     return;
5887 }
5888
5889 sub markup_tokens {
5890     my ( $self, $rtokens, $rtoken_type, $rlevels ) = @_;
5891     my ( @colored_tokens, $string, $type, $token, $level );
5892     my $rlast_level    = $self->{_rlast_level};
5893     my $rpackage_stack = $self->{_rpackage_stack};
5894
5895     for ( my $j = 0 ; $j < @{$rtoken_type} ; $j++ ) {
5896         $type  = $rtoken_type->[$j];
5897         $token = $rtokens->[$j];
5898         $level = $rlevels->[$j];
5899         $level = 0 if ( $level < 0 );
5900
5901         #-------------------------------------------------------
5902         # Update the package stack.  The package stack is needed to keep
5903         # the toc correct because some packages may be declared within
5904         # blocks and go out of scope when we leave the block.
5905         #-------------------------------------------------------
5906         if ( $level > ${$rlast_level} ) {
5907             unless ( $rpackage_stack->[ $level - 1 ] ) {
5908                 $rpackage_stack->[ $level - 1 ] = 'main';
5909             }
5910             $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5911         }
5912         elsif ( $level < ${$rlast_level} ) {
5913             my $package = $rpackage_stack->[$level];
5914             unless ($package) { $package = 'main' }
5915
5916             # if we change packages due to a nesting change, we
5917             # have to make an entry in the toc
5918             if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5919                 $self->add_toc_item( $package, 'package' );
5920             }
5921         }
5922         ${$rlast_level} = $level;
5923
5924         #-------------------------------------------------------
5925         # Intercept a sub name here; split it
5926         # into keyword 'sub' and sub name; and add an
5927         # entry in the toc
5928         #-------------------------------------------------------
5929         if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5930             $token = $self->markup_html_element( $1, 'k' );
5931             push @colored_tokens, $token;
5932             $token = $2;
5933             $type  = 'M';
5934
5935             # but don't include sub declarations in the toc;
5936             # these wlll have leading token types 'i;'
5937             my $signature = join "", @{$rtoken_type};
5938             unless ( $signature =~ /^i;/ ) {
5939                 my $subname = $token;
5940                 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5941                 $self->add_toc_item( $subname, 'sub' );
5942             }
5943         }
5944
5945         #-------------------------------------------------------
5946         # Intercept a package name here; split it
5947         # into keyword 'package' and name; add to the toc,
5948         # and update the package stack
5949         #-------------------------------------------------------
5950         if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5951             $token = $self->markup_html_element( $1, 'k' );
5952             push @colored_tokens, $token;
5953             $token = $2;
5954             $type  = 'i';
5955             $self->add_toc_item( "$token", 'package' );
5956             $rpackage_stack->[$level] = $token;
5957         }
5958
5959         $token = $self->markup_html_element( $token, $type );
5960         push @colored_tokens, $token;
5961     }
5962     return ( \@colored_tokens );
5963 }
5964
5965 sub markup_html_element {
5966     my ( $self, $token, $type ) = @_;
5967
5968     return $token if ( $type eq 'b' );         # skip a blank token
5969     return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5970     $token = escape_html($token);
5971
5972     # get the short abbreviation for this token type
5973     my $short_name = $token_short_names{$type};
5974     if ( !defined($short_name) ) {
5975         $short_name = "pu";                    # punctuation is default
5976     }
5977
5978     # handle style sheets..
5979     if ( !$rOpts->{'nohtml-style-sheets'} ) {
5980         if ( $short_name ne 'pu' ) {
5981             $token = qq(<span class="$short_name">) . $token . "</span>";
5982         }
5983     }
5984
5985     # handle no style sheets..
5986     else {
5987         my $color = $html_color{$short_name};
5988
5989         if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5990             $token = qq(<font color="$color">) . $token . "</font>";
5991         }
5992         if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5993         if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5994     }
5995     return $token;
5996 }
5997
5998 sub escape_html {
5999
6000     my $token = shift;
6001     if ($missing_html_entities) {
6002         $token =~ s/\&/&amp;/g;
6003         $token =~ s/\</&lt;/g;
6004         $token =~ s/\>/&gt;/g;
6005         $token =~ s/\"/&quot;/g;
6006     }
6007     else {
6008         HTML::Entities::encode_entities($token);
6009     }
6010     return $token;
6011 }
6012
6013 sub finish_formatting {
6014
6015     # called after last line
6016     my $self = shift;
6017     $self->close_html_file();
6018     return;
6019 }
6020
6021 sub write_line {
6022
6023     my ( $self, $line_of_tokens ) = @_;
6024     return unless $self->{_html_file_opened};
6025     my $html_pre_fh = $self->{_html_pre_fh};
6026     my $line_type   = $line_of_tokens->{_line_type};
6027     my $input_line  = $line_of_tokens->{_line_text};
6028     my $line_number = $line_of_tokens->{_line_number};
6029     chomp $input_line;
6030
6031     # markup line of code..
6032     my $html_line;
6033     if ( $line_type eq 'CODE' ) {
6034         my $rtoken_type = $line_of_tokens->{_rtoken_type};
6035         my $rtokens     = $line_of_tokens->{_rtokens};
6036         my $rlevels     = $line_of_tokens->{_rlevels};
6037
6038         if ( $input_line =~ /(^\s*)/ ) {
6039             $html_line = $1;
6040         }
6041         else {
6042             $html_line = "";
6043         }
6044         my ($rcolored_tokens) =
6045           $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
6046         $html_line .= join '', @{$rcolored_tokens};
6047     }
6048
6049     # markup line of non-code..
6050     else {
6051         my $line_character;
6052         if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
6053         elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
6054         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
6055         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
6056         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
6057         elsif ( $line_type eq 'END_START' ) {
6058             $line_character = 'k';
6059             $self->add_toc_item( '__END__', '__END__' );
6060         }
6061         elsif ( $line_type eq 'DATA_START' ) {
6062             $line_character = 'k';
6063             $self->add_toc_item( '__DATA__', '__DATA__' );
6064         }
6065         elsif ( $line_type =~ /^POD/ ) {
6066             $line_character = 'P';
6067             if ( $rOpts->{'pod2html'} ) {
6068                 my $html_pod_fh = $self->{_html_pod_fh};
6069                 if ( $line_type eq 'POD_START' ) {
6070
6071                     my $rpre_string_stack = $self->{_rpre_string_stack};
6072                     my $rpre_string       = $rpre_string_stack->[-1];
6073
6074                     # if we have written any non-blank lines to the
6075                     # current pre section, start writing to a new output
6076                     # string
6077                     if ( ${$rpre_string} =~ /\S/ ) {
6078                         my $pre_string;
6079                         $html_pre_fh =
6080                           Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
6081                         $self->{_html_pre_fh} = $html_pre_fh;
6082                         push @{$rpre_string_stack}, \$pre_string;
6083
6084                         # leave a marker in the pod stream so we know
6085                         # where to put the pre section we just
6086                         # finished.
6087                         my $for_html = '=for html';    # don't confuse pod utils
6088                         $html_pod_fh->print(<<EOM);
6089
6090 $for_html
6091 <!-- pERLTIDY sECTION -->
6092
6093 EOM
6094                     }
6095
6096                     # otherwise, just clear the current string and start
6097                     # over
6098                     else {
6099                         ${$rpre_string} = "";
6100                         $html_pod_fh->print("\n");
6101                     }
6102                 }
6103                 $html_pod_fh->print( $input_line . "\n" );
6104                 if ( $line_type eq 'POD_END' ) {
6105                     $self->{_pod_cut_count}++;
6106                     $html_pod_fh->print("\n");
6107                 }
6108                 return;
6109             }
6110         }
6111         else { $line_character = 'Q' }
6112         $html_line = $self->markup_html_element( $input_line, $line_character );
6113     }
6114
6115     # add the line number if requested
6116     if ( $rOpts->{'html-line-numbers'} ) {
6117         my $extra_space =
6118             ( $line_number < 10 )   ? "   "
6119           : ( $line_number < 100 )  ? "  "
6120           : ( $line_number < 1000 ) ? " "
6121           :                           "";
6122         $html_line = $extra_space . $line_number . " " . $html_line;
6123     }
6124
6125     # write the line
6126     $html_pre_fh->print("$html_line\n");
6127     return;
6128 }
6129
6130 #####################################################################
6131 #
6132 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
6133 # line breaks to the token stream
6134 #
6135 # WARNING: This is not a real class for speed reasons.  Only one
6136 # Formatter may be used.
6137 #
6138 #####################################################################
6139
6140 package Perl::Tidy::Formatter;
6141
6142 BEGIN {
6143
6144     # Caution: these debug flags produce a lot of output
6145     # They should all be 0 except when debugging small scripts
6146     use constant FORMATTER_DEBUG_FLAG_RECOMBINE   => 0;
6147     use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
6148     use constant FORMATTER_DEBUG_FLAG_BOND        => 0;
6149     use constant FORMATTER_DEBUG_FLAG_BREAK       => 0;
6150     use constant FORMATTER_DEBUG_FLAG_CI          => 0;
6151     use constant FORMATTER_DEBUG_FLAG_FLUSH       => 0;
6152     use constant FORMATTER_DEBUG_FLAG_FORCE       => 0;
6153     use constant FORMATTER_DEBUG_FLAG_LIST        => 0;
6154     use constant FORMATTER_DEBUG_FLAG_NOBREAK     => 0;
6155     use constant FORMATTER_DEBUG_FLAG_OUTPUT      => 0;
6156     use constant FORMATTER_DEBUG_FLAG_SPARSE      => 0;
6157     use constant FORMATTER_DEBUG_FLAG_STORE       => 0;
6158     use constant FORMATTER_DEBUG_FLAG_UNDOBP      => 0;
6159     use constant FORMATTER_DEBUG_FLAG_WHITE       => 0;
6160
6161     my $debug_warning = sub {
6162         print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
6163     };
6164
6165     FORMATTER_DEBUG_FLAG_RECOMBINE   && $debug_warning->('RECOMBINE');
6166     FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
6167     FORMATTER_DEBUG_FLAG_BOND        && $debug_warning->('BOND');
6168     FORMATTER_DEBUG_FLAG_BREAK       && $debug_warning->('BREAK');
6169     FORMATTER_DEBUG_FLAG_CI          && $debug_warning->('CI');
6170     FORMATTER_DEBUG_FLAG_FLUSH       && $debug_warning->('FLUSH');
6171     FORMATTER_DEBUG_FLAG_FORCE       && $debug_warning->('FORCE');
6172     FORMATTER_DEBUG_FLAG_LIST        && $debug_warning->('LIST');
6173     FORMATTER_DEBUG_FLAG_NOBREAK     && $debug_warning->('NOBREAK');
6174     FORMATTER_DEBUG_FLAG_OUTPUT      && $debug_warning->('OUTPUT');
6175     FORMATTER_DEBUG_FLAG_SPARSE      && $debug_warning->('SPARSE');
6176     FORMATTER_DEBUG_FLAG_STORE       && $debug_warning->('STORE');
6177     FORMATTER_DEBUG_FLAG_UNDOBP      && $debug_warning->('UNDOBP');
6178     FORMATTER_DEBUG_FLAG_WHITE       && $debug_warning->('WHITE');
6179 }
6180
6181 use Carp;
6182 use vars qw{
6183
6184   @gnu_stack
6185   $max_gnu_stack_index
6186   $gnu_position_predictor
6187   $line_start_index_to_go
6188   $last_indentation_written
6189   $last_unadjusted_indentation
6190   $last_leading_token
6191   $last_output_short_opening_token
6192   $peak_batch_size
6193
6194   $saw_VERSION_in_this_file
6195   $saw_END_or_DATA_
6196
6197   @gnu_item_list
6198   $max_gnu_item_index
6199   $gnu_sequence_number
6200   $last_output_indentation
6201   %last_gnu_equals
6202   %gnu_comma_count
6203   %gnu_arrow_count
6204
6205   @block_type_to_go
6206   @type_sequence_to_go
6207   @container_environment_to_go
6208   @bond_strength_to_go
6209   @forced_breakpoint_to_go
6210   @token_lengths_to_go
6211   @summed_lengths_to_go
6212   @levels_to_go
6213   @leading_spaces_to_go
6214   @reduced_spaces_to_go
6215   @matching_token_to_go
6216   @mate_index_to_go
6217   @ci_levels_to_go
6218   @nesting_depth_to_go
6219   @nobreak_to_go
6220   @old_breakpoint_to_go
6221   @tokens_to_go
6222   @rtoken_vars_to_go
6223   @K_to_go
6224   @types_to_go
6225   @inext_to_go
6226   @iprev_to_go
6227
6228   %saved_opening_indentation
6229
6230   $max_index_to_go
6231   $comma_count_in_batch
6232   $last_nonblank_index_to_go
6233   $last_nonblank_type_to_go
6234   $last_nonblank_token_to_go
6235   $last_last_nonblank_index_to_go
6236   $last_last_nonblank_type_to_go
6237   $last_last_nonblank_token_to_go
6238   @nonblank_lines_at_depth
6239   $starting_in_quote
6240   $ending_in_quote
6241   @whitespace_level_stack
6242   $whitespace_last_level
6243
6244   $format_skipping_pattern_begin
6245   $format_skipping_pattern_end
6246
6247   $forced_breakpoint_count
6248   $forced_breakpoint_undo_count
6249   @forced_breakpoint_undo_stack
6250   %postponed_breakpoint
6251
6252   $tabbing
6253   $embedded_tab_count
6254   $first_embedded_tab_at
6255   $last_embedded_tab_at
6256   $deleted_semicolon_count
6257   $first_deleted_semicolon_at
6258   $last_deleted_semicolon_at
6259   $added_semicolon_count
6260   $first_added_semicolon_at
6261   $last_added_semicolon_at
6262   $first_tabbing_disagreement
6263   $last_tabbing_disagreement
6264   $in_tabbing_disagreement
6265   $tabbing_disagreement_count
6266   $input_line_tabbing
6267
6268   $last_line_leading_type
6269   $last_line_leading_level
6270   $last_last_line_leading_level
6271
6272   %block_leading_text
6273   %block_opening_line_number
6274   $csc_new_statement_ok
6275   $csc_last_label
6276   %csc_block_label
6277   $accumulating_text_for_block
6278   $leading_block_text
6279   $rleading_block_if_elsif_text
6280   $leading_block_text_level
6281   $leading_block_text_length_exceeded
6282   $leading_block_text_line_length
6283   $leading_block_text_line_number
6284   $closing_side_comment_prefix_pattern
6285   $closing_side_comment_list_pattern
6286
6287   $blank_lines_after_opening_block_pattern
6288   $blank_lines_before_closing_block_pattern
6289
6290   $last_nonblank_token
6291   $last_nonblank_type
6292   $last_last_nonblank_token
6293   $last_last_nonblank_type
6294   $last_nonblank_block_type
6295   $last_output_level
6296   %is_do_follower
6297   %is_if_brace_follower
6298   %space_after_keyword
6299   $rbrace_follower
6300   $looking_for_else
6301   %is_last_next_redo_return
6302   %is_other_brace_follower
6303   %is_else_brace_follower
6304   %is_anon_sub_brace_follower
6305   %is_anon_sub_1_brace_follower
6306   %is_sort_map_grep
6307   %is_sort_map_grep_eval
6308   %is_sort_map_grep_eval_do
6309   %is_block_without_semicolon
6310   %is_if_unless
6311   %is_and_or
6312   %is_assignment
6313   %is_chain_operator
6314   %is_if_unless_and_or_last_next_redo_return
6315   %ok_to_add_semicolon_for_block_type
6316
6317   @has_broken_sublist
6318   @dont_align
6319   @want_comma_break
6320
6321   $is_static_block_comment
6322   $index_start_one_line_block
6323   $semicolons_before_block_self_destruct
6324   $index_max_forced_break
6325   $input_line_number
6326   $diagnostics_object
6327   $vertical_aligner_object
6328   $logger_object
6329   $file_writer_object
6330   $formatter_self
6331   @ci_stack
6332   %want_break_before
6333   %outdent_keyword
6334   $static_block_comment_pattern
6335   $static_side_comment_pattern
6336   %opening_vertical_tightness
6337   %closing_vertical_tightness
6338   %closing_token_indentation
6339   $some_closing_token_indentation
6340
6341   %opening_token_right
6342   %stack_opening_token
6343   %stack_closing_token
6344
6345   $block_brace_vertical_tightness_pattern
6346
6347   $rOpts_add_newlines
6348   $rOpts_add_whitespace
6349   $rOpts_block_brace_tightness
6350   $rOpts_block_brace_vertical_tightness
6351   $rOpts_brace_left_and_indent
6352   $rOpts_comma_arrow_breakpoints
6353   $rOpts_break_at_old_keyword_breakpoints
6354   $rOpts_break_at_old_comma_breakpoints
6355   $rOpts_break_at_old_logical_breakpoints
6356   $rOpts_break_at_old_ternary_breakpoints
6357   $rOpts_break_at_old_attribute_breakpoints
6358   $rOpts_closing_side_comment_else_flag
6359   $rOpts_closing_side_comment_maximum_text
6360   $rOpts_continuation_indentation
6361   $rOpts_cuddled_else
6362   $rOpts_delete_old_whitespace
6363   $rOpts_fuzzy_line_length
6364   $rOpts_indent_columns
6365   $rOpts_line_up_parentheses
6366   $rOpts_maximum_fields_per_table
6367   $rOpts_maximum_line_length
6368   $rOpts_variable_maximum_line_length
6369   $rOpts_short_concatenation_item_length
6370   $rOpts_keep_old_blank_lines
6371   $rOpts_ignore_old_breakpoints
6372   $rOpts_format_skipping
6373   $rOpts_space_function_paren
6374   $rOpts_space_keyword_paren
6375   $rOpts_keep_interior_semicolons
6376   $rOpts_ignore_side_comment_lengths
6377   $rOpts_stack_closing_block_brace
6378   $rOpts_space_backslash_quote
6379   $rOpts_whitespace_cycle
6380
6381   %is_opening_type
6382   %is_closing_type
6383   %is_keyword_returning_list
6384   %tightness
6385   %matching_token
6386   $rOpts
6387   %right_bond_strength
6388   %left_bond_strength
6389   %binary_ws_rules
6390   %want_left_space
6391   %want_right_space
6392   %is_digraph
6393   %is_trigraph
6394   $bli_pattern
6395   $bli_list_string
6396   %is_closing_type
6397   %is_opening_type
6398   %is_closing_token
6399   %is_opening_token
6400
6401   %weld_len_left_closing
6402   %weld_len_right_closing
6403   %weld_len_left_opening
6404   %weld_len_right_opening
6405
6406   $rcuddled_block_types
6407
6408   $SUB_PATTERN
6409   $ASUB_PATTERN
6410
6411   $NVARS
6412
6413 };
6414
6415 BEGIN {
6416
6417     # Array index names for token vars
6418     my $i = 0;
6419     use constant {
6420         _BLOCK_TYPE_            => $i++,
6421         _CI_LEVEL_              => $i++,
6422         _CONTAINER_ENVIRONMENT_ => $i++,
6423         _CONTAINER_TYPE_        => $i++,
6424         _CUMULATIVE_LENGTH_     => $i++,
6425         _LINE_INDEX_            => $i++,
6426         _KNEXT_SEQ_ITEM_        => $i++,
6427         _LEVEL_                 => $i++,
6428         _LEVEL_TRUE_            => $i++,
6429         _SLEVEL_                => $i++,
6430         _TOKEN_                 => $i++,
6431         _TYPE_                  => $i++,
6432         _TYPE_SEQUENCE_         => $i++,
6433     };
6434     $NVARS = 1 + _TYPE_SEQUENCE_;
6435
6436     # default list of block types for which -bli would apply
6437     $bli_list_string = 'if else elsif unless while for foreach do : sub';
6438
6439     my @q;
6440
6441     @q = qw(
6442       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6443       <= >= == =~ !~ != ++ -- /= x=
6444     );
6445     @is_digraph{@q} = (1) x scalar(@q);
6446
6447     @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
6448     @is_trigraph{@q} = (1) x scalar(@q);
6449
6450     @q = qw(
6451       = **= += *= &= <<= &&=
6452       -= /= |= >>= ||= //=
6453       .= %= ^=
6454       x=
6455     );
6456     @is_assignment{@q} = (1) x scalar(@q);
6457
6458     @q = qw(
6459       grep
6460       keys
6461       map
6462       reverse
6463       sort
6464       split
6465     );
6466     @is_keyword_returning_list{@q} = (1) x scalar(@q);
6467
6468     @q = qw(is if unless and or err last next redo return);
6469     @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
6470
6471     @q = qw(last next redo return);
6472     @is_last_next_redo_return{@q} = (1) x scalar(@q);
6473
6474     @q = qw(sort map grep);
6475     @is_sort_map_grep{@q} = (1) x scalar(@q);
6476
6477     @q = qw(sort map grep eval);
6478     @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
6479
6480     @q = qw(sort map grep eval do);
6481     @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
6482
6483     @q = qw(if unless);
6484     @is_if_unless{@q} = (1) x scalar(@q);
6485
6486     @q = qw(and or err);
6487     @is_and_or{@q} = (1) x scalar(@q);
6488
6489     # Identify certain operators which often occur in chains.
6490     # Note: the minus (-) causes a side effect of padding of the first line in
6491     # something like this (by sub set_logical_padding):
6492     #    Checkbutton => 'Transmission checked',
6493     #   -variable    => \$TRANS
6494     # This usually improves appearance so it seems ok.
6495     @q = qw(&& || and or : ? . + - * /);
6496     @is_chain_operator{@q} = (1) x scalar(@q);
6497
6498     # We can remove semicolons after blocks preceded by these keywords
6499     @q =
6500       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6501       unless while until for foreach given when default);
6502     @is_block_without_semicolon{@q} = (1) x scalar(@q);
6503
6504     # We will allow semicolons to be added within these block types
6505     # as well as sub and package blocks.
6506     # NOTES:
6507     # 1. Note that these keywords are omitted:
6508     #     switch case given when default sort map grep
6509     # 2. It is also ok to add for sub and package blocks and a labeled block
6510     # 3. But not okay for other perltidy types including:
6511     #     { } ; G t
6512     # 4. Test files: blktype.t, blktype1.t, semicolon.t
6513     @q =
6514       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6515       unless do while until eval for foreach );
6516     @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
6517
6518     # 'L' is token for opening { at hash key
6519     @q = qw" L { ( [ ";
6520     @is_opening_type{@q} = (1) x scalar(@q);
6521
6522     # 'R' is token for closing } at hash key
6523     @q = qw" R } ) ] ";
6524     @is_closing_type{@q} = (1) x scalar(@q);
6525
6526     @q = qw" { ( [ ";
6527     @is_opening_token{@q} = (1) x scalar(@q);
6528
6529     @q = qw" } ) ] ";
6530     @is_closing_token{@q} = (1) x scalar(@q);
6531
6532     # Patterns for standardizing matches to block types for regular subs and
6533     # anonymous subs. Examples
6534     #  'sub process' is a named sub
6535     #  'sub ::m' is a named sub
6536     #  'sub' is an anonymous sub
6537     #  'sub:' is a label, not a sub
6538     #  'substr' is a keyword
6539     $SUB_PATTERN  = '^sub\s+(::|\w)';
6540     $ASUB_PATTERN = '^sub$';
6541 }
6542
6543 # whitespace codes
6544 use constant WS_YES      => 1;
6545 use constant WS_OPTIONAL => 0;
6546 use constant WS_NO       => -1;
6547
6548 # Token bond strengths.
6549 use constant NO_BREAK    => 10000;
6550 use constant VERY_STRONG => 100;
6551 use constant STRONG      => 2.1;
6552 use constant NOMINAL     => 1.1;
6553 use constant WEAK        => 0.8;
6554 use constant VERY_WEAK   => 0.55;
6555
6556 # values for testing indexes in output array
6557 use constant UNDEFINED_INDEX => -1;
6558
6559 # Maximum number of little messages; probably need not be changed.
6560 use constant MAX_NAG_MESSAGES => 6;
6561
6562 # increment between sequence numbers for each type
6563 # For example, ?: pairs might have numbers 7,11,15,...
6564 use constant TYPE_SEQUENCE_INCREMENT => 4;
6565
6566 {
6567
6568     # methods to count instances
6569     my $_count = 0;
6570     sub get_count        { return $_count; }
6571     sub _increment_count { return ++$_count }
6572     sub _decrement_count { return --$_count }
6573 }
6574
6575 sub trim {
6576
6577     # trim leading and trailing whitespace from a string
6578     my $str = shift;
6579     $str =~ s/\s+$//;
6580     $str =~ s/^\s+//;
6581     return $str;
6582 }
6583
6584 sub max {
6585     my @vals = @_;
6586     my $max  = shift @vals;
6587     foreach my $val (@vals) {
6588         $max = ( $max < $val ) ? $val : $max;
6589     }
6590     return $max;
6591 }
6592
6593 sub min {
6594     my @vals = @_;
6595     my $min  = shift @vals;
6596     foreach my $val (@vals) {
6597         $min = ( $min > $val ) ? $val : $min;
6598     }
6599     return $min;
6600 }
6601
6602 sub split_words {
6603
6604     # given a string containing words separated by whitespace,
6605     # return the list of words
6606     my ($str) = @_;
6607     return unless $str;
6608     $str =~ s/\s+$//;
6609     $str =~ s/^\s+//;
6610     return split( /\s+/, $str );
6611 }
6612
6613 sub check_keys {
6614     my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
6615
6616     # Check the keys of a hash:
6617     # $rtest     = ref to hash to test
6618     # $rexpected = ref to has with valid keys
6619
6620     # $msg = a message to write in case of error
6621     # $exact_match defines the type of check:
6622     #     = false: test hash must not have unknown key
6623     #     = true:  test hash must have exactly same keys as known hash
6624     my @unknown_keys =
6625       grep { !exists $rvalid->{$_} } keys %{$rtest};
6626     my @missing_keys =
6627       grep { !exists $rtest->{$_} } keys %{$rvalid};
6628     my $error = @unknown_keys;
6629     if ($exact_match) { $error ||= @missing_keys }
6630     if ($error) {
6631         local $" = ')(';
6632         my @expected_keys = sort keys %{$rvalid};
6633         @unknown_keys = sort @unknown_keys;
6634         Perl::Tidy::Die <<EOM;
6635 ------------------------------------------------------------------------
6636 Program error detected checking hash keys
6637 Message is: '$msg'
6638 Expected keys: (@expected_keys)
6639 Unknown key(s): (@unknown_keys)
6640 Missing key(s): (@missing_keys)
6641 ------------------------------------------------------------------------
6642 EOM
6643     }
6644 }
6645
6646 # interface to Perl::Tidy::Logger routines
6647 sub warning {
6648     my ($msg) = @_;
6649     if ($logger_object) { $logger_object->warning($msg); }
6650     return;
6651 }
6652
6653 sub complain {
6654     my ($msg) = @_;
6655     if ($logger_object) {
6656         $logger_object->complain($msg);
6657     }
6658     return;
6659 }
6660
6661 sub write_logfile_entry {
6662     my @msg = @_;
6663     if ($logger_object) {
6664         $logger_object->write_logfile_entry(@msg);
6665     }
6666     return;
6667 }
6668
6669 sub black_box {
6670     my @msg = @_;
6671     if ($logger_object) { $logger_object->black_box(@msg); }
6672     return;
6673 }
6674
6675 sub report_definite_bug {
6676     if ($logger_object) {
6677         $logger_object->report_definite_bug();
6678     }
6679     return;
6680 }
6681
6682 sub get_saw_brace_error {
6683     if ($logger_object) {
6684         $logger_object->get_saw_brace_error();
6685     }
6686     return;
6687 }
6688
6689 sub we_are_at_the_last_line {
6690     if ($logger_object) {
6691         $logger_object->we_are_at_the_last_line();
6692     }
6693     return;
6694 }
6695
6696 # interface to Perl::Tidy::Diagnostics routine
6697 sub write_diagnostics {
6698     my $msg = shift;
6699     if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
6700     return;
6701 }
6702
6703 sub get_added_semicolon_count {
6704     my $self = shift;
6705     return $added_semicolon_count;
6706 }
6707
6708 sub DESTROY {
6709     my $self = shift;
6710     $self->_decrement_count();
6711     return;
6712 }
6713
6714 sub get_output_line_number {
6715     return $vertical_aligner_object->get_output_line_number();
6716 }
6717
6718 sub new {
6719
6720     my $class = shift;
6721
6722     # we are given an object with a write_line() method to take lines
6723     my %defaults = (
6724         sink_object        => undef,
6725         diagnostics_object => undef,
6726         logger_object      => undef,
6727     );
6728     my %args = ( %defaults, @_ );
6729
6730     $logger_object      = $args{logger_object};
6731     $diagnostics_object = $args{diagnostics_object};
6732
6733     # we create another object with a get_line() and peek_ahead() method
6734     my $sink_object = $args{sink_object};
6735     $file_writer_object =
6736       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6737
6738     # initialize the leading whitespace stack to negative levels
6739     # so that we can never run off the end of the stack
6740     $peak_batch_size        = 0;    # flag to determine if we have output code
6741     $gnu_position_predictor = 0;    # where the current token is predicted to be
6742     $max_gnu_stack_index    = 0;
6743     $max_gnu_item_index     = -1;
6744     $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6745     @gnu_item_list                   = ();
6746     $last_output_indentation         = 0;
6747     $last_indentation_written        = 0;
6748     $last_unadjusted_indentation     = 0;
6749     $last_leading_token              = "";
6750     $last_output_short_opening_token = 0;
6751
6752     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6753     $saw_END_or_DATA_         = 0;
6754
6755     @block_type_to_go            = ();
6756     @type_sequence_to_go         = ();
6757     @container_environment_to_go = ();
6758     @bond_strength_to_go         = ();
6759     @forced_breakpoint_to_go     = ();
6760     @summed_lengths_to_go        = ();    # line length to start of ith token
6761     @token_lengths_to_go         = ();
6762     @levels_to_go                = ();
6763     @matching_token_to_go        = ();
6764     @mate_index_to_go            = ();
6765     @ci_levels_to_go             = ();
6766     @nesting_depth_to_go         = (0);
6767     @nobreak_to_go               = ();
6768     @old_breakpoint_to_go        = ();
6769     @tokens_to_go                = ();
6770     @rtoken_vars_to_go           = ();
6771     @K_to_go                     = ();
6772     @types_to_go                 = ();
6773     @leading_spaces_to_go        = ();
6774     @reduced_spaces_to_go        = ();
6775     @inext_to_go                 = ();
6776     @iprev_to_go                 = ();
6777
6778     @whitespace_level_stack = ();
6779     $whitespace_last_level  = -1;
6780
6781     @dont_align         = ();
6782     @has_broken_sublist = ();
6783     @want_comma_break   = ();
6784
6785     @ci_stack                   = ("");
6786     $first_tabbing_disagreement = 0;
6787     $last_tabbing_disagreement  = 0;
6788     $tabbing_disagreement_count = 0;
6789     $in_tabbing_disagreement    = 0;
6790     $input_line_tabbing         = undef;
6791
6792     $last_last_line_leading_level = 0;
6793     $last_line_leading_level      = 0;
6794     $last_line_leading_type       = '#';
6795
6796     $last_nonblank_token        = ';';
6797     $last_nonblank_type         = ';';
6798     $last_last_nonblank_token   = ';';
6799     $last_last_nonblank_type    = ';';
6800     $last_nonblank_block_type   = "";
6801     $last_output_level          = 0;
6802     $looking_for_else           = 0;
6803     $embedded_tab_count         = 0;
6804     $first_embedded_tab_at      = 0;
6805     $last_embedded_tab_at       = 0;
6806     $deleted_semicolon_count    = 0;
6807     $first_deleted_semicolon_at = 0;
6808     $last_deleted_semicolon_at  = 0;
6809     $added_semicolon_count      = 0;
6810     $first_added_semicolon_at   = 0;
6811     $last_added_semicolon_at    = 0;
6812     $is_static_block_comment    = 0;
6813     %postponed_breakpoint       = ();
6814
6815     # variables for adding side comments
6816     %block_leading_text        = ();
6817     %block_opening_line_number = ();
6818     $csc_new_statement_ok      = 1;
6819     %csc_block_label           = ();
6820
6821     %saved_opening_indentation = ();
6822
6823     reset_block_text_accumulator();
6824
6825     prepare_for_new_input_lines();
6826
6827     $vertical_aligner_object =
6828       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6829         $logger_object, $diagnostics_object );
6830
6831     if ( $rOpts->{'entab-leading-whitespace'} ) {
6832         write_logfile_entry(
6833 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6834         );
6835     }
6836     elsif ( $rOpts->{'tabs'} ) {
6837         write_logfile_entry("Indentation will be with a tab character\n");
6838     }
6839     else {
6840         write_logfile_entry(
6841             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6842     }
6843
6844     # This hash holds the main data structures for formatting
6845     # All hash keys must be defined here.
6846     $formatter_self = {
6847         rlines              => [],       # = ref to array of lines of the file
6848         rLL                 => [],       # = ref to array with all tokens
6849                                          # in the file. LL originally meant
6850                                          # 'Linked List'. Linked lists were a
6851                                          # bad idea but LL is easy to type.
6852         Klimit              => undef,    # = maximum K index for rLL. This is
6853                                          # needed to catch any autovivification
6854                                          # problems.
6855         rnested_pairs       => [],       # for welding decisions
6856         K_opening_container => {},       # for quickly traversing structure
6857         K_closing_container => {},       # for quickly traversing structure
6858         K_opening_ternary   => {},       # for quickly traversing structure
6859         K_closing_ternary   => {},       # for quickly traversing structure
6860         rK_phantom_semicolons =>
6861           undef,    # for undoing phantom semicolons if iterating
6862         rpaired_to_inner_container => {},
6863         rbreak_container           => {},    # prevent one-line blocks
6864         rvalid_self_keys           => [],    # for checking
6865     };
6866     my @valid_keys = keys %{$formatter_self};
6867     $formatter_self->{rvalid_self_keys} = \@valid_keys;
6868
6869     bless $formatter_self, $class;
6870
6871     # Safety check..this is not a class yet
6872     if ( _increment_count() > 1 ) {
6873         confess
6874 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
6875     }
6876     return $formatter_self;
6877 }
6878
6879 sub Fault {
6880     my ($msg) = @_;
6881
6882     # This routine is called for errors that really should not occur
6883     # except if there has been a bug introduced by a recent program change
6884     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
6885     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
6886     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
6887
6888     Perl::Tidy::Die(<<EOM);
6889 ==============================================================================
6890 Fault detected at line $line0 of sub '$subroutine1'
6891 in file '$filename1'
6892 which was called from line $line1 of sub '$subroutine2'
6893 Message: '$msg'
6894 This is probably an error introduced by a recent programming change.
6895 ==============================================================================
6896 EOM
6897 }
6898
6899 sub check_self_hash {
6900     my $self            = shift;
6901     my @valid_self_keys = @{ $self->{rvalid_self_keys} };
6902     my %valid_self_hash;
6903     @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
6904     check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
6905     return;
6906 }
6907
6908 sub check_token_array {
6909     my $self = shift;
6910
6911     # Check for errors in the array of tokens
6912     # Uses package variable $NVARS
6913     $self->check_self_hash();
6914     my $rLL = $self->{rLL};
6915     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
6916         my $nvars = @{ $rLL->[$KK] };
6917         if ( $nvars != $NVARS ) {
6918             my $type = $rLL->[$KK]->[_TYPE_];
6919             $type = '*' unless defined($type);
6920             Fault(
6921 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
6922             );
6923         }
6924         foreach my $var ( _TOKEN_, _TYPE_ ) {
6925             if ( !defined( $rLL->[$KK]->[$var] ) ) {
6926                 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
6927                 Fault("Undefined variable $var for K=$KK, line=$iline\n");
6928             }
6929         }
6930         return;
6931     }
6932 }
6933
6934 sub set_rLL_max_index {
6935     my $self = shift;
6936
6937     # Set the limit of the rLL array, assuming that it is correct.
6938     # This should only be called by routines after they make changes
6939     # to tokenization
6940     my $rLL = $self->{rLL};
6941     if ( !defined($rLL) ) {
6942
6943         # Shouldn't happen because rLL was initialized to be an array ref
6944         Fault("Undefined Memory rLL");
6945     }
6946     my $Klimit_old = $self->{Klimit};
6947     my $num        = @{$rLL};
6948     my $Klimit;
6949     if ( $num > 0 ) { $Klimit = $num - 1 }
6950     $self->{Klimit} = $Klimit;
6951     return ($Klimit);
6952 }
6953
6954 sub get_rLL_max_index {
6955     my $self = shift;
6956
6957     # the memory location $rLL and number of tokens should be obtained
6958     # from this routine so that any autovivication can be immediately caught.
6959     my $rLL    = $self->{rLL};
6960     my $Klimit = $self->{Klimit};
6961     if ( !defined($rLL) ) {
6962
6963         # Shouldn't happen because rLL was initialized to be an array ref
6964         Fault("Undefined Memory rLL");
6965     }
6966     my $num = @{$rLL};
6967     if (   $num == 0 && defined($Klimit)
6968         || $num > 0 && !defined($Klimit)
6969         || $num > 0 && $Klimit != $num - 1 )
6970     {
6971
6972         # Possible autovivification problem...
6973         if ( !defined($Klimit) ) { $Klimit = '*' }
6974         Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
6975     }
6976     return ($Klimit);
6977 }
6978
6979 sub prepare_for_new_input_lines {
6980
6981     # Remember the largest batch size processed. This is needed
6982     # by the pad routine to avoid padding the first nonblank token
6983     if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
6984         $peak_batch_size = $max_index_to_go;
6985     }
6986
6987     $gnu_sequence_number++;    # increment output batch counter
6988     %last_gnu_equals                = ();
6989     %gnu_comma_count                = ();
6990     %gnu_arrow_count                = ();
6991     $line_start_index_to_go         = 0;
6992     $max_gnu_item_index             = UNDEFINED_INDEX;
6993     $index_max_forced_break         = UNDEFINED_INDEX;
6994     $max_index_to_go                = UNDEFINED_INDEX;
6995     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
6996     $last_nonblank_type_to_go       = '';
6997     $last_nonblank_token_to_go      = '';
6998     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6999     $last_last_nonblank_type_to_go  = '';
7000     $last_last_nonblank_token_to_go = '';
7001     $forced_breakpoint_count        = 0;
7002     $forced_breakpoint_undo_count   = 0;
7003     $rbrace_follower                = undef;
7004     $summed_lengths_to_go[0]        = 0;
7005     $comma_count_in_batch           = 0;
7006     $starting_in_quote              = 0;
7007
7008     destroy_one_line_block();
7009     return;
7010 }
7011
7012 sub break_lines {
7013
7014     # Loop over old lines to set new line break points
7015
7016     my $self   = shift;
7017     my $rlines = $self->{rlines};
7018
7019     # Flag to prevent blank lines when POD occurs in a format skipping sect.
7020     my $in_format_skipping_section;
7021
7022     my $line_type = "";
7023     foreach my $line_of_tokens ( @{$rlines} ) {
7024
7025         my $last_line_type = $line_type;
7026         $line_type = $line_of_tokens->{_line_type};
7027         my $input_line = $line_of_tokens->{_line_text};
7028
7029         # _line_type codes are:
7030         #   SYSTEM         - system-specific code before hash-bang line
7031         #   CODE           - line of perl code (including comments)
7032         #   POD_START      - line starting pod, such as '=head'
7033         #   POD            - pod documentation text
7034         #   POD_END        - last line of pod section, '=cut'
7035         #   HERE           - text of here-document
7036         #   HERE_END       - last line of here-doc (target word)
7037         #   FORMAT         - format section
7038         #   FORMAT_END     - last line of format section, '.'
7039         #   DATA_START     - __DATA__ line
7040         #   DATA           - unidentified text following __DATA__
7041         #   END_START      - __END__ line
7042         #   END            - unidentified text following __END__
7043         #   ERROR          - we are in big trouble, probably not a perl script
7044
7045         # put a blank line after an =cut which comes before __END__ and __DATA__
7046         # (required by podchecker)
7047         if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
7048             $file_writer_object->reset_consecutive_blank_lines();
7049             if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
7050                 $self->want_blank_line();
7051             }
7052         }
7053
7054         # handle line of code..
7055         if ( $line_type eq 'CODE' ) {
7056
7057             my $CODE_type = $line_of_tokens->{_code_type};
7058             $in_format_skipping_section = $CODE_type eq 'FS';
7059
7060             # Handle blank lines
7061             if ( $CODE_type eq 'BL' ) {
7062
7063                 # If keep-old-blank-lines is zero, we delete all
7064                 # old blank lines and let the blank line rules generate any
7065                 # needed blanks.
7066                 if ($rOpts_keep_old_blank_lines) {
7067                     $self->flush();
7068                     $file_writer_object->write_blank_code_line(
7069                         $rOpts_keep_old_blank_lines == 2 );
7070                     $last_line_leading_type = 'b';
7071                 }
7072                 next;
7073             }
7074             else {
7075
7076                 # let logger see all non-blank lines of code
7077                 my $output_line_number = get_output_line_number();
7078                 ##$vertical_aligner_object->get_output_line_number();
7079                 black_box( $line_of_tokens, $output_line_number );
7080             }
7081
7082             # Handle Format Skipping (FS) and Verbatim (VB) Lines
7083             if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
7084                 $self->write_unindented_line("$input_line");
7085                 $file_writer_object->reset_consecutive_blank_lines();
7086                 next;
7087             }
7088
7089             # Handle all other lines of code
7090             $self->print_line_of_tokens($line_of_tokens);
7091         }
7092
7093         # handle line of non-code..
7094         else {
7095
7096             # set special flags
7097             my $skip_line = 0;
7098             my $tee_line  = 0;
7099             if ( $line_type =~ /^POD/ ) {
7100
7101                 # Pod docs should have a preceding blank line.  But stay
7102                 # out of __END__ and __DATA__ sections, because
7103                 # the user may be using this section for any purpose whatsoever
7104                 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
7105                 if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
7106                 if ( $rOpts->{'trim-pod'} )   { $input_line =~ s/\s+$// }
7107                 if (   !$skip_line
7108                     && !$in_format_skipping_section
7109                     && $line_type eq 'POD_START'
7110                     && !$saw_END_or_DATA_ )
7111                 {
7112                     $self->want_blank_line();
7113                 }
7114             }
7115
7116             # leave the blank counters in a predictable state
7117             # after __END__ or __DATA__
7118             elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
7119                 $file_writer_object->reset_consecutive_blank_lines();
7120                 $saw_END_or_DATA_ = 1;
7121             }
7122
7123             # write unindented non-code line
7124             if ( !$skip_line ) {
7125                 if ($tee_line) { $file_writer_object->tee_on() }
7126                 $self->write_unindented_line($input_line);
7127                 if ($tee_line) { $file_writer_object->tee_off() }
7128             }
7129         }
7130     }
7131     return;
7132 }
7133
7134 {    ## Beginning of routine to check line hashes
7135
7136     my %valid_line_hash;
7137
7138     BEGIN {
7139
7140         # These keys are defined for each line in the formatter
7141         # Each line must have exactly these quantities
7142         my @valid_line_keys = qw(
7143           _curly_brace_depth
7144           _ending_in_quote
7145           _guessed_indentation_level
7146           _line_number
7147           _line_text
7148           _line_type
7149           _paren_depth
7150           _quote_character
7151           _rK_range
7152           _square_bracket_depth
7153           _starting_in_quote
7154           _ended_in_blank_token
7155           _code_type
7156
7157           _ci_level_0
7158           _level_0
7159           _nesting_blocks_0
7160           _nesting_tokens_0
7161         );
7162
7163         @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
7164     }
7165
7166     sub check_line_hashes {
7167         my $self = shift;
7168         $self->check_self_hash();
7169         my $rlines = $self->{rlines};
7170         foreach my $rline ( @{$rlines} ) {
7171             my $iline     = $rline->{_line_number};
7172             my $line_type = $rline->{_line_type};
7173             check_keys( $rline, \%valid_line_hash,
7174                 "Checkpoint: line number =$iline,  line_type=$line_type", 1 );
7175         }
7176         return;
7177     }
7178
7179 }    ## End check line hashes
7180
7181 sub write_line {
7182
7183     # We are caching tokenized lines as they arrive and converting them to the
7184     # format needed for the final formatting.
7185     my ( $self, $line_of_tokens_old ) = @_;
7186     my $rLL        = $self->{rLL};
7187     my $Klimit     = $self->{Klimit};
7188     my $rlines_new = $self->{rlines};
7189
7190     my $Kfirst;
7191     my $line_of_tokens = {};
7192     foreach my $key (
7193         qw(
7194         _curly_brace_depth
7195         _ending_in_quote
7196         _guessed_indentation_level
7197         _line_number
7198         _line_text
7199         _line_type
7200         _paren_depth
7201         _quote_character
7202         _square_bracket_depth
7203         _starting_in_quote
7204         )
7205       )
7206     {
7207         $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
7208     }
7209
7210     # Data needed by Logger
7211     $line_of_tokens->{_level_0}          = 0;
7212     $line_of_tokens->{_ci_level_0}       = 0;
7213     $line_of_tokens->{_nesting_blocks_0} = "";
7214     $line_of_tokens->{_nesting_tokens_0} = "";
7215
7216     # Needed to avoid trimming quotes
7217     $line_of_tokens->{_ended_in_blank_token} = undef;
7218
7219     my $line_type     = $line_of_tokens_old->{_line_type};
7220     my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
7221     if ( $line_type eq 'CODE' ) {
7222
7223         my $rtokens         = $line_of_tokens_old->{_rtokens};
7224         my $rtoken_type     = $line_of_tokens_old->{_rtoken_type};
7225         my $rblock_type     = $line_of_tokens_old->{_rblock_type};
7226         my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
7227         my $rcontainer_environment =
7228           $line_of_tokens_old->{_rcontainer_environment};
7229         my $rtype_sequence  = $line_of_tokens_old->{_rtype_sequence};
7230         my $rlevels         = $line_of_tokens_old->{_rlevels};
7231         my $rslevels        = $line_of_tokens_old->{_rslevels};
7232         my $rci_levels      = $line_of_tokens_old->{_rci_levels};
7233         my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
7234         my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
7235
7236         my $jmax = @{$rtokens} - 1;
7237         if ( $jmax >= 0 ) {
7238             $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
7239             foreach my $j ( 0 .. $jmax ) {
7240                 my @tokary;
7241                 @tokary[
7242                   _TOKEN_,                 _TYPE_,
7243                   _BLOCK_TYPE_,            _CONTAINER_TYPE_,
7244                   _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
7245                   _LEVEL_,                 _LEVEL_TRUE_,
7246                   _SLEVEL_,                _CI_LEVEL_,
7247                   _LINE_INDEX_,
7248                   ]
7249                   = (
7250                     $rtokens->[$j],                $rtoken_type->[$j],
7251                     $rblock_type->[$j],            $rcontainer_type->[$j],
7252                     $rcontainer_environment->[$j], $rtype_sequence->[$j],
7253                     $rlevels->[$j],                $rlevels->[$j],
7254                     $rslevels->[$j],               $rci_levels->[$j],
7255                     $input_line_no,
7256                   );
7257                 ##push @token_array, \@tokary;
7258                 push @{$rLL}, \@tokary;
7259             }
7260
7261             #$Klast=@{$rLL}-1;
7262             $Klimit = @{$rLL} - 1;
7263
7264             # Need to remember if we can trim the input line
7265             $line_of_tokens->{_ended_in_blank_token} =
7266               $rtoken_type->[$jmax] eq 'b';
7267
7268             $line_of_tokens->{_level_0}          = $rlevels->[0];
7269             $line_of_tokens->{_ci_level_0}       = $rci_levels->[0];
7270             $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
7271             $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
7272         }
7273     }
7274
7275     $line_of_tokens->{_rK_range}  = [ $Kfirst, $Klimit ];
7276     $line_of_tokens->{_code_type} = "";
7277     $self->{Klimit}               = $Klimit;
7278
7279     push @{$rlines_new}, $line_of_tokens;
7280     return;
7281 }
7282
7283 BEGIN {
7284
7285     # initialize these global hashes, which control the use of
7286     # whitespace around tokens:
7287     #
7288     # %binary_ws_rules
7289     # %want_left_space
7290     # %want_right_space
7291     # %space_after_keyword
7292     #
7293     # Many token types are identical to the tokens themselves.
7294     # See the tokenizer for a complete list. Here are some special types:
7295     #   k = perl keyword
7296     #   f = semicolon in for statement
7297     #   m = unary minus
7298     #   p = unary plus
7299     # Note that :: is excluded since it should be contained in an identifier
7300     # Note that '->' is excluded because it never gets space
7301     # parentheses and brackets are excluded since they are handled specially
7302     # curly braces are included but may be overridden by logic, such as
7303     # newline logic.
7304
7305     # NEW_TOKENS: create a whitespace rule here.  This can be as
7306     # simple as adding your new letter to @spaces_both_sides, for
7307     # example.
7308
7309     my @q;
7310
7311     @q = qw" L { ( [ ";
7312     @is_opening_type{@q} = (1) x scalar(@q);
7313
7314     @q = qw" R } ) ] ";
7315     @is_closing_type{@q} = (1) x scalar(@q);
7316
7317     my @spaces_both_sides = qw"
7318       + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7319       .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
7320       &&= ||= //= <=> A k f w F n C Y U G v
7321       ";
7322
7323     my @spaces_left_side = qw"
7324       t ! ~ m p { \ h pp mm Z j
7325       ";
7326     push( @spaces_left_side, '#' );    # avoids warning message
7327
7328     my @spaces_right_side = qw"
7329       ; } ) ] R J ++ -- **=
7330       ";
7331     push( @spaces_right_side, ',' );    # avoids warning message
7332
7333     # Note that we are in a BEGIN block here.  Later in processing
7334     # the values of %want_left_space and  %want_right_space
7335     # may be overridden by any user settings specified by the
7336     # -wls and -wrs parameters.  However the binary_whitespace_rules
7337     # are hardwired and have priority.
7338     @want_left_space{@spaces_both_sides} =
7339       (1) x scalar(@spaces_both_sides);
7340     @want_right_space{@spaces_both_sides} =
7341       (1) x scalar(@spaces_both_sides);
7342     @want_left_space{@spaces_left_side} =
7343       (1) x scalar(@spaces_left_side);
7344     @want_right_space{@spaces_left_side} =
7345       (-1) x scalar(@spaces_left_side);
7346     @want_left_space{@spaces_right_side} =
7347       (-1) x scalar(@spaces_right_side);
7348     @want_right_space{@spaces_right_side} =
7349       (1) x scalar(@spaces_right_side);
7350     $want_left_space{'->'}      = WS_NO;
7351     $want_right_space{'->'}     = WS_NO;
7352     $want_left_space{'**'}      = WS_NO;
7353     $want_right_space{'**'}     = WS_NO;
7354     $want_right_space{'CORE::'} = WS_NO;
7355
7356     # These binary_ws_rules are hardwired and have priority over the above
7357     # settings.  It would be nice to allow adjustment by the user,
7358     # but it would be complicated to specify.
7359     #
7360     # hash type information must stay tightly bound
7361     # as in :  ${xxxx}
7362     $binary_ws_rules{'i'}{'L'} = WS_NO;
7363     $binary_ws_rules{'i'}{'{'} = WS_YES;
7364     $binary_ws_rules{'k'}{'{'} = WS_YES;
7365     $binary_ws_rules{'U'}{'{'} = WS_YES;
7366     $binary_ws_rules{'i'}{'['} = WS_NO;
7367     $binary_ws_rules{'R'}{'L'} = WS_NO;
7368     $binary_ws_rules{'R'}{'{'} = WS_NO;
7369     $binary_ws_rules{'t'}{'L'} = WS_NO;
7370     $binary_ws_rules{'t'}{'{'} = WS_NO;
7371     $binary_ws_rules{'}'}{'L'} = WS_NO;
7372     $binary_ws_rules{'}'}{'{'} = WS_NO;
7373     $binary_ws_rules{'$'}{'L'} = WS_NO;
7374     $binary_ws_rules{'$'}{'{'} = WS_NO;
7375     $binary_ws_rules{'@'}{'L'} = WS_NO;
7376     $binary_ws_rules{'@'}{'{'} = WS_NO;
7377     $binary_ws_rules{'='}{'L'} = WS_YES;
7378     $binary_ws_rules{'J'}{'J'} = WS_YES;
7379
7380     # the following includes ') {'
7381     # as in :    if ( xxx ) { yyy }
7382     $binary_ws_rules{']'}{'L'} = WS_NO;
7383     $binary_ws_rules{']'}{'{'} = WS_NO;
7384     $binary_ws_rules{')'}{'{'} = WS_YES;
7385     $binary_ws_rules{')'}{'['} = WS_NO;
7386     $binary_ws_rules{']'}{'['} = WS_NO;
7387     $binary_ws_rules{']'}{'{'} = WS_NO;
7388     $binary_ws_rules{'}'}{'['} = WS_NO;
7389     $binary_ws_rules{'R'}{'['} = WS_NO;
7390
7391     $binary_ws_rules{']'}{'++'} = WS_NO;
7392     $binary_ws_rules{']'}{'--'} = WS_NO;
7393     $binary_ws_rules{')'}{'++'} = WS_NO;
7394     $binary_ws_rules{')'}{'--'} = WS_NO;
7395
7396     $binary_ws_rules{'R'}{'++'} = WS_NO;
7397     $binary_ws_rules{'R'}{'--'} = WS_NO;
7398
7399     $binary_ws_rules{'i'}{'Q'} = WS_YES;
7400     $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
7401
7402     # FIXME: we could to split 'i' into variables and functions
7403     # and have no space for functions but space for variables.  For now,
7404     # I have a special patch in the special rules below
7405     $binary_ws_rules{'i'}{'('} = WS_NO;
7406
7407     $binary_ws_rules{'w'}{'('} = WS_NO;
7408     $binary_ws_rules{'w'}{'{'} = WS_YES;
7409 } ## end BEGIN block
7410
7411 sub set_whitespace_flags {
7412
7413     #    This routine examines each pair of nonblank tokens and
7414     #    sets a flag indicating if white space is needed.
7415     #
7416     #    $rwhitespace_flags->[$j] is a flag indicating whether a white space
7417     #    BEFORE token $j is needed, with the following values:
7418     #
7419     #             WS_NO      = -1 do not want a space before token $j
7420     #             WS_OPTIONAL=  0 optional space or $j is a whitespace
7421     #             WS_YES     =  1 want a space before token $j
7422     #
7423
7424     my $self = shift;
7425     my $rLL  = $self->{rLL};
7426
7427     my $rwhitespace_flags = [];
7428
7429     my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
7430         $token, $type, $block_type, $input_line_no );
7431     my $j_tight_closing_paren = -1;
7432
7433     $token              = ' ';
7434     $type               = 'b';
7435     $block_type         = '';
7436     $input_line_no      = 0;
7437     $last_token         = ' ';
7438     $last_type          = 'b';
7439     $last_block_type    = '';
7440     $last_input_line_no = 0;
7441
7442     my $jmax = @{$rLL} - 1;
7443
7444     my ($ws);
7445
7446     # This is some logic moved to a sub to avoid deep nesting of if stmts
7447     my $ws_in_container = sub {
7448
7449         my ($j) = @_;
7450         my $ws = WS_YES;
7451         if ( $j + 1 > $jmax ) { return (WS_NO) }
7452
7453         # Patch to count '-foo' as single token so that
7454         # each of  $a{-foo} and $a{foo} and $a{'foo'} do
7455         # not get spaces with default formatting.
7456         my $j_here = $j;
7457         ++$j_here
7458           if ( $token eq '-'
7459             && $last_token eq '{'
7460             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
7461
7462         # $j_next is where a closing token should be if
7463         # the container has a single token
7464         if ( $j_here + 1 > $jmax ) { return (WS_NO) }
7465         my $j_next =
7466           ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
7467           ? $j_here + 2
7468           : $j_here + 1;
7469
7470         if ( $j_next > $jmax ) { return WS_NO }
7471         my $tok_next  = $rLL->[$j_next]->[_TOKEN_];
7472         my $type_next = $rLL->[$j_next]->[_TYPE_];
7473
7474         # for tightness = 1, if there is just one token
7475         # within the matching pair, we will keep it tight
7476         if (
7477             $tok_next eq $matching_token{$last_token}
7478
7479             # but watch out for this: [ [ ]    (misc.t)
7480             && $last_token ne $token
7481
7482             # double diamond is usually spaced
7483             && $token ne '<<>>'
7484
7485           )
7486         {
7487
7488             # remember where to put the space for the closing paren
7489             $j_tight_closing_paren = $j_next;
7490             return (WS_NO);
7491         }
7492         return (WS_YES);
7493     };
7494
7495     # main loop over all tokens to define the whitespace flags
7496     for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
7497
7498         my $rtokh = $rLL->[$j];
7499
7500         # Set a default
7501         $rwhitespace_flags->[$j] = WS_OPTIONAL;
7502
7503         if ( $rtokh->[_TYPE_] eq 'b' ) {
7504             next;
7505         }
7506
7507         # set a default value, to be changed as needed
7508         $ws                 = undef;
7509         $last_token         = $token;
7510         $last_type          = $type;
7511         $last_block_type    = $block_type;
7512         $last_input_line_no = $input_line_no;
7513         $token              = $rtokh->[_TOKEN_];
7514         $type               = $rtokh->[_TYPE_];
7515         $block_type         = $rtokh->[_BLOCK_TYPE_];
7516         $input_line_no      = $rtokh->[_LINE_INDEX_];
7517
7518         #---------------------------------------------------------------
7519         # Whitespace Rules Section 1:
7520         # Handle space on the inside of opening braces.
7521         #---------------------------------------------------------------
7522
7523         #    /^[L\{\(\[]$/
7524         if ( $is_opening_type{$last_type} ) {
7525
7526             $j_tight_closing_paren = -1;
7527
7528             # let us keep empty matched braces together: () {} []
7529             # except for BLOCKS
7530             if ( $token eq $matching_token{$last_token} ) {
7531                 if ($block_type) {
7532                     $ws = WS_YES;
7533                 }
7534                 else {
7535                     $ws = WS_NO;
7536                 }
7537             }
7538             else {
7539
7540                 # we're considering the right of an opening brace
7541                 # tightness = 0 means always pad inside with space
7542                 # tightness = 1 means pad inside if "complex"
7543                 # tightness = 2 means never pad inside with space
7544
7545                 my $tightness;
7546                 if (   $last_type eq '{'
7547                     && $last_token eq '{'
7548                     && $last_block_type )
7549                 {
7550                     $tightness = $rOpts_block_brace_tightness;
7551                 }
7552                 else { $tightness = $tightness{$last_token} }
7553
7554                #=============================================================
7555                # Patch for test problem fabrice_bug.pl
7556                # We must always avoid spaces around a bare word beginning
7557                # with ^ as in:
7558                #    my $before = ${^PREMATCH};
7559                # Because all of the following cause an error in perl:
7560                #    my $before = ${ ^PREMATCH };
7561                #    my $before = ${ ^PREMATCH};
7562                #    my $before = ${^PREMATCH };
7563                # So if brace tightness flag is -bt=0 we must temporarily reset
7564                # to bt=1.  Note that here we must set tightness=1 and not 2 so
7565                # that the closing space
7566                # is also avoided (via the $j_tight_closing_paren flag in coding)
7567                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
7568
7569                 #=============================================================
7570
7571                 if ( $tightness <= 0 ) {
7572                     $ws = WS_YES;
7573                 }
7574                 elsif ( $tightness > 1 ) {
7575                     $ws = WS_NO;
7576                 }
7577                 else {
7578                     $ws = $ws_in_container->($j);
7579                 }
7580             }
7581         }    # end setting space flag inside opening tokens
7582         my $ws_1;
7583         $ws_1 = $ws
7584           if FORMATTER_DEBUG_FLAG_WHITE;
7585
7586         #---------------------------------------------------------------
7587         # Whitespace Rules Section 2:
7588         # Handle space on inside of closing brace pairs.
7589         #---------------------------------------------------------------
7590
7591         #   /[\}\)\]R]/
7592         if ( $is_closing_type{$type} ) {
7593
7594             if ( $j == $j_tight_closing_paren ) {
7595
7596                 $j_tight_closing_paren = -1;
7597                 $ws                    = WS_NO;
7598             }
7599             else {
7600
7601                 if ( !defined($ws) ) {
7602
7603                     my $tightness;
7604                     if ( $type eq '}' && $token eq '}' && $block_type ) {
7605                         $tightness = $rOpts_block_brace_tightness;
7606                     }
7607                     else { $tightness = $tightness{$token} }
7608
7609                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7610                 }
7611             }
7612         }    # end setting space flag inside closing tokens
7613
7614         my $ws_2;
7615         $ws_2 = $ws
7616           if FORMATTER_DEBUG_FLAG_WHITE;
7617
7618         #---------------------------------------------------------------
7619         # Whitespace Rules Section 3:
7620         # Use the binary rule table.
7621         #---------------------------------------------------------------
7622         if ( !defined($ws) ) {
7623             $ws = $binary_ws_rules{$last_type}{$type};
7624         }
7625         my $ws_3;
7626         $ws_3 = $ws
7627           if FORMATTER_DEBUG_FLAG_WHITE;
7628
7629         #---------------------------------------------------------------
7630         # Whitespace Rules Section 4:
7631         # Handle some special cases.
7632         #---------------------------------------------------------------
7633         if ( $token eq '(' ) {
7634
7635             # This will have to be tweaked as tokenization changes.
7636             # We usually want a space at '} (', for example:
7637             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7638             #
7639             # But not others:
7640             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
7641             # At present, the above & block is marked as type L/R so this case
7642             # won't go through here.
7643             if ( $last_type eq '}' ) { $ws = WS_YES }
7644
7645             # NOTE: some older versions of Perl had occasional problems if
7646             # spaces are introduced between keywords or functions and opening
7647             # parens.  So the default is not to do this except is certain
7648             # cases.  The current Perl seems to tolerate spaces.
7649
7650             # Space between keyword and '('
7651             elsif ( $last_type eq 'k' ) {
7652                 $ws = WS_NO
7653                   unless ( $rOpts_space_keyword_paren
7654                     || $space_after_keyword{$last_token} );
7655             }
7656
7657             # Space between function and '('
7658             # -----------------------------------------------------
7659             # 'w' and 'i' checks for something like:
7660             #   myfun(    &myfun(   ->myfun(
7661             # -----------------------------------------------------
7662             elsif (( $last_type =~ /^[wUG]$/ )
7663                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7664             {
7665                 $ws = WS_NO unless ($rOpts_space_function_paren);
7666             }
7667
7668             # space between something like $i and ( in
7669             # for $i ( 0 .. 20 ) {
7670             # FIXME: eventually, type 'i' needs to be split into multiple
7671             # token types so this can be a hardwired rule.
7672             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7673                 $ws = WS_YES;
7674             }
7675
7676             # allow constant function followed by '()' to retain no space
7677             elsif ($last_type eq 'C'
7678                 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
7679             {
7680                 $ws = WS_NO;
7681             }
7682         }
7683
7684         # patch for SWITCH/CASE: make space at ']{' optional
7685         # since the '{' might begin a case or when block
7686         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
7687             $ws = WS_OPTIONAL;
7688         }
7689
7690         # keep space between 'sub' and '{' for anonymous sub definition
7691         if ( $type eq '{' ) {
7692             if ( $last_token eq 'sub' ) {
7693                 $ws = WS_YES;
7694             }
7695
7696             # this is needed to avoid no space in '){'
7697             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7698
7699             # avoid any space before the brace or bracket in something like
7700             #  @opts{'a','b',...}
7701             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7702                 $ws = WS_NO;
7703             }
7704         }
7705
7706         elsif ( $type eq 'i' ) {
7707
7708             # never a space before ->
7709             if ( $token =~ /^\-\>/ ) {
7710                 $ws = WS_NO;
7711             }
7712         }
7713
7714         # retain any space between '-' and bare word
7715         elsif ( $type eq 'w' || $type eq 'C' ) {
7716             $ws = WS_OPTIONAL if $last_type eq '-';
7717
7718             # never a space before ->
7719             if ( $token =~ /^\-\>/ ) {
7720                 $ws = WS_NO;
7721             }
7722         }
7723
7724         # retain any space between '-' and bare word
7725         # example: avoid space between 'USER' and '-' here:
7726         #   $myhash{USER-NAME}='steve';
7727         elsif ( $type eq 'm' || $type eq '-' ) {
7728             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
7729         }
7730
7731         # always space before side comment
7732         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
7733
7734         # always preserver whatever space was used after a possible
7735         # filehandle (except _) or here doc operator
7736         if (
7737             $type ne '#'
7738             && ( ( $last_type eq 'Z' && $last_token ne '_' )
7739                 || $last_type eq 'h' )
7740           )
7741         {
7742             $ws = WS_OPTIONAL;
7743         }
7744
7745         # space_backslash_quote; RT #123774
7746         # allow a space between a backslash and single or double quote
7747         # to avoid fooling html formatters
7748         elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
7749             if ($rOpts_space_backslash_quote) {
7750                 if ( $rOpts_space_backslash_quote == 1 ) {
7751                     $ws = WS_OPTIONAL;
7752                 }
7753                 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
7754                 else { }    # shouldnt happen
7755             }
7756             else {
7757                 $ws = WS_NO;
7758             }
7759         }
7760
7761         my $ws_4;
7762         $ws_4 = $ws
7763           if FORMATTER_DEBUG_FLAG_WHITE;
7764
7765         #---------------------------------------------------------------
7766         # Whitespace Rules Section 5:
7767         # Apply default rules not covered above.
7768         #---------------------------------------------------------------
7769
7770         # If we fall through to here, look at the pre-defined hash tables for
7771         # the two tokens, and:
7772         #  if (they are equal) use the common value
7773         #  if (either is zero or undef) use the other
7774         #  if (either is -1) use it
7775         # That is,
7776         # left  vs right
7777         #  1    vs    1     -->  1
7778         #  0    vs    0     -->  0
7779         # -1    vs   -1     --> -1
7780         #
7781         #  0    vs   -1     --> -1
7782         #  0    vs    1     -->  1
7783         #  1    vs    0     -->  1
7784         # -1    vs    0     --> -1
7785         #
7786         # -1    vs    1     --> -1
7787         #  1    vs   -1     --> -1
7788         if ( !defined($ws) ) {
7789             my $wl = $want_left_space{$type};
7790             my $wr = $want_right_space{$last_type};
7791             if ( !defined($wl) ) { $wl = 0 }
7792             if ( !defined($wr) ) { $wr = 0 }
7793             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
7794         }
7795
7796         if ( !defined($ws) ) {
7797             $ws = 0;
7798             write_diagnostics(
7799                 "WS flag is undefined for tokens $last_token $token\n");
7800         }
7801
7802         # Treat newline as a whitespace. Otherwise, we might combine
7803         # 'Send' and '-recipients' here according to the above rules:
7804         #    my $msg = new Fax::Send
7805         #      -recipients => $to,
7806         #      -data => $data;
7807         if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
7808
7809         if (   ( $ws == 0 )
7810             && $j > 0
7811             && $j < $jmax
7812             && ( $last_type !~ /^[Zh]$/ ) )
7813         {
7814
7815             # If this happens, we have a non-fatal but undesirable
7816             # hole in the above rules which should be patched.
7817             write_diagnostics(
7818                 "WS flag is zero for tokens $last_token $token\n");
7819         }
7820
7821         $rwhitespace_flags->[$j] = $ws;
7822
7823         FORMATTER_DEBUG_FLAG_WHITE && do {
7824             my $str = substr( $last_token, 0, 15 );
7825             $str .= ' ' x ( 16 - length($str) );
7826             if ( !defined($ws_1) ) { $ws_1 = "*" }
7827             if ( !defined($ws_2) ) { $ws_2 = "*" }
7828             if ( !defined($ws_3) ) { $ws_3 = "*" }
7829             if ( !defined($ws_4) ) { $ws_4 = "*" }
7830             print STDOUT
7831 "NEW WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
7832         };
7833     } ## end main loop
7834
7835     if ( $rOpts->{'tight-secret-operators'} ) {
7836         new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
7837     }
7838     return $rwhitespace_flags;
7839 } ## end sub set_whitespace_flags
7840
7841 sub respace_tokens {
7842
7843     my $self = shift;
7844     return if $rOpts->{'indent-only'};
7845
7846     # This routine makes all necessary changes to the tokenization after the
7847     # file has been read. This consists mostly of inserting and deleting spaces
7848     # according to the selected parameters. In a few cases non-space characters
7849     # are added, deleted or modified.
7850
7851     # The old tokens are copied one-by-one, with changes, from the old
7852     # linear storage array to a new array.
7853
7854     my $rLL                        = $self->{rLL};
7855     my $Klimit_old                 = $self->{Klimit};
7856     my $rlines                     = $self->{rlines};
7857     my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
7858
7859     my $rLL_new = [];    # This is the new array
7860     my $KK      = 0;
7861     my $rtoken_vars;
7862     my $Kmax = @{$rLL} - 1;
7863
7864     # Set the whitespace flags, which indicate the token spacing preference.
7865     my $rwhitespace_flags = $self->set_whitespace_flags();
7866
7867     # we will be setting token lengths as we go
7868     my $cumulative_length = 0;
7869
7870     # We also define these hash indexes giving container token array indexes
7871     # as a function of the container sequence numbers.  For example,
7872     my $K_opening_container = {};    # opening [ { or (
7873     my $K_closing_container = {};    # closing ] } or )
7874     my $K_opening_ternary   = {};    # opening ? of ternary
7875     my $K_closing_ternary   = {};    # closing : of ternary
7876
7877     # List of new K indexes of phantom semicolons
7878     # This will be needed if we want to undo them for iterations
7879     my $rK_phantom_semicolons = [];
7880
7881     # Temporary hashes for adding semicolons
7882     ##my $rKfirst_new               = {};
7883
7884     # a sub to link preceding nodes forward to a new node type
7885     my $link_back = sub {
7886         my ( $Ktop, $key ) = @_;
7887
7888         my $Kprev = $Ktop - 1;
7889         while ( $Kprev >= 0
7890             && !defined( $rLL_new->[$Kprev]->[$key] ) )
7891         {
7892             $rLL_new->[$Kprev]->[$key] = $Ktop;
7893             $Kprev -= 1;
7894         }
7895     };
7896
7897     # A sub to store each token in the new array
7898     # All new tokens must be stored by this sub so that it can update
7899     # all data structures on the fly.
7900     my $last_nonblank_type = ';';
7901     my $store_token        = sub {
7902         my ($item) = @_;
7903
7904         # This will be the index of this item in the new array
7905         my $KK_new = @{$rLL_new};
7906
7907         # check for a sequenced item (i.e., container or ?/:)
7908         my $type_sequence = $item->[_TYPE_SEQUENCE_];
7909         if ($type_sequence) {
7910
7911             $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
7912
7913             my $token = $item->[_TOKEN_];
7914             if ( $is_opening_token{$token} ) {
7915
7916                 $K_opening_container->{$type_sequence} = $KK_new;
7917             }
7918             elsif ( $is_closing_token{$token} ) {
7919
7920                 $K_closing_container->{$type_sequence} = $KK_new;
7921             }
7922
7923             # These are not yet used but could be useful
7924             else {
7925                 if ( $token eq '?' ) {
7926                     $K_opening_ternary->{$type_sequence} = $KK;
7927                 }
7928                 elsif ( $token eq ':' ) {
7929                     $K_closing_ternary->{$type_sequence} = $KK;
7930                 }
7931                 else {
7932                     # shouldn't happen
7933                     print STDERR "Ugh: shouldn't happen\n";
7934                 }
7935             }
7936         }
7937
7938         # Save the length sum to just BEFORE this token
7939         $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
7940
7941         # now set the length of this token
7942         my $token_length = length( $item->[_TOKEN_] );
7943
7944         # and update the cumulative length
7945         $cumulative_length += $token_length;
7946
7947         my $type = $item->[_TYPE_];
7948         if ( $type ne 'b' ) { $last_nonblank_type = $type }
7949
7950         # and finally, add this item to the new array
7951         push @{$rLL_new}, $item;
7952     };
7953
7954     my $add_phantom_semicolon = sub {
7955
7956         my ($KK) = @_;
7957
7958         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
7959         return unless ( defined($Kp) );
7960
7961         # we are only adding semicolons for certain block types
7962         my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
7963         return
7964           unless ( $ok_to_add_semicolon_for_block_type{$block_type}
7965             || $block_type =~ /^(sub|package)/
7966             || $block_type =~ /^\w+\:$/ );
7967
7968         my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
7969
7970         my $previous_nonblank_type  = $rLL_new->[$Kp]->[_TYPE_];
7971         my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
7972
7973         # Do not add a semicolon if...
7974         return
7975           if (
7976
7977             # it would follow a comment (and be isolated)
7978             $previous_nonblank_type eq '#'
7979
7980             # it follows a code block ( because they are not always wanted
7981             # there and may add clutter)
7982             || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
7983
7984             # it would follow a label
7985             || $previous_nonblank_type eq 'J'
7986
7987             # it would be inside a 'format' statement (and cause syntax error)
7988             || (   $previous_nonblank_type eq 'k'
7989                 && $previous_nonblank_token =~ /format/ )
7990
7991             # if it would prevent welding two containers
7992             || $rpaired_to_inner_container->{$type_sequence}
7993
7994           );
7995
7996    # We will insert an empty semicolon here as a placeholder.
7997    # Later, if it becomes the last token on a line, we will bring it to life.
7998    # The advantage of doing this is that (1) we just have to check line endings,
7999    # and (2) the phantom semicolon has zero width and therefore won't cause
8000    # needless breaks of one-line blocks.
8001         my $Ktop = -1;
8002         if (   $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
8003             && $want_left_space{';'} == WS_NO )
8004         {
8005
8006             # convert the blank into a semicolon..
8007             # be careful: we are working on the new stack top
8008             # on a token which has been stored.
8009             my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
8010
8011             # Convert the existing blank to a semicolon
8012             $rLL_new->[$Ktop]->[_TOKEN_] = '';    # zero length
8013             $rLL_new->[$Ktop]->[_TYPE_]  = ';';
8014             $rLL_new->[$Ktop]->[_SLEVEL_] =
8015               $rLL->[$KK]->[_SLEVEL_];
8016
8017             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
8018
8019             # Then store a new blank
8020             $store_token->($rcopy);
8021         }
8022         else {
8023
8024             # insert a new token
8025             my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
8026             $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
8027             $store_token->($rcopy);
8028             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
8029         }
8030     };
8031
8032     my $check_Q = sub {
8033
8034         # Check that a quote looks okay
8035         # This works but needs to by sync'd with the log file output
8036         my ( $KK, $Kfirst ) = @_;
8037         my $token = $rLL->[$KK]->[_TOKEN_];
8038         note_embedded_tab() if ( $token =~ "\t" );
8039
8040         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
8041         return unless ( defined($Kp) );
8042         my $previous_nonblank_type  = $rLL_new->[$Kp]->[_TYPE_];
8043         my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
8044
8045         my $previous_nonblank_type_2  = 'b';
8046         my $previous_nonblank_token_2 = "";
8047         my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
8048         if ( defined($Kpp) ) {
8049             $previous_nonblank_type_2  = $rLL_new->[$Kpp]->[_TYPE_];
8050             $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
8051         }
8052
8053         my $Kn                  = $self->K_next_nonblank($KK);
8054         my $next_nonblank_token = "";
8055         if ( defined($Kn) ) {
8056             $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
8057         }
8058
8059         my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
8060         my $type_0  = $rLL->[$Kfirst]->[_TYPE_];
8061
8062         # make note of something like '$var = s/xxx/yyy/;'
8063         # in case it should have been '$var =~ s/xxx/yyy/;'
8064         if (
8065                $token =~ /^(s|tr|y|m|\/)/
8066             && $previous_nonblank_token =~ /^(=|==|!=)$/
8067
8068             # preceded by simple scalar
8069             && $previous_nonblank_type_2 eq 'i'
8070             && $previous_nonblank_token_2 =~ /^\$/
8071
8072             # followed by some kind of termination
8073             # (but give complaint if we can not see far enough ahead)
8074             && $next_nonblank_token =~ /^[; \)\}]$/
8075
8076             # scalar is not declared
8077             && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
8078           )
8079         {
8080             my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8081             complain(
8082 "Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
8083             );
8084         }
8085     };
8086
8087     # Main loop over all lines of the file
8088     my $last_K_out;
8089     my $CODE_type = "";
8090     my $line_type = "";
8091     foreach my $line_of_tokens ( @{$rlines} ) {
8092
8093         $input_line_number = $line_of_tokens->{_line_number};
8094         my $last_line_type = $line_type;
8095         $line_type = $line_of_tokens->{_line_type};
8096         next unless ( $line_type eq 'CODE' );
8097         my $last_CODE_type = $CODE_type;
8098         $CODE_type = $line_of_tokens->{_code_type};
8099         my $rK_range = $line_of_tokens->{_rK_range};
8100         my ( $Kfirst, $Klast ) = @{$rK_range};
8101         next unless defined($Kfirst);
8102
8103         # Check for correct sequence of token indexes...
8104         # An error here means that sub write_line() did not correctly
8105         # package the tokenized lines as it received them.
8106         if ( defined($last_K_out) ) {
8107             if ( $Kfirst != $last_K_out + 1 ) {
8108                 Fault(
8109                     "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
8110                 );
8111             }
8112         }
8113         else {
8114             if ( $Kfirst != 0 ) {
8115                 Fault("Program Bug: first K is $Kfirst but should be 0");
8116             }
8117         }
8118         $last_K_out = $Klast;
8119
8120         # Handle special lines of code
8121         if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
8122
8123             # CODE_types are as follows.
8124             # 'BL' = Blank Line
8125             # 'VB' = Verbatim - line goes out verbatim
8126             # 'FS' = Format Skipping - line goes out verbatim, no blanks
8127             # 'IO' = Indent Only - only indentation may be changed
8128             # 'NIN' = No Internal Newlines - line does not get broken
8129             # 'HSC'=Hanging Side Comment - fix this hanging side comment
8130             # 'BC'=Block Comment - an ordinary full line comment
8131             # 'SBC'=Static Block Comment - a block comment which does not get
8132             #      indented
8133             # 'SBCX'=Static Block Comment Without Leading Space
8134             # 'DEL'=Delete this line
8135             # 'VER'=VERSION statement
8136             # '' or (undefined) - no restructions
8137
8138             # For a hanging side comment we insert an empty quote before
8139             # the comment so that it becomes a normal side comment and
8140             # will be aligned by the vertical aligner
8141             if ( $CODE_type eq 'HSC' ) {
8142
8143                 # Safety Check: This must be a line with one token (a comment)
8144                 my $rtoken_vars = $rLL->[$Kfirst];
8145                 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
8146
8147    # Note that even if the flag 'noadd-whitespace' is set, we will
8148    # make an exception here and allow a blank to be inserted to push the comment
8149    # to the right.  We can think of this as an adjustment of indentation
8150    # rather than whitespace between tokens. This will also prevent the hanging
8151    # side comment from getting converted to a block comment if whitespace
8152    # gets deleted, as for example with the -extrude and -mangle options.
8153                     my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
8154                     $store_token->($rcopy);
8155                     $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
8156                     $store_token->($rcopy);
8157                     $store_token->($rtoken_vars);
8158                     next;
8159                 }
8160                 else {
8161
8162                     # This line was mis-marked by sub scan_comment
8163                     Fault(
8164                         "Program bug. A hanging side comment has been mismarked"
8165                     );
8166                 }
8167             }
8168
8169             # Copy tokens unchanged
8170             foreach my $KK ( $Kfirst .. $Klast ) {
8171                 $store_token->( $rLL->[$KK] );
8172             }
8173             next;
8174         }
8175
8176         # Handle normal line..
8177
8178         # Insert any essential whitespace between lines
8179         # if last line was normal CODE
8180         my $type_next  = $rLL->[$Kfirst]->[_TYPE_];
8181         my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
8182         my $Kp         = $self->K_previous_nonblank( undef, $rLL_new );
8183         if (   $last_line_type eq 'CODE'
8184             && $type_next ne 'b'
8185             && defined($Kp) )
8186         {
8187             my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
8188             my $type_p  = $rLL_new->[$Kp]->[_TYPE_];
8189
8190             my ( $token_pp, $type_pp );
8191             my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
8192             if ( defined($Kpp) ) {
8193                 $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
8194                 $type_pp  = $rLL_new->[$Kpp]->[_TYPE_];
8195             }
8196             else {
8197                 $token_pp = ";";
8198                 $type_pp  = ';';
8199             }
8200
8201             if (
8202                 is_essential_whitespace(
8203                     $token_pp, $type_pp,    $token_p,
8204                     $type_p,   $token_next, $type_next,
8205                 )
8206               )
8207             {
8208
8209                 # Copy this first token as blank, but use previous line number
8210                 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
8211                 $rcopy->[_LINE_INDEX_] =
8212                   $rLL_new->[-1]->[_LINE_INDEX_];
8213                 $store_token->($rcopy);
8214             }
8215         }
8216
8217         # loop to copy all tokens on this line, with any changes
8218         my $type_sequence;
8219         for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
8220             $rtoken_vars = $rLL->[$KK];
8221             my $token              = $rtoken_vars->[_TOKEN_];
8222             my $type               = $rtoken_vars->[_TYPE_];
8223             my $last_type_sequence = $type_sequence;
8224             $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8225
8226             # Handle a blank space ...
8227             if ( $type eq 'b' ) {
8228
8229                 # Delete it if not wanted by whitespace rules
8230                 # or we are deleting all whitespace
8231                 # Note that whitespace flag is a flag indicating whether a
8232                 # white space BEFORE the token is needed
8233                 next if ( $KK >= $Kmax );    # skip terminal blank
8234                 my $Knext = $KK + 1;
8235                 my $ws    = $rwhitespace_flags->[$Knext];
8236                 if (   $ws == -1
8237                     || $rOpts_delete_old_whitespace )
8238                 {
8239
8240                     # FIXME: maybe switch to using _new
8241                     my $Kp = $self->K_previous_nonblank($KK);
8242                     next unless defined($Kp);
8243                     my $token_p = $rLL->[$Kp]->[_TOKEN_];
8244                     my $type_p  = $rLL->[$Kp]->[_TYPE_];
8245
8246                     my ( $token_pp, $type_pp );
8247
8248                     #my $Kpp = $K_previous_nonblank->($Kp);
8249                     my $Kpp = $self->K_previous_nonblank($Kp);
8250                     if ( defined($Kpp) ) {
8251                         $token_pp = $rLL->[$Kpp]->[_TOKEN_];
8252                         $type_pp  = $rLL->[$Kpp]->[_TYPE_];
8253                     }
8254                     else {
8255                         $token_pp = ";";
8256                         $type_pp  = ';';
8257                     }
8258                     my $token_next = $rLL->[$Knext]->[_TOKEN_];
8259                     my $type_next  = $rLL->[$Knext]->[_TYPE_];
8260
8261                     my $do_not_delete = is_essential_whitespace(
8262                         $token_pp, $type_pp,    $token_p,
8263                         $type_p,   $token_next, $type_next,
8264                     );
8265
8266                     next unless ($do_not_delete);
8267                 }
8268
8269                 # make it just one character if allowed
8270                 if ($rOpts_add_whitespace) {
8271                     $rtoken_vars->[_TOKEN_] = ' ';
8272                 }
8273                 $store_token->($rtoken_vars);
8274                 next;
8275             }
8276
8277             # Handle a nonblank token...
8278
8279             # Modify certain tokens here for whitespace
8280             # The following is not yet done, but could be:
8281             #   sub (x x x)
8282             if ( $type =~ /^[wit]$/ ) {
8283
8284                 # Examples:
8285                 # change '$  var'  to '$var' etc
8286                 #        '-> new'  to '->new'
8287                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8288                     $token =~ s/\s*//g;
8289                     $rtoken_vars->[_TOKEN_] = $token;
8290                 }
8291
8292                 # Split identifiers with leading arrows, inserting blanks if
8293                 # necessary.  It is easier and safer here than in the
8294                 # tokenizer.  For example '->new' becomes two tokens, '->' and
8295                 # 'new' with a possible blank between.
8296                 #
8297                 # Note: there is a related patch in sub set_whitespace_flags
8298                 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
8299                     my $token_save = $1;
8300                     my $type_save  = $type;
8301
8302                     # store a blank to left of arrow if necessary
8303                     my $Kprev = $self->K_previous_nonblank($KK);
8304                     if (   defined($Kprev)
8305                         && $rLL->[$Kprev]->[_TYPE_] ne 'b'
8306                         && $rOpts_add_whitespace
8307                         && $want_left_space{'->'} == WS_YES )
8308                     {
8309                         my $rcopy =
8310                           copy_token_as_type( $rtoken_vars, 'b', ' ' );
8311                         $store_token->($rcopy);
8312                     }
8313
8314                     # then store the arrow
8315                     my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
8316                     $store_token->($rcopy);
8317
8318                     # then reset the current token to be the remainder,
8319                     # and reset the whitespace flag according to the arrow
8320                     $token = $rtoken_vars->[_TOKEN_] = $token_save;
8321                     $type  = $rtoken_vars->[_TYPE_]  = $type_save;
8322                     $store_token->($rtoken_vars);
8323                     next;
8324                 }
8325
8326                 if ( $token =~ /$SUB_PATTERN/ ) {
8327                     $token =~ s/\s+/ /g;
8328                     $rtoken_vars->[_TOKEN_] = $token;
8329                 }
8330
8331                 # trim identifiers of trailing blanks which can occur
8332                 # under some unusual circumstances, such as if the
8333                 # identifier 'witch' has trailing blanks on input here:
8334                 #
8335                 # sub
8336                 # witch
8337                 # ()   # prototype may be on new line ...
8338                 # ...
8339                 if ( $type eq 'i' ) {
8340                     $token =~ s/\s+$//g;
8341                     $rtoken_vars->[_TOKEN_] = $token;
8342                 }
8343             }
8344
8345             # change 'LABEL   :'   to 'LABEL:'
8346             elsif ( $type eq 'J' ) {
8347                 $token =~ s/\s+//g;
8348                 $rtoken_vars->[_TOKEN_] = $token;
8349             }
8350
8351             # patch to add space to something like "x10"
8352             # This avoids having to split this token in the pre-tokenizer
8353             elsif ( $type eq 'n' ) {
8354                 if ( $token =~ /^x\d+/ ) {
8355                     $token =~ s/x/x /;
8356                     $rtoken_vars->[_TOKEN_] = $token;
8357                 }
8358             }
8359
8360             # check a quote for problems
8361             elsif ( $type eq 'Q' ) {
8362
8363                 # This is ready to go but is commented out because there is
8364                 # still identical logic in sub break_lines.
8365                 # $check_Q->($KK, $Kfirst);
8366             }
8367
8368            # trim blanks from right of qw quotes
8369            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8370             elsif ( $type eq 'q' ) {
8371                 $token =~ s/\s*$//;
8372                 $rtoken_vars->[_TOKEN_] = $token;
8373                 note_embedded_tab() if ( $token =~ "\t" );
8374             }
8375
8376             elsif ($type_sequence) {
8377
8378                 #                if ( $is_opening_token{$token} ) {
8379                 #                }
8380
8381                 if ( $is_closing_token{$token} ) {
8382
8383                     # Insert a tentative missing semicolon if the next token is
8384                     # a closing block brace
8385                     if (
8386                            $type eq '}'
8387                         && $token eq '}'
8388
8389                         # not preceded by a ';'
8390                         && $last_nonblank_type ne ';'
8391
8392                    # and this is not a VERSION stmt (is all one line, we are not
8393                    # inserting semicolons on one-line blocks)
8394                         && $CODE_type ne 'VER'
8395
8396                         # and we are allowed to add semicolons
8397                         && $rOpts->{'add-semicolons'}
8398                       )
8399                     {
8400                         $add_phantom_semicolon->($KK);
8401                     }
8402                 }
8403             }
8404
8405             # Insert any needed whitespace
8406             if (   @{$rLL_new}
8407                 && $rLL_new->[-1]->[_TYPE_] ne 'b'
8408                 && $rOpts_add_whitespace )
8409             {
8410                 my $ws = $rwhitespace_flags->[$KK];
8411                 if ( $ws == 1 ) {
8412
8413                     my $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
8414                     $rcopy->[_LINE_INDEX_] =
8415                       $rLL_new->[-1]->[_LINE_INDEX_];
8416                     $store_token->($rcopy);
8417                 }
8418             }
8419             $store_token->($rtoken_vars);
8420         }    # End token loop
8421     }    # End line loop
8422
8423     # Reset memory to be the new array
8424     $self->{rLL} = $rLL_new;
8425     $self->set_rLL_max_index();
8426     $self->{K_opening_container}   = $K_opening_container;
8427     $self->{K_closing_container}   = $K_closing_container;
8428     $self->{K_opening_ternary}     = $K_opening_ternary;
8429     $self->{K_closing_ternary}     = $K_closing_ternary;
8430     $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
8431
8432     # make sure the new array looks okay
8433     $self->check_token_array();
8434
8435     # reset the token limits of each line
8436     $self->resync_lines_and_tokens();
8437
8438     return;
8439 }
8440
8441 {    # scan_comments
8442
8443     my $Last_line_had_side_comment;
8444     my $In_format_skipping_section;
8445     my $Saw_VERSION_in_this_file;
8446
8447     sub scan_comments {
8448         my $self   = shift;
8449         my $rlines = $self->{rlines};
8450
8451         $Last_line_had_side_comment = undef;
8452         $In_format_skipping_section = undef;
8453         $Saw_VERSION_in_this_file   = undef;
8454
8455         # Loop over all lines
8456         foreach my $line_of_tokens ( @{$rlines} ) {
8457             my $line_type = $line_of_tokens->{_line_type};
8458             next unless ( $line_type eq 'CODE' );
8459             my $CODE_type = $self->get_CODE_type($line_of_tokens);
8460             $line_of_tokens->{_code_type} = $CODE_type;
8461         }
8462         return;
8463     }
8464
8465     sub get_CODE_type {
8466         my ( $self, $line_of_tokens ) = @_;
8467
8468         # We are looking at a line of code and setting a flag to
8469         # describe any special processing that it requires
8470
8471         # Possible CODE_types are as follows.
8472         # 'BL' = Blank Line
8473         # 'VB' = Verbatim - line goes out verbatim
8474         # 'IO' = Indent Only - line goes out unchanged except for indentation
8475         # 'NIN' = No Internal Newlines - line does not get broken
8476         # 'HSC'=Hanging Side Comment - fix this hanging side comment
8477         # 'BC'=Block Comment - an ordinary full line comment
8478         # 'SBC'=Static Block Comment - a block comment which does not get
8479         #      indented
8480         # 'SBCX'=Static Block Comment Without Leading Space
8481         # 'DEL'=Delete this line
8482         # 'VER'=VERSION statement
8483         # '' or (undefined) - no restructions
8484
8485         my $rLL    = $self->{rLL};
8486         my $Klimit = $self->{Klimit};
8487
8488         my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
8489         my $no_internal_newlines = 1 - $rOpts_add_newlines;
8490         if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
8491
8492         # extract what we need for this line..
8493
8494         # Global value for error messages:
8495         $input_line_number = $line_of_tokens->{_line_number};
8496
8497         my $rK_range = $line_of_tokens->{_rK_range};
8498         my ( $Kfirst, $Klast ) = @{$rK_range};
8499         my $jmax = -1;
8500         if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
8501         my $input_line         = $line_of_tokens->{_line_text};
8502         my $in_continued_quote = my $starting_in_quote =
8503           $line_of_tokens->{_starting_in_quote};
8504         my $in_quote        = $line_of_tokens->{_ending_in_quote};
8505         my $ending_in_quote = $in_quote;
8506         my $guessed_indentation_level =
8507           $line_of_tokens->{_guessed_indentation_level};
8508
8509         my $is_static_block_comment = 0;
8510
8511         # Handle a continued quote..
8512         if ($in_continued_quote) {
8513
8514             # A line which is entirely a quote or pattern must go out
8515             # verbatim.  Note: the \n is contained in $input_line.
8516             if ( $jmax <= 0 ) {
8517                 if ( ( $input_line =~ "\t" ) ) {
8518                     note_embedded_tab();
8519                 }
8520                 $Last_line_had_side_comment = 0;
8521                 return 'VB';
8522             }
8523         }
8524
8525         my $is_block_comment =
8526           ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
8527
8528         # Write line verbatim if we are in a formatting skip section
8529         if ($In_format_skipping_section) {
8530             $Last_line_had_side_comment = 0;
8531
8532             # Note: extra space appended to comment simplifies pattern matching
8533             if ( $is_block_comment
8534                 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
8535                 /$format_skipping_pattern_end/o )
8536             {
8537                 $In_format_skipping_section = 0;
8538                 write_logfile_entry("Exiting formatting skip section\n");
8539             }
8540             return 'FS';
8541         }
8542
8543         # See if we are entering a formatting skip section
8544         if (   $rOpts_format_skipping
8545             && $is_block_comment
8546             && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
8547             /$format_skipping_pattern_begin/o )
8548         {
8549             $In_format_skipping_section = 1;
8550             write_logfile_entry("Entering formatting skip section\n");
8551             $Last_line_had_side_comment = 0;
8552             return 'FS';
8553         }
8554
8555         # ignore trailing blank tokens (they will get deleted later)
8556         if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
8557             $jmax--;
8558         }
8559
8560         # Handle a blank line..
8561         if ( $jmax < 0 ) {
8562             $Last_line_had_side_comment = 0;
8563             return 'BL';
8564         }
8565
8566         # see if this is a static block comment (starts with ## by default)
8567         my $is_static_block_comment_without_leading_space = 0;
8568         if (   $is_block_comment
8569             && $rOpts->{'static-block-comments'}
8570             && $input_line =~ /$static_block_comment_pattern/o )
8571         {
8572             $is_static_block_comment = 1;
8573             $is_static_block_comment_without_leading_space =
8574               substr( $input_line, 0, 1 ) eq '#';
8575         }
8576
8577         # Check for comments which are line directives
8578         # Treat exactly as static block comments without leading space
8579         # reference: perlsyn, near end, section Plain Old Comments (Not!)
8580         # example: '# line 42 "new_filename.plx"'
8581         if (
8582                $is_block_comment
8583             && $input_line =~ /^\#   \s*
8584                                line \s+ (\d+)   \s*
8585                                (?:\s("?)([^"]+)\2)? \s*
8586                                $/x
8587           )
8588         {
8589             $is_static_block_comment                       = 1;
8590             $is_static_block_comment_without_leading_space = 1;
8591         }
8592
8593         # look for hanging side comment
8594         if (
8595                $is_block_comment
8596             && $Last_line_had_side_comment  # last line had side comment
8597             && $input_line =~ /^\s/         # there is some leading space
8598             && !$is_static_block_comment    # do not make static comment hanging
8599             && $rOpts->{'hanging-side-comments'}    # user is allowing
8600                                                     # hanging side comments
8601                                                     # like this
8602           )
8603         {
8604             $Last_line_had_side_comment = 1;
8605             return 'HSC';
8606         }
8607
8608         # remember if this line has a side comment
8609         $Last_line_had_side_comment =
8610           ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
8611
8612         # Handle a block (full-line) comment..
8613         if ($is_block_comment) {
8614
8615             if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
8616
8617             # TRIM COMMENTS -- This could be turned off as a option
8618             $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//;    # trim right end
8619
8620             if ($is_static_block_comment_without_leading_space) {
8621                 return 'SBCX';
8622             }
8623             elsif ($is_static_block_comment) {
8624                 return 'SBC';
8625             }
8626             else {
8627                 return 'BC';
8628             }
8629         }
8630
8631 =pod
8632         # NOTE: This does not work yet. Version in print-line-of-tokens
8633         # is Still used until fixed
8634
8635         # compare input/output indentation except for continuation lines
8636         # (because they have an unknown amount of initial blank space)
8637         # and lines which are quotes (because they may have been outdented)
8638         # Note: this test is placed here because we know the continuation flag
8639         # at this point, which allows us to avoid non-meaningful checks.
8640         my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_];
8641         compare_indentation_levels( $guessed_indentation_level,
8642             $structural_indentation_level )
8643           unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0
8644             || $guessed_indentation_level == 0
8645             && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' );
8646 =cut
8647
8648         #   Patch needed for MakeMaker.  Do not break a statement
8649         #   in which $VERSION may be calculated.  See MakeMaker.pm;
8650         #   this is based on the coding in it.
8651         #   The first line of a file that matches this will be eval'd:
8652         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8653         #   Examples:
8654         #     *VERSION = \'1.01';
8655         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
8656         #   We will pass such a line straight through without breaking
8657         #   it unless -npvl is used.
8658
8659         #   Patch for problem reported in RT #81866, where files
8660         #   had been flattened into a single line and couldn't be
8661         #   tidied without -npvl.  There are two parts to this patch:
8662         #   First, it is not done for a really long line (80 tokens for now).
8663         #   Second, we will only allow up to one semicolon
8664         #   before the VERSION.  We need to allow at least one semicolon
8665         #   for statements like this:
8666         #      require Exporter;  our $VERSION = $Exporter::VERSION;
8667         #   where both statements must be on a single line for MakeMaker
8668
8669         my $is_VERSION_statement = 0;
8670         if (  !$Saw_VERSION_in_this_file
8671             && $jmax < 80
8672             && $input_line =~
8673             /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
8674         {
8675             $Saw_VERSION_in_this_file = 1;
8676             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8677             $CODE_type = 'VER';
8678         }
8679         return $CODE_type;
8680     }
8681 }
8682
8683 sub find_nested_pairs {
8684     my $self = shift;
8685
8686     my $rLL = $self->{rLL};
8687     return unless ( defined($rLL) && @{$rLL} );
8688
8689     # We define an array of pairs of nested containers
8690     my @nested_pairs;
8691
8692     # We also set the following hash values to identify container pairs for
8693     # which the opening and closing tokens are adjacent in the token stream:
8694     # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
8695     # $seqno_in are the seqence numbers of the outer and inner containers of
8696     # the pair We need these later to decide if we can insert a missing
8697     # semicolon
8698     my $rpaired_to_inner_container = {};
8699
8700     # This local hash remembers if an outer container has a close following
8701     # inner container;
8702     # The key is the outer sequence number
8703     # The value is the token_hash of the inner container
8704
8705     my %has_close_following_opening;
8706
8707     # Names of calling routines can either be marked as 'i' or 'w',
8708     # and they may invoke a sub call with an '->'. We will consider
8709     # any consecutive string of such types as a single unit when making
8710     # weld decisions.  We also allow a leading !
8711     my $is_name_type = {
8712         'i'  => 1,
8713         'w'  => 1,
8714         'U'  => 1,
8715         '->' => 1,
8716         '!'  => 1,
8717     };
8718
8719     my $is_name = sub {
8720         my $type = shift;
8721         return $type && $is_name_type->{$type};
8722     };
8723
8724     my $last_container;
8725     my $last_last_container;
8726     my $last_nonblank_token_vars;
8727     my $last_count;
8728
8729     my $nonblank_token_count = 0;
8730
8731     # loop over all tokens
8732     foreach my $rtoken_vars ( @{$rLL} ) {
8733
8734         my $type = $rtoken_vars->[_TYPE_];
8735
8736         next if ( $type eq 'b' );
8737
8738         # long identifier-like items are counted as a single item
8739         $nonblank_token_count++
8740           unless ( $is_name->($type)
8741             && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
8742
8743         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8744         if ($type_sequence) {
8745
8746             my $token = $rtoken_vars->[_TOKEN_];
8747
8748             if ( $is_opening_token{$token} ) {
8749
8750                 # following previous opening token ...
8751                 if (   $last_container
8752                     && $is_opening_token{ $last_container->[_TOKEN_] } )
8753                 {
8754
8755                     # adjacent to this one
8756                     my $tok_diff = $nonblank_token_count - $last_count;
8757
8758                     my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
8759
8760                     if (   $tok_diff == 1
8761                         || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
8762                     {
8763
8764                         # remember this pair...
8765                         my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
8766                         my $inner_seqno = $type_sequence;
8767                         $has_close_following_opening{$outer_seqno} =
8768                           $rtoken_vars;
8769                     }
8770                 }
8771             }
8772
8773             elsif ( $is_closing_token{$token} ) {
8774
8775                 # if the corresponding opening token had an adjacent opening
8776                 if (   $has_close_following_opening{$type_sequence}
8777                     && $is_closing_token{ $last_container->[_TOKEN_] }
8778                     && $has_close_following_opening{$type_sequence}
8779                     ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
8780                 {
8781
8782                     # The closing weld tokens must be adjacent
8783                     # NOTE: so intermediate commas and semicolons
8784                     # can currently block a weld.  This is something
8785                     # that could be fixed in the future by including
8786                     # a flag to delete un-necessary commas and semicolons.
8787                     my $tok_diff = $nonblank_token_count - $last_count;
8788
8789                     if ( $tok_diff == 1 ) {
8790
8791                         # This is a closely nested pair ..
8792                         my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
8793                         my $outer_seqno = $type_sequence;
8794                         $rpaired_to_inner_container->{$outer_seqno} =
8795                           $inner_seqno;
8796
8797                         push @nested_pairs, [ $inner_seqno, $outer_seqno ];
8798                     }
8799                 }
8800             }
8801
8802             $last_last_container = $last_container;
8803             $last_container      = $rtoken_vars;
8804             $last_count          = $nonblank_token_count;
8805         }
8806         $last_nonblank_token_vars = $rtoken_vars;
8807     }
8808     $self->{rnested_pairs}              = \@nested_pairs;
8809     $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
8810     return;
8811 }
8812
8813 sub dump_tokens {
8814
8815     # a debug routine, not normally used
8816     my ( $self, $msg ) = @_;
8817     my $rLL   = $self->{rLL};
8818     my $nvars = @{$rLL};
8819     print STDERR "$msg\n";
8820     print STDERR "ntokens=$nvars\n";
8821     print STDERR "K\t_TOKEN_\t_TYPE_\n";
8822     my $K = 0;
8823     foreach my $item ( @{$rLL} ) {
8824         print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
8825         $K++;
8826     }
8827 }
8828
8829 sub K_next_nonblank {
8830     my ( $self, $KK, $rLL ) = @_;
8831
8832     # return the index K of the next nonblank token
8833     return unless ( defined($KK) && $KK >= 0 );
8834     $rLL = $self->{rLL} unless ( defined($rLL) );
8835     my $Num  = @{$rLL};
8836     my $Knnb = $KK + 1;
8837     while ( $Knnb < $Num ) {
8838         if ( !defined( $rLL->[$Knnb] ) ) {
8839             Fault("Undefined entry for k=$Knnb");
8840         }
8841         if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
8842         $Knnb++;
8843     }
8844     return;
8845 }
8846
8847 sub K_previous_nonblank {
8848
8849     # return index of previous nonblank token before item K
8850     # Call with $KK=undef to start search at the top of the array
8851     my ( $self, $KK, $rLL ) = @_;
8852     $rLL = $self->{rLL} unless ( defined($rLL) );
8853     my $Num = @{$rLL};
8854     if ( !defined($KK) ) { $KK = $Num }
8855     elsif ( $KK > $Num ) {
8856
8857         # The caller should make the first call with KK_new=undef to
8858         # avoid this error
8859         Fault(
8860 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
8861         );
8862     }
8863     my $Kpnb = $KK - 1;
8864     while ( $Kpnb >= 0 ) {
8865         if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
8866         $Kpnb--;
8867     }
8868     return;
8869 }
8870
8871 sub weld_containers {
8872
8873     # do any welding operations
8874     my $self = shift;
8875
8876   # initialize weld length hashes needed later for checking line lengths
8877   # TODO: These should eventually be stored in $self rather than be package vars
8878     %weld_len_left_closing  = ();
8879     %weld_len_right_closing = ();
8880     %weld_len_left_opening  = ();
8881     %weld_len_right_opening = ();
8882
8883     return if ( $rOpts->{'indent-only'} );
8884     return unless ($rOpts_add_newlines);
8885
8886     $self->weld_nested_containers()
8887       if $rOpts->{'weld-nested-containers'};
8888
8889     # Note that these two calls are order-dependent.
8890     # sub weld_nested_containers() must be called before sub
8891     # weld_cuddled_blocks().  This is because it is more complex and could
8892     # overwrite the %weld_len_... hash values written by weld_cuddled_blocks().
8893     # sub weld_cuddled_blocks(), on the other hand, is much simpler and will
8894     # not overwrite the values written by weld_nested_containers.  But
8895     # note that weld_nested_containers() changes the _LEVEL_ values, so
8896     # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
8897
8898     # Here is a good test case to  Be sure that both cuddling and welding
8899     # are working and not interfering with each other:
8900
8901     #   perltidy -wn -cb -cbl='if-elsif-else'
8902
8903    # if ($BOLD_MATH) { (
8904    #     $labels, $comment,
8905    #     join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
8906    # ) } else { (
8907    #     &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
8908    #     $after
8909    # ) }
8910
8911     $self->weld_cuddled_blocks()
8912       if $rOpts->{'cuddled-blocks'};
8913
8914     return;
8915 }
8916
8917 sub weld_cuddled_blocks {
8918     my $self = shift;
8919
8920     # This routine implements the -cb flag by finding the appropriate
8921     # closing and opening block braces and welding them together.
8922
8923     my $rLL = $self->{rLL};
8924     return unless ( defined($rLL) && @{$rLL} );
8925     my $rbreak_container = $self->{rbreak_container};
8926
8927     my $K_opening_container = $self->{K_opening_container};
8928     my $K_closing_container = $self->{K_closing_container};
8929
8930     my $length_to_opening_seqno = sub {
8931         my ($seqno) = @_;
8932         my $KK      = $K_opening_container->{$seqno};
8933         my $lentot  = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
8934         return $lentot;
8935     };
8936     my $length_to_closing_seqno = sub {
8937         my ($seqno) = @_;
8938         my $KK      = $K_closing_container->{$seqno};
8939         my $lentot  = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
8940         return $lentot;
8941     };
8942
8943     my $is_broken_block = sub {
8944
8945         # a block is broken if the input line numbers of the braces differ
8946         # we can only cuddle between broken blocks
8947         my ($seqno) = @_;
8948         my $K_opening = $K_opening_container->{$seqno};
8949         return unless ( defined($K_opening) );
8950         my $K_closing = $K_closing_container->{$seqno};
8951         return unless ( defined($K_closing) );
8952         return $rbreak_container->{$seqno}
8953           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
8954           $rLL->[$K_opening]->[_LINE_INDEX_];
8955     };
8956
8957     # A stack to remember open chains at all levels:
8958     # $in_chain[$level] = [$chain_type, $type_sequence];
8959     my @in_chain;
8960     my $CBO = $rOpts->{'cuddled-break-option'};
8961
8962     # loop over structure items to find cuddled pairs
8963     my $level = 0;
8964     my $KK    = 0;
8965     while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
8966         my $rtoken_vars   = $rLL->[$KK];
8967         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
8968         if ( !$type_sequence ) {
8969             Fault("sequence = $type_sequence not defined");
8970         }
8971
8972         # We use the original levels because they get changed by sub
8973         # 'weld_nested_containers'. So if this were to be called before that
8974         # routine, the levels would be wrong and things would go bad.
8975         my $last_level = $level;
8976         $level = $rtoken_vars->[_LEVEL_TRUE_];
8977
8978         if    ( $level < $last_level ) { $in_chain[$last_level] = undef }
8979         elsif ( $level > $last_level ) { $in_chain[$level]      = undef }
8980
8981         # We are only looking at code blocks
8982         my $token = $rtoken_vars->[_TOKEN_];
8983         my $type  = $rtoken_vars->[_TYPE_];
8984         next unless ( $type eq $token );
8985
8986         if ( $token eq '{' ) {
8987
8988             my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
8989             if ( !$block_type ) {
8990
8991                 # patch for unrecognized block types which may not be labeled
8992                 my $Kp = $self->K_previous_nonblank($KK);
8993                 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
8994                     $Kp = $self->K_previous_nonblank($Kp);
8995                 }
8996                 next unless $Kp;
8997                 $block_type = $rLL->[$Kp]->[_TOKEN_];
8998             }
8999             if ( $in_chain[$level] ) {
9000
9001                 # we are in a chain and are at an opening block brace.
9002                 # See if we are welding this opening brace with the previous
9003                 # block brace.  Get their identification numbers:
9004                 my $closing_seqno = $in_chain[$level]->[1];
9005                 my $opening_seqno = $type_sequence;
9006
9007                 # The preceding block must be on multiple lines so that its
9008                 # closing brace will start a new line.
9009                 if ( !$is_broken_block->($closing_seqno) ) {
9010                     next unless ( $CBO == 2 );
9011                     $rbreak_container->{$closing_seqno} = 1;
9012                 }
9013
9014                 # we will let the trailing block be either broken or intact
9015                 ## && $is_broken_block->($opening_seqno);
9016
9017                 # We can weld the closing brace to its following word ..
9018                 my $Ko  = $K_closing_container->{$closing_seqno};
9019                 my $Kon = $self->K_next_nonblank($Ko);
9020
9021                 # ..unless it is a comment
9022                 if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
9023                     my $dlen =
9024                       $rLL->[ $Kon + 1 ]->[_CUMULATIVE_LENGTH_] -
9025                       $rLL->[$Ko]->[_CUMULATIVE_LENGTH_];
9026                     $weld_len_right_closing{$closing_seqno} = $dlen;
9027
9028                     # Set flag that we want to break the next container
9029                     # so that the cuddled line is balanced.
9030                     $rbreak_container->{$opening_seqno} = 1
9031                       if ($CBO);
9032                 }
9033
9034             }
9035             else {
9036
9037                 # We are not in a chain. Start a new chain if we see the
9038                 # starting block type.
9039                 if ( $rcuddled_block_types->{$block_type} ) {
9040                     $in_chain[$level] = [ $block_type, $type_sequence ];
9041                 }
9042                 else {
9043                     $block_type = '*';
9044                     $in_chain[$level] = [ $block_type, $type_sequence ];
9045                 }
9046             }
9047         }
9048         elsif ( $token eq '}' ) {
9049             if ( $in_chain[$level] ) {
9050
9051                 # We are in a chain at a closing brace.  See if this chain
9052                 # continues..
9053                 my $Knn = $self->K_next_nonblank($KK);
9054
9055                 # skip past comments
9056                 while ( $Knn && $rLL->[$Knn]->[_TYPE_] eq '#' ) {
9057                     $Knn = $self->K_next_nonblank($Knn);
9058                 }
9059                 next unless $Knn;
9060
9061                 my $chain_type          = $in_chain[$level]->[0];
9062                 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
9063                 if (
9064                     $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
9065                   )
9066                 {
9067
9068                     # Note that we do not weld yet because we must wait until
9069                     # we we are sure that an opening brace for this follows.
9070                     $in_chain[$level]->[1] = $type_sequence;
9071                 }
9072                 else { $in_chain[$level] = undef }
9073             }
9074         }
9075     }
9076
9077     return;
9078 }
9079
9080 sub weld_nested_containers {
9081     my $self = shift;
9082
9083     # This routine implements the -wn flag by "welding together"
9084     # the nested closing and opening tokens which were previously
9085     # identified by sub 'find_nested_pairs'.  "welding" simply
9086     # involves setting certain hash values which will be checked
9087     # later during formatting.
9088
9089     my $rLL                 = $self->{rLL};
9090     my $Klimit              = $self->get_rLL_max_index();
9091     my $rnested_pairs       = $self->{rnested_pairs};
9092     my $rlines              = $self->{rlines};
9093     my $K_opening_container = $self->{K_opening_container};
9094     my $K_closing_container = $self->{K_closing_container};
9095
9096     # Return unless there are nested pairs to weld
9097     return unless defined($rnested_pairs) && @{$rnested_pairs};
9098
9099     # This array will hold the sequence numbers of the tokens to be welded.
9100     my @welds;
9101
9102     # Variables needed for estimating line lengths
9103     my $starting_indent;
9104     my $starting_lentot;
9105
9106     # A tolerance to the length for length estimates.  In some rare cases
9107     # this can avoid problems where a final weld slightly exceeds the
9108     # line length and gets broken in a bad spot.
9109     my $length_tol = 1;
9110
9111     my $excess_length_to = sub {
9112         my ($rtoken_hash) = @_;
9113
9114         # Estimate the length from the line start to a given token
9115         my $length = $rtoken_hash->[_CUMULATIVE_LENGTH_] - $starting_lentot;
9116
9117         my $excess_length =
9118           $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
9119         return ($excess_length);
9120     };
9121     my $length_to_opening_seqno = sub {
9122         my ($seqno) = @_;
9123         my $KK      = $K_opening_container->{$seqno};
9124         my $lentot  = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
9125         return $lentot;
9126     };
9127     my $length_to_closing_seqno = sub {
9128         my ($seqno) = @_;
9129         my $KK      = $K_closing_container->{$seqno};
9130         my $lentot  = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
9131         return $lentot;
9132     };
9133
9134     # Abbreviations:
9135     #  _oo=outer opening, i.e. first of  { {
9136     #  _io=inner opening, i.e. second of { {
9137     #  _oc=outer closing, i.e. second of } {
9138     #  _ic=inner closing, i.e. first of  } }
9139
9140     my $previous_pair;
9141
9142     # We are working from outermost to innermost pairs so that
9143     # level changes will be complete when we arrive at the inner pairs.
9144
9145     while ( my $item = pop( @{$rnested_pairs} ) ) {
9146         my ( $inner_seqno, $outer_seqno ) = @{$item};
9147
9148         my $Kouter_opening = $K_opening_container->{$outer_seqno};
9149         my $Kinner_opening = $K_opening_container->{$inner_seqno};
9150         my $Kouter_closing = $K_closing_container->{$outer_seqno};
9151         my $Kinner_closing = $K_closing_container->{$inner_seqno};
9152
9153         my $outer_opening = $rLL->[$Kouter_opening];
9154         my $inner_opening = $rLL->[$Kinner_opening];
9155         my $outer_closing = $rLL->[$Kouter_closing];
9156         my $inner_closing = $rLL->[$Kinner_closing];
9157
9158         my $iline_oo = $outer_opening->[_LINE_INDEX_];
9159         my $iline_io = $inner_opening->[_LINE_INDEX_];
9160
9161         # Set flag saying if this pair starts a new weld
9162         my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
9163
9164         # Set flag saying if this pair is adjacent to the previous nesting pair
9165         # (even if previous pair was rejected as a weld)
9166         my $touch_previous_pair =
9167           defined($previous_pair) && $outer_seqno == $previous_pair->[0];
9168         $previous_pair = $item;
9169
9170         # Set a flag if we should not weld. It sometimes looks best not to weld
9171         # when the opening and closing tokens are very close.  However, there
9172         # is a danger that we will create a "blinker", which oscillates between
9173         # two semi-stable states, if we do not weld.  So the rules for
9174         # not welding have to be carefully defined and tested.
9175         my $do_not_weld;
9176         if ( !$touch_previous_pair ) {
9177
9178             # If this pair is not adjacent to the previous pair (skipped or
9179             # not), then measure lengths from the start of line of oo
9180
9181             my $rK_range = $rlines->[$iline_oo]->{_rK_range};
9182             my ( $Kfirst, $Klast ) = @{$rK_range};
9183             $starting_lentot = $rLL->[$Kfirst]->[_CUMULATIVE_LENGTH_];
9184             $starting_indent = 0;
9185             if ( !$rOpts_variable_maximum_line_length ) {
9186                 my $level = $rLL->[$Kfirst]->[_LEVEL_];
9187                 $starting_indent = $rOpts_indent_columns * $level;
9188             }
9189
9190             # DO-NOT-WELD RULE 1:
9191             # Do not weld something that looks like the start of a two-line
9192             # function call, like this:
9193             #    $trans->add_transformation(
9194             #        PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
9195             # We will look for a semicolon after the closing paren.
9196
9197             # We want to weld something complex, like this though
9198             # my $compass = uc( opposite_direction( line_to_canvas_direction(
9199             #     @{ $coords[0] }, @{ $coords[1] } ) ) );
9200             # Otherwise we will get a 'blinker'
9201
9202             my $iline_oc = $outer_closing->[_LINE_INDEX_];
9203             if ( $iline_oc <= $iline_oo + 1 ) {
9204
9205                 # Look for following semicolon...
9206                 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
9207                 my $next_nonblank_type =
9208                   defined($Knext_nonblank)
9209                   ? $rLL->[$Knext_nonblank]->[_TYPE_]
9210                   : 'b';
9211                 if ( $next_nonblank_type eq ';' ) {
9212
9213                     # Then do not weld if no other containers between inner
9214                     # opening and closing.
9215                     my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
9216                     if ( $Knext_seq_item == $Kinner_closing ) {
9217                         $do_not_weld ||= 1;
9218                     }
9219                 }
9220             }
9221         }
9222
9223         my $iline_ic = $inner_closing->[_LINE_INDEX_];
9224
9225         # DO-NOT-WELD RULE 2:
9226         # Do not weld an opening paren to an inner one line brace block
9227         # We will just use old line numbers for this test and require
9228         # iterations if necessary for convergence
9229
9230         # For example, otherwise we could cause the opening paren
9231         # in the following example to separate from the caller name
9232         # as here:
9233
9234         #    $_[0]->code_handler
9235         #       ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
9236
9237         # Here is another example where we do not want to weld:
9238         #  $wrapped->add_around_modifier(
9239         #    sub { push @tracelog => 'around 1'; $_[0]->(); } );
9240
9241         # If the one line sub block gets broken due to length or by the
9242         # user, then we can weld.  The result will then be:
9243         # $wrapped->add_around_modifier( sub {
9244         #    push @tracelog => 'around 1';
9245         #    $_[0]->();
9246         # } );
9247
9248         if ( $iline_ic == $iline_io ) {
9249
9250             my $token_oo      = $outer_opening->[_TOKEN_];
9251             my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
9252             my $token_io      = $inner_opening->[_TOKEN_];
9253             $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
9254         }
9255
9256         # DO-NOT-WELD RULE 3:
9257         # Do not weld if this makes our line too long
9258         $do_not_weld ||= $excess_length_to->($inner_opening) > 0;
9259
9260         if ($do_not_weld) {
9261
9262             # After neglecting a pair, we start measuring from start of point io
9263             $starting_lentot = $inner_opening->[_CUMULATIVE_LENGTH_];
9264             $starting_indent = 0;
9265             if ( !$rOpts_variable_maximum_line_length ) {
9266                 my $level = $inner_opening->[_LEVEL_];
9267                 $starting_indent = $rOpts_indent_columns * $level;
9268             }
9269
9270             # Normally, a broken pair should not decrease indentation of
9271             # intermediate tokens:
9272             ##      if ( $last_pair_broken ) { next }
9273             # However, for long strings of welded tokens, such as '{{{{{{...'
9274             # we will allow broken pairs to also remove indentation.
9275             # This will keep very long strings of opening and closing
9276             # braces from marching off to the right.  We will do this if the
9277             # number of tokens in a weld before the broken weld is 4 or more.
9278             # This rule will mainly be needed for test scripts, since typical
9279             # welds have fewer than about 4 welded tokens.
9280             if ( !@welds || @{ $welds[-1] } < 4 ) { next }
9281         }
9282
9283         # otherwise start new weld ...
9284         elsif ($starting_new_weld) {
9285             push @welds, $item;
9286         }
9287
9288         # ... or extend current weld
9289         else {
9290             unshift @{ $welds[-1] }, $inner_seqno;
9291         }
9292
9293         ########################################################################
9294         # After welding, reduce the indentation level if all intermediate tokens
9295         ########################################################################
9296
9297         my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
9298         if ( $dlevel != 0 ) {
9299             my $Kstart = $Kinner_opening;
9300             my $Kstop  = $Kinner_closing;
9301             for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
9302                 $rLL->[$KK]->[_LEVEL_] += $dlevel;
9303             }
9304         }
9305     }
9306
9307     #####################################################
9308     # Define weld lengths needed later to set line breaks
9309     #####################################################
9310     foreach my $item (@welds) {
9311
9312         # sweep from inner to outer
9313
9314         my $inner_seqno;
9315         my $len_close = 0;
9316         my $len_open  = 0;
9317         foreach my $outer_seqno ( @{$item} ) {
9318             if ($inner_seqno) {
9319
9320                 my $dlen_opening =
9321                   $length_to_opening_seqno->($inner_seqno) -
9322                   $length_to_opening_seqno->($outer_seqno);
9323
9324                 my $dlen_closing =
9325                   $length_to_closing_seqno->($outer_seqno) -
9326                   $length_to_closing_seqno->($inner_seqno);
9327
9328                 $len_open  += $dlen_opening;
9329                 $len_close += $dlen_closing;
9330
9331             }
9332
9333             $weld_len_left_closing{$outer_seqno}  = $len_close;
9334             $weld_len_right_opening{$outer_seqno} = $len_open;
9335
9336             $inner_seqno = $outer_seqno;
9337         }
9338
9339         # sweep from outer to inner
9340         foreach my $seqno ( reverse @{$item} ) {
9341             $weld_len_right_closing{$seqno} =
9342               $len_close - $weld_len_left_closing{$seqno};
9343             $weld_len_left_opening{$seqno} =
9344               $len_open - $weld_len_right_opening{$seqno};
9345         }
9346     }
9347
9348     #####################################
9349     # DEBUG
9350     #####################################
9351     if (0) {
9352         my $count = 0;
9353         local $" = ')(';
9354         foreach my $weld (@welds) {
9355             print "\nWeld number $count has seq: (@{$weld})\n";
9356             foreach my $seq ( @{$weld} ) {
9357                 print <<EOM;
9358         seq=$seq
9359         left_opening=$weld_len_left_opening{$seq};
9360         right_opening=$weld_len_right_opening{$seq};
9361         left_closing=$weld_len_left_closing{$seq};
9362         right_closing=$weld_len_right_closing{$seq};
9363 EOM
9364             }
9365
9366             $count++;
9367         }
9368     }
9369     return;
9370 }
9371
9372 sub weld_len_left {
9373
9374     my ( $seqno, $type_or_tok ) = @_;
9375
9376     # Given the sequence number of a token, and the token or its type,
9377     # return the length of any weld to its left
9378
9379     my $weld_len;
9380     if ($seqno) {
9381         if ( $is_closing_type{$type_or_tok} ) {
9382             $weld_len = $weld_len_left_closing{$seqno};
9383         }
9384         elsif ( $is_opening_type{$type_or_tok} ) {
9385             $weld_len = $weld_len_left_opening{$seqno};
9386         }
9387     }
9388     if ( !defined($weld_len) ) { $weld_len = 0 }
9389     return $weld_len;
9390 }
9391
9392 sub weld_len_right {
9393
9394     my ( $seqno, $type_or_tok ) = @_;
9395
9396     # Given the sequence number of a token, and the token or its type,
9397     # return the length of any weld to its right
9398
9399     my $weld_len;
9400     if ($seqno) {
9401         if ( $is_closing_type{$type_or_tok} ) {
9402             $weld_len = $weld_len_right_closing{$seqno};
9403         }
9404         elsif ( $is_opening_type{$type_or_tok} ) {
9405             $weld_len = $weld_len_right_opening{$seqno};
9406         }
9407     }
9408     if ( !defined($weld_len) ) { $weld_len = 0 }
9409     return $weld_len;
9410 }
9411
9412 sub weld_len_left_to_go {
9413     my ($i) = @_;
9414
9415     # Given the index of a token in the 'to_go' array
9416     # return the length of any weld to its left
9417     return if ( $i < 0 );
9418     my $weld_len =
9419       weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
9420     return $weld_len;
9421 }
9422
9423 sub weld_len_right_to_go {
9424     my ($i) = @_;
9425
9426     # Given the index of a token in the 'to_go' array
9427     # return the length of any weld to its right
9428     return if ( $i < 0 );
9429     if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
9430     my $weld_len =
9431       weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
9432     return $weld_len;
9433 }
9434
9435 sub link_sequence_items {
9436
9437     # This has been merged into 'respace_tokens' but retained for reference
9438     my $self   = shift;
9439     my $rlines = $self->{rlines};
9440     my $rLL    = $self->{rLL};
9441
9442     # We walk the token list and make links to the next sequence item.
9443     # We also define these hashes to container tokens using sequence number as
9444     # the key:
9445     my $K_opening_container = {};    # opening [ { or (
9446     my $K_closing_container = {};    # closing ] } or )
9447     my $K_opening_ternary   = {};    # opening ? of ternary
9448     my $K_closing_ternary   = {};    # closing : of ternary
9449
9450     # sub to link preceding nodes forward to a new node type
9451     my $link_back = sub {
9452         my ( $Ktop, $key ) = @_;
9453
9454         my $Kprev = $Ktop - 1;
9455         while ( $Kprev >= 0
9456             && !defined( $rLL->[$Kprev]->[$key] ) )
9457         {
9458             $rLL->[$Kprev]->[$key] = $Ktop;
9459             $Kprev -= 1;
9460         }
9461     };
9462
9463     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
9464
9465         $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
9466
9467         my $type = $rLL->[$KK]->[_TYPE_];
9468
9469         next if ( $type eq 'b' );
9470
9471         my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
9472         if ($type_sequence) {
9473
9474             $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
9475
9476             my $token = $rLL->[$KK]->[_TOKEN_];
9477             if ( $is_opening_token{$token} ) {
9478
9479                 $K_opening_container->{$type_sequence} = $KK;
9480             }
9481             elsif ( $is_closing_token{$token} ) {
9482
9483                 $K_closing_container->{$type_sequence} = $KK;
9484             }
9485
9486             # These are not yet used but could be useful
9487             else {
9488                 if ( $token eq '?' ) {
9489                     $K_opening_ternary->{$type_sequence} = $KK;
9490                 }
9491                 elsif ( $token eq ':' ) {
9492                     $K_closing_ternary->{$type_sequence} = $KK;
9493                 }
9494                 else {
9495                     Fault(<<EOM);
9496 Unknown sequenced token type '$type'.  Expecting one of '{[(?:)]}'
9497 EOM
9498                 }
9499             }
9500         }
9501     }
9502
9503     $self->{K_opening_container} = $K_opening_container;
9504     $self->{K_closing_container} = $K_closing_container;
9505     $self->{K_opening_ternary}   = $K_opening_ternary;
9506     $self->{K_closing_ternary}   = $K_closing_ternary;
9507     return;
9508 }
9509
9510 sub sum_token_lengths {
9511     my $self = shift;
9512
9513     # This has been merged into 'respace_tokens' but retained for reference
9514     my $rLL               = $self->{rLL};
9515     my $cumulative_length = 0;
9516     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
9517
9518         # Save the length sum to just BEFORE this token
9519         $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
9520
9521         # now set the length of this token
9522         my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
9523
9524         $cumulative_length += $token_length;
9525     }
9526     return;
9527 }
9528
9529 sub resync_lines_and_tokens {
9530
9531     my $self   = shift;
9532     my $rLL    = $self->{rLL};
9533     my $Klimit = $self->{Klimit};
9534     my $rlines = $self->{rlines};
9535
9536     # Re-construct the arrays of tokens associated with the original input lines
9537     # since they have probably changed due to inserting and deleting blanks
9538     # and a few other tokens.
9539
9540     my $Kmax = -1;
9541
9542     # This is the next token and its line index:
9543     my $Knext = 0;
9544     my $inext;
9545     if ( defined($rLL) && @{$rLL} ) {
9546         $Kmax  = @{$rLL} - 1;
9547         $inext = $rLL->[$Knext]->[_LINE_INDEX_];
9548     }
9549
9550     my $get_inext = sub {
9551         if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
9552         else {
9553             $inext = $rLL->[$Knext]->[_LINE_INDEX_];
9554         }
9555         return $inext;
9556     };
9557
9558     # Remember the most recently output token index
9559     my $Klast_out;
9560
9561     my $iline = -1;
9562     foreach my $line_of_tokens ( @{$rlines} ) {
9563         $iline++;
9564         my $line_type = $line_of_tokens->{_line_type};
9565         if ( $line_type eq 'CODE' ) {
9566
9567             my @K_array;
9568             my $rK_range;
9569             $inext = $get_inext->();
9570             while ( defined($inext) && $inext <= $iline ) {
9571                 push @{K_array}, $Knext;
9572                 $Knext += 1;
9573                 $inext = $get_inext->();
9574             }
9575
9576             # Delete any terminal blank token
9577             if (@K_array) {
9578                 if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
9579                     pop @K_array;
9580                 }
9581             }
9582
9583             # Define the range of K indexes for the line:
9584             # $Kfirst = index of first token on line
9585             # $Klast_out = index of last token on line
9586             my ( $Kfirst, $Klast );
9587             if (@K_array) {
9588                 $Kfirst    = $K_array[0];
9589                 $Klast     = $K_array[-1];
9590                 $Klast_out = $Klast;
9591             }
9592
9593             # It is only safe to trim the actual line text if the input
9594             # line had a terminal blank token. Otherwise, we may be
9595             # in a quote.
9596             if ( $line_of_tokens->{_ended_in_blank_token} ) {
9597                 $line_of_tokens->{_line_text} =~ s/\s+$//;
9598             }
9599             $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
9600         }
9601     }
9602
9603     # There shouldn't be any nodes beyond the last one unless we start
9604     # allowing 'link_after' calls
9605     if ( defined($inext) ) {
9606
9607         Fault("unexpected tokens at end of file when reconstructing lines");
9608     }
9609
9610     return;
9611 }
9612
9613 sub dump_verbatim {
9614     my $self   = shift;
9615     my $rlines = $self->{rlines};
9616     foreach my $line ( @{$rlines} ) {
9617         my $input_line = $line->{_line_text};
9618         $self->write_unindented_line($input_line);
9619     }
9620     return;
9621 }
9622
9623 sub finish_formatting {
9624
9625     my ( $self, $severe_error ) = @_;
9626
9627     # The file has been tokenized and is ready to be formatted.
9628     # All of the relevant data is stored in $self, ready to go.
9629
9630     # output file verbatim if severe error or no formatting requested
9631     if ( $severe_error || $rOpts->{notidy} ) {
9632         $self->dump_verbatim();
9633         $self->wrapup();
9634         return;
9635     }
9636
9637     # Make a pass through the lines, looking at lines of CODE and identifying
9638     # special processing needs, such format skipping sections marked by
9639     # special comments
9640     $self->scan_comments();
9641
9642     # Find nested pairs of container tokens for any welding. This information
9643     # is also needed for adding semicolons, so it is split apart from the
9644     # welding step.
9645     $self->find_nested_pairs();
9646
9647     # Make sure everything looks good
9648     $self->check_line_hashes();
9649
9650     # Future: Place to Begin future Iteration Loop
9651     # foreach my $it_count(1..$maxit) {
9652
9653     # Future: We must reset some things after the first iteration.
9654     # This includes:
9655     #   - resetting levels if there was any welding
9656     #   - resetting any phantom semicolons
9657     #   - dealing with any line numbering issues so we can relate final lines
9658     #     line numbers with input line numbers.
9659     #
9660     # If ($it_count>1) {
9661     #   Copy {level_raw} to [_LEVEL_] if ($it_count>1)
9662     #   Renumber lines
9663     # }
9664
9665     # Make a pass through all tokens, adding or deleting any whitespace as
9666     # required.  Also make any other changes, such as adding semicolons.
9667     # All token changes must be made here so that the token data structure
9668     # remains fixed for the rest of this iteration.
9669     $self->respace_tokens();
9670
9671     # Implement any welding needed for the -wn or -cb options
9672     $self->weld_containers();
9673
9674     # Finishes formatting and write the result to the line sink.
9675     # Eventually this call should just change the 'rlines' data according to the
9676     # new line breaks and then return so that we can do an internal iteration
9677     # before continuing with the next stages of formatting.
9678     $self->break_lines();
9679
9680     ############################################################
9681     # A possible future decomposition of 'break_lines()' follows.
9682     # Benefits:
9683     # - allow perltidy to do an internal iteration which eliminates
9684     #   many unnecessary steps, such as re-parsing and vertical alignment.
9685     #   This will allow iterations to be automatic.
9686     # - consolidate all length calculations to allow utf8 alignment
9687     ############################################################
9688
9689     # Future: Check for convergence of beginning tokens on CODE lines
9690
9691     # Future: End of Iteration Loop
9692
9693     # Future: add_padding($rargs);
9694
9695     # Future: add_closing_side_comments($rargs);
9696
9697     # Future: vertical_alignment($rargs);
9698
9699     # Future: output results
9700
9701     # A final routine to tie up any loose ends
9702     $self->wrapup();
9703     return;
9704 }
9705
9706 sub create_one_line_block {
9707     ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
9708       @_;
9709     return;
9710 }
9711
9712 sub destroy_one_line_block {
9713     $index_start_one_line_block            = UNDEFINED_INDEX;
9714     $semicolons_before_block_self_destruct = 0;
9715     return;
9716 }
9717
9718 sub leading_spaces_to_go {
9719
9720     # return the number of indentation spaces for a token in the output stream;
9721     # these were previously stored by 'set_leading_whitespace'.
9722
9723     my $ii = shift;
9724     if ( $ii < 0 ) { $ii = 0 }
9725     return get_spaces( $leading_spaces_to_go[$ii] );
9726
9727 }
9728
9729 sub get_spaces {
9730
9731     # return the number of leading spaces associated with an indentation
9732     # variable $indentation is either a constant number of spaces or an object
9733     # with a get_spaces method.
9734     my $indentation = shift;
9735     return ref($indentation) ? $indentation->get_spaces() : $indentation;
9736 }
9737
9738 sub get_recoverable_spaces {
9739
9740     # return the number of spaces (+ means shift right, - means shift left)
9741     # that we would like to shift a group of lines with the same indentation
9742     # to get them to line up with their opening parens
9743     my $indentation = shift;
9744     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
9745 }
9746
9747 sub get_available_spaces_to_go {
9748
9749     my $ii   = shift;
9750     my $item = $leading_spaces_to_go[$ii];
9751
9752     # return the number of available leading spaces associated with an
9753     # indentation variable.  $indentation is either a constant number of
9754     # spaces or an object with a get_available_spaces method.
9755     return ref($item) ? $item->get_available_spaces() : 0;
9756 }
9757
9758 sub new_lp_indentation_item {
9759
9760     # this is an interface to the IndentationItem class
9761     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
9762
9763     # A negative level implies not to store the item in the item_list
9764     my $index = 0;
9765     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
9766
9767     my $item = Perl::Tidy::IndentationItem->new(
9768         $spaces,      $level,
9769         $ci_level,    $available_spaces,
9770         $index,       $gnu_sequence_number,
9771         $align_paren, $max_gnu_stack_index,
9772         $line_start_index_to_go,
9773     );
9774
9775     if ( $level >= 0 ) {
9776         $gnu_item_list[$max_gnu_item_index] = $item;
9777     }
9778
9779     return $item;
9780 }
9781
9782 sub set_leading_whitespace {
9783
9784     # This routine defines leading whitespace
9785     # given: the level and continuation_level of a token,
9786     # define: space count of leading string which would apply if it
9787     # were the first token of a new line.
9788
9789     my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
9790
9791     # Adjust levels if necessary to recycle whitespace:
9792     # given $level_abs, the absolute level
9793     # define $level, a possibly reduced level for whitespace
9794     my $level = $level_abs;
9795     if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
9796         if ( $level_abs < $whitespace_last_level ) {
9797             pop(@whitespace_level_stack);
9798         }
9799         if ( !@whitespace_level_stack ) {
9800             push @whitespace_level_stack, $level_abs;
9801         }
9802         elsif ( $level_abs > $whitespace_last_level ) {
9803             $level = $whitespace_level_stack[-1] +
9804               ( $level_abs - $whitespace_last_level );
9805
9806             if (
9807                 # 1 Try to break at a block brace
9808                 (
9809                        $level > $rOpts_whitespace_cycle
9810                     && $last_nonblank_type eq '{'
9811                     && $last_nonblank_token eq '{'
9812                 )
9813
9814                 # 2 Then either a brace or bracket
9815                 || (   $level > $rOpts_whitespace_cycle + 1
9816                     && $last_nonblank_token =~ /^[\{\[]$/ )
9817
9818                 # 3 Then a paren too
9819                 || $level > $rOpts_whitespace_cycle + 2
9820               )
9821             {
9822                 $level = 1;
9823             }
9824             push @whitespace_level_stack, $level;
9825         }
9826         $level = $whitespace_level_stack[-1];
9827     }
9828     $whitespace_last_level = $level_abs;
9829
9830     # modify for -bli, which adds one continuation indentation for
9831     # opening braces
9832     if (   $rOpts_brace_left_and_indent
9833         && $max_index_to_go == 0
9834         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
9835     {
9836         $ci_level++;
9837     }
9838
9839     # patch to avoid trouble when input file has negative indentation.
9840     # other logic should catch this error.
9841     if ( $level < 0 ) { $level = 0 }
9842
9843     #-------------------------------------------
9844     # handle the standard indentation scheme
9845     #-------------------------------------------
9846     unless ($rOpts_line_up_parentheses) {
9847         my $space_count =
9848           $ci_level * $rOpts_continuation_indentation +
9849           $level * $rOpts_indent_columns;
9850         my $ci_spaces =
9851           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
9852
9853         if ($in_continued_quote) {
9854             $space_count = 0;
9855             $ci_spaces   = 0;
9856         }
9857         $leading_spaces_to_go[$max_index_to_go] = $space_count;
9858         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
9859         return;
9860     }
9861
9862     #-------------------------------------------------------------
9863     # handle case of -lp indentation..
9864     #-------------------------------------------------------------
9865
9866     # The continued_quote flag means that this is the first token of a
9867     # line, and it is the continuation of some kind of multi-line quote
9868     # or pattern.  It requires special treatment because it must have no
9869     # added leading whitespace. So we create a special indentation item
9870     # which is not in the stack.
9871     if ($in_continued_quote) {
9872         my $space_count     = 0;
9873         my $available_space = 0;
9874         $level = -1;    # flag to prevent storing in item_list
9875         $leading_spaces_to_go[$max_index_to_go] =
9876           $reduced_spaces_to_go[$max_index_to_go] =
9877           new_lp_indentation_item( $space_count, $level, $ci_level,
9878             $available_space, 0 );
9879         return;
9880     }
9881
9882     # get the top state from the stack
9883     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_spaces();
9884     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_level();
9885     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
9886
9887     my $type        = $types_to_go[$max_index_to_go];
9888     my $token       = $tokens_to_go[$max_index_to_go];
9889     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
9890
9891     if ( $type eq '{' || $type eq '(' ) {
9892
9893         $gnu_comma_count{ $total_depth + 1 } = 0;
9894         $gnu_arrow_count{ $total_depth + 1 } = 0;
9895
9896         # If we come to an opening token after an '=' token of some type,
9897         # see if it would be helpful to 'break' after the '=' to save space
9898         my $last_equals = $last_gnu_equals{$total_depth};
9899         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
9900
9901             # find the position if we break at the '='
9902             my $i_test = $last_equals;
9903             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
9904
9905             # TESTING
9906             ##my $too_close = ($i_test==$max_index_to_go-1);
9907
9908             my $test_position = total_line_length( $i_test, $max_index_to_go );
9909             my $mll = maximum_line_length($i_test);
9910
9911             if (
9912
9913                 # the equals is not just before an open paren (testing)
9914                 ##!$too_close &&
9915
9916                 # if we are beyond the midpoint
9917                 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
9918
9919                 # or we are beyond the 1/4 point and there was an old
9920                 # break at the equals
9921                 || (
9922                     $gnu_position_predictor >
9923                     $mll - $rOpts_maximum_line_length * 3 / 4
9924                     && (
9925                         $old_breakpoint_to_go[$last_equals]
9926                         || (   $last_equals > 0
9927                             && $old_breakpoint_to_go[ $last_equals - 1 ] )
9928                         || (   $last_equals > 1
9929                             && $types_to_go[ $last_equals - 1 ] eq 'b'
9930                             && $old_breakpoint_to_go[ $last_equals - 2 ] )
9931                     )
9932                 )
9933               )
9934             {
9935
9936                 # then make the switch -- note that we do not set a real
9937                 # breakpoint here because we may not really need one; sub
9938                 # scan_list will do that if necessary
9939                 $line_start_index_to_go = $i_test + 1;
9940                 $gnu_position_predictor = $test_position;
9941             }
9942         }
9943     }
9944
9945     my $halfway =
9946       maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
9947
9948     # Check for decreasing depth ..
9949     # Note that one token may have both decreasing and then increasing
9950     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
9951     # in this example we would first go back to (1,0) then up to (2,0)
9952     # in a single call.
9953     if ( $level < $current_level || $ci_level < $current_ci_level ) {
9954
9955         # loop to find the first entry at or completely below this level
9956         my ( $lev, $ci_lev );
9957         while (1) {
9958             if ($max_gnu_stack_index) {
9959
9960                 # save index of token which closes this level
9961                 $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
9962
9963                 # Undo any extra indentation if we saw no commas
9964                 my $available_spaces =
9965                   $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
9966
9967                 my $comma_count = 0;
9968                 my $arrow_count = 0;
9969                 if ( $type eq '}' || $type eq ')' ) {
9970                     $comma_count = $gnu_comma_count{$total_depth};
9971                     $arrow_count = $gnu_arrow_count{$total_depth};
9972                     $comma_count = 0 unless $comma_count;
9973                     $arrow_count = 0 unless $arrow_count;
9974                 }
9975                 $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
9976                 $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
9977
9978                 if ( $available_spaces > 0 ) {
9979
9980                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
9981
9982                         my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
9983                         my $seqno =
9984                           $gnu_stack[$max_gnu_stack_index]
9985                           ->get_sequence_number();
9986
9987                         # Be sure this item was created in this batch.  This
9988                         # should be true because we delete any available
9989                         # space from open items at the end of each batch.
9990                         if (   $gnu_sequence_number != $seqno
9991                             || $i > $max_gnu_item_index )
9992                         {
9993                             warning(
9994 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
9995                             );
9996                             report_definite_bug();
9997                         }
9998
9999                         else {
10000                             if ( $arrow_count == 0 ) {
10001                                 $gnu_item_list[$i]
10002                                   ->permanently_decrease_available_spaces(
10003                                     $available_spaces);
10004                             }
10005                             else {
10006                                 $gnu_item_list[$i]
10007                                   ->tentatively_decrease_available_spaces(
10008                                     $available_spaces);
10009                             }
10010                             foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
10011                                 $gnu_item_list[$j]
10012                                   ->decrease_SPACES($available_spaces);
10013                             }
10014                         }
10015                     }
10016                 }
10017
10018                 # go down one level
10019                 --$max_gnu_stack_index;
10020                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_level();
10021                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
10022
10023                 # stop when we reach a level at or below the current level
10024                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
10025                     $space_count =
10026                       $gnu_stack[$max_gnu_stack_index]->get_spaces();
10027                     $current_level    = $lev;
10028                     $current_ci_level = $ci_lev;
10029                     last;
10030                 }
10031             }
10032
10033             # reached bottom of stack .. should never happen because
10034             # only negative levels can get here, and $level was forced
10035             # to be positive above.
10036             else {
10037                 warning(
10038 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
10039                 );
10040                 report_definite_bug();
10041                 last;
10042             }
10043         }
10044     }
10045
10046     # handle increasing depth
10047     if ( $level > $current_level || $ci_level > $current_ci_level ) {
10048
10049         # Compute the standard incremental whitespace.  This will be
10050         # the minimum incremental whitespace that will be used.  This
10051         # choice results in a smooth transition between the gnu-style
10052         # and the standard style.
10053         my $standard_increment =
10054           ( $level - $current_level ) * $rOpts_indent_columns +
10055           ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
10056
10057         # Now we have to define how much extra incremental space
10058         # ("$available_space") we want.  This extra space will be
10059         # reduced as necessary when long lines are encountered or when
10060         # it becomes clear that we do not have a good list.
10061         my $available_space = 0;
10062         my $align_paren     = 0;
10063         my $excess          = 0;
10064
10065         # initialization on empty stack..
10066         if ( $max_gnu_stack_index == 0 ) {
10067             $space_count = $level * $rOpts_indent_columns;
10068         }
10069
10070         # if this is a BLOCK, add the standard increment
10071         elsif ($last_nonblank_block_type) {
10072             $space_count += $standard_increment;
10073         }
10074
10075         # if last nonblank token was not structural indentation,
10076         # just use standard increment
10077         elsif ( $last_nonblank_type ne '{' ) {
10078             $space_count += $standard_increment;
10079         }
10080
10081         # otherwise use the space to the first non-blank level change token
10082         else {
10083
10084             $space_count = $gnu_position_predictor;
10085
10086             my $min_gnu_indentation =
10087               $gnu_stack[$max_gnu_stack_index]->get_spaces();
10088
10089             $available_space = $space_count - $min_gnu_indentation;
10090             if ( $available_space >= $standard_increment ) {
10091                 $min_gnu_indentation += $standard_increment;
10092             }
10093             elsif ( $available_space > 1 ) {
10094                 $min_gnu_indentation += $available_space + 1;
10095             }
10096             elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
10097                 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
10098                     $min_gnu_indentation += 2;
10099                 }
10100                 else {
10101                     $min_gnu_indentation += 1;
10102                 }
10103             }
10104             else {
10105                 $min_gnu_indentation += $standard_increment;
10106             }
10107             $available_space = $space_count - $min_gnu_indentation;
10108
10109             if ( $available_space < 0 ) {
10110                 $space_count     = $min_gnu_indentation;
10111                 $available_space = 0;
10112             }
10113             $align_paren = 1;
10114         }
10115
10116         # update state, but not on a blank token
10117         if ( $types_to_go[$max_index_to_go] ne 'b' ) {
10118
10119             $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
10120
10121             ++$max_gnu_stack_index;
10122             $gnu_stack[$max_gnu_stack_index] =
10123               new_lp_indentation_item( $space_count, $level, $ci_level,
10124                 $available_space, $align_paren );
10125
10126             # If the opening paren is beyond the half-line length, then
10127             # we will use the minimum (standard) indentation.  This will
10128             # help avoid problems associated with running out of space
10129             # near the end of a line.  As a result, in deeply nested
10130             # lists, there will be some indentations which are limited
10131             # to this minimum standard indentation. But the most deeply
10132             # nested container will still probably be able to shift its
10133             # parameters to the right for proper alignment, so in most
10134             # cases this will not be noticeable.
10135             if ( $available_space > 0 && $space_count > $halfway ) {
10136                 $gnu_stack[$max_gnu_stack_index]
10137                   ->tentatively_decrease_available_spaces($available_space);
10138             }
10139         }
10140     }
10141
10142     # Count commas and look for non-list characters.  Once we see a
10143     # non-list character, we give up and don't look for any more commas.
10144     if ( $type eq '=>' ) {
10145         $gnu_arrow_count{$total_depth}++;
10146
10147         # tentatively treating '=>' like '=' for estimating breaks
10148         # TODO: this could use some experimentation
10149         $last_gnu_equals{$total_depth} = $max_index_to_go;
10150     }
10151
10152     elsif ( $type eq ',' ) {
10153         $gnu_comma_count{$total_depth}++;
10154     }
10155
10156     elsif ( $is_assignment{$type} ) {
10157         $last_gnu_equals{$total_depth} = $max_index_to_go;
10158     }
10159
10160     # this token might start a new line
10161     # if this is a non-blank..
10162     if ( $type ne 'b' ) {
10163
10164         # and if ..
10165         if (
10166
10167             # this is the first nonblank token of the line
10168             $max_index_to_go == 1 && $types_to_go[0] eq 'b'
10169
10170             # or previous character was one of these:
10171             || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
10172
10173             # or previous character was opening and this does not close it
10174             || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
10175             || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
10176
10177             # or this token is one of these:
10178             || $type =~ /^([\.]|\|\||\&\&)$/
10179
10180             # or this is a closing structure
10181             || (   $last_nonblank_type_to_go eq '}'
10182                 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
10183
10184             # or previous token was keyword 'return'
10185             || ( $last_nonblank_type_to_go eq 'k'
10186                 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
10187
10188             # or starting a new line at certain keywords is fine
10189             || (   $type eq 'k'
10190                 && $is_if_unless_and_or_last_next_redo_return{$token} )
10191
10192             # or this is after an assignment after a closing structure
10193             || (
10194                 $is_assignment{$last_nonblank_type_to_go}
10195                 && (
10196                     $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
10197
10198                     # and it is significantly to the right
10199                     || $gnu_position_predictor > $halfway
10200                 )
10201             )
10202           )
10203         {
10204             check_for_long_gnu_style_lines();
10205             $line_start_index_to_go = $max_index_to_go;
10206
10207             # back up 1 token if we want to break before that type
10208             # otherwise, we may strand tokens like '?' or ':' on a line
10209             if ( $line_start_index_to_go > 0 ) {
10210                 if ( $last_nonblank_type_to_go eq 'k' ) {
10211
10212                     if ( $want_break_before{$last_nonblank_token_to_go} ) {
10213                         $line_start_index_to_go--;
10214                     }
10215                 }
10216                 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
10217                     $line_start_index_to_go--;
10218                 }
10219             }
10220         }
10221     }
10222
10223     # remember the predicted position of this token on the output line
10224     if ( $max_index_to_go > $line_start_index_to_go ) {
10225         $gnu_position_predictor =
10226           total_line_length( $line_start_index_to_go, $max_index_to_go );
10227     }
10228     else {
10229         $gnu_position_predictor =
10230           $space_count + $token_lengths_to_go[$max_index_to_go];
10231     }
10232
10233     # store the indentation object for this token
10234     # this allows us to manipulate the leading whitespace
10235     # (in case we have to reduce indentation to fit a line) without
10236     # having to change any token values
10237     $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
10238     $reduced_spaces_to_go[$max_index_to_go] =
10239       ( $max_gnu_stack_index > 0 && $ci_level )
10240       ? $gnu_stack[ $max_gnu_stack_index - 1 ]
10241       : $gnu_stack[$max_gnu_stack_index];
10242     return;
10243 }
10244
10245 sub check_for_long_gnu_style_lines {
10246
10247     # look at the current estimated maximum line length, and
10248     # remove some whitespace if it exceeds the desired maximum
10249
10250     # this is only for the '-lp' style
10251     return unless ($rOpts_line_up_parentheses);
10252
10253     # nothing can be done if no stack items defined for this line
10254     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
10255
10256     # see if we have exceeded the maximum desired line length
10257     # keep 2 extra free because they are needed in some cases
10258     # (result of trial-and-error testing)
10259     my $spaces_needed =
10260       $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
10261
10262     return if ( $spaces_needed <= 0 );
10263
10264     # We are over the limit, so try to remove a requested number of
10265     # spaces from leading whitespace.  We are only allowed to remove
10266     # from whitespace items created on this batch, since others have
10267     # already been used and cannot be undone.
10268     my @candidates = ();
10269     my $i;
10270
10271     # loop over all whitespace items created for the current batch
10272     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
10273         my $item = $gnu_item_list[$i];
10274
10275         # item must still be open to be a candidate (otherwise it
10276         # cannot influence the current token)
10277         next if ( $item->get_closed() >= 0 );
10278
10279         my $available_spaces = $item->get_available_spaces();
10280
10281         if ( $available_spaces > 0 ) {
10282             push( @candidates, [ $i, $available_spaces ] );
10283         }
10284     }
10285
10286     return unless (@candidates);
10287
10288     # sort by available whitespace so that we can remove whitespace
10289     # from the maximum available first
10290     @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
10291
10292     # keep removing whitespace until we are done or have no more
10293     foreach my $candidate (@candidates) {
10294         my ( $i, $available_spaces ) = @{$candidate};
10295         my $deleted_spaces =
10296           ( $available_spaces > $spaces_needed )
10297           ? $spaces_needed
10298           : $available_spaces;
10299
10300         # remove the incremental space from this item
10301         $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
10302
10303         my $i_debug = $i;
10304
10305         # update the leading whitespace of this item and all items
10306         # that came after it
10307         for ( ; $i <= $max_gnu_item_index ; $i++ ) {
10308
10309             my $old_spaces = $gnu_item_list[$i]->get_spaces();
10310             if ( $old_spaces >= $deleted_spaces ) {
10311                 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
10312             }
10313
10314             # shouldn't happen except for code bug:
10315             else {
10316                 my $level        = $gnu_item_list[$i_debug]->get_level();
10317                 my $ci_level     = $gnu_item_list[$i_debug]->get_ci_level();
10318                 my $old_level    = $gnu_item_list[$i]->get_level();
10319                 my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
10320                 warning(
10321 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
10322                 );
10323                 report_definite_bug();
10324             }
10325         }
10326         $gnu_position_predictor -= $deleted_spaces;
10327         $spaces_needed          -= $deleted_spaces;
10328         last unless ( $spaces_needed > 0 );
10329     }
10330     return;
10331 }
10332
10333 sub finish_lp_batch {
10334
10335     # This routine is called once after each output stream batch is
10336     # finished to undo indentation for all incomplete -lp
10337     # indentation levels.  It is too risky to leave a level open,
10338     # because then we can't backtrack in case of a long line to follow.
10339     # This means that comments and blank lines will disrupt this
10340     # indentation style.  But the vertical aligner may be able to
10341     # get the space back if there are side comments.
10342
10343     # this is only for the 'lp' style
10344     return unless ($rOpts_line_up_parentheses);
10345
10346     # nothing can be done if no stack items defined for this line
10347     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
10348
10349     # loop over all whitespace items created for the current batch
10350     foreach my $i ( 0 .. $max_gnu_item_index ) {
10351         my $item = $gnu_item_list[$i];
10352
10353         # only look for open items
10354         next if ( $item->get_closed() >= 0 );
10355
10356         # Tentatively remove all of the available space
10357         # (The vertical aligner will try to get it back later)
10358         my $available_spaces = $item->get_available_spaces();
10359         if ( $available_spaces > 0 ) {
10360
10361             # delete incremental space for this item
10362             $gnu_item_list[$i]
10363               ->tentatively_decrease_available_spaces($available_spaces);
10364
10365             # Reduce the total indentation space of any nodes that follow
10366             # Note that any such nodes must necessarily be dependents
10367             # of this node.
10368             foreach ( $i + 1 .. $max_gnu_item_index ) {
10369                 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
10370             }
10371         }
10372     }
10373     return;
10374 }
10375
10376 sub reduce_lp_indentation {
10377
10378     # reduce the leading whitespace at token $i if possible by $spaces_needed
10379     # (a large value of $spaces_needed will remove all excess space)
10380     # NOTE: to be called from scan_list only for a sequence of tokens
10381     # contained between opening and closing parens/braces/brackets
10382
10383     my ( $i, $spaces_wanted ) = @_;
10384     my $deleted_spaces = 0;
10385
10386     my $item             = $leading_spaces_to_go[$i];
10387     my $available_spaces = $item->get_available_spaces();
10388
10389     if (
10390         $available_spaces > 0
10391         && ( ( $spaces_wanted <= $available_spaces )
10392             || !$item->get_have_child() )
10393       )
10394     {
10395
10396         # we'll remove these spaces, but mark them as recoverable
10397         $deleted_spaces =
10398           $item->tentatively_decrease_available_spaces($spaces_wanted);
10399     }
10400
10401     return $deleted_spaces;
10402 }
10403
10404 sub token_sequence_length {
10405
10406     # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
10407     # returns 0 if $ibeg > $iend (shouldn't happen)
10408     my ( $ibeg, $iend ) = @_;
10409     return 0 if ( $iend < 0 || $ibeg > $iend );
10410     return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
10411     return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
10412 }
10413
10414 sub total_line_length {
10415
10416     # return length of a line of tokens ($ibeg .. $iend)
10417     my ( $ibeg, $iend ) = @_;
10418     return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
10419 }
10420
10421 sub maximum_line_length_for_level {
10422
10423     # return maximum line length for line starting with a given level
10424     my $maximum_line_length = $rOpts_maximum_line_length;
10425
10426     # Modify if -vmll option is selected
10427     if ($rOpts_variable_maximum_line_length) {
10428         my $level = shift;
10429         if ( $level < 0 ) { $level = 0 }
10430         $maximum_line_length += $level * $rOpts_indent_columns;
10431     }
10432     return $maximum_line_length;
10433 }
10434
10435 sub maximum_line_length {
10436
10437     # return maximum line length for line starting with the token at given index
10438     my $ii = shift;
10439     return maximum_line_length_for_level( $levels_to_go[$ii] );
10440 }
10441
10442 sub excess_line_length {
10443
10444     # return number of characters by which a line of tokens ($ibeg..$iend)
10445     # exceeds the allowable line length.
10446     my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
10447
10448     # Include left and right weld lengths unless requested not to
10449     my $wl = $ignore_left_weld  ? 0 : weld_len_left_to_go($iend);
10450     my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
10451
10452     return total_line_length( $ibeg, $iend ) + $wl + $wr -
10453       maximum_line_length($ibeg);
10454 }
10455
10456 sub wrapup {
10457
10458     # flush buffer and write any informative messages
10459     my $self = shift;
10460
10461     $self->flush();
10462     $file_writer_object->decrement_output_line_number()
10463       ;    # fix up line number since it was incremented
10464     we_are_at_the_last_line();
10465     if ( $added_semicolon_count > 0 ) {
10466         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
10467         my $what =
10468           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
10469         write_logfile_entry("$added_semicolon_count $what added:\n");
10470         write_logfile_entry(
10471             "  $first at input line $first_added_semicolon_at\n");
10472
10473         if ( $added_semicolon_count > 1 ) {
10474             write_logfile_entry(
10475                 "   Last at input line $last_added_semicolon_at\n");
10476         }
10477         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
10478         write_logfile_entry("\n");
10479     }
10480
10481     if ( $deleted_semicolon_count > 0 ) {
10482         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
10483         my $what =
10484           ( $deleted_semicolon_count > 1 )
10485           ? "semicolons were"
10486           : "semicolon was";
10487         write_logfile_entry(
10488             "$deleted_semicolon_count unnecessary $what deleted:\n");
10489         write_logfile_entry(
10490             "  $first at input line $first_deleted_semicolon_at\n");
10491
10492         if ( $deleted_semicolon_count > 1 ) {
10493             write_logfile_entry(
10494                 "   Last at input line $last_deleted_semicolon_at\n");
10495         }
10496         write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
10497         write_logfile_entry("\n");
10498     }
10499
10500     if ( $embedded_tab_count > 0 ) {
10501         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
10502         my $what =
10503           ( $embedded_tab_count > 1 )
10504           ? "quotes or patterns"
10505           : "quote or pattern";
10506         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
10507         write_logfile_entry(
10508 "This means the display of this script could vary with device or software\n"
10509         );
10510         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
10511
10512         if ( $embedded_tab_count > 1 ) {
10513             write_logfile_entry(
10514                 "   Last at input line $last_embedded_tab_at\n");
10515         }
10516         write_logfile_entry("\n");
10517     }
10518
10519     if ($first_tabbing_disagreement) {
10520         write_logfile_entry(
10521 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
10522         );
10523     }
10524
10525     if ($in_tabbing_disagreement) {
10526         write_logfile_entry(
10527 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
10528         );
10529     }
10530     else {
10531
10532         if ($last_tabbing_disagreement) {
10533
10534             write_logfile_entry(
10535 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
10536             );
10537         }
10538         else {
10539             write_logfile_entry("No indentation disagreement seen\n");
10540         }
10541     }
10542     if ($first_tabbing_disagreement) {
10543         write_logfile_entry(
10544 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
10545         );
10546     }
10547     write_logfile_entry("\n");
10548
10549     $vertical_aligner_object->report_anything_unusual();
10550
10551     $file_writer_object->report_line_length_errors();
10552
10553     return;
10554 }
10555
10556 sub check_options {
10557
10558     # This routine is called to check the Opts hash after it is defined
10559     $rOpts = shift;
10560
10561     make_static_block_comment_pattern();
10562     make_static_side_comment_pattern();
10563     make_closing_side_comment_prefix();
10564     make_closing_side_comment_list_pattern();
10565     $format_skipping_pattern_begin =
10566       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
10567     $format_skipping_pattern_end =
10568       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
10569
10570     # If closing side comments ARE selected, then we can safely
10571     # delete old closing side comments unless closing side comment
10572     # warnings are requested.  This is a good idea because it will
10573     # eliminate any old csc's which fall below the line count threshold.
10574     # We cannot do this if warnings are turned on, though, because we
10575     # might delete some text which has been added.  So that must
10576     # be handled when comments are created.
10577     if ( $rOpts->{'closing-side-comments'} ) {
10578         if ( !$rOpts->{'closing-side-comment-warnings'} ) {
10579             $rOpts->{'delete-closing-side-comments'} = 1;
10580         }
10581     }
10582
10583     # If closing side comments ARE NOT selected, but warnings ARE
10584     # selected and we ARE DELETING csc's, then we will pretend to be
10585     # adding with a huge interval.  This will force the comments to be
10586     # generated for comparison with the old comments, but not added.
10587     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
10588         if ( $rOpts->{'delete-closing-side-comments'} ) {
10589             $rOpts->{'delete-closing-side-comments'}  = 0;
10590             $rOpts->{'closing-side-comments'}         = 1;
10591             $rOpts->{'closing-side-comment-interval'} = 100000000;
10592         }
10593     }
10594
10595     make_bli_pattern();
10596     make_block_brace_vertical_tightness_pattern();
10597     make_blank_line_pattern();
10598
10599     prepare_cuddled_block_types();
10600     if ( $rOpts->{'dump-cuddled-block-list'} ) {
10601         dump_cuddled_block_list(*STDOUT);
10602         Perl::Tidy::Exit 0;
10603     }
10604
10605     if ( $rOpts->{'line-up-parentheses'} ) {
10606
10607         if (   $rOpts->{'indent-only'}
10608             || !$rOpts->{'add-newlines'}
10609             || !$rOpts->{'delete-old-newlines'} )
10610         {
10611             Perl::Tidy::Warn <<EOM;
10612 -----------------------------------------------------------------------
10613 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
10614
10615 The -lp indentation logic requires that perltidy be able to coordinate
10616 arbitrarily large numbers of line breakpoints.  This isn't possible
10617 with these flags. Sometimes an acceptable workaround is to use -wocb=3
10618 -----------------------------------------------------------------------
10619 EOM
10620             $rOpts->{'line-up-parentheses'} = 0;
10621         }
10622     }
10623
10624     # At present, tabs are not compatible with the line-up-parentheses style
10625     # (it would be possible to entab the total leading whitespace
10626     # just prior to writing the line, if desired).
10627     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
10628         Perl::Tidy::Warn <<EOM;
10629 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
10630 EOM
10631         $rOpts->{'tabs'} = 0;
10632     }
10633
10634     # Likewise, tabs are not compatible with outdenting..
10635     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
10636         Perl::Tidy::Warn <<EOM;
10637 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
10638 EOM
10639         $rOpts->{'tabs'} = 0;
10640     }
10641
10642     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
10643         Perl::Tidy::Warn <<EOM;
10644 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
10645 EOM
10646         $rOpts->{'tabs'} = 0;
10647     }
10648
10649     if ( !$rOpts->{'space-for-semicolon'} ) {
10650         $want_left_space{'f'} = -1;
10651     }
10652
10653     if ( $rOpts->{'space-terminal-semicolon'} ) {
10654         $want_left_space{';'} = 1;
10655     }
10656
10657     # implement outdenting preferences for keywords
10658     %outdent_keyword = ();
10659     unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
10660         @_ = qw(next last redo goto return);    # defaults
10661     }
10662
10663     # FUTURE: if not a keyword, assume that it is an identifier
10664     foreach (@_) {
10665         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
10666             $outdent_keyword{$_} = 1;
10667         }
10668         else {
10669             Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
10670         }
10671     }
10672
10673     # implement user whitespace preferences
10674     if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
10675         @want_left_space{@_} = (1) x scalar(@_);
10676     }
10677
10678     if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
10679         @want_right_space{@_} = (1) x scalar(@_);
10680     }
10681
10682     if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
10683         @want_left_space{@_} = (-1) x scalar(@_);
10684     }
10685
10686     if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
10687         @want_right_space{@_} = (-1) x scalar(@_);
10688     }
10689     if ( $rOpts->{'dump-want-left-space'} ) {
10690         dump_want_left_space(*STDOUT);
10691         Perl::Tidy::Exit 0;
10692     }
10693
10694     if ( $rOpts->{'dump-want-right-space'} ) {
10695         dump_want_right_space(*STDOUT);
10696         Perl::Tidy::Exit 0;
10697     }
10698
10699     # default keywords for which space is introduced before an opening paren
10700     # (at present, including them messes up vertical alignment)
10701     @_ = qw(my local our and or err eq ne if else elsif until
10702       unless while for foreach return switch case given when catch);
10703     @space_after_keyword{@_} = (1) x scalar(@_);
10704
10705     # first remove any or all of these if desired
10706     if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
10707
10708         # -nsak='*' selects all the above keywords
10709         if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
10710         @space_after_keyword{@_} = (0) x scalar(@_);
10711     }
10712
10713     # then allow user to add to these defaults
10714     if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
10715         @space_after_keyword{@_} = (1) x scalar(@_);
10716     }
10717
10718     # implement user break preferences
10719     my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
10720       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
10721       . : ? && || and or err xor
10722     );
10723
10724     my $break_after = sub {
10725         foreach my $tok (@_) {
10726             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
10727             my $lbs = $left_bond_strength{$tok};
10728             my $rbs = $right_bond_strength{$tok};
10729             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
10730                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
10731                   ( $lbs, $rbs );
10732             }
10733         }
10734     };
10735
10736     my $break_before = sub {
10737         foreach my $tok (@_) {
10738             my $lbs = $left_bond_strength{$tok};
10739             my $rbs = $right_bond_strength{$tok};
10740             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
10741                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
10742                   ( $lbs, $rbs );
10743             }
10744         }
10745     };
10746
10747     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
10748     $break_before->(@all_operators)
10749       if ( $rOpts->{'break-before-all-operators'} );
10750
10751     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
10752     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
10753
10754     # make note if breaks are before certain key types
10755     %want_break_before = ();
10756     foreach my $tok ( @all_operators, ',' ) {
10757         $want_break_before{$tok} =
10758           $left_bond_strength{$tok} < $right_bond_strength{$tok};
10759     }
10760
10761     # Coordinate ?/: breaks, which must be similar
10762     if ( !$want_break_before{':'} ) {
10763         $want_break_before{'?'}   = $want_break_before{':'};
10764         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
10765         $left_bond_strength{'?'}  = NO_BREAK;
10766     }
10767
10768     # Define here tokens which may follow the closing brace of a do statement
10769     # on the same line, as in:
10770     #   } while ( $something);
10771     @_ = qw(until while unless if ; : );
10772     push @_, ',';
10773     @is_do_follower{@_} = (1) x scalar(@_);
10774
10775     # These tokens may follow the closing brace of an if or elsif block.
10776     # In other words, for cuddled else we want code to look like:
10777     #   } elsif ( $something) {
10778     #   } else {
10779     if ( $rOpts->{'cuddled-else'} ) {
10780         @_ = qw(else elsif);
10781         @is_if_brace_follower{@_} = (1) x scalar(@_);
10782     }
10783     else {
10784         %is_if_brace_follower = ();
10785     }
10786
10787     # nothing can follow the closing curly of an else { } block:
10788     %is_else_brace_follower = ();
10789
10790     # what can follow a multi-line anonymous sub definition closing curly:
10791     @_ = qw# ; : => or and  && || ~~ !~~ ) #;
10792     push @_, ',';
10793     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
10794
10795     # what can follow a one-line anonymous sub closing curly:
10796     # one-line anonymous subs also have ']' here...
10797     # see tk3.t and PP.pm
10798     @_ = qw#  ; : => or and  && || ) ] ~~ !~~ #;
10799     push @_, ',';
10800     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
10801
10802     # What can follow a closing curly of a block
10803     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
10804     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
10805     @_ = qw#  ; : => or and  && || ) #;
10806     push @_, ',';
10807
10808     # allow cuddled continue if cuddled else is specified
10809     if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
10810
10811     @is_other_brace_follower{@_} = (1) x scalar(@_);
10812
10813     $right_bond_strength{'{'} = WEAK;
10814     $left_bond_strength{'{'}  = VERY_STRONG;
10815
10816     # make -l=0  equal to -l=infinite
10817     if ( !$rOpts->{'maximum-line-length'} ) {
10818         $rOpts->{'maximum-line-length'} = 1000000;
10819     }
10820
10821     # make -lbl=0  equal to -lbl=infinite
10822     if ( !$rOpts->{'long-block-line-count'} ) {
10823         $rOpts->{'long-block-line-count'} = 1000000;
10824     }
10825
10826     my $enc = $rOpts->{'character-encoding'};
10827     if ( $enc && $enc !~ /^(none|utf8)$/i ) {
10828         Perl::Tidy::Die <<EOM;
10829 Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
10830 EOM
10831     }
10832
10833     my $ole = $rOpts->{'output-line-ending'};
10834     if ($ole) {
10835         my %endings = (
10836             dos  => "\015\012",
10837             win  => "\015\012",
10838             mac  => "\015",
10839             unix => "\012",
10840         );
10841
10842         # Patch for RT #99514, a memoization issue.
10843         # Normally, the user enters one of 'dos', 'win', etc, and we change the
10844         # value in the options parameter to be the corresponding line ending
10845         # character.  But, if we are using memoization, on later passes through
10846         # here the option parameter will already have the desired ending
10847         # character rather than the keyword 'dos', 'win', etc.  So
10848         # we must check to see if conversion has already been done and, if so,
10849         # bypass the conversion step.
10850         my %endings_inverted = (
10851             "\015\012" => 'dos',
10852             "\015\012" => 'win',
10853             "\015"     => 'mac',
10854             "\012"     => 'unix',
10855         );
10856
10857         if ( defined( $endings_inverted{$ole} ) ) {
10858
10859             # we already have valid line ending, nothing more to do
10860         }
10861         else {
10862             $ole = lc $ole;
10863             unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
10864                 my $str = join " ", keys %endings;
10865                 Perl::Tidy::Die <<EOM;
10866 Unrecognized line ending '$ole'; expecting one of: $str
10867 EOM
10868             }
10869             if ( $rOpts->{'preserve-line-endings'} ) {
10870                 Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
10871                 $rOpts->{'preserve-line-endings'} = undef;
10872             }
10873         }
10874     }
10875
10876     # hashes used to simplify setting whitespace
10877     %tightness = (
10878         '{' => $rOpts->{'brace-tightness'},
10879         '}' => $rOpts->{'brace-tightness'},
10880         '(' => $rOpts->{'paren-tightness'},
10881         ')' => $rOpts->{'paren-tightness'},
10882         '[' => $rOpts->{'square-bracket-tightness'},
10883         ']' => $rOpts->{'square-bracket-tightness'},
10884     );
10885     %matching_token = (
10886         '{' => '}',
10887         '(' => ')',
10888         '[' => ']',
10889         '?' => ':',
10890     );
10891
10892     # frequently used parameters
10893     $rOpts_add_newlines          = $rOpts->{'add-newlines'};
10894     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
10895     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
10896     $rOpts_block_brace_vertical_tightness =
10897       $rOpts->{'block-brace-vertical-tightness'};
10898     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
10899     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
10900     $rOpts_break_at_old_ternary_breakpoints =
10901       $rOpts->{'break-at-old-ternary-breakpoints'};
10902     $rOpts_break_at_old_attribute_breakpoints =
10903       $rOpts->{'break-at-old-attribute-breakpoints'};
10904     $rOpts_break_at_old_comma_breakpoints =
10905       $rOpts->{'break-at-old-comma-breakpoints'};
10906     $rOpts_break_at_old_keyword_breakpoints =
10907       $rOpts->{'break-at-old-keyword-breakpoints'};
10908     $rOpts_break_at_old_logical_breakpoints =
10909       $rOpts->{'break-at-old-logical-breakpoints'};
10910     $rOpts_closing_side_comment_else_flag =
10911       $rOpts->{'closing-side-comment-else-flag'};
10912     $rOpts_closing_side_comment_maximum_text =
10913       $rOpts->{'closing-side-comment-maximum-text'};
10914     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
10915     $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
10916     $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
10917     $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
10918     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
10919     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
10920     $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
10921     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
10922     $rOpts_whitespace_cycle         = $rOpts->{'whitespace-cycle'};
10923
10924     $rOpts_variable_maximum_line_length =
10925       $rOpts->{'variable-maximum-line-length'};
10926     $rOpts_short_concatenation_item_length =
10927       $rOpts->{'short-concatenation-item-length'};
10928
10929     $rOpts_keep_old_blank_lines     = $rOpts->{'keep-old-blank-lines'};
10930     $rOpts_ignore_old_breakpoints   = $rOpts->{'ignore-old-breakpoints'};
10931     $rOpts_format_skipping          = $rOpts->{'format-skipping'};
10932     $rOpts_space_function_paren     = $rOpts->{'space-function-paren'};
10933     $rOpts_space_keyword_paren      = $rOpts->{'space-keyword-paren'};
10934     $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
10935     $rOpts_ignore_side_comment_lengths =
10936       $rOpts->{'ignore-side-comment-lengths'};
10937
10938     # Note that both opening and closing tokens can access the opening
10939     # and closing flags of their container types.
10940     %opening_vertical_tightness = (
10941         '(' => $rOpts->{'paren-vertical-tightness'},
10942         '{' => $rOpts->{'brace-vertical-tightness'},
10943         '[' => $rOpts->{'square-bracket-vertical-tightness'},
10944         ')' => $rOpts->{'paren-vertical-tightness'},
10945         '}' => $rOpts->{'brace-vertical-tightness'},
10946         ']' => $rOpts->{'square-bracket-vertical-tightness'},
10947     );
10948
10949     %closing_vertical_tightness = (
10950         '(' => $rOpts->{'paren-vertical-tightness-closing'},
10951         '{' => $rOpts->{'brace-vertical-tightness-closing'},
10952         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
10953         ')' => $rOpts->{'paren-vertical-tightness-closing'},
10954         '}' => $rOpts->{'brace-vertical-tightness-closing'},
10955         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
10956     );
10957
10958     # assume flag for '>' same as ')' for closing qw quotes
10959     %closing_token_indentation = (
10960         ')' => $rOpts->{'closing-paren-indentation'},
10961         '}' => $rOpts->{'closing-brace-indentation'},
10962         ']' => $rOpts->{'closing-square-bracket-indentation'},
10963         '>' => $rOpts->{'closing-paren-indentation'},
10964     );
10965
10966     # flag indicating if any closing tokens are indented
10967     $some_closing_token_indentation =
10968          $rOpts->{'closing-paren-indentation'}
10969       || $rOpts->{'closing-brace-indentation'}
10970       || $rOpts->{'closing-square-bracket-indentation'}
10971       || $rOpts->{'indent-closing-brace'};
10972
10973     %opening_token_right = (
10974         '(' => $rOpts->{'opening-paren-right'},
10975         '{' => $rOpts->{'opening-hash-brace-right'},
10976         '[' => $rOpts->{'opening-square-bracket-right'},
10977     );
10978
10979     %stack_opening_token = (
10980         '(' => $rOpts->{'stack-opening-paren'},
10981         '{' => $rOpts->{'stack-opening-hash-brace'},
10982         '[' => $rOpts->{'stack-opening-square-bracket'},
10983     );
10984
10985     %stack_closing_token = (
10986         ')' => $rOpts->{'stack-closing-paren'},
10987         '}' => $rOpts->{'stack-closing-hash-brace'},
10988         ']' => $rOpts->{'stack-closing-square-bracket'},
10989     );
10990     $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
10991     $rOpts_space_backslash_quote     = $rOpts->{'space-backslash-quote'};
10992     return;
10993 }
10994
10995 sub bad_pattern {
10996
10997     # See if a pattern will compile. We have to use a string eval here,
10998     # but it should be safe because the pattern has been constructed
10999     # by this program.
11000     my ($pattern) = @_;
11001     eval "'##'=~/$pattern/";
11002     return $@;
11003 }
11004
11005 sub prepare_cuddled_block_types {
11006
11007     my $cuddled_string = $rOpts->{'cuddled-block-list'};
11008     $cuddled_string = "try-catch-finally" unless defined($cuddled_string);
11009
11010     # we have a cuddled string of the form
11011     #  'try-catch-finally'
11012
11013     # we want to prepare a hash of the form
11014
11015     # $rcuddled_block_types = {
11016     #    'try' => {
11017     #        'catch'   => 1,
11018     #        'finally' => 1
11019     #    },
11020     # };
11021
11022     # use -dcbl to dump this hash
11023
11024     # Multiple such strings are input as a space or comma separated list
11025
11026     # If we get two lists with the same leading type, such as
11027     #   -cbl = "-try-catch-finally  -try-catch-otherwise"
11028     # then they will get merged as follows:
11029     # $rcuddled_block_types = {
11030     #    'try' => {
11031     #        'catch'     => 1,
11032     #        'finally'   => 2,
11033     #        'otherwise' => 1,
11034     #    },
11035     # };
11036     # This will allow either type of chain to be followed.
11037
11038     $cuddled_string =~ s/,/ /g;    # allow space or comma separated lists
11039     my @cuddled_strings = split /\s+/, $cuddled_string;
11040
11041     $rcuddled_block_types = {};
11042
11043     # process each dash-separated string...
11044     my $string_count = 0;
11045     foreach my $string (@cuddled_strings) {
11046         next unless $string;
11047         my @words = split /-+/, $string;    # allow multiple dashes
11048
11049         # we could look for and report possible errors here...
11050         next unless ( @words && @words > 0 );
11051         my $start = shift @words;
11052
11053         # allow either '-continue' or *-continue' for arbitrary starting type
11054         $start = '*' unless $start;
11055
11056         # always make an entry for the leading word. If none follow, this
11057         # will still prevent a wildcard from matching this word.
11058         if ( !defined( $rcuddled_block_types->{$start} ) ) {
11059             $rcuddled_block_types->{$start} = {};
11060         }
11061
11062         # The count gives the original word order in case we ever want it.
11063         $string_count++;
11064         my $word_count = 0;
11065         foreach my $word (@words) {
11066             next unless $word;
11067             $word_count++;
11068             $rcuddled_block_types->{$start}->{$word} =
11069               1;    #"$string_count.$word_count";
11070         }
11071     }
11072
11073     return;
11074 }
11075
11076 sub dump_cuddled_block_list {
11077     my ($fh) = @_;
11078
11079     # Here is the format of the cuddled block type hash
11080     # which controls this routine
11081     #    my $rcuddled_block_types = {
11082     #        'if' => {
11083     #            'else'  => 1,
11084     #            'elsif' => 1
11085     #        },
11086     #        'try' => {
11087     #            'catch'   => 1,
11088     #            'finally' => 1
11089     #        },
11090     #    };
11091     #The numerical values are string.word,
11092     #where string = string number  and  word = word number in that string
11093
11094     my $cuddled_string = $rOpts->{'cuddled-block-list'};
11095     $cuddled_string = '' unless $cuddled_string;
11096     $fh->print(<<EOM);
11097 ------------------------------------------------------------------------
11098 Hash of cuddled block types created from
11099   -cbl='$cuddled_string'
11100 ------------------------------------------------------------------------
11101 EOM
11102
11103     use Data::Dumper;
11104     $fh->print( Dumper($rcuddled_block_types) );
11105
11106     $fh->print(<<EOM);
11107 ------------------------------------------------------------------------
11108 EOM
11109     return;
11110 }
11111
11112 sub make_static_block_comment_pattern {
11113
11114     # create the pattern used to identify static block comments
11115     $static_block_comment_pattern = '^\s*##';
11116
11117     # allow the user to change it
11118     if ( $rOpts->{'static-block-comment-prefix'} ) {
11119         my $prefix = $rOpts->{'static-block-comment-prefix'};
11120         $prefix =~ s/^\s*//;
11121         my $pattern = $prefix;
11122
11123         # user may give leading caret to force matching left comments only
11124         if ( $prefix !~ /^\^#/ ) {
11125             if ( $prefix !~ /^#/ ) {
11126                 Perl::Tidy::Die
11127 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
11128             }
11129             $pattern = '^\s*' . $prefix;
11130         }
11131         if ( bad_pattern($pattern) ) {
11132             Perl::Tidy::Die
11133 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
11134         }
11135         $static_block_comment_pattern = $pattern;
11136     }
11137     return;
11138 }
11139
11140 sub make_format_skipping_pattern {
11141     my ( $opt_name, $default ) = @_;
11142     my $param = $rOpts->{$opt_name};
11143     unless ($param) { $param = $default }
11144     $param =~ s/^\s*//;
11145     if ( $param !~ /^#/ ) {
11146         Perl::Tidy::Die
11147           "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
11148     }
11149     my $pattern = '^' . $param . '\s';
11150     if ( bad_pattern($pattern) ) {
11151         Perl::Tidy::Die
11152 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
11153     }
11154     return $pattern;
11155 }
11156
11157 sub make_closing_side_comment_list_pattern {
11158
11159     # turn any input list into a regex for recognizing selected block types
11160     $closing_side_comment_list_pattern = '^\w+';
11161     if ( defined( $rOpts->{'closing-side-comment-list'} )
11162         && $rOpts->{'closing-side-comment-list'} )
11163     {
11164         $closing_side_comment_list_pattern =
11165           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
11166     }
11167     return;
11168 }
11169
11170 sub make_bli_pattern {
11171
11172     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
11173         && $rOpts->{'brace-left-and-indent-list'} )
11174     {
11175         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
11176     }
11177
11178     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
11179     return;
11180 }
11181
11182 sub make_block_brace_vertical_tightness_pattern {
11183
11184     # turn any input list into a regex for recognizing selected block types
11185     $block_brace_vertical_tightness_pattern =
11186       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
11187     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
11188         && $rOpts->{'block-brace-vertical-tightness-list'} )
11189     {
11190         $block_brace_vertical_tightness_pattern =
11191           make_block_pattern( '-bbvtl',
11192             $rOpts->{'block-brace-vertical-tightness-list'} );
11193     }
11194     return;
11195 }
11196
11197 sub make_blank_line_pattern {
11198
11199     $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
11200     my $key = 'blank-lines-before-closing-block-list';
11201     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
11202         $blank_lines_before_closing_block_pattern =
11203           make_block_pattern( '-blbcl', $rOpts->{$key} );
11204     }
11205
11206     $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
11207     $key = 'blank-lines-after-opening-block-list';
11208     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
11209         $blank_lines_after_opening_block_pattern =
11210           make_block_pattern( '-blaol', $rOpts->{$key} );
11211     }
11212     return;
11213 }
11214
11215 sub make_block_pattern {
11216
11217     #  given a string of block-type keywords, return a regex to match them
11218     #  The only tricky part is that labels are indicated with a single ':'
11219     #  and the 'sub' token text may have additional text after it (name of
11220     #  sub).
11221     #
11222     #  Example:
11223     #
11224     #   input string: "if else elsif unless while for foreach do : sub";
11225     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
11226
11227     #  Minor Update:
11228     #
11229     #  To distinguish between anonymous subs and named subs, use 'sub' to
11230     #   indicate a named sub, and 'asub' to indicate an anonymous sub
11231
11232     my ( $abbrev, $string ) = @_;
11233     my @list  = split_words($string);
11234     my @words = ();
11235     my %seen;
11236     for my $i (@list) {
11237         if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
11238         next if $seen{$i};
11239         $seen{$i} = 1;
11240         if ( $i eq 'sub' ) {
11241         }
11242         elsif ( $i eq 'asub' ) {
11243         }
11244         elsif ( $i eq ';' ) {
11245             push @words, ';';
11246         }
11247         elsif ( $i eq '{' ) {
11248             push @words, '\{';
11249         }
11250         elsif ( $i eq ':' ) {
11251             push @words, '\w+:';
11252         }
11253         elsif ( $i =~ /^\w/ ) {
11254             push @words, $i;
11255         }
11256         else {
11257             Perl::Tidy::Warn
11258               "unrecognized block type $i after $abbrev, ignoring\n";
11259         }
11260     }
11261     my $pattern = '(' . join( '|', @words ) . ')$';
11262     my $sub_patterns = "";
11263     if ( $seen{'sub'} ) {
11264         $sub_patterns .= '|' . $SUB_PATTERN;
11265     }
11266     if ( $seen{'asub'} ) {
11267         $sub_patterns .= '|' . $ASUB_PATTERN;
11268     }
11269     if ($sub_patterns) {
11270         $pattern = '(' . $pattern . $sub_patterns . ')';
11271     }
11272     $pattern = '^' . $pattern;
11273     return $pattern;
11274 }
11275
11276 sub make_static_side_comment_pattern {
11277
11278     # create the pattern used to identify static side comments
11279     $static_side_comment_pattern = '^##';
11280
11281     # allow the user to change it
11282     if ( $rOpts->{'static-side-comment-prefix'} ) {
11283         my $prefix = $rOpts->{'static-side-comment-prefix'};
11284         $prefix =~ s/^\s*//;
11285         my $pattern = '^' . $prefix;
11286         if ( bad_pattern($pattern) ) {
11287             Perl::Tidy::Die
11288 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
11289         }
11290         $static_side_comment_pattern = $pattern;
11291     }
11292     return;
11293 }
11294
11295 sub make_closing_side_comment_prefix {
11296
11297     # Be sure we have a valid closing side comment prefix
11298     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
11299     my $csc_prefix_pattern;
11300     if ( !defined($csc_prefix) ) {
11301         $csc_prefix         = '## end';
11302         $csc_prefix_pattern = '^##\s+end';
11303     }
11304     else {
11305         my $test_csc_prefix = $csc_prefix;
11306         if ( $test_csc_prefix !~ /^#/ ) {
11307             $test_csc_prefix = '#' . $test_csc_prefix;
11308         }
11309
11310         # make a regex to recognize the prefix
11311         my $test_csc_prefix_pattern = $test_csc_prefix;
11312
11313         # escape any special characters
11314         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
11315
11316         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
11317
11318         # allow exact number of intermediate spaces to vary
11319         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
11320
11321         # make sure we have a good pattern
11322         # if we fail this we probably have an error in escaping
11323         # characters.
11324
11325         if ( bad_pattern($test_csc_prefix_pattern) ) {
11326
11327             # shouldn't happen..must have screwed up escaping, above
11328             report_definite_bug();
11329             Perl::Tidy::Warn
11330 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
11331
11332             # just warn and keep going with defaults
11333             Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
11334             Perl::Tidy::Warn
11335               "Using default -cscp instead; please check output\n";
11336         }
11337         else {
11338             $csc_prefix         = $test_csc_prefix;
11339             $csc_prefix_pattern = $test_csc_prefix_pattern;
11340         }
11341     }
11342     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
11343     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
11344     return;
11345 }
11346
11347 sub dump_want_left_space {
11348     my $fh = shift;
11349     local $" = "\n";
11350     print $fh <<EOM;
11351 These values are the main control of whitespace to the left of a token type;
11352 They may be altered with the -wls parameter.
11353 For a list of token types, use perltidy --dump-token-types (-dtt)
11354  1 means the token wants a space to its left
11355 -1 means the token does not want a space to its left
11356 ------------------------------------------------------------------------
11357 EOM
11358     foreach my $key ( sort keys %want_left_space ) {
11359         print $fh "$key\t$want_left_space{$key}\n";
11360     }
11361     return;
11362 }
11363
11364 sub dump_want_right_space {
11365     my $fh = shift;
11366     local $" = "\n";
11367     print $fh <<EOM;
11368 These values are the main control of whitespace to the right of a token type;
11369 They may be altered with the -wrs parameter.
11370 For a list of token types, use perltidy --dump-token-types (-dtt)
11371  1 means the token wants a space to its right
11372 -1 means the token does not want a space to its right
11373 ------------------------------------------------------------------------
11374 EOM
11375     foreach my $key ( sort keys %want_right_space ) {
11376         print $fh "$key\t$want_right_space{$key}\n";
11377     }
11378     return;
11379 }
11380
11381 {    # begin is_essential_whitespace
11382
11383     my %is_sort_grep_map;
11384     my %is_for_foreach;
11385
11386     BEGIN {
11387
11388         my @q;
11389         @q = qw(sort grep map);
11390         @is_sort_grep_map{@q} = (1) x scalar(@q);
11391
11392         @q = qw(for foreach);
11393         @is_for_foreach{@q} = (1) x scalar(@q);
11394
11395     }
11396
11397     sub is_essential_whitespace {
11398
11399         # Essential whitespace means whitespace which cannot be safely deleted
11400         # without risking the introduction of a syntax error.
11401         # We are given three tokens and their types:
11402         # ($tokenl, $typel) is the token to the left of the space in question
11403         # ($tokenr, $typer) is the token to the right of the space in question
11404         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
11405         #
11406         # This is a slow routine but is not needed too often except when -mangle
11407         # is used.
11408         #
11409         # Note: This routine should almost never need to be changed.  It is
11410         # for avoiding syntax problems rather than for formatting.
11411         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
11412
11413         my $result =
11414
11415           # never combine two bare words or numbers
11416           # examples:  and ::ok(1)
11417           #            return ::spw(...)
11418           #            for bla::bla:: abc
11419           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
11420           #            $input eq"quit" to make $inputeq"quit"
11421           #            my $size=-s::SINK if $file;  <==OK but we won't do it
11422           # don't join something like: for bla::bla:: abc
11423           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
11424           (      ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
11425               && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
11426
11427           # do not combine a number with a concatenation dot
11428           # example: pom.caputo:
11429           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
11430           || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
11431           || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
11432
11433           # do not join a minus with a bare word, because you might form
11434           # a file test operator.  Example from Complex.pm:
11435           # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
11436           || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
11437
11438           # and something like this could become ambiguous without space
11439           # after the '-':
11440           #   use constant III=>1;
11441           #   $a = $b - III;
11442           # and even this:
11443           #   $a = - III;
11444           || ( ( $tokenl eq '-' )
11445             && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
11446
11447           # '= -' should not become =- or you will get a warning
11448           # about reversed -=
11449           # || ($tokenr eq '-')
11450
11451           # keep a space between a quote and a bareword to prevent the
11452           # bareword from becoming a quote modifier.
11453           || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
11454
11455           # keep a space between a token ending in '$' and any word;
11456           # this caused trouble:  "die @$ if $@"
11457           || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
11458             && ( $tokenr =~ /^[a-zA-Z_]/ ) )
11459
11460           # perl is very fussy about spaces before <<
11461           || ( $tokenr =~ /^\<\</ )
11462
11463           # avoid combining tokens to create new meanings. Example:
11464           #     $a+ +$b must not become $a++$b
11465           || ( $is_digraph{ $tokenl . $tokenr } )
11466           || ( $is_trigraph{ $tokenl . $tokenr } )
11467
11468           # another example: do not combine these two &'s:
11469           #     allow_options & &OPT_EXECCGI
11470           || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
11471
11472           # don't combine $$ or $# with any alphanumeric
11473           # (testfile mangle.t with --mangle)
11474           || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
11475
11476           # retain any space after possible filehandle
11477           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
11478           || ( $typel eq 'Z' )
11479
11480           # Perl is sensitive to whitespace after the + here:
11481           #  $b = xvals $a + 0.1 * yvals $a;
11482           || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
11483
11484           # keep paren separate in 'use Foo::Bar ()'
11485           || ( $tokenr eq '('
11486             && $typel eq 'w'
11487             && $typell eq 'k'
11488             && $tokenll eq 'use' )
11489
11490           # keep any space between filehandle and paren:
11491           # file mangle.t with --mangle:
11492           || ( $typel eq 'Y' && $tokenr eq '(' )
11493
11494           # retain any space after here doc operator ( hereerr.t)
11495           || ( $typel eq 'h' )
11496
11497           # be careful with a space around ++ and --, to avoid ambiguity as to
11498           # which token it applies
11499           || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
11500           || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
11501
11502           # need space after foreach my; for example, this will fail in
11503           # older versions of Perl:
11504           # foreach my$ft(@filetypes)...
11505           || (
11506             $tokenl eq 'my'
11507
11508             #  /^(for|foreach)$/
11509             && $is_for_foreach{$tokenll}
11510             && $tokenr =~ /^\$/
11511           )
11512
11513           # must have space between grep and left paren; "grep(" will fail
11514           || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
11515
11516           # don't stick numbers next to left parens, as in:
11517           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
11518           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
11519
11520           # We must be sure that a space between a ? and a quoted string
11521           # remains if the space before the ? remains.  [Loca.pm, lockarea]
11522           # ie,
11523           #    $b=join $comma ? ',' : ':', @_;  # ok
11524           #    $b=join $comma?',' : ':', @_;    # ok!
11525           #    $b=join $comma ?',' : ':', @_;   # error!
11526           # Not really required:
11527           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
11528
11529           # do not remove space between an '&' and a bare word because
11530           # it may turn into a function evaluation, like here
11531           # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
11532           #    $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
11533           || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
11534
11535           # space stacked labels  (TODO: check if really necessary)
11536           || ( $typel eq 'J' && $typer eq 'J' )
11537
11538           ;    # the value of this long logic sequence is the result we want
11539 ##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
11540         return $result;
11541     }
11542 }
11543
11544 {
11545     my %secret_operators;
11546     my %is_leading_secret_token;
11547
11548     BEGIN {
11549
11550         # token lists for perl secret operators as compiled by Philippe Bruhat
11551         # at: https://metacpan.org/module/perlsecret
11552         %secret_operators = (
11553             'Goatse'             => [qw#= ( ) =#],        #=( )=
11554             'Venus1'             => [qw#0 +#],            # 0+
11555             'Venus2'             => [qw#+ 0#],            # +0
11556             'Enterprise'         => [qw#) x ! !#],        # ()x!!
11557             'Kite1'              => [qw#~ ~ <>#],         # ~~<>
11558             'Kite2'              => [qw#~~ <>#],          # ~~<>
11559             'Winking Fat Comma'  => [ ( ',', '=>' ) ],    # ,=>
11560             'Bang bang         ' => [qw#! !#],            # !!
11561         );
11562
11563         # The following operators and constants are not included because they
11564         # are normally kept tight by perltidy:
11565         # ~~ <~>
11566         #
11567
11568         # Make a lookup table indexed by the first token of each operator:
11569         # first token => [list, list, ...]
11570         foreach my $value ( values(%secret_operators) ) {
11571             my $tok = $value->[0];
11572             push @{ $is_leading_secret_token{$tok} }, $value;
11573         }
11574     }
11575
11576     sub new_secret_operator_whitespace {
11577
11578         my ( $rlong_array, $rwhitespace_flags ) = @_;
11579
11580         # Loop over all tokens in this line
11581         my ( $token, $type );
11582         my $jmax = @{$rlong_array} - 1;
11583         foreach my $j ( 0 .. $jmax ) {
11584
11585             $token = $rlong_array->[$j]->[_TOKEN_];
11586             $type  = $rlong_array->[$j]->[_TYPE_];
11587
11588             # Skip unless this token might start a secret operator
11589             next if ( $type eq 'b' );
11590             next unless ( $is_leading_secret_token{$token} );
11591
11592             #      Loop over all secret operators with this leading token
11593             foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
11594                 my $jend = $j - 1;
11595                 foreach my $tok ( @{$rpattern} ) {
11596                     $jend++;
11597                     $jend++
11598
11599                       if ( $jend <= $jmax
11600                         && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
11601                     if (   $jend > $jmax
11602                         || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
11603                     {
11604                         $jend = undef;
11605                         last;
11606                     }
11607                 }
11608
11609                 if ($jend) {
11610
11611                     # set flags to prevent spaces within this operator
11612                     foreach my $jj ( $j + 1 .. $jend ) {
11613                         $rwhitespace_flags->[$jj] = WS_NO;
11614                     }
11615                     $j = $jend;
11616                     last;
11617                 }
11618             }    ##      End Loop over all operators
11619         }    ## End loop over all tokens
11620         return;
11621     }    # End sub
11622 }
11623
11624 {        # begin print_line_of_tokens
11625
11626     my $rinput_token_array;    # Current working array
11627     my $rinput_K_array;        # Future working array
11628
11629     my $in_quote;
11630     my $guessed_indentation_level;
11631
11632     # This should be a return variable from extract_token
11633     # These local token variables are stored by store_token_to_go:
11634     my $rtoken_vars;
11635     my $Ktoken_vars;
11636     my $block_type;
11637     my $ci_level;
11638     my $container_environment;
11639     my $container_type;
11640     my $in_continued_quote;
11641     my $level;
11642     my $no_internal_newlines;
11643     my $slevel;
11644     my $token;
11645     my $type;
11646     my $type_sequence;
11647
11648     # routine to pull the jth token from the line of tokens
11649     sub extract_token {
11650         my ( $self, $j ) = @_;
11651
11652         my $rLL = $self->{rLL};
11653         $Ktoken_vars = $rinput_K_array->[$j];
11654         if ( !defined($Ktoken_vars) ) {
11655
11656        # Shouldn't happen: an error here would be due to a recent program change
11657             Fault("undefined index K for j=$j");
11658         }
11659         $rtoken_vars = $rLL->[$Ktoken_vars];
11660
11661         if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
11662
11663        # Shouldn't happen: an error here would be due to a recent program change
11664             Fault(<<EOM);
11665  j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
11666 EOM
11667         }
11668
11669         #########################################################
11670         # these are now redundant and can eventually be eliminated
11671
11672         $token                 = $rtoken_vars->[_TOKEN_];
11673         $type                  = $rtoken_vars->[_TYPE_];
11674         $block_type            = $rtoken_vars->[_BLOCK_TYPE_];
11675         $container_type        = $rtoken_vars->[_CONTAINER_TYPE_];
11676         $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
11677         $type_sequence         = $rtoken_vars->[_TYPE_SEQUENCE_];
11678         $level                 = $rtoken_vars->[_LEVEL_];
11679         $slevel                = $rtoken_vars->[_SLEVEL_];
11680         $ci_level              = $rtoken_vars->[_CI_LEVEL_];
11681         #########################################################
11682
11683         return;
11684     }
11685
11686     {
11687         my @saved_token;
11688
11689         sub save_current_token {
11690
11691             @saved_token = (
11692                 $block_type,            $ci_level,
11693                 $container_environment, $container_type,
11694                 $in_continued_quote,    $level,
11695                 $no_internal_newlines,  $slevel,
11696                 $token,                 $type,
11697                 $type_sequence,         $rtoken_vars,
11698                 $Ktoken_vars,
11699             );
11700             return;
11701         }
11702
11703         sub restore_current_token {
11704             (
11705                 $block_type,            $ci_level,
11706                 $container_environment, $container_type,
11707                 $in_continued_quote,    $level,
11708                 $no_internal_newlines,  $slevel,
11709                 $token,                 $type,
11710                 $type_sequence,         $rtoken_vars,
11711                 $Ktoken_vars,
11712             ) = @saved_token;
11713             return;
11714         }
11715     }
11716
11717     sub token_length {
11718
11719         # Returns the length of a token, given:
11720         #  $token=text of the token
11721         #  $type = type
11722         #  $not_first_token = should be TRUE if this is not the first token of
11723         #   the line.  It might the index of this token in an array.  It is
11724         #   used to test for a side comment vs a block comment.
11725         # Note: Eventually this should be the only routine determining the
11726         # length of a token in this package.
11727         my ( $token, $type, $not_first_token ) = @_;
11728         my $token_length = length($token);
11729
11730         # We mark lengths of side comments as just 1 if we are
11731         # ignoring their lengths when setting line breaks.
11732         $token_length = 1
11733           if ( $rOpts_ignore_side_comment_lengths
11734             && $not_first_token
11735             && $type eq '#' );
11736         return $token_length;
11737     }
11738
11739     sub rtoken_length {
11740
11741         # return length of ith token in @{$rtokens}
11742         my ($i) = @_;
11743         return token_length( $rinput_token_array->[$i]->[_TOKEN_],
11744             $rinput_token_array->[$i]->[_TYPE_], $i );
11745     }
11746
11747     # Routine to place the current token into the output stream.
11748     # Called once per output token.
11749     sub store_token_to_go {
11750
11751         my ( $self, $side_comment_follows ) = @_;
11752
11753         my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
11754
11755         ++$max_index_to_go;
11756         $K_to_go[$max_index_to_go]                     = $Ktoken_vars;
11757         $rtoken_vars_to_go[$max_index_to_go]           = $rtoken_vars;
11758         $tokens_to_go[$max_index_to_go]                = $token;
11759         $types_to_go[$max_index_to_go]                 = $type;
11760         $nobreak_to_go[$max_index_to_go]               = $flag;
11761         $old_breakpoint_to_go[$max_index_to_go]        = 0;
11762         $forced_breakpoint_to_go[$max_index_to_go]     = 0;
11763         $block_type_to_go[$max_index_to_go]            = $block_type;
11764         $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
11765         $container_environment_to_go[$max_index_to_go] = $container_environment;
11766         $ci_levels_to_go[$max_index_to_go]             = $ci_level;
11767         $mate_index_to_go[$max_index_to_go]            = -1;
11768         $matching_token_to_go[$max_index_to_go]        = '';
11769         $bond_strength_to_go[$max_index_to_go]         = 0;
11770
11771         # Note: negative levels are currently retained as a diagnostic so that
11772         # the 'final indentation level' is correctly reported for bad scripts.
11773         # But this means that every use of $level as an index must be checked.
11774         # If this becomes too much of a problem, we might give up and just clip
11775         # them at zero.
11776         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
11777         $levels_to_go[$max_index_to_go] = $level;
11778         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
11779
11780         # link the non-blank tokens
11781         my $iprev = $max_index_to_go - 1;
11782         $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
11783         $iprev_to_go[$max_index_to_go] = $iprev;
11784         $inext_to_go[$iprev]           = $max_index_to_go
11785           if ( $iprev >= 0 && $type ne 'b' );
11786         $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
11787
11788         $token_lengths_to_go[$max_index_to_go] =
11789           token_length( $token, $type, $max_index_to_go );
11790
11791         # We keep a running sum of token lengths from the start of this batch:
11792         #   summed_lengths_to_go[$i]   = total length to just before token $i
11793         #   summed_lengths_to_go[$i+1] = total length to just after token $i
11794         $summed_lengths_to_go[ $max_index_to_go + 1 ] =
11795           $summed_lengths_to_go[$max_index_to_go] +
11796           $token_lengths_to_go[$max_index_to_go];
11797
11798         # Define the indentation that this token would have if it started
11799         # a new line.  We have to do this now because we need to know this
11800         # when considering one-line blocks.
11801         set_leading_whitespace( $level, $ci_level, $in_continued_quote );
11802
11803         # remember previous nonblank tokens seen
11804         if ( $type ne 'b' ) {
11805             $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
11806             $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
11807             $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
11808             $last_nonblank_index_to_go      = $max_index_to_go;
11809             $last_nonblank_type_to_go       = $type;
11810             $last_nonblank_token_to_go      = $token;
11811             if ( $type eq ',' ) {
11812                 $comma_count_in_batch++;
11813             }
11814         }
11815
11816         FORMATTER_DEBUG_FLAG_STORE && do {
11817             my ( $a, $b, $c ) = caller();
11818             print STDOUT
11819 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
11820         };
11821         return;
11822     }
11823
11824     sub insert_new_token_to_go {
11825
11826         # insert a new token into the output stream.  use same level as
11827         # previous token; assumes a character at max_index_to_go.
11828         my $self = shift;
11829         my @args = @_;
11830         save_current_token();
11831         ( $token, $type, $slevel, $no_internal_newlines ) = @args;
11832
11833         if ( $max_index_to_go == UNDEFINED_INDEX ) {
11834             warning("code bug: bad call to insert_new_token_to_go\n");
11835         }
11836         $level = $levels_to_go[$max_index_to_go];
11837
11838         # FIXME: it seems to be necessary to use the next, rather than
11839         # previous, value of this variable when creating a new blank (align.t)
11840         #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
11841         $ci_level              = $ci_levels_to_go[$max_index_to_go];
11842         $container_environment = $container_environment_to_go[$max_index_to_go];
11843         $in_continued_quote    = 0;
11844         $block_type            = "";
11845         $type_sequence         = "";
11846         $self->store_token_to_go();
11847         restore_current_token();
11848         return;
11849     }
11850
11851     sub copy_hash {
11852         my ($rold_token_hash) = @_;
11853         my %new_token_hash =
11854           map { $_, $rold_token_hash->{$_} } keys %{$rold_token_hash};
11855         return \%new_token_hash;
11856     }
11857
11858     sub copy_array {
11859         my ($rold) = @_;
11860         my @new = map { $_ } @{$rold};
11861         return \@new;
11862     }
11863
11864     sub copy_token_as_type {
11865         my ( $rold_token, $type, $token ) = @_;
11866         if ( $type eq 'b' ) {
11867             $token = " " unless defined($token);
11868         }
11869         elsif ( $type eq 'q' ) {
11870             $token = '' unless defined($token);
11871         }
11872         elsif ( $type eq '->' ) {
11873             $token = '->' unless defined($token);
11874         }
11875         elsif ( $type eq ';' ) {
11876             $token = ';' unless defined($token);
11877         }
11878         else {
11879             Fault(
11880 "Programming error: copy_token_as has type $type but should be 'b' or 'q'"
11881             );
11882         }
11883         my $rnew_token = copy_array($rold_token);
11884         $rnew_token->[_TYPE_]                  = $type;
11885         $rnew_token->[_TOKEN_]                 = $token;
11886         $rnew_token->[_BLOCK_TYPE_]            = '';
11887         $rnew_token->[_CONTAINER_TYPE_]        = '';
11888         $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
11889         $rnew_token->[_TYPE_SEQUENCE_]         = '';
11890         return $rnew_token;
11891     }
11892
11893     sub boolean_equals {
11894         my ( $val1, $val2 ) = @_;
11895         return ( $val1 && $val2 || !$val1 && !$val2 );
11896     }
11897
11898     sub print_line_of_tokens {
11899
11900         my ( $self, $line_of_tokens ) = @_;
11901
11902         # This routine is called once per input line to process all of
11903         # the tokens on that line.  This is the first stage of
11904         # beautification.
11905         #
11906         # Full-line comments and blank lines may be processed immediately.
11907         #
11908         # For normal lines of code, the tokens are stored one-by-one,
11909         # via calls to 'sub store_token_to_go', until a known line break
11910         # point is reached.  Then, the batch of collected tokens is
11911         # passed along to 'sub output_line_to_go' for further
11912         # processing.  This routine decides if there should be
11913         # whitespace between each pair of non-white tokens, so later
11914         # routines only need to decide on any additional line breaks.
11915         # Any whitespace is initially a single space character.  Later,
11916         # the vertical aligner may expand that to be multiple space
11917         # characters if necessary for alignment.
11918
11919         $input_line_number = $line_of_tokens->{_line_number};
11920         my $input_line = $line_of_tokens->{_line_text};
11921         my $CODE_type  = $line_of_tokens->{_code_type};
11922
11923         my $rK_range = $line_of_tokens->{_rK_range};
11924         my ( $K_first, $K_last ) = @{$rK_range};
11925
11926         my $rLL              = $self->{rLL};
11927         my $rbreak_container = $self->{rbreak_container};
11928
11929         if ( !defined($K_first) ) {
11930
11931             # Unexpected blank line..
11932             # Calling routine was supposed to handle this
11933             Perl::Tidy::Warn(
11934 "Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring"
11935             );
11936             return;
11937         }
11938
11939         $no_internal_newlines = 1 - $rOpts_add_newlines;
11940         my $is_comment =
11941           ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
11942         my $is_static_block_comment_without_leading_space =
11943           $CODE_type eq 'SBCX';
11944         $is_static_block_comment =
11945           $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
11946         my $is_hanging_side_comment = $CODE_type eq 'HSC';
11947         my $is_VERSION_statement    = $CODE_type eq 'VER';
11948         if ($is_VERSION_statement) {
11949             $saw_VERSION_in_this_file = 1;
11950             $no_internal_newlines     = 1;
11951         }
11952
11953         # Add interline blank if any
11954         my $last_old_nonblank_type   = "b";
11955         my $first_new_nonblank_type  = "b";
11956         my $first_new_nonblank_token = " ";
11957         if ( $max_index_to_go >= 0 ) {
11958             $last_old_nonblank_type   = $types_to_go[$max_index_to_go];
11959             $first_new_nonblank_type  = $rLL->[$K_first]->[_TYPE_];
11960             $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
11961             if (  !$is_comment
11962                 && $types_to_go[$max_index_to_go] ne 'b'
11963                 && $K_first > 0
11964                 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
11965             {
11966                 $K_first -= 1;
11967             }
11968         }
11969
11970         # Copy the tokens into local arrays
11971         $rinput_token_array = [];
11972         $rinput_K_array     = [];
11973         $rinput_K_array     = [ ( $K_first .. $K_last ) ];
11974         $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
11975         my $jmax = @{$rinput_K_array} - 1;
11976
11977         $in_continued_quote = $starting_in_quote =
11978           $line_of_tokens->{_starting_in_quote};
11979         $in_quote        = $line_of_tokens->{_ending_in_quote};
11980         $ending_in_quote = $in_quote;
11981         $guessed_indentation_level =
11982           $line_of_tokens->{_guessed_indentation_level};
11983
11984         my $j_next;
11985         my $next_nonblank_token;
11986         my $next_nonblank_token_type;
11987
11988         $block_type            = "";
11989         $container_type        = "";
11990         $container_environment = "";
11991         $type_sequence         = "";
11992
11993         ######################################
11994         # Handle a block (full-line) comment..
11995         ######################################
11996         if ($is_comment) {
11997
11998             if ( $rOpts->{'delete-block-comments'} ) { return }
11999
12000             if ( $rOpts->{'tee-block-comments'} ) {
12001                 $file_writer_object->tee_on();
12002             }
12003
12004             destroy_one_line_block();
12005             $self->output_line_to_go();
12006
12007             # output a blank line before block comments
12008             if (
12009                 # unless we follow a blank or comment line
12010                 $last_line_leading_type !~ /^[#b]$/
12011
12012                 # only if allowed
12013                 && $rOpts->{'blanks-before-comments'}
12014
12015                 # if this is NOT an empty comment line
12016                 && $rinput_token_array->[0]->[_TOKEN_] ne '#'
12017
12018                 # not after a short line ending in an opening token
12019                 # because we already have space above this comment.
12020                 # Note that the first comment in this if block, after
12021                 # the 'if (', does not get a blank line because of this.
12022                 && !$last_output_short_opening_token
12023
12024                 # never before static block comments
12025                 && !$is_static_block_comment
12026               )
12027             {
12028                 $self->flush();    # switching to new output stream
12029                 $file_writer_object->write_blank_code_line();
12030                 $last_line_leading_type = 'b';
12031             }
12032
12033             # TRIM COMMENTS -- This could be turned off as a option
12034             $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//;    # trim right end
12035
12036             if (
12037                 $rOpts->{'indent-block-comments'}
12038                 && (  !$rOpts->{'indent-spaced-block-comments'}
12039                     || $input_line =~ /^\s+/ )
12040                 && !$is_static_block_comment_without_leading_space
12041               )
12042             {
12043                 $self->extract_token(0);
12044                 $self->store_token_to_go();
12045                 $self->output_line_to_go();
12046             }
12047             else {
12048                 $self->flush();    # switching to new output stream
12049                 $file_writer_object->write_code_line(
12050                     $rinput_token_array->[0]->[_TOKEN_] . "\n" );
12051                 $last_line_leading_type = '#';
12052             }
12053             if ( $rOpts->{'tee-block-comments'} ) {
12054                 $file_writer_object->tee_off();
12055             }
12056             return;
12057         }
12058
12059         # TODO: Move to sub scan_comments
12060         # compare input/output indentation except for continuation lines
12061         # (because they have an unknown amount of initial blank space)
12062         # and lines which are quotes (because they may have been outdented)
12063         # Note: this test is placed here because we know the continuation flag
12064         # at this point, which allows us to avoid non-meaningful checks.
12065         my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
12066         compare_indentation_levels( $guessed_indentation_level,
12067             $structural_indentation_level )
12068           unless ( $is_hanging_side_comment
12069             || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
12070             || $guessed_indentation_level == 0
12071             && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
12072
12073         ##########################
12074         # Handle indentation-only
12075         ##########################
12076
12077         # NOTE: In previous versions we sent all qw lines out immediately here.
12078         # No longer doing this: also write a line which is entirely a 'qw' list
12079         # to allow stacking of opening and closing tokens.  Note that interior
12080         # qw lines will still go out at the end of this routine.
12081         ##if ( $rOpts->{'indent-only'} ) {
12082         if ( $CODE_type eq 'IO' ) {
12083             $self->flush();
12084             my $line = $input_line;
12085
12086             # delete side comments if requested with -io, but
12087             # we will not allow deleting of closing side comments with -io
12088             # because the coding would be more complex
12089             if (   $rOpts->{'delete-side-comments'}
12090                 && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
12091             {
12092
12093                 $line = "";
12094                 foreach my $jj ( 0 .. $jmax - 1 ) {
12095                     $line .= $rinput_token_array->[$jj]->[_TOKEN_];
12096                 }
12097             }
12098             $line = trim($line);
12099
12100             $self->extract_token(0);
12101             $token                 = $line;
12102             $type                  = 'q';
12103             $block_type            = "";
12104             $container_type        = "";
12105             $container_environment = "";
12106             $type_sequence         = "";
12107             $self->store_token_to_go();
12108             $self->output_line_to_go();
12109             return;
12110         }
12111
12112         ############################
12113         # Handle all other lines ...
12114         ############################
12115
12116         #######################################################
12117         # FIXME: this should become unnecessary
12118         # making $j+2 valid simplifies coding
12119         my $rnew_blank =
12120           copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
12121         push @{$rinput_token_array}, $rnew_blank;
12122         push @{$rinput_token_array}, $rnew_blank;
12123         #######################################################
12124
12125         # If we just saw the end of an elsif block, write nag message
12126         # if we do not see another elseif or an else.
12127         if ($looking_for_else) {
12128
12129             unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
12130                 write_logfile_entry("(No else block)\n");
12131             }
12132             $looking_for_else = 0;
12133         }
12134
12135         # This is a good place to kill incomplete one-line blocks
12136         if (
12137             (
12138                    ( $semicolons_before_block_self_destruct == 0 )
12139                 && ( $max_index_to_go >= 0 )
12140                 && ( $last_old_nonblank_type eq ';' )
12141                 && ( $first_new_nonblank_token ne '}' )
12142             )
12143
12144             # Patch for RT #98902. Honor request to break at old commas.
12145             || (   $rOpts_break_at_old_comma_breakpoints
12146                 && $max_index_to_go >= 0
12147                 && $last_old_nonblank_type eq ',' )
12148           )
12149         {
12150             $forced_breakpoint_to_go[$max_index_to_go] = 1
12151               if ($rOpts_break_at_old_comma_breakpoints);
12152             destroy_one_line_block();
12153             $self->output_line_to_go();
12154         }
12155
12156         # loop to process the tokens one-by-one
12157         $type  = 'b';
12158         $token = "";
12159
12160         # We do not want a leading blank if the previous batch just got output
12161         my $jmin = 0;
12162         if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
12163             $jmin = 1;
12164         }
12165
12166         foreach my $j ( $jmin .. $jmax ) {
12167
12168             # pull out the local values for this token
12169             $self->extract_token($j);
12170
12171             if ( $type eq '#' ) {
12172
12173                 # trim trailing whitespace
12174                 # (there is no option at present to prevent this)
12175                 $token =~ s/\s*$//;
12176
12177                 if (
12178                     $rOpts->{'delete-side-comments'}
12179
12180                     # delete closing side comments if necessary
12181                     || (   $rOpts->{'delete-closing-side-comments'}
12182                         && $token =~ /$closing_side_comment_prefix_pattern/o
12183                         && $last_nonblank_block_type =~
12184                         /$closing_side_comment_list_pattern/o )
12185                   )
12186                 {
12187                     if ( $types_to_go[$max_index_to_go] eq 'b' ) {
12188                         unstore_token_to_go();
12189                     }
12190                     last;
12191                 }
12192             }
12193
12194             # If we are continuing after seeing a right curly brace, flush
12195             # buffer unless we see what we are looking for, as in
12196             #   } else ...
12197             if ( $rbrace_follower && $type ne 'b' ) {
12198
12199                 unless ( $rbrace_follower->{$token} ) {
12200                     $self->output_line_to_go();
12201                 }
12202                 $rbrace_follower = undef;
12203             }
12204
12205             $j_next =
12206               ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
12207               ? $j + 2
12208               : $j + 1;
12209             $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
12210             $next_nonblank_token_type =
12211               $rinput_token_array->[$j_next]->[_TYPE_];
12212
12213             ######################
12214             # MAYBE MOVE ELSEWHERE?
12215             ######################
12216             if ( $type eq 'Q' ) {
12217                 note_embedded_tab() if ( $token =~ "\t" );
12218
12219                 # make note of something like '$var = s/xxx/yyy/;'
12220                 # in case it should have been '$var =~ s/xxx/yyy/;'
12221                 if (
12222                        $token =~ /^(s|tr|y|m|\/)/
12223                     && $last_nonblank_token =~ /^(=|==|!=)$/
12224
12225                     # preceded by simple scalar
12226                     && $last_last_nonblank_type eq 'i'
12227                     && $last_last_nonblank_token =~ /^\$/
12228
12229                     # followed by some kind of termination
12230                     # (but give complaint if we can's see far enough ahead)
12231                     && $next_nonblank_token =~ /^[; \)\}]$/
12232
12233                     # scalar is not declared
12234                     && !(
12235                            $types_to_go[0] eq 'k'
12236                         && $tokens_to_go[0] =~ /^(my|our|local)$/
12237                     )
12238                   )
12239                 {
12240                     my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
12241                     complain(
12242 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
12243                     );
12244                 }
12245             }
12246
12247             # Do not allow breaks which would promote a side comment to a
12248             # block comment.  In order to allow a break before an opening
12249             # or closing BLOCK, followed by a side comment, those sections
12250             # of code will handle this flag separately.
12251             my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
12252             my $is_opening_BLOCK =
12253               (      $type eq '{'
12254                   && $token eq '{'
12255                   && $block_type
12256                   && $block_type ne 't' );
12257             my $is_closing_BLOCK =
12258               (      $type eq '}'
12259                   && $token eq '}'
12260                   && $block_type
12261                   && $block_type ne 't' );
12262
12263             if (   $side_comment_follows
12264                 && !$is_opening_BLOCK
12265                 && !$is_closing_BLOCK )
12266             {
12267                 $no_internal_newlines = 1;
12268             }
12269
12270             # We're only going to handle breaking for code BLOCKS at this
12271             # (top) level.  Other indentation breaks will be handled by
12272             # sub scan_list, which is better suited to dealing with them.
12273             if ($is_opening_BLOCK) {
12274
12275                 # Tentatively output this token.  This is required before
12276                 # calling starting_one_line_block.  We may have to unstore
12277                 # it, though, if we have to break before it.
12278                 $self->store_token_to_go($side_comment_follows);
12279
12280                 # Look ahead to see if we might form a one-line block..
12281                 my $too_long = 0;
12282
12283                 # But obey any flag set for cuddled blocks
12284                 if ( $rbreak_container->{$type_sequence} ) {
12285                     destroy_one_line_block();
12286                 }
12287                 else {
12288                     $too_long =
12289                       starting_one_line_block( $j, $jmax, $level, $slevel,
12290                         $ci_level, $rinput_token_array );
12291                 }
12292                 clear_breakpoint_undo_stack();
12293
12294                 # to simplify the logic below, set a flag to indicate if
12295                 # this opening brace is far from the keyword which introduces it
12296                 my $keyword_on_same_line = 1;
12297                 if (   ( $max_index_to_go >= 0 )
12298                     && ( $last_nonblank_type eq ')' ) )
12299                 {
12300                     if (   $block_type =~ /^(if|else|elsif)$/
12301                         && ( $tokens_to_go[0] eq '}' )
12302                         && $rOpts_cuddled_else )
12303                     {
12304                         $keyword_on_same_line = 1;
12305                     }
12306                     elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
12307                     {
12308                         $keyword_on_same_line = 0;
12309                     }
12310                 }
12311
12312                 # decide if user requested break before '{'
12313                 my $want_break =
12314
12315                   # use -bl flag if not a sub block of any type
12316                   $block_type !~ /^sub\b/
12317                   ? $rOpts->{'opening-brace-on-new-line'}
12318
12319                   # use -sbl flag for a named sub block
12320                   : $block_type !~ /$ASUB_PATTERN/
12321                   ? $rOpts->{'opening-sub-brace-on-new-line'}
12322
12323                   # use -asbl flag for an anonymous sub block
12324                   : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
12325
12326                 # Do not break if this token is welded to the left
12327                 if ( weld_len_left( $type_sequence, $token ) ) {
12328                     $want_break = 0;
12329                 }
12330
12331                 # Break before an opening '{' ...
12332                 if (
12333
12334                     # if requested
12335                     $want_break
12336
12337                     # and we were unable to start looking for a block,
12338                     && $index_start_one_line_block == UNDEFINED_INDEX
12339
12340                     # or if it will not be on same line as its keyword, so that
12341                     # it will be outdented (eval.t, overload.t), and the user
12342                     # has not insisted on keeping it on the right
12343                     || (   !$keyword_on_same_line
12344                         && !$rOpts->{'opening-brace-always-on-right'} )
12345
12346                   )
12347                 {
12348
12349                     # but only if allowed
12350                     unless ($no_internal_newlines) {
12351
12352                         # since we already stored this token, we must unstore it
12353                         $self->unstore_token_to_go();
12354
12355                         # then output the line
12356                         $self->output_line_to_go();
12357
12358                         # and now store this token at the start of a new line
12359                         $self->store_token_to_go($side_comment_follows);
12360                     }
12361                 }
12362
12363                 # Now update for side comment
12364                 if ($side_comment_follows) { $no_internal_newlines = 1 }
12365
12366                 # now output this line
12367                 unless ($no_internal_newlines) {
12368                     $self->output_line_to_go();
12369                 }
12370             }
12371
12372             elsif ($is_closing_BLOCK) {
12373
12374                 # If there is a pending one-line block ..
12375                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12376
12377                     # we have to terminate it if..
12378                     if (
12379
12380                         # it is too long (final length may be different from
12381                         # initial estimate). note: must allow 1 space for this
12382                         # token
12383                         excess_line_length( $index_start_one_line_block,
12384                             $max_index_to_go ) >= 0
12385
12386                         # or if it has too many semicolons
12387                         || (   $semicolons_before_block_self_destruct == 0
12388                             && $last_nonblank_type ne ';' )
12389                       )
12390                     {
12391                         destroy_one_line_block();
12392                     }
12393                 }
12394
12395                 # put a break before this closing curly brace if appropriate
12396                 unless ( $no_internal_newlines
12397                     || $index_start_one_line_block != UNDEFINED_INDEX )
12398                 {
12399
12400                     # write out everything before this closing curly brace
12401                     $self->output_line_to_go();
12402                 }
12403
12404                 # Now update for side comment
12405                 if ($side_comment_follows) { $no_internal_newlines = 1 }
12406
12407                 # store the closing curly brace
12408                 $self->store_token_to_go();
12409
12410                 # ok, we just stored a closing curly brace.  Often, but
12411                 # not always, we want to end the line immediately.
12412                 # So now we have to check for special cases.
12413
12414                 # if this '}' successfully ends a one-line block..
12415                 my $is_one_line_block = 0;
12416                 my $keep_going        = 0;
12417                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12418
12419                     # Remember the type of token just before the
12420                     # opening brace.  It would be more general to use
12421                     # a stack, but this will work for one-line blocks.
12422                     $is_one_line_block =
12423                       $types_to_go[$index_start_one_line_block];
12424
12425                     # we have to actually make it by removing tentative
12426                     # breaks that were set within it
12427                     undo_forced_breakpoint_stack(0);
12428                     set_nobreaks( $index_start_one_line_block,
12429                         $max_index_to_go - 1 );
12430
12431                     # then re-initialize for the next one-line block
12432                     destroy_one_line_block();
12433
12434                     # then decide if we want to break after the '}' ..
12435                     # We will keep going to allow certain brace followers as in:
12436                     #   do { $ifclosed = 1; last } unless $losing;
12437                     #
12438                     # But make a line break if the curly ends a
12439                     # significant block:
12440                     if (
12441                         (
12442                             $is_block_without_semicolon{$block_type}
12443
12444                             # Follow users break point for
12445                             # one line block types U & G, such as a 'try' block
12446                             || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
12447                         )
12448
12449                         # if needless semicolon follows we handle it later
12450                         && $next_nonblank_token ne ';'
12451                       )
12452                     {
12453                         $self->output_line_to_go()
12454                           unless ($no_internal_newlines);
12455                     }
12456                 }
12457
12458                 # set string indicating what we need to look for brace follower
12459                 # tokens
12460                 if ( $block_type eq 'do' ) {
12461                     $rbrace_follower = \%is_do_follower;
12462                 }
12463                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
12464                     $rbrace_follower = \%is_if_brace_follower;
12465                 }
12466                 elsif ( $block_type eq 'else' ) {
12467                     $rbrace_follower = \%is_else_brace_follower;
12468                 }
12469
12470                 # added eval for borris.t
12471                 elsif ($is_sort_map_grep_eval{$block_type}
12472                     || $is_one_line_block eq 'G' )
12473                 {
12474                     $rbrace_follower = undef;
12475                     $keep_going      = 1;
12476                 }
12477
12478                 # anonymous sub
12479                 elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
12480
12481                     if ($is_one_line_block) {
12482                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
12483                     }
12484                     else {
12485                         $rbrace_follower = \%is_anon_sub_brace_follower;
12486                     }
12487                 }
12488
12489                 # None of the above: specify what can follow a closing
12490                 # brace of a block which is not an
12491                 # if/elsif/else/do/sort/map/grep/eval
12492                 # Testfiles:
12493                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
12494                 else {
12495                     $rbrace_follower = \%is_other_brace_follower;
12496                 }
12497
12498                 # See if an elsif block is followed by another elsif or else;
12499                 # complain if not.
12500                 if ( $block_type eq 'elsif' ) {
12501
12502                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
12503                         $looking_for_else = 1;    # ok, check on next line
12504                     }
12505                     else {
12506
12507                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
12508                             write_logfile_entry("No else block :(\n");
12509                         }
12510                     }
12511                 }
12512
12513                 # keep going after certain block types (map,sort,grep,eval)
12514                 # added eval for borris.t
12515                 if ($keep_going) {
12516
12517                     # keep going
12518                 }
12519
12520                 # if no more tokens, postpone decision until re-entring
12521                 elsif ( ( $next_nonblank_token_type eq 'b' )
12522                     && $rOpts_add_newlines )
12523                 {
12524                     unless ($rbrace_follower) {
12525                         $self->output_line_to_go()
12526                           unless ($no_internal_newlines);
12527                     }
12528                 }
12529
12530                 elsif ($rbrace_follower) {
12531
12532                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
12533                         $self->output_line_to_go()
12534                           unless ($no_internal_newlines);
12535                     }
12536                     $rbrace_follower = undef;
12537                 }
12538
12539                 else {
12540                     $self->output_line_to_go() unless ($no_internal_newlines);
12541                 }
12542
12543             }    # end treatment of closing block token
12544
12545             # handle semicolon
12546             elsif ( $type eq ';' ) {
12547
12548                 # kill one-line blocks with too many semicolons
12549                 $semicolons_before_block_self_destruct--;
12550                 if (
12551                     ( $semicolons_before_block_self_destruct < 0 )
12552                     || (   $semicolons_before_block_self_destruct == 0
12553                         && $next_nonblank_token_type !~ /^[b\}]$/ )
12554                   )
12555                 {
12556                     destroy_one_line_block();
12557                 }
12558
12559                 # Remove unnecessary semicolons, but not after bare
12560                 # blocks, where it could be unsafe if the brace is
12561                 # mistokenized.
12562                 if (
12563                     (
12564                         $last_nonblank_token eq '}'
12565                         && (
12566                             $is_block_without_semicolon{
12567                                 $last_nonblank_block_type}
12568                             || $last_nonblank_block_type =~ /$SUB_PATTERN/
12569                             || $last_nonblank_block_type =~ /^\w+:$/ )
12570                     )
12571                     || $last_nonblank_type eq ';'
12572                   )
12573                 {
12574
12575                     if (
12576                         $rOpts->{'delete-semicolons'}
12577
12578                         # don't delete ; before a # because it would promote it
12579                         # to a block comment
12580                         && ( $next_nonblank_token_type ne '#' )
12581                       )
12582                     {
12583                         note_deleted_semicolon();
12584                         $self->output_line_to_go()
12585                           unless ( $no_internal_newlines
12586                             || $index_start_one_line_block != UNDEFINED_INDEX );
12587                         next;
12588                     }
12589                     else {
12590                         write_logfile_entry("Extra ';'\n");
12591                     }
12592                 }
12593                 $self->store_token_to_go();
12594
12595                 $self->output_line_to_go()
12596                   unless ( $no_internal_newlines
12597                     || ( $rOpts_keep_interior_semicolons && $j < $jmax )
12598                     || ( $next_nonblank_token eq '}' ) );
12599
12600             }
12601
12602             # handle here_doc target string
12603             elsif ( $type eq 'h' ) {
12604
12605                 # no newlines after seeing here-target
12606                 $no_internal_newlines = 1;
12607                 destroy_one_line_block();
12608                 $self->store_token_to_go();
12609             }
12610
12611             # handle all other token types
12612             else {
12613
12614                 $self->store_token_to_go();
12615             }
12616
12617             # remember two previous nonblank OUTPUT tokens
12618             if ( $type ne '#' && $type ne 'b' ) {
12619                 $last_last_nonblank_token = $last_nonblank_token;
12620                 $last_last_nonblank_type  = $last_nonblank_type;
12621                 $last_nonblank_token      = $token;
12622                 $last_nonblank_type       = $type;
12623                 $last_nonblank_block_type = $block_type;
12624             }
12625
12626             # unset the continued-quote flag since it only applies to the
12627             # first token, and we want to resume normal formatting if
12628             # there are additional tokens on the line
12629             $in_continued_quote = 0;
12630
12631         }    # end of loop over all tokens in this 'line_of_tokens'
12632
12633         # we have to flush ..
12634         if (
12635
12636             # if there is a side comment
12637             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
12638
12639             # if this line ends in a quote
12640             # NOTE: This is critically important for insuring that quoted lines
12641             # do not get processed by things like -sot and -sct
12642             || $in_quote
12643
12644             # if this is a VERSION statement
12645             || $is_VERSION_statement
12646
12647             # to keep a label at the end of a line
12648             || $type eq 'J'
12649
12650             # if we are instructed to keep all old line breaks
12651             || !$rOpts->{'delete-old-newlines'}
12652           )
12653         {
12654             destroy_one_line_block();
12655             $self->output_line_to_go();
12656         }
12657
12658         # mark old line breakpoints in current output stream
12659         if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
12660             my $jobp = $max_index_to_go;
12661             if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
12662             {
12663                 $jobp--;
12664             }
12665             $old_breakpoint_to_go[$jobp] = 1;
12666         }
12667         return;
12668     } ## end sub print_line_of_tokens
12669 } ## end block print_line_of_tokens
12670
12671 # sub output_line_to_go sends one logical line of tokens on down the
12672 # pipeline to the VerticalAligner package, breaking the line into continuation
12673 # lines as necessary.  The line of tokens is ready to go in the "to_go"
12674 # arrays.
12675 sub output_line_to_go {
12676
12677     my $self = shift;
12678
12679     # debug stuff; this routine can be called from many points
12680     FORMATTER_DEBUG_FLAG_OUTPUT && do {
12681         my ( $a, $b, $c ) = caller;
12682         write_diagnostics(
12683 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
12684         );
12685         my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
12686         write_diagnostics("$output_str\n");
12687     };
12688
12689     # Do not end line in a weld
12690     # TODO: Move this fix into the routine?
12691     #my $jnb = $max_index_to_go;
12692     #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- }
12693     return if ( weld_len_right_to_go($max_index_to_go) );
12694
12695     # just set a tentative breakpoint if we might be in a one-line block
12696     if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
12697         set_forced_breakpoint($max_index_to_go);
12698         return;
12699     }
12700
12701     my $cscw_block_comment;
12702     $cscw_block_comment = $self->add_closing_side_comment()
12703       if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
12704
12705     my $comma_arrow_count_contained = match_opening_and_closing_tokens();
12706
12707     # tell the -lp option we are outputting a batch so it can close
12708     # any unfinished items in its stack
12709     finish_lp_batch();
12710
12711     # If this line ends in a code block brace, set breaks at any
12712     # previous closing code block braces to breakup a chain of code
12713     # blocks on one line.  This is very rare but can happen for
12714     # user-defined subs.  For example we might be looking at this:
12715     #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
12716     my $saw_good_break = 0;    # flag to force breaks even if short line
12717     if (
12718
12719         # looking for opening or closing block brace
12720         $block_type_to_go[$max_index_to_go]
12721
12722         # but not one of these which are never duplicated on a line:
12723         # until|while|for|if|elsif|else
12724         && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
12725       )
12726     {
12727         my $lev = $nesting_depth_to_go[$max_index_to_go];
12728
12729         # Walk backwards from the end and
12730         # set break at any closing block braces at the same level.
12731         # But quit if we are not in a chain of blocks.
12732         for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
12733             last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
12734             next if ( $levels_to_go[$i] > $lev );    # skip past higher level
12735
12736             if ( $block_type_to_go[$i] ) {
12737                 if ( $tokens_to_go[$i] eq '}' ) {
12738                     set_forced_breakpoint($i);
12739                     $saw_good_break = 1;
12740                 }
12741             }
12742
12743             # quit if we see anything besides words, function, blanks
12744             # at this level
12745             elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
12746         }
12747     }
12748
12749     my $imin = 0;
12750     my $imax = $max_index_to_go;
12751
12752     # trim any blank tokens
12753     if ( $max_index_to_go >= 0 ) {
12754         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
12755         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
12756     }
12757
12758     # anything left to write?
12759     if ( $imin <= $imax ) {
12760
12761         # add a blank line before certain key types but not after a comment
12762         if ( $last_line_leading_type !~ /^[#]/ ) {
12763             my $want_blank    = 0;
12764             my $leading_token = $tokens_to_go[$imin];
12765             my $leading_type  = $types_to_go[$imin];
12766
12767             # blank lines before subs except declarations and one-liners
12768             # MCONVERSION LOCATION - for sub tokenization change
12769             if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
12770                 $want_blank = $rOpts->{'blank-lines-before-subs'}
12771                   if (
12772                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
12773                         $imax ) !~ /^[\;\}]$/
12774                   );
12775             }
12776
12777             # break before all package declarations
12778             # MCONVERSION LOCATION - for tokenizaton change
12779             elsif ($leading_token =~ /^(package\s)/
12780                 && $leading_type eq 'i' )
12781             {
12782                 $want_blank = $rOpts->{'blank-lines-before-packages'};
12783             }
12784
12785             # break before certain key blocks except one-liners
12786             if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
12787                 $want_blank = $rOpts->{'blank-lines-before-subs'}
12788                   if (
12789                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
12790                         $imax ) ne '}'
12791                   );
12792             }
12793
12794             # Break before certain block types if we haven't had a
12795             # break at this level for a while.  This is the
12796             # difficult decision..
12797             elsif ($leading_type eq 'k'
12798                 && $last_line_leading_type ne 'b'
12799                 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
12800             {
12801                 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
12802                 if ( !defined($lc) ) { $lc = 0 }
12803
12804                 $want_blank =
12805                      $rOpts->{'blanks-before-blocks'}
12806                   && $lc >= $rOpts->{'long-block-line-count'}
12807                   && $file_writer_object->get_consecutive_nonblank_lines() >=
12808                   $rOpts->{'long-block-line-count'}
12809                   && (
12810                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
12811                         $imax ) ne '}'
12812                   );
12813             }
12814
12815             # Check for blank lines wanted before a closing brace
12816             if ( $leading_token eq '}' ) {
12817                 if (   $rOpts->{'blank-lines-before-closing-block'}
12818                     && $block_type_to_go[$imin]
12819                     && $block_type_to_go[$imin] =~
12820                     /$blank_lines_before_closing_block_pattern/ )
12821                 {
12822                     my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
12823                     if ( $nblanks > $want_blank ) {
12824                         $want_blank = $nblanks;
12825                     }
12826                 }
12827             }
12828
12829             if ($want_blank) {
12830
12831                 # future: send blank line down normal path to VerticalAligner
12832                 Perl::Tidy::VerticalAligner::flush();
12833                 $file_writer_object->require_blank_code_lines($want_blank);
12834             }
12835         }
12836
12837         # update blank line variables and count number of consecutive
12838         # non-blank, non-comment lines at this level
12839         $last_last_line_leading_level = $last_line_leading_level;
12840         $last_line_leading_level      = $levels_to_go[$imin];
12841         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
12842         $last_line_leading_type = $types_to_go[$imin];
12843         if (   $last_line_leading_level == $last_last_line_leading_level
12844             && $last_line_leading_type ne 'b'
12845             && $last_line_leading_type ne '#'
12846             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
12847         {
12848             $nonblank_lines_at_depth[$last_line_leading_level]++;
12849         }
12850         else {
12851             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
12852         }
12853
12854         FORMATTER_DEBUG_FLAG_FLUSH && do {
12855             my ( $package, $file, $line ) = caller;
12856             print STDOUT
12857 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
12858         };
12859
12860         # add a couple of extra terminal blank tokens
12861         pad_array_to_go();
12862
12863         # set all forced breakpoints for good list formatting
12864         my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
12865
12866         my $old_line_count_in_batch =
12867           $rtoken_vars_to_go[$max_index_to_go]->[_LINE_INDEX_] -
12868           $rtoken_vars_to_go[0]->[_LINE_INDEX_] + 1;
12869
12870         if (
12871                $is_long_line
12872             || $old_line_count_in_batch > 1
12873
12874             # must always call scan_list() with unbalanced batches because it
12875             # is maintaining some stacks
12876             || is_unbalanced_batch()
12877
12878             # call scan_list if we might want to break at commas
12879             || (
12880                 $comma_count_in_batch
12881                 && (   $rOpts_maximum_fields_per_table > 0
12882                     || $rOpts_comma_arrow_breakpoints == 0 )
12883             )
12884
12885             # call scan_list if user may want to break open some one-line
12886             # hash references
12887             || (   $comma_arrow_count_contained
12888                 && $rOpts_comma_arrow_breakpoints != 3 )
12889           )
12890         {
12891             ## This caused problems in one version of perl for unknown reasons:
12892             ## $saw_good_break ||= scan_list();
12893             my $sgb = scan_list();
12894             $saw_good_break ||= $sgb;
12895         }
12896
12897         # let $ri_first and $ri_last be references to lists of
12898         # first and last tokens of line fragments to output..
12899         my ( $ri_first, $ri_last );
12900
12901         # write a single line if..
12902         if (
12903
12904             # we aren't allowed to add any newlines
12905             !$rOpts_add_newlines
12906
12907             # or, we don't already have an interior breakpoint
12908             # and we didn't see a good breakpoint
12909             || (
12910                    !$forced_breakpoint_count
12911                 && !$saw_good_break
12912
12913                 # and this line is 'short'
12914                 && !$is_long_line
12915             )
12916           )
12917         {
12918             @{$ri_first} = ($imin);
12919             @{$ri_last}  = ($imax);
12920         }
12921
12922         # otherwise use multiple lines
12923         else {
12924
12925             ( $ri_first, $ri_last, my $colon_count ) =
12926               set_continuation_breaks($saw_good_break);
12927
12928             break_all_chain_tokens( $ri_first, $ri_last );
12929
12930             break_equals( $ri_first, $ri_last );
12931
12932             # now we do a correction step to clean this up a bit
12933             # (The only time we would not do this is for debugging)
12934             if ( $rOpts->{'recombine'} ) {
12935                 ( $ri_first, $ri_last ) =
12936                   recombine_breakpoints( $ri_first, $ri_last );
12937             }
12938
12939             insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
12940         }
12941
12942         # do corrector step if -lp option is used
12943         my $do_not_pad = 0;
12944         if ($rOpts_line_up_parentheses) {
12945             $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
12946         }
12947         $self->unmask_phantom_semicolons( $ri_first, $ri_last );
12948         $self->send_lines_to_vertical_aligner( $ri_first, $ri_last,
12949             $do_not_pad );
12950
12951         # Insert any requested blank lines after an opening brace.  We have to
12952         # skip back before any side comment to find the terminal token
12953         my $iterm;
12954         for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
12955             next if $types_to_go[$iterm] eq '#';
12956             next if $types_to_go[$iterm] eq 'b';
12957             last;
12958         }
12959
12960         # write requested number of blank lines after an opening block brace
12961         if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
12962             if (   $rOpts->{'blank-lines-after-opening-block'}
12963                 && $block_type_to_go[$iterm]
12964                 && $block_type_to_go[$iterm] =~
12965                 /$blank_lines_after_opening_block_pattern/ )
12966             {
12967                 my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
12968                 Perl::Tidy::VerticalAligner::flush();
12969                 $file_writer_object->require_blank_code_lines($nblanks);
12970             }
12971         }
12972     }
12973
12974     prepare_for_new_input_lines();
12975
12976     # output any new -cscw block comment
12977     if ($cscw_block_comment) {
12978         $self->flush();
12979         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
12980     }
12981     return;
12982 }
12983
12984 sub note_added_semicolon {
12985     my ($line_number) = @_;
12986     $last_added_semicolon_at = $line_number;
12987     if ( $added_semicolon_count == 0 ) {
12988         $first_added_semicolon_at = $last_added_semicolon_at;
12989     }
12990     $added_semicolon_count++;
12991     write_logfile_entry("Added ';' here\n");
12992     return;
12993 }
12994
12995 sub note_deleted_semicolon {
12996     $last_deleted_semicolon_at = $input_line_number;
12997     if ( $deleted_semicolon_count == 0 ) {
12998         $first_deleted_semicolon_at = $last_deleted_semicolon_at;
12999     }
13000     $deleted_semicolon_count++;
13001     write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
13002     return;
13003 }
13004
13005 sub note_embedded_tab {
13006     $embedded_tab_count++;
13007     $last_embedded_tab_at = $input_line_number;
13008     if ( !$first_embedded_tab_at ) {
13009         $first_embedded_tab_at = $last_embedded_tab_at;
13010     }
13011
13012     if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
13013         write_logfile_entry("Embedded tabs in quote or pattern\n");
13014     }
13015     return;
13016 }
13017
13018 sub starting_one_line_block {
13019
13020     # after seeing an opening curly brace, look for the closing brace
13021     # and see if the entire block will fit on a line.  This routine is
13022     # not always right because it uses the old whitespace, so a check
13023     # is made later (at the closing brace) to make sure we really
13024     # have a one-line block.  We have to do this preliminary check,
13025     # though, because otherwise we would always break at a semicolon
13026     # within a one-line block if the block contains multiple statements.
13027
13028     my ( $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
13029
13030     my $jmax_check = @{$rtoken_array};
13031     if ( $jmax_check < $jmax ) {
13032         print STDERR "jmax=$jmax > $jmax_check\n";
13033     }
13034
13035     # kill any current block - we can only go 1 deep
13036     destroy_one_line_block();
13037
13038     # return value:
13039     #  1=distance from start of block to opening brace exceeds line length
13040     #  0=otherwise
13041
13042     my $i_start = 0;
13043
13044     # shouldn't happen: there must have been a prior call to
13045     # store_token_to_go to put the opening brace in the output stream
13046     if ( $max_index_to_go < 0 ) {
13047         warning("program bug: store_token_to_go called incorrectly\n");
13048         report_definite_bug();
13049     }
13050     else {
13051
13052         # cannot use one-line blocks with cuddled else/elsif lines
13053         if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
13054             return 0;
13055         }
13056     }
13057
13058     my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
13059
13060     # find the starting keyword for this block (such as 'if', 'else', ...)
13061
13062     if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
13063         $i_start = $max_index_to_go;
13064     }
13065
13066     # the previous nonblank token should start these block types
13067     elsif (( $last_last_nonblank_token_to_go eq $block_type )
13068         || ( $block_type =~ /^sub\b/ )
13069         || $block_type =~ /\(\)/ )
13070     {
13071         $i_start = $last_last_nonblank_index_to_go;
13072
13073         # For signatures and extended syntax ...
13074         # If this brace follows a parenthesized list, we should look back to
13075         # find the keyword before the opening paren because otherwise we might
13076         # form a one line block which stays intack, and cause the parenthesized
13077         # expression to break open. That looks bad.  However, actually
13078         # searching for the opening paren is slow and tedius.
13079         # The actual keyword is often at the start of a line, but might not be.
13080         # For example, we might have an anonymous sub with signature list
13081         # following a =>.  It is safe to mark the start anywhere before the
13082         # opening paren, so we just go back to the prevoious break (or start of
13083         # the line) if that is before the opening paren.  The minor downside is
13084         # that we may very occasionally break open a block unnecessarily.
13085         if ( $tokens_to_go[$i_start] eq ')' ) {
13086             $i_start = $index_max_forced_break + 1;
13087             if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
13088             my $lev = $levels_to_go[$i_start];
13089             if ( $lev > $level ) { return 0 }
13090         }
13091     }
13092
13093     elsif ( $last_last_nonblank_token_to_go eq ')' ) {
13094
13095         # For something like "if (xxx) {", the keyword "if" will be
13096         # just after the most recent break. This will be 0 unless
13097         # we have just killed a one-line block and are starting another.
13098         # (doif.t)
13099         # Note: cannot use inext_index_to_go[] here because that array
13100         # is still being constructed.
13101         $i_start = $index_max_forced_break + 1;
13102         if ( $types_to_go[$i_start] eq 'b' ) {
13103             $i_start++;
13104         }
13105
13106         # Patch to avoid breaking short blocks defined with extended_syntax:
13107         # Strip off any trailing () which was added in the parser to mark
13108         # the opening keyword.  For example, in the following
13109         #    create( TypeFoo $e) {$bubba}
13110         # the blocktype would be marked as create()
13111         my $stripped_block_type = $block_type;
13112         $stripped_block_type =~ s/\(\)$//;
13113
13114         unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
13115             return 0;
13116         }
13117     }
13118
13119     # patch for SWITCH/CASE to retain one-line case/when blocks
13120     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
13121
13122         # Note: cannot use inext_index_to_go[] here because that array
13123         # is still being constructed.
13124         $i_start = $index_max_forced_break + 1;
13125         if ( $types_to_go[$i_start] eq 'b' ) {
13126             $i_start++;
13127         }
13128         unless ( $tokens_to_go[$i_start] eq $block_type ) {
13129             return 0;
13130         }
13131     }
13132
13133     else {
13134         return 1;
13135     }
13136
13137     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
13138
13139     # see if length is too long to even start
13140     if ( $pos > maximum_line_length($i_start) ) {
13141         return 1;
13142     }
13143
13144     foreach my $i ( $j + 1 .. $jmax ) {
13145
13146         # old whitespace could be arbitrarily large, so don't use it
13147         if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
13148         else { $pos += rtoken_length($i) }
13149
13150         # Return false result if we exceed the maximum line length,
13151         if ( $pos > maximum_line_length($i_start) ) {
13152             return 0;
13153         }
13154
13155         # or encounter another opening brace before finding the closing brace.
13156         elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
13157             && $rtoken_array->[$i]->[_TYPE_] eq '{'
13158             && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
13159         {
13160             return 0;
13161         }
13162
13163         # if we find our closing brace..
13164         elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
13165             && $rtoken_array->[$i]->[_TYPE_] eq '}'
13166             && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
13167         {
13168
13169             # be sure any trailing comment also fits on the line
13170             my $i_nonblank =
13171               ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
13172
13173             # Patch for one-line sort/map/grep/eval blocks with side comments:
13174             # We will ignore the side comment length for sort/map/grep/eval
13175             # because this can lead to statements which change every time
13176             # perltidy is run.  Here is an example from Denis Moskowitz which
13177             # oscillates between these two states without this patch:
13178
13179 ## --------
13180 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13181 ##  @baz;
13182 ##
13183 ## grep {
13184 ##     $_->foo ne 'bar'
13185 ##   }    # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
13186 ##   @baz;
13187 ## --------
13188
13189             # When the first line is input it gets broken apart by the main
13190             # line break logic in sub print_line_of_tokens.
13191             # When the second line is input it gets recombined by
13192             # print_line_of_tokens and passed to the output routines.  The
13193             # output routines (set_continuation_breaks) do not break it apart
13194             # because the bond strengths are set to the highest possible value
13195             # for grep/map/eval/sort blocks, so the first version gets output.
13196             # It would be possible to fix this by changing bond strengths,
13197             # but they are high to prevent errors in older versions of perl.
13198
13199             if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
13200                 && !$is_sort_map_grep{$block_type} )
13201             {
13202
13203                 $pos += rtoken_length($i_nonblank);
13204
13205                 if ( $i_nonblank > $i + 1 ) {
13206
13207                     # source whitespace could be anything, assume
13208                     # at least one space before the hash on output
13209                     if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
13210                         $pos += 1;
13211                     }
13212                     else { $pos += rtoken_length( $i + 1 ) }
13213                 }
13214
13215                 if ( $pos >= maximum_line_length($i_start) ) {
13216                     return 0;
13217                 }
13218             }
13219
13220             # ok, it's a one-line block
13221             create_one_line_block( $i_start, 20 );
13222             return 0;
13223         }
13224
13225         # just keep going for other characters
13226         else {
13227         }
13228     }
13229
13230     # Allow certain types of new one-line blocks to form by joining
13231     # input lines.  These can be safely done, but for other block types,
13232     # we keep old one-line blocks but do not form new ones. It is not
13233     # always a good idea to make as many one-line blocks as possible,
13234     # so other types are not done.  The user can always use -mangle.
13235     if ( $is_sort_map_grep_eval{$block_type} ) {
13236         create_one_line_block( $i_start, 1 );
13237     }
13238     return 0;
13239 }
13240
13241 sub unstore_token_to_go {
13242
13243     # remove most recent token from output stream
13244     my $self = shift;
13245     if ( $max_index_to_go > 0 ) {
13246         $max_index_to_go--;
13247     }
13248     else {
13249         $max_index_to_go = UNDEFINED_INDEX;
13250     }
13251     return;
13252 }
13253
13254 sub want_blank_line {
13255     my $self = shift;
13256     $self->flush();
13257     $file_writer_object->want_blank_line();
13258     return;
13259 }
13260
13261 sub write_unindented_line {
13262     my ( $self, $line ) = @_;
13263     $self->flush();
13264     $file_writer_object->write_line($line);
13265     return;
13266 }
13267
13268 sub undo_ci {
13269
13270     # Undo continuation indentation in certain sequences
13271     # For example, we can undo continuation indentation in sort/map/grep chains
13272     #    my $dat1 = pack( "n*",
13273     #        map { $_, $lookup->{$_} }
13274     #          sort { $a <=> $b }
13275     #          grep { $lookup->{$_} ne $default } keys %$lookup );
13276     # To align the map/sort/grep keywords like this:
13277     #    my $dat1 = pack( "n*",
13278     #        map { $_, $lookup->{$_} }
13279     #        sort { $a <=> $b }
13280     #        grep { $lookup->{$_} ne $default } keys %$lookup );
13281     my ( $ri_first, $ri_last ) = @_;
13282     my ( $line_1, $line_2, $lev_last );
13283     my $this_line_is_semicolon_terminated;
13284     my $max_line = @{$ri_first} - 1;
13285
13286     # looking at each line of this batch..
13287     # We are looking at leading tokens and looking for a sequence
13288     # all at the same level and higher level than enclosing lines.
13289     foreach my $line ( 0 .. $max_line ) {
13290
13291         my $ibeg = $ri_first->[$line];
13292         my $lev  = $levels_to_go[$ibeg];
13293         if ( $line > 0 ) {
13294
13295             # if we have started a chain..
13296             if ($line_1) {
13297
13298                 # see if it continues..
13299                 if ( $lev == $lev_last ) {
13300                     if (   $types_to_go[$ibeg] eq 'k'
13301                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
13302                     {
13303
13304                         # chain continues...
13305                         # check for chain ending at end of a statement
13306                         if ( $line == $max_line ) {
13307
13308                             # see of this line ends a statement
13309                             my $iend = $ri_last->[$line];
13310                             $this_line_is_semicolon_terminated =
13311                               $types_to_go[$iend] eq ';'
13312
13313                               # with possible side comment
13314                               || ( $types_to_go[$iend] eq '#'
13315                                 && $iend - $ibeg >= 2
13316                                 && $types_to_go[ $iend - 2 ] eq ';'
13317                                 && $types_to_go[ $iend - 1 ] eq 'b' );
13318                         }
13319                         $line_2 = $line if ($this_line_is_semicolon_terminated);
13320                     }
13321                     else {
13322
13323                         # kill chain
13324                         $line_1 = undef;
13325                     }
13326                 }
13327                 elsif ( $lev < $lev_last ) {
13328
13329                     # chain ends with previous line
13330                     $line_2 = $line - 1;
13331                 }
13332                 elsif ( $lev > $lev_last ) {
13333
13334                     # kill chain
13335                     $line_1 = undef;
13336                 }
13337
13338                 # undo the continuation indentation if a chain ends
13339                 if ( defined($line_2) && defined($line_1) ) {
13340                     my $continuation_line_count = $line_2 - $line_1 + 1;
13341                     @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
13342                       (0) x ($continuation_line_count)
13343                       if ( $continuation_line_count >= 0 );
13344                     @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
13345                       = @reduced_spaces_to_go[ @{$ri_first}
13346                       [ $line_1 .. $line_2 ] ];
13347                     $line_1 = undef;
13348                 }
13349             }
13350
13351             # not in a chain yet..
13352             else {
13353
13354                 # look for start of a new sort/map/grep chain
13355                 if ( $lev > $lev_last ) {
13356                     if (   $types_to_go[$ibeg] eq 'k'
13357                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
13358                     {
13359                         $line_1 = $line;
13360                     }
13361                 }
13362             }
13363         }
13364         $lev_last = $lev;
13365     }
13366     return;
13367 }
13368
13369 sub undo_lp_ci {
13370
13371     # If there is a single, long parameter within parens, like this:
13372     #
13373     #  $self->command( "/msg "
13374     #        . $infoline->chan
13375     #        . " You said $1, but did you know that it's square was "
13376     #        . $1 * $1 . " ?" );
13377     #
13378     # we can remove the continuation indentation of the 2nd and higher lines
13379     # to achieve this effect, which is more pleasing:
13380     #
13381     #  $self->command("/msg "
13382     #                 . $infoline->chan
13383     #                 . " You said $1, but did you know that it's square was "
13384     #                 . $1 * $1 . " ?");
13385
13386     my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
13387     my $max_line = @{$ri_first} - 1;
13388
13389     # must be multiple lines
13390     return unless $max_line > $line_open;
13391
13392     my $lev_start     = $levels_to_go[$i_start];
13393     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
13394
13395     # see if all additional lines in this container have continuation
13396     # indentation
13397     my $n;
13398     my $line_1 = 1 + $line_open;
13399     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
13400         my $ibeg = $ri_first->[$n];
13401         my $iend = $ri_last->[$n];
13402         if ( $ibeg eq $closing_index ) { $n--; last }
13403         return if ( $lev_start != $levels_to_go[$ibeg] );
13404         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
13405         last   if ( $closing_index <= $iend );
13406     }
13407
13408     # we can reduce the indentation of all continuation lines
13409     my $continuation_line_count = $n - $line_open;
13410     @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
13411       (0) x ($continuation_line_count);
13412     @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
13413       @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
13414     return;
13415 }
13416
13417 sub pad_token {
13418
13419     # insert $pad_spaces before token number $ipad
13420     my ( $ipad, $pad_spaces ) = @_;
13421     if ( $pad_spaces > 0 ) {
13422         $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
13423     }
13424     elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
13425         $tokens_to_go[$ipad] = "";
13426     }
13427     else {
13428
13429         # shouldn't happen
13430         return;
13431     }
13432
13433     $token_lengths_to_go[$ipad] += $pad_spaces;
13434     foreach my $i ( $ipad .. $max_index_to_go ) {
13435         $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
13436     }
13437     return;
13438 }
13439
13440 {
13441     my %is_math_op;
13442
13443     BEGIN {
13444
13445         my @q = qw( + - * / );
13446         @is_math_op{@q} = (1) x scalar(@q);
13447     }
13448
13449     sub set_logical_padding {
13450
13451         # Look at a batch of lines and see if extra padding can improve the
13452         # alignment when there are certain leading operators. Here is an
13453         # example, in which some extra space is introduced before
13454         # '( $year' to make it line up with the subsequent lines:
13455         #
13456         #       if (   ( $Year < 1601 )
13457         #           || ( $Year > 2899 )
13458         #           || ( $EndYear < 1601 )
13459         #           || ( $EndYear > 2899 ) )
13460         #       {
13461         #           &Error_OutOfRange;
13462         #       }
13463         #
13464         my ( $ri_first, $ri_last ) = @_;
13465         my $max_line = @{$ri_first} - 1;
13466
13467         # FIXME: move these declarations below
13468         my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
13469             $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
13470
13471         # looking at each line of this batch..
13472         foreach my $line ( 0 .. $max_line - 1 ) {
13473
13474             # see if the next line begins with a logical operator
13475             $ibeg      = $ri_first->[$line];
13476             $iend      = $ri_last->[$line];
13477             $ibeg_next = $ri_first->[ $line + 1 ];
13478             $tok_next  = $tokens_to_go[$ibeg_next];
13479             $type_next = $types_to_go[$ibeg_next];
13480
13481             $has_leading_op_next = ( $tok_next =~ /^\w/ )
13482               ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
13483               : $is_chain_operator{$type_next};    # and, or
13484
13485             next unless ($has_leading_op_next);
13486
13487             # next line must not be at lesser depth
13488             next
13489               if ( $nesting_depth_to_go[$ibeg] >
13490                 $nesting_depth_to_go[$ibeg_next] );
13491
13492             # identify the token in this line to be padded on the left
13493             $ipad = undef;
13494
13495             # handle lines at same depth...
13496             if ( $nesting_depth_to_go[$ibeg] ==
13497                 $nesting_depth_to_go[$ibeg_next] )
13498             {
13499
13500                 # if this is not first line of the batch ...
13501                 if ( $line > 0 ) {
13502
13503                     # and we have leading operator..
13504                     next if $has_leading_op;
13505
13506                     # Introduce padding if..
13507                     # 1. the previous line is at lesser depth, or
13508                     # 2. the previous line ends in an assignment
13509                     # 3. the previous line ends in a 'return'
13510                     # 4. the previous line ends in a comma
13511                     # Example 1: previous line at lesser depth
13512                     #       if (   ( $Year < 1601 )      # <- we are here but
13513                     #           || ( $Year > 2899 )      #  list has not yet
13514                     #           || ( $EndYear < 1601 )   # collapsed vertically
13515                     #           || ( $EndYear > 2899 ) )
13516                     #       {
13517                     #
13518                     # Example 2: previous line ending in assignment:
13519                     #    $leapyear =
13520                     #        $year % 4   ? 0     # <- We are here
13521                     #      : $year % 100 ? 1
13522                     #      : $year % 400 ? 0
13523                     #      : 1;
13524                     #
13525                     # Example 3: previous line ending in comma:
13526                     #    push @expr,
13527                     #        /test/   ? undef
13528                     #      : eval($_) ? 1
13529                     #      : eval($_) ? 1
13530                     #      :            0;
13531
13532                    # be sure levels agree (do not indent after an indented 'if')
13533                     next
13534                       if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
13535
13536                     # allow padding on first line after a comma but only if:
13537                     # (1) this is line 2 and
13538                     # (2) there are at more than three lines and
13539                     # (3) lines 3 and 4 have the same leading operator
13540                     # These rules try to prevent padding within a long
13541                     # comma-separated list.
13542                     my $ok_comma;
13543                     if (   $types_to_go[$iendm] eq ','
13544                         && $line == 1
13545                         && $max_line > 2 )
13546                     {
13547                         my $ibeg_next_next = $ri_first->[ $line + 2 ];
13548                         my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
13549                         $ok_comma = $tok_next_next eq $tok_next;
13550                     }
13551
13552                     next
13553                       unless (
13554                            $is_assignment{ $types_to_go[$iendm] }
13555                         || $ok_comma
13556                         || ( $nesting_depth_to_go[$ibegm] <
13557                             $nesting_depth_to_go[$ibeg] )
13558                         || (   $types_to_go[$iendm] eq 'k'
13559                             && $tokens_to_go[$iendm] eq 'return' )
13560                       );
13561
13562                     # we will add padding before the first token
13563                     $ipad = $ibeg;
13564                 }
13565
13566                 # for first line of the batch..
13567                 else {
13568
13569                     # WARNING: Never indent if first line is starting in a
13570                     # continued quote, which would change the quote.
13571                     next if $starting_in_quote;
13572
13573                     # if this is text after closing '}'
13574                     # then look for an interior token to pad
13575                     if ( $types_to_go[$ibeg] eq '}' ) {
13576
13577                     }
13578
13579                     # otherwise, we might pad if it looks really good
13580                     else {
13581
13582                         # we might pad token $ibeg, so be sure that it
13583                         # is at the same depth as the next line.
13584                         next
13585                           if ( $nesting_depth_to_go[$ibeg] !=
13586                             $nesting_depth_to_go[$ibeg_next] );
13587
13588                         # We can pad on line 1 of a statement if at least 3
13589                         # lines will be aligned. Otherwise, it
13590                         # can look very confusing.
13591
13592                  # We have to be careful not to pad if there are too few
13593                  # lines.  The current rule is:
13594                  # (1) in general we require at least 3 consecutive lines
13595                  # with the same leading chain operator token,
13596                  # (2) but an exception is that we only require two lines
13597                  # with leading colons if there are no more lines.  For example,
13598                  # the first $i in the following snippet would get padding
13599                  # by the second rule:
13600                  #
13601                  #   $i == 1 ? ( "First", "Color" )
13602                  # : $i == 2 ? ( "Then",  "Rarity" )
13603                  # :           ( "Then",  "Name" );
13604
13605                         if ( $max_line > 1 ) {
13606                             my $leading_token = $tokens_to_go[$ibeg_next];
13607                             my $tokens_differ;
13608
13609                             # never indent line 1 of a '.' series because
13610                             # previous line is most likely at same level.
13611                             # TODO: we should also look at the leasing_spaces
13612                             # of the last output line and skip if it is same
13613                             # as this line.
13614                             next if ( $leading_token eq '.' );
13615
13616                             my $count = 1;
13617                             foreach my $l ( 2 .. 3 ) {
13618                                 last if ( $line + $l > $max_line );
13619                                 my $ibeg_next_next = $ri_first->[ $line + $l ];
13620                                 if ( $tokens_to_go[$ibeg_next_next] ne
13621                                     $leading_token )
13622                                 {
13623                                     $tokens_differ = 1;
13624                                     last;
13625                                 }
13626                                 $count++;
13627                             }
13628                             next if ($tokens_differ);
13629                             next if ( $count < 3 && $leading_token ne ':' );
13630                             $ipad = $ibeg;
13631                         }
13632                         else {
13633                             next;
13634                         }
13635                     }
13636                 }
13637             }
13638
13639             # find interior token to pad if necessary
13640             if ( !defined($ipad) ) {
13641
13642                 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
13643
13644                     # find any unclosed container
13645                     next
13646                       unless ( $type_sequence_to_go[$i]
13647                         && $mate_index_to_go[$i] > $iend );
13648
13649                     # find next nonblank token to pad
13650                     $ipad = $inext_to_go[$i];
13651                     last if ( $ipad > $iend );
13652                 }
13653                 last unless $ipad;
13654             }
13655
13656             # We cannot pad the first leading token of a file because
13657             # it could cause a bug in which the starting indentation
13658             # level is guessed incorrectly each time the code is run
13659             # though perltidy, thus causing the code to march off to
13660             # the right.  For example, the following snippet would have
13661             # this problem:
13662
13663 ##     ov_method mycan( $package, '(""' ),       $package
13664 ##  or ov_method mycan( $package, '(0+' ),       $package
13665 ##  or ov_method mycan( $package, '(bool' ),     $package
13666 ##  or ov_method mycan( $package, '(nomethod' ), $package;
13667
13668             # If this snippet is within a block this won't happen
13669             # unless the user just processes the snippet alone within
13670             # an editor.  In that case either the user will see and
13671             # fix the problem or it will be corrected next time the
13672             # entire file is processed with perltidy.
13673             ##next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
13674             next if ( $ipad == 0 && $peak_batch_size <= 1 );
13675
13676 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
13677 ## IT DID MORE HARM THAN GOOD
13678 ##            ceil(
13679 ##                      $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
13680 ##                    / $upem
13681 ##            ),
13682 ##?            # do not put leading padding for just 2 lines of math
13683 ##?            if (   $ipad == $ibeg
13684 ##?                && $line > 0
13685 ##?                && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
13686 ##?                && $is_math_op{$type_next}
13687 ##?                && $line + 2 <= $max_line )
13688 ##?            {
13689 ##?                my $ibeg_next_next = $ri_first->[ $line + 2 ];
13690 ##?                my $type_next_next = $types_to_go[$ibeg_next_next];
13691 ##?                next if !$is_math_op{$type_next_next};
13692 ##?            }
13693
13694             # next line must not be at greater depth
13695             my $iend_next = $ri_last->[ $line + 1 ];
13696             next
13697               if ( $nesting_depth_to_go[ $iend_next + 1 ] >
13698                 $nesting_depth_to_go[$ipad] );
13699
13700             # lines must be somewhat similar to be padded..
13701             my $inext_next = $inext_to_go[$ibeg_next];
13702             my $type       = $types_to_go[$ipad];
13703             my $type_next  = $types_to_go[ $ipad + 1 ];
13704
13705             # see if there are multiple continuation lines
13706             my $logical_continuation_lines = 1;
13707             if ( $line + 2 <= $max_line ) {
13708                 my $leading_token  = $tokens_to_go[$ibeg_next];
13709                 my $ibeg_next_next = $ri_first->[ $line + 2 ];
13710                 if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
13711                     && $nesting_depth_to_go[$ibeg_next] eq
13712                     $nesting_depth_to_go[$ibeg_next_next] )
13713                 {
13714                     $logical_continuation_lines++;
13715                 }
13716             }
13717
13718             # see if leading types match
13719             my $types_match = $types_to_go[$inext_next] eq $type;
13720             my $matches_without_bang;
13721
13722             # if first line has leading ! then compare the following token
13723             if ( !$types_match && $type eq '!' ) {
13724                 $types_match = $matches_without_bang =
13725                   $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
13726             }
13727
13728             if (
13729
13730                 # either we have multiple continuation lines to follow
13731                 # and we are not padding the first token
13732                 ( $logical_continuation_lines > 1 && $ipad > 0 )
13733
13734                 # or..
13735                 || (
13736
13737                     # types must match
13738                     $types_match
13739
13740                     # and keywords must match if keyword
13741                     && !(
13742                            $type eq 'k'
13743                         && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
13744                     )
13745                 )
13746               )
13747             {
13748
13749                 #----------------------begin special checks--------------
13750                 #
13751                 # SPECIAL CHECK 1:
13752                 # A check is needed before we can make the pad.
13753                 # If we are in a list with some long items, we want each
13754                 # item to stand out.  So in the following example, the
13755                 # first line beginning with '$casefold->' would look good
13756                 # padded to align with the next line, but then it
13757                 # would be indented more than the last line, so we
13758                 # won't do it.
13759                 #
13760                 #  ok(
13761                 #      $casefold->{code}         eq '0041'
13762                 #        && $casefold->{status}  eq 'C'
13763                 #        && $casefold->{mapping} eq '0061',
13764                 #      'casefold 0x41'
13765                 #  );
13766                 #
13767                 # Note:
13768                 # It would be faster, and almost as good, to use a comma
13769                 # count, and not pad if comma_count > 1 and the previous
13770                 # line did not end with a comma.
13771                 #
13772                 my $ok_to_pad = 1;
13773
13774                 my $ibg   = $ri_first->[ $line + 1 ];
13775                 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
13776
13777                 # just use simplified formula for leading spaces to avoid
13778                 # needless sub calls
13779                 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
13780
13781                 # look at each line beyond the next ..
13782                 my $l = $line + 1;
13783                 foreach my $ltest ( $line + 2 .. $max_line ) {
13784                     $l = $ltest;
13785                     my $ibg = $ri_first->[$l];
13786
13787                     # quit looking at the end of this container
13788                     last
13789                       if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
13790                       || ( $nesting_depth_to_go[$ibg] < $depth );
13791
13792                     # cannot do the pad if a later line would be
13793                     # outdented more
13794                     if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
13795                         $ok_to_pad = 0;
13796                         last;
13797                     }
13798                 }
13799
13800                 # don't pad if we end in a broken list
13801                 if ( $l == $max_line ) {
13802                     my $i2 = $ri_last->[$l];
13803                     if ( $types_to_go[$i2] eq '#' ) {
13804                         my $i1 = $ri_first->[$l];
13805                         next
13806                           if (
13807                             terminal_type( \@types_to_go, \@block_type_to_go,
13808                                 $i1, $i2 ) eq ','
13809                           );
13810                     }
13811                 }
13812
13813                 # SPECIAL CHECK 2:
13814                 # a minus may introduce a quoted variable, and we will
13815                 # add the pad only if this line begins with a bare word,
13816                 # such as for the word 'Button' here:
13817                 #    [
13818                 #         Button      => "Print letter \"~$_\"",
13819                 #        -command     => [ sub { print "$_[0]\n" }, $_ ],
13820                 #        -accelerator => "Meta+$_"
13821                 #    ];
13822                 #
13823                 #  On the other hand, if 'Button' is quoted, it looks best
13824                 #  not to pad:
13825                 #    [
13826                 #        'Button'     => "Print letter \"~$_\"",
13827                 #        -command     => [ sub { print "$_[0]\n" }, $_ ],
13828                 #        -accelerator => "Meta+$_"
13829                 #    ];
13830                 if ( $types_to_go[$ibeg_next] eq 'm' ) {
13831                     $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
13832                 }
13833
13834                 next unless $ok_to_pad;
13835
13836                 #----------------------end special check---------------
13837
13838                 my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
13839                 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
13840                 $pad_spaces = $length_2 - $length_1;
13841
13842                 # If the first line has a leading ! and the second does
13843                 # not, then remove one space to try to align the next
13844                 # leading characters, which are often the same.  For example:
13845                 #  if (  !$ts
13846                 #      || $ts == $self->Holder
13847                 #      || $self->Holder->Type eq "Arena" )
13848                 #
13849                 # This usually helps readability, but if there are subsequent
13850                 # ! operators things will still get messed up.  For example:
13851                 #
13852                 #  if (  !exists $Net::DNS::typesbyname{$qtype}
13853                 #      && exists $Net::DNS::classesbyname{$qtype}
13854                 #      && !exists $Net::DNS::classesbyname{$qclass}
13855                 #      && exists $Net::DNS::typesbyname{$qclass} )
13856                 # We can't fix that.
13857                 if ($matches_without_bang) { $pad_spaces-- }
13858
13859                 # make sure this won't change if -lp is used
13860                 my $indentation_1 = $leading_spaces_to_go[$ibeg];
13861                 if ( ref($indentation_1) ) {
13862                     if ( $indentation_1->get_recoverable_spaces() == 0 ) {
13863                         my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
13864                         unless ( $indentation_2->get_recoverable_spaces() == 0 )
13865                         {
13866                             $pad_spaces = 0;
13867                         }
13868                     }
13869                 }
13870
13871                 # we might be able to handle a pad of -1 by removing a blank
13872                 # token
13873                 if ( $pad_spaces < 0 ) {
13874
13875                     if ( $pad_spaces == -1 ) {
13876                         if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
13877                         {
13878                             pad_token( $ipad - 1, $pad_spaces );
13879                         }
13880                     }
13881                     $pad_spaces = 0;
13882                 }
13883
13884                 # now apply any padding for alignment
13885                 if ( $ipad >= 0 && $pad_spaces ) {
13886
13887                     my $length_t = total_line_length( $ibeg, $iend );
13888                     if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
13889                     {
13890                         pad_token( $ipad, $pad_spaces );
13891                     }
13892                 }
13893             }
13894         }
13895         continue {
13896             $iendm          = $iend;
13897             $ibegm          = $ibeg;
13898             $has_leading_op = $has_leading_op_next;
13899         }    # end of loop over lines
13900         return;
13901     }
13902 }
13903
13904 sub correct_lp_indentation {
13905
13906     # When the -lp option is used, we need to make a last pass through
13907     # each line to correct the indentation positions in case they differ
13908     # from the predictions.  This is necessary because perltidy uses a
13909     # predictor/corrector method for aligning with opening parens.  The
13910     # predictor is usually good, but sometimes stumbles.  The corrector
13911     # tries to patch things up once the actual opening paren locations
13912     # are known.
13913     my ( $ri_first, $ri_last ) = @_;
13914     my $do_not_pad = 0;
13915
13916     #  Note on flag '$do_not_pad':
13917     #  We want to avoid a situation like this, where the aligner inserts
13918     #  whitespace before the '=' to align it with a previous '=', because
13919     #  otherwise the parens might become mis-aligned in a situation like
13920     #  this, where the '=' has become aligned with the previous line,
13921     #  pushing the opening '(' forward beyond where we want it.
13922     #
13923     #  $mkFloor::currentRoom = '';
13924     #  $mkFloor::c_entry     = $c->Entry(
13925     #                                 -width        => '10',
13926     #                                 -relief       => 'sunken',
13927     #                                 ...
13928     #                                 );
13929     #
13930     #  We leave it to the aligner to decide how to do this.
13931
13932     # first remove continuation indentation if appropriate
13933     my $max_line = @{$ri_first} - 1;
13934
13935     # looking at each line of this batch..
13936     my ( $ibeg, $iend );
13937     foreach my $line ( 0 .. $max_line ) {
13938         $ibeg = $ri_first->[$line];
13939         $iend = $ri_last->[$line];
13940
13941         # looking at each token in this output line..
13942         foreach my $i ( $ibeg .. $iend ) {
13943
13944             # How many space characters to place before this token
13945             # for special alignment.  Actual padding is done in the
13946             # continue block.
13947
13948             # looking for next unvisited indentation item
13949             my $indentation = $leading_spaces_to_go[$i];
13950             if ( !$indentation->get_marked() ) {
13951                 $indentation->set_marked(1);
13952
13953                 # looking for indentation item for which we are aligning
13954                 # with parens, braces, and brackets
13955                 next unless ( $indentation->get_align_paren() );
13956
13957                 # skip closed container on this line
13958                 if ( $i > $ibeg ) {
13959                     my $im = max( $ibeg, $iprev_to_go[$i] );
13960                     if (   $type_sequence_to_go[$im]
13961                         && $mate_index_to_go[$im] <= $iend )
13962                     {
13963                         next;
13964                     }
13965                 }
13966
13967                 if ( $line == 1 && $i == $ibeg ) {
13968                     $do_not_pad = 1;
13969                 }
13970
13971                 # Ok, let's see what the error is and try to fix it
13972                 my $actual_pos;
13973                 my $predicted_pos = $indentation->get_spaces();
13974                 if ( $i > $ibeg ) {
13975
13976                     # token is mid-line - use length to previous token
13977                     $actual_pos = total_line_length( $ibeg, $i - 1 );
13978
13979                     # for mid-line token, we must check to see if all
13980                     # additional lines have continuation indentation,
13981                     # and remove it if so.  Otherwise, we do not get
13982                     # good alignment.
13983                     my $closing_index = $indentation->get_closed();
13984                     if ( $closing_index > $iend ) {
13985                         my $ibeg_next = $ri_first->[ $line + 1 ];
13986                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
13987                             undo_lp_ci( $line, $i, $closing_index, $ri_first,
13988                                 $ri_last );
13989                         }
13990                     }
13991                 }
13992                 elsif ( $line > 0 ) {
13993
13994                     # handle case where token starts a new line;
13995                     # use length of previous line
13996                     my $ibegm = $ri_first->[ $line - 1 ];
13997                     my $iendm = $ri_last->[ $line - 1 ];
13998                     $actual_pos = total_line_length( $ibegm, $iendm );
13999
14000                     # follow -pt style
14001                     ++$actual_pos
14002                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
14003                 }
14004                 else {
14005
14006                     # token is first character of first line of batch
14007                     $actual_pos = $predicted_pos;
14008                 }
14009
14010                 my $move_right = $actual_pos - $predicted_pos;
14011
14012                 # done if no error to correct (gnu2.t)
14013                 if ( $move_right == 0 ) {
14014                     $indentation->set_recoverable_spaces($move_right);
14015                     next;
14016                 }
14017
14018                 # if we have not seen closure for this indentation in
14019                 # this batch, we can only pass on a request to the
14020                 # vertical aligner
14021                 my $closing_index = $indentation->get_closed();
14022
14023                 if ( $closing_index < 0 ) {
14024                     $indentation->set_recoverable_spaces($move_right);
14025                     next;
14026                 }
14027
14028                 # If necessary, look ahead to see if there is really any
14029                 # leading whitespace dependent on this whitespace, and
14030                 # also find the longest line using this whitespace.
14031                 # Since it is always safe to move left if there are no
14032                 # dependents, we only need to do this if we may have
14033                 # dependent nodes or need to move right.
14034
14035                 my $right_margin = 0;
14036                 my $have_child   = $indentation->get_have_child();
14037
14038                 my %saw_indentation;
14039                 my $line_count = 1;
14040                 $saw_indentation{$indentation} = $indentation;
14041
14042                 if ( $have_child || $move_right > 0 ) {
14043                     $have_child = 0;
14044                     my $max_length = 0;
14045                     if ( $i == $ibeg ) {
14046                         $max_length = total_line_length( $ibeg, $iend );
14047                     }
14048
14049                     # look ahead at the rest of the lines of this batch..
14050                     foreach my $line_t ( $line + 1 .. $max_line ) {
14051                         my $ibeg_t = $ri_first->[$line_t];
14052                         my $iend_t = $ri_last->[$line_t];
14053                         last if ( $closing_index <= $ibeg_t );
14054
14055                         # remember all different indentation objects
14056                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
14057                         $saw_indentation{$indentation_t} = $indentation_t;
14058                         $line_count++;
14059
14060                         # remember longest line in the group
14061                         my $length_t = total_line_length( $ibeg_t, $iend_t );
14062                         if ( $length_t > $max_length ) {
14063                             $max_length = $length_t;
14064                         }
14065                     }
14066                     $right_margin = maximum_line_length($ibeg) - $max_length;
14067                     if ( $right_margin < 0 ) { $right_margin = 0 }
14068                 }
14069
14070                 my $first_line_comma_count =
14071                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
14072                 my $comma_count = $indentation->get_comma_count();
14073                 my $arrow_count = $indentation->get_arrow_count();
14074
14075                 # This is a simple approximate test for vertical alignment:
14076                 # if we broke just after an opening paren, brace, bracket,
14077                 # and there are 2 or more commas in the first line,
14078                 # and there are no '=>'s,
14079                 # then we are probably vertically aligned.  We could set
14080                 # an exact flag in sub scan_list, but this is good
14081                 # enough.
14082                 my $indentation_count = keys %saw_indentation;
14083                 my $is_vertically_aligned =
14084                   (      $i == $ibeg
14085                       && $first_line_comma_count > 1
14086                       && $indentation_count == 1
14087                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
14088
14089                 # Make the move if possible ..
14090                 if (
14091
14092                     # we can always move left
14093                     $move_right < 0
14094
14095                     # but we should only move right if we are sure it will
14096                     # not spoil vertical alignment
14097                     || ( $comma_count == 0 )
14098                     || ( $comma_count > 0 && !$is_vertically_aligned )
14099                   )
14100                 {
14101                     my $move =
14102                       ( $move_right <= $right_margin )
14103                       ? $move_right
14104                       : $right_margin;
14105
14106                     foreach ( keys %saw_indentation ) {
14107                         $saw_indentation{$_}
14108                           ->permanently_decrease_available_spaces( -$move );
14109                     }
14110                 }
14111
14112                 # Otherwise, record what we want and the vertical aligner
14113                 # will try to recover it.
14114                 else {
14115                     $indentation->set_recoverable_spaces($move_right);
14116                 }
14117             }
14118         }
14119     }
14120     return $do_not_pad;
14121 }
14122
14123 # flush is called to output any tokens in the pipeline, so that
14124 # an alternate source of lines can be written in the correct order
14125
14126 sub flush {
14127     my $self = shift;
14128     destroy_one_line_block();
14129     $self->output_line_to_go();
14130     Perl::Tidy::VerticalAligner::flush();
14131     return;
14132 }
14133
14134 sub reset_block_text_accumulator {
14135
14136     # save text after 'if' and 'elsif' to append after 'else'
14137     if ($accumulating_text_for_block) {
14138
14139         if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
14140             push @{$rleading_block_if_elsif_text}, $leading_block_text;
14141         }
14142     }
14143     $accumulating_text_for_block        = "";
14144     $leading_block_text                 = "";
14145     $leading_block_text_level           = 0;
14146     $leading_block_text_length_exceeded = 0;
14147     $leading_block_text_line_number     = 0;
14148     $leading_block_text_line_length     = 0;
14149     return;
14150 }
14151
14152 sub set_block_text_accumulator {
14153     my $i = shift;
14154     $accumulating_text_for_block = $tokens_to_go[$i];
14155     if ( $accumulating_text_for_block !~ /^els/ ) {
14156         $rleading_block_if_elsif_text = [];
14157     }
14158     $leading_block_text             = "";
14159     $leading_block_text_level       = $levels_to_go[$i];
14160     $leading_block_text_line_number = get_output_line_number();
14161     ##$vertical_aligner_object->get_output_line_number();
14162     $leading_block_text_length_exceeded = 0;
14163
14164     # this will contain the column number of the last character
14165     # of the closing side comment
14166     $leading_block_text_line_length =
14167       length($csc_last_label) +
14168       length($accumulating_text_for_block) +
14169       length( $rOpts->{'closing-side-comment-prefix'} ) +
14170       $leading_block_text_level * $rOpts_indent_columns + 3;
14171     return;
14172 }
14173
14174 sub accumulate_block_text {
14175     my $i = shift;
14176
14177     # accumulate leading text for -csc, ignoring any side comments
14178     if (   $accumulating_text_for_block
14179         && !$leading_block_text_length_exceeded
14180         && $types_to_go[$i] ne '#' )
14181     {
14182
14183         my $added_length = $token_lengths_to_go[$i];
14184         $added_length += 1 if $i == 0;
14185         my $new_line_length = $leading_block_text_line_length + $added_length;
14186
14187         # we can add this text if we don't exceed some limits..
14188         if (
14189
14190             # we must not have already exceeded the text length limit
14191             length($leading_block_text) <
14192             $rOpts_closing_side_comment_maximum_text
14193
14194             # and either:
14195             # the new total line length must be below the line length limit
14196             # or the new length must be below the text length limit
14197             # (ie, we may allow one token to exceed the text length limit)
14198             && (
14199                 $new_line_length <
14200                 maximum_line_length_for_level($leading_block_text_level)
14201
14202                 || length($leading_block_text) + $added_length <
14203                 $rOpts_closing_side_comment_maximum_text
14204             )
14205
14206             # UNLESS: we are adding a closing paren before the brace we seek.
14207             # This is an attempt to avoid situations where the ... to be
14208             # added are longer than the omitted right paren, as in:
14209
14210             #   foreach my $item (@a_rather_long_variable_name_here) {
14211             #      &whatever;
14212             #   } ## end foreach my $item (@a_rather_long_variable_name_here...
14213
14214             || (
14215                 $tokens_to_go[$i] eq ')'
14216                 && (
14217                     (
14218                            $i + 1 <= $max_index_to_go
14219                         && $block_type_to_go[ $i + 1 ] eq
14220                         $accumulating_text_for_block
14221                     )
14222                     || (   $i + 2 <= $max_index_to_go
14223                         && $block_type_to_go[ $i + 2 ] eq
14224                         $accumulating_text_for_block )
14225                 )
14226             )
14227           )
14228         {
14229
14230             # add an extra space at each newline
14231             if ( $i == 0 ) { $leading_block_text .= ' ' }
14232
14233             # add the token text
14234             $leading_block_text .= $tokens_to_go[$i];
14235             $leading_block_text_line_length = $new_line_length;
14236         }
14237
14238         # show that text was truncated if necessary
14239         elsif ( $types_to_go[$i] ne 'b' ) {
14240             $leading_block_text_length_exceeded = 1;
14241             $leading_block_text .= '...';
14242         }
14243     }
14244     return;
14245 }
14246
14247 {
14248     my %is_if_elsif_else_unless_while_until_for_foreach;
14249
14250     BEGIN {
14251
14252         # These block types may have text between the keyword and opening
14253         # curly.  Note: 'else' does not, but must be included to allow trailing
14254         # if/elsif text to be appended.
14255         # patch for SWITCH/CASE: added 'case' and 'when'
14256         my @q =
14257           qw(if elsif else unless while until for foreach case when catch);
14258         @is_if_elsif_else_unless_while_until_for_foreach{@q} =
14259           (1) x scalar(@q);
14260     }
14261
14262     sub accumulate_csc_text {
14263
14264         # called once per output buffer when -csc is used. Accumulates
14265         # the text placed after certain closing block braces.
14266         # Defines and returns the following for this buffer:
14267
14268         my $block_leading_text = "";    # the leading text of the last '}'
14269         my $rblock_leading_if_elsif_text;
14270         my $i_block_leading_text =
14271           -1;    # index of token owning block_leading_text
14272         my $block_line_count    = 100;    # how many lines the block spans
14273         my $terminal_type       = 'b';    # type of last nonblank token
14274         my $i_terminal          = 0;      # index of last nonblank token
14275         my $terminal_block_type = "";
14276
14277         # update most recent statement label
14278         $csc_last_label = "" unless ($csc_last_label);
14279         if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
14280         my $block_label = $csc_last_label;
14281
14282         # Loop over all tokens of this batch
14283         for my $i ( 0 .. $max_index_to_go ) {
14284             my $type       = $types_to_go[$i];
14285             my $block_type = $block_type_to_go[$i];
14286             my $token      = $tokens_to_go[$i];
14287
14288             # remember last nonblank token type
14289             if ( $type ne '#' && $type ne 'b' ) {
14290                 $terminal_type       = $type;
14291                 $terminal_block_type = $block_type;
14292                 $i_terminal          = $i;
14293             }
14294
14295             my $type_sequence = $type_sequence_to_go[$i];
14296             if ( $block_type && $type_sequence ) {
14297
14298                 if ( $token eq '}' ) {
14299
14300                     # restore any leading text saved when we entered this block
14301                     if ( defined( $block_leading_text{$type_sequence} ) ) {
14302                         ( $block_leading_text, $rblock_leading_if_elsif_text )
14303                           = @{ $block_leading_text{$type_sequence} };
14304                         $i_block_leading_text = $i;
14305                         delete $block_leading_text{$type_sequence};
14306                         $rleading_block_if_elsif_text =
14307                           $rblock_leading_if_elsif_text;
14308                     }
14309
14310                     if ( defined( $csc_block_label{$type_sequence} ) ) {
14311                         $block_label = $csc_block_label{$type_sequence};
14312                         delete $csc_block_label{$type_sequence};
14313                     }
14314
14315                     # if we run into a '}' then we probably started accumulating
14316                     # at something like a trailing 'if' clause..no harm done.
14317                     if (   $accumulating_text_for_block
14318                         && $levels_to_go[$i] <= $leading_block_text_level )
14319                     {
14320                         my $lev = $levels_to_go[$i];
14321                         reset_block_text_accumulator();
14322                     }
14323
14324                     if ( defined( $block_opening_line_number{$type_sequence} ) )
14325                     {
14326                         my $output_line_number = get_output_line_number();
14327                         ##$vertical_aligner_object->get_output_line_number();
14328                         $block_line_count =
14329                           $output_line_number -
14330                           $block_opening_line_number{$type_sequence} + 1;
14331                         delete $block_opening_line_number{$type_sequence};
14332                     }
14333                     else {
14334
14335                         # Error: block opening line undefined for this line..
14336                         # This shouldn't be possible, but it is not a
14337                         # significant problem.
14338                     }
14339                 }
14340
14341                 elsif ( $token eq '{' ) {
14342
14343                     my $line_number = get_output_line_number();
14344                     ##$vertical_aligner_object->get_output_line_number();
14345                     $block_opening_line_number{$type_sequence} = $line_number;
14346
14347                     # set a label for this block, except for
14348                     # a bare block which already has the label
14349                     # A label can only be used on the next {
14350                     if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
14351                     $csc_block_label{$type_sequence} = $csc_last_label;
14352                     $csc_last_label = "";
14353
14354                     if (   $accumulating_text_for_block
14355                         && $levels_to_go[$i] == $leading_block_text_level )
14356                     {
14357
14358                         if ( $accumulating_text_for_block eq $block_type ) {
14359
14360                             # save any leading text before we enter this block
14361                             $block_leading_text{$type_sequence} = [
14362                                 $leading_block_text,
14363                                 $rleading_block_if_elsif_text
14364                             ];
14365                             $block_opening_line_number{$type_sequence} =
14366                               $leading_block_text_line_number;
14367                             reset_block_text_accumulator();
14368                         }
14369                         else {
14370
14371                             # shouldn't happen, but not a serious error.
14372                             # We were accumulating -csc text for block type
14373                             # $accumulating_text_for_block and unexpectedly
14374                             # encountered a '{' for block type $block_type.
14375                         }
14376                     }
14377                 }
14378             }
14379
14380             if (   $type eq 'k'
14381                 && $csc_new_statement_ok
14382                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
14383                 && $token =~ /$closing_side_comment_list_pattern/o )
14384             {
14385                 set_block_text_accumulator($i);
14386             }
14387             else {
14388
14389                 # note: ignoring type 'q' because of tricks being played
14390                 # with 'q' for hanging side comments
14391                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
14392                     $csc_new_statement_ok =
14393                       ( $block_type || $type eq 'J' || $type eq ';' );
14394                 }
14395                 if (   $type eq ';'
14396                     && $accumulating_text_for_block
14397                     && $levels_to_go[$i] == $leading_block_text_level )
14398                 {
14399                     reset_block_text_accumulator();
14400                 }
14401                 else {
14402                     accumulate_block_text($i);
14403                 }
14404             }
14405         }
14406
14407         # Treat an 'else' block specially by adding preceding 'if' and
14408         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
14409         # especially for cuddled-else formatting.
14410         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
14411             $block_leading_text =
14412               make_else_csc_text( $i_terminal, $terminal_block_type,
14413                 $block_leading_text, $rblock_leading_if_elsif_text );
14414         }
14415
14416         # if this line ends in a label then remember it for the next pass
14417         $csc_last_label = "";
14418         if ( $terminal_type eq 'J' ) {
14419             $csc_last_label = $tokens_to_go[$i_terminal];
14420         }
14421
14422         return ( $terminal_type, $i_terminal, $i_block_leading_text,
14423             $block_leading_text, $block_line_count, $block_label );
14424     }
14425 }
14426
14427 sub make_else_csc_text {
14428
14429     # create additional -csc text for an 'else' and optionally 'elsif',
14430     # depending on the value of switch
14431     # $rOpts_closing_side_comment_else_flag:
14432     #
14433     #  = 0 add 'if' text to trailing else
14434     #  = 1 same as 0 plus:
14435     #      add 'if' to 'elsif's if can fit in line length
14436     #      add last 'elsif' to trailing else if can fit in one line
14437     #  = 2 same as 1 but do not check if exceed line length
14438     #
14439     # $rif_elsif_text = a reference to a list of all previous closing
14440     # side comments created for this if block
14441     #
14442     my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
14443     my $csc_text = $block_leading_text;
14444
14445     if (   $block_type eq 'elsif'
14446         && $rOpts_closing_side_comment_else_flag == 0 )
14447     {
14448         return $csc_text;
14449     }
14450
14451     my $count = @{$rif_elsif_text};
14452     return $csc_text unless ($count);
14453
14454     my $if_text = '[ if' . $rif_elsif_text->[0];
14455
14456     # always show the leading 'if' text on 'else'
14457     if ( $block_type eq 'else' ) {
14458         $csc_text .= $if_text;
14459     }
14460
14461     # see if that's all
14462     if ( $rOpts_closing_side_comment_else_flag == 0 ) {
14463         return $csc_text;
14464     }
14465
14466     my $last_elsif_text = "";
14467     if ( $count > 1 ) {
14468         $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
14469         if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
14470     }
14471
14472     # tentatively append one more item
14473     my $saved_text = $csc_text;
14474     if ( $block_type eq 'else' ) {
14475         $csc_text .= $last_elsif_text;
14476     }
14477     else {
14478         $csc_text .= ' ' . $if_text;
14479     }
14480
14481     # all done if no length checks requested
14482     if ( $rOpts_closing_side_comment_else_flag == 2 ) {
14483         return $csc_text;
14484     }
14485
14486     # undo it if line length exceeded
14487     my $length =
14488       length($csc_text) +
14489       length($block_type) +
14490       length( $rOpts->{'closing-side-comment-prefix'} ) +
14491       $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
14492     if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
14493         $csc_text = $saved_text;
14494     }
14495     return $csc_text;
14496 }
14497
14498 {    # sub balance_csc_text
14499
14500     my %matching_char;
14501
14502     BEGIN {
14503         %matching_char = (
14504             '{' => '}',
14505             '(' => ')',
14506             '[' => ']',
14507             '}' => '{',
14508             ')' => '(',
14509             ']' => '[',
14510         );
14511     }
14512
14513     sub balance_csc_text {
14514
14515         # Append characters to balance a closing side comment so that editors
14516         # such as vim can correctly jump through code.
14517         # Simple Example:
14518         #  input  = ## end foreach my $foo ( sort { $b  ...
14519         #  output = ## end foreach my $foo ( sort { $b  ...})
14520
14521         # NOTE: This routine does not currently filter out structures within
14522         # quoted text because the bounce algorithms in text editors do not
14523         # necessarily do this either (a version of vim was checked and
14524         # did not do this).
14525
14526         # Some complex examples which will cause trouble for some editors:
14527         #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
14528         #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
14529         #  if ( $1 eq '{' ) {
14530         # test file test1/braces.pl has many such examples.
14531
14532         my ($csc) = @_;
14533
14534         # loop to examine characters one-by-one, RIGHT to LEFT and
14535         # build a balancing ending, LEFT to RIGHT.
14536         for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
14537
14538             my $char = substr( $csc, $pos, 1 );
14539
14540             # ignore everything except structural characters
14541             next unless ( $matching_char{$char} );
14542
14543             # pop most recently appended character
14544             my $top = chop($csc);
14545
14546             # push it back plus the mate to the newest character
14547             # unless they balance each other.
14548             $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
14549         }
14550
14551         # return the balanced string
14552         return $csc;
14553     }
14554 }
14555
14556 sub add_closing_side_comment {
14557
14558     my $self = shift;
14559
14560     # add closing side comments after closing block braces if -csc used
14561     my $cscw_block_comment;
14562
14563     #---------------------------------------------------------------
14564     # Step 1: loop through all tokens of this line to accumulate
14565     # the text needed to create the closing side comments. Also see
14566     # how the line ends.
14567     #---------------------------------------------------------------
14568
14569     my ( $terminal_type, $i_terminal, $i_block_leading_text,
14570         $block_leading_text, $block_line_count, $block_label )
14571       = accumulate_csc_text();
14572
14573     #---------------------------------------------------------------
14574     # Step 2: make the closing side comment if this ends a block
14575     #---------------------------------------------------------------
14576     ##my $have_side_comment = $i_terminal != $max_index_to_go;
14577     my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
14578
14579     # if this line might end in a block closure..
14580     if (
14581         $terminal_type eq '}'
14582
14583         # ..and either
14584         && (
14585
14586             # the block is long enough
14587             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
14588
14589             # or there is an existing comment to check
14590             || (   $have_side_comment
14591                 && $rOpts->{'closing-side-comment-warnings'} )
14592         )
14593
14594         # .. and if this is one of the types of interest
14595         && $block_type_to_go[$i_terminal] =~
14596         /$closing_side_comment_list_pattern/o
14597
14598         # .. but not an anonymous sub
14599         # These are not normally of interest, and their closing braces are
14600         # often followed by commas or semicolons anyway.  This also avoids
14601         # possible erratic output due to line numbering inconsistencies
14602         # in the cases where their closing braces terminate a line.
14603         && $block_type_to_go[$i_terminal] ne 'sub'
14604
14605         # ..and the corresponding opening brace must is not in this batch
14606         # (because we do not need to tag one-line blocks, although this
14607         # should also be caught with a positive -csci value)
14608         && $mate_index_to_go[$i_terminal] < 0
14609
14610         # ..and either
14611         && (
14612
14613             # this is the last token (line doesn't have a side comment)
14614             !$have_side_comment
14615
14616             # or the old side comment is a closing side comment
14617             || $tokens_to_go[$max_index_to_go] =~
14618             /$closing_side_comment_prefix_pattern/o
14619         )
14620       )
14621     {
14622
14623         # then make the closing side comment text
14624         if ($block_label) { $block_label .= " " }
14625         my $token =
14626 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
14627
14628         # append any extra descriptive text collected above
14629         if ( $i_block_leading_text == $i_terminal ) {
14630             $token .= $block_leading_text;
14631         }
14632
14633         $token = balance_csc_text($token)
14634           if $rOpts->{'closing-side-comments-balanced'};
14635
14636         $token =~ s/\s*$//;    # trim any trailing whitespace
14637
14638         # handle case of existing closing side comment
14639         if ($have_side_comment) {
14640
14641             # warn if requested and tokens differ significantly
14642             if ( $rOpts->{'closing-side-comment-warnings'} ) {
14643                 my $old_csc = $tokens_to_go[$max_index_to_go];
14644                 my $new_csc = $token;
14645                 $new_csc =~ s/\s+//g;            # trim all whitespace
14646                 $old_csc =~ s/\s+//g;            # trim all whitespace
14647                 $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
14648                 $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
14649                 $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
14650                 my $new_trailing_dots = $1;
14651                 $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
14652
14653                 # Patch to handle multiple closing side comments at
14654                 # else and elsif's.  These have become too complicated
14655                 # to check, so if we see an indication of
14656                 # '[ if' or '[ # elsif', then assume they were made
14657                 # by perltidy.
14658                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
14659                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
14660                 }
14661                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
14662                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
14663                 }
14664
14665                 # if old comment is contained in new comment,
14666                 # only compare the common part.
14667                 if ( length($new_csc) > length($old_csc) ) {
14668                     $new_csc = substr( $new_csc, 0, length($old_csc) );
14669                 }
14670
14671                 # if the new comment is shorter and has been limited,
14672                 # only compare the common part.
14673                 if ( length($new_csc) < length($old_csc)
14674                     && $new_trailing_dots )
14675                 {
14676                     $old_csc = substr( $old_csc, 0, length($new_csc) );
14677                 }
14678
14679                 # any remaining difference?
14680                 if ( $new_csc ne $old_csc ) {
14681
14682                     # just leave the old comment if we are below the threshold
14683                     # for creating side comments
14684                     if ( $block_line_count <
14685                         $rOpts->{'closing-side-comment-interval'} )
14686                     {
14687                         $token = undef;
14688                     }
14689
14690                     # otherwise we'll make a note of it
14691                     else {
14692
14693                         warning(
14694 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
14695                         );
14696
14697                      # save the old side comment in a new trailing block comment
14698                         my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
14699                         $year  += 1900;
14700                         $month += 1;
14701                         $cscw_block_comment =
14702 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
14703                     }
14704                 }
14705                 else {
14706
14707                     # No differences.. we can safely delete old comment if we
14708                     # are below the threshold
14709                     if ( $block_line_count <
14710                         $rOpts->{'closing-side-comment-interval'} )
14711                     {
14712                         $token = undef;
14713                         $self->unstore_token_to_go()
14714                           if ( $types_to_go[$max_index_to_go] eq '#' );
14715                         $self->unstore_token_to_go()
14716                           if ( $types_to_go[$max_index_to_go] eq 'b' );
14717                     }
14718                 }
14719             }
14720
14721             # switch to the new csc (unless we deleted it!)
14722             $tokens_to_go[$max_index_to_go] = $token if $token;
14723         }
14724
14725         # handle case of NO existing closing side comment
14726         else {
14727
14728         # Remove any existing blank and add another below.
14729         # This is a tricky point. A side comment needs to have the same level
14730         # as the preceding closing brace or else the line will not get the right
14731         # indentation. So even if we have a blank, we are going to replace it.
14732             if ( $types_to_go[$max_index_to_go] eq 'b' ) {
14733                 unstore_token_to_go();
14734             }
14735
14736             # insert the new side comment into the output token stream
14737             my $type          = '#';
14738             my $block_type    = '';
14739             my $type_sequence = '';
14740             my $container_environment =
14741               $container_environment_to_go[$max_index_to_go];
14742             my $level                = $levels_to_go[$max_index_to_go];
14743             my $slevel               = $nesting_depth_to_go[$max_index_to_go];
14744             my $no_internal_newlines = 0;
14745
14746             my $ci_level           = $ci_levels_to_go[$max_index_to_go];
14747             my $in_continued_quote = 0;
14748
14749             # insert a blank token
14750             $self->insert_new_token_to_go( ' ', 'b', $slevel,
14751                 $no_internal_newlines );
14752
14753             # then the side comment
14754             $self->insert_new_token_to_go( $token, $type, $slevel,
14755                 $no_internal_newlines );
14756         }
14757     }
14758     return $cscw_block_comment;
14759 }
14760
14761 sub previous_nonblank_token {
14762     my ($i)  = @_;
14763     my $name = "";
14764     my $im   = $i - 1;
14765     return "" if ( $im < 0 );
14766     if ( $types_to_go[$im] eq 'b' ) { $im--; }
14767     return "" if ( $im < 0 );
14768     $name = $tokens_to_go[$im];
14769
14770     # prepend any sub name to an isolated -> to avoid unwanted alignments
14771     # [test case is test8/penco.pl]
14772     if ( $name eq '->' ) {
14773         $im--;
14774         if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
14775             $name = $tokens_to_go[$im] . $name;
14776         }
14777     }
14778     return $name;
14779 }
14780
14781 sub send_lines_to_vertical_aligner {
14782
14783     my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_;
14784
14785     my $rindentation_list = [0];    # ref to indentations for each line
14786
14787     # define the array @matching_token_to_go for the output tokens
14788     # which will be non-blank for each special token (such as =>)
14789     # for which alignment is required.
14790     set_vertical_alignment_markers( $ri_first, $ri_last );
14791
14792     # flush if necessary to avoid unwanted alignment
14793     my $must_flush = 0;
14794     if ( @{$ri_first} > 1 ) {
14795
14796         # flush before a long if statement
14797         if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
14798             $must_flush = 1;
14799         }
14800     }
14801     if ($must_flush) {
14802         Perl::Tidy::VerticalAligner::flush();
14803     }
14804
14805     undo_ci( $ri_first, $ri_last );
14806
14807     set_logical_padding( $ri_first, $ri_last );
14808
14809     # loop to prepare each line for shipment
14810     my $n_last_line = @{$ri_first} - 1;
14811     my $in_comma_list;
14812     for my $n ( 0 .. $n_last_line ) {
14813         my $ibeg = $ri_first->[$n];
14814         my $iend = $ri_last->[$n];
14815
14816         my ( $rtokens, $rfields, $rpatterns ) =
14817           make_alignment_patterns( $ibeg, $iend );
14818
14819         # Set flag to show how much level changes between this line
14820         # and the next line, if we have it.
14821         my $ljump = 0;
14822         if ( $n < $n_last_line ) {
14823             my $ibegp = $ri_first->[ $n + 1 ];
14824             $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
14825         }
14826
14827         my ( $indentation, $lev, $level_end, $terminal_type,
14828             $is_semicolon_terminated, $is_outdented_line )
14829           = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
14830             $ri_first, $ri_last, $rindentation_list, $ljump );
14831
14832         # we will allow outdenting of long lines..
14833         my $outdent_long_lines = (
14834
14835             # which are long quotes, if allowed
14836             ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
14837
14838             # which are long block comments, if allowed
14839               || (
14840                    $types_to_go[$ibeg] eq '#'
14841                 && $rOpts->{'outdent-long-comments'}
14842
14843                 # but not if this is a static block comment
14844                 && !$is_static_block_comment
14845               )
14846         );
14847
14848         my $level_jump =
14849           $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
14850
14851         my $rvertical_tightness_flags =
14852           set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
14853             $ri_first, $ri_last );
14854
14855         # flush an outdented line to avoid any unwanted vertical alignment
14856         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
14857
14858         # Set a flag at the final ':' of a ternary chain to request
14859         # vertical alignment of the final term.  Here is a
14860         # slightly complex example:
14861         #
14862         # $self->{_text} = (
14863         #    !$section        ? ''
14864         #   : $type eq 'item' ? "the $section entry"
14865         #   :                   "the section on $section"
14866         # )
14867         # . (
14868         #   $page
14869         #   ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
14870         #   : ' elsewhere in this document'
14871         # );
14872         #
14873         my $is_terminal_ternary = 0;
14874         if (   $tokens_to_go[$ibeg] eq ':'
14875             || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' )
14876         {
14877             my $last_leading_type = ":";
14878             if ( $n > 0 ) {
14879                 my $iprev = $ri_first->[ $n - 1 ];
14880                 $last_leading_type = $types_to_go[$iprev];
14881             }
14882             if (   $terminal_type ne ';'
14883                 && $n_last_line > $n
14884                 && $level_end == $lev )
14885             {
14886                 my $inext = $ri_first->[ $n + 1 ];
14887                 $level_end     = $levels_to_go[$inext];
14888                 $terminal_type = $types_to_go[$inext];
14889             }
14890
14891             $is_terminal_ternary = $last_leading_type eq ':'
14892               && ( ( $terminal_type eq ';' && $level_end <= $lev )
14893                 || ( $terminal_type ne ':' && $level_end < $lev ) )
14894
14895               # the terminal term must not contain any ternary terms, as in
14896               # my $ECHO = (
14897               #       $Is_MSWin32 ? ".\\echo$$"
14898               #     : $Is_MacOS   ? ":echo$$"
14899               #     : ( $Is_NetWare ? "echo$$" : "./echo$$" )
14900               # );
14901               && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
14902         }
14903
14904         # send this new line down the pipe
14905         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
14906         Perl::Tidy::VerticalAligner::valign_input(
14907             $lev,
14908             $level_end,
14909             $indentation,
14910             $rfields,
14911             $rtokens,
14912             $rpatterns,
14913             $forced_breakpoint_to_go[$iend] || $in_comma_list,
14914             $outdent_long_lines,
14915             $is_terminal_ternary,
14916             $is_semicolon_terminated,
14917             $do_not_pad,
14918             $rvertical_tightness_flags,
14919             $level_jump,
14920         );
14921         $in_comma_list =
14922           $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
14923
14924         # flush an outdented line to avoid any unwanted vertical alignment
14925         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
14926
14927         $do_not_pad = 0;
14928
14929         # Set flag indicating if this line ends in an opening
14930         # token and is very short, so that a blank line is not
14931         # needed if the subsequent line is a comment.
14932         # Examples of what we are looking for:
14933         #   {
14934         #   && (
14935         #   BEGIN {
14936         #   default {
14937         #   sub {
14938         $last_output_short_opening_token
14939
14940           # line ends in opening token
14941           = $types_to_go[$iend] =~ /^[\{\(\[L]$/
14942
14943           # and either
14944           && (
14945             # line has either single opening token
14946             $iend == $ibeg
14947
14948             # or is a single token followed by opening token.
14949             # Note that sub identifiers have blanks like 'sub doit'
14950             || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
14951           )
14952
14953           # and limit total to 10 character widths
14954           && token_sequence_length( $ibeg, $iend ) <= 10;
14955
14956     }    # end of loop to output each line
14957
14958     # remember indentation of lines containing opening containers for
14959     # later use by sub set_adjusted_indentation
14960     save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
14961     return;
14962 }
14963
14964 {    # begin make_alignment_patterns
14965
14966     my %block_type_map;
14967     my %keyword_map;
14968
14969     BEGIN {
14970
14971         # map related block names into a common name to
14972         # allow alignment
14973         %block_type_map = (
14974             'unless'  => 'if',
14975             'else'    => 'if',
14976             'elsif'   => 'if',
14977             'when'    => 'if',
14978             'default' => 'if',
14979             'case'    => 'if',
14980             'sort'    => 'map',
14981             'grep'    => 'map',
14982         );
14983
14984         # map certain keywords to the same 'if' class to align
14985         # long if/elsif sequences. [elsif.pl]
14986         %keyword_map = (
14987             'unless'  => 'if',
14988             'else'    => 'if',
14989             'elsif'   => 'if',
14990             'when'    => 'given',
14991             'default' => 'given',
14992             'case'    => 'switch',
14993
14994             # treat an 'undef' similar to numbers and quotes
14995             'undef' => 'Q',
14996         );
14997     }
14998
14999     sub make_alignment_patterns {
15000
15001         # Here we do some important preliminary work for the
15002         # vertical aligner.  We create three arrays for one
15003         # output line. These arrays contain strings that can
15004         # be tested by the vertical aligner to see if
15005         # consecutive lines can be aligned vertically.
15006         #
15007         # The three arrays are indexed on the vertical
15008         # alignment fields and are:
15009         # @tokens - a list of any vertical alignment tokens for this line.
15010         #   These are tokens, such as '=' '&&' '#' etc which
15011         #   we want to might align vertically.  These are
15012         #   decorated with various information such as
15013         #   nesting depth to prevent unwanted vertical
15014         #   alignment matches.
15015         # @fields - the actual text of the line between the vertical alignment
15016         #   tokens.
15017         # @patterns - a modified list of token types, one for each alignment
15018         #   field.  These should normally each match before alignment is
15019         #   allowed, even when the alignment tokens match.
15020         my ( $ibeg, $iend ) = @_;
15021         my @tokens   = ();
15022         my @fields   = ();
15023         my @patterns = ();
15024         my $i_start  = $ibeg;
15025
15026         my $depth                 = 0;
15027         my @container_name        = ("");
15028         my @multiple_comma_arrows = (undef);
15029
15030         my $j = 0;    # field index
15031
15032         $patterns[0] = "";
15033         for my $i ( $ibeg .. $iend ) {
15034
15035             # Keep track of containers balanced on this line only.
15036             # These are used below to prevent unwanted cross-line alignments.
15037             # Unbalanced containers already avoid aligning across
15038             # container boundaries.
15039             if ( $tokens_to_go[$i] eq '(' ) {
15040
15041                 # if container is balanced on this line...
15042                 my $i_mate = $mate_index_to_go[$i];
15043                 if ( $i_mate > $i && $i_mate <= $iend ) {
15044                     $depth++;
15045                     my $seqno = $type_sequence_to_go[$i];
15046                     my $count = comma_arrow_count($seqno);
15047                     $multiple_comma_arrows[$depth] = $count && $count > 1;
15048
15049                     # Append the previous token name to make the container name
15050                     # more unique.  This name will also be given to any commas
15051                     # within this container, and it helps avoid undesirable
15052                     # alignments of different types of containers.
15053                     my $name = previous_nonblank_token($i);
15054                     $name =~ s/^->//;
15055                     $container_name[$depth] = "+" . $name;
15056
15057                     # Make the container name even more unique if necessary.
15058                     # If we are not vertically aligning this opening paren,
15059                     # append a character count to avoid bad alignment because
15060                     # it usually looks bad to align commas within containers
15061                     # for which the opening parens do not align.  Here
15062                     # is an example very BAD alignment of commas (because
15063                     # the atan2 functions are not all aligned):
15064                     #    $XY =
15065                     #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
15066                     #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
15067                     #      $X * atan2( $X,            1 ) -
15068                     #      $Y * atan2( $Y,            1 );
15069                     #
15070                     # On the other hand, it is usually okay to align commas if
15071                     # opening parens align, such as:
15072                     #    glVertex3d( $cx + $s * $xs, $cy,            $z );
15073                     #    glVertex3d( $cx,            $cy + $s * $ys, $z );
15074                     #    glVertex3d( $cx - $s * $xs, $cy,            $z );
15075                     #    glVertex3d( $cx,            $cy - $s * $ys, $z );
15076                     #
15077                     # To distinguish between these situations, we will
15078                     # append the length of the line from the previous matching
15079                     # token, or beginning of line, to the function name.  This
15080                     # will allow the vertical aligner to reject undesirable
15081                     # matches.
15082
15083                     # if we are not aligning on this paren...
15084                     if ( $matching_token_to_go[$i] eq '' ) {
15085
15086                         # Sum length from previous alignment, or start of line.
15087                         my $len =
15088                           ( $i_start == $ibeg )
15089                           ? total_line_length( $i_start, $i - 1 )
15090                           : token_sequence_length( $i_start, $i - 1 );
15091
15092                         # tack length onto the container name to make unique
15093                         $container_name[$depth] .= "-" . $len;
15094                     }
15095                 }
15096             }
15097             elsif ( $tokens_to_go[$i] eq ')' ) {
15098                 $depth-- if $depth > 0;
15099             }
15100
15101             # if we find a new synchronization token, we are done with
15102             # a field
15103             if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
15104
15105                 my $tok = my $raw_tok = $matching_token_to_go[$i];
15106
15107                 # make separators in different nesting depths unique
15108                 # by appending the nesting depth digit.
15109                 if ( $raw_tok ne '#' ) {
15110                     $tok .= "$nesting_depth_to_go[$i]";
15111                 }
15112
15113                 # also decorate commas with any container name to avoid
15114                 # unwanted cross-line alignments.
15115                 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
15116                     if ( $container_name[$depth] ) {
15117                         $tok .= $container_name[$depth];
15118                     }
15119                 }
15120
15121                 # Patch to avoid aligning leading and trailing if, unless.
15122                 # Mark trailing if, unless statements with container names.
15123                 # This makes them different from leading if, unless which
15124                 # are not so marked at present.  If we ever need to name
15125                 # them too, we could use ci to distinguish them.
15126                 # Example problem to avoid:
15127                 #    return ( 2, "DBERROR" )
15128                 #      if ( $retval == 2 );
15129                 #    if   ( scalar @_ ) {
15130                 #        my ( $a, $b, $c, $d, $e, $f ) = @_;
15131                 #    }
15132                 if ( $raw_tok eq '(' ) {
15133                     my $ci = $ci_levels_to_go[$ibeg];
15134                     if (   $container_name[$depth] =~ /^\+(if|unless)/
15135                         && $ci )
15136                     {
15137                         $tok .= $container_name[$depth];
15138                     }
15139                 }
15140
15141                 # Decorate block braces with block types to avoid
15142                 # unwanted alignments such as the following:
15143                 # foreach ( @{$routput_array} ) { $fh->print($_) }
15144                 # eval                          { $fh->close() };
15145                 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
15146                     my $block_type = $block_type_to_go[$i];
15147
15148                     # map certain related block types to allow
15149                     # else blocks to align
15150                     $block_type = $block_type_map{$block_type}
15151                       if ( defined( $block_type_map{$block_type} ) );
15152
15153                     # remove sub names to allow one-line sub braces to align
15154                     # regardless of name
15155                     #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
15156                     if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
15157
15158                     # allow all control-type blocks to align
15159                     if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
15160
15161                     $tok .= $block_type;
15162                 }
15163
15164                 # concatenate the text of the consecutive tokens to form
15165                 # the field
15166                 push( @fields,
15167                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
15168
15169                 # store the alignment token for this field
15170                 push( @tokens, $tok );
15171
15172                 # get ready for the next batch
15173                 $i_start = $i;
15174                 $j++;
15175                 $patterns[$j] = "";
15176             }
15177
15178             # continue accumulating tokens
15179             # handle non-keywords..
15180             if ( $types_to_go[$i] ne 'k' ) {
15181                 my $type = $types_to_go[$i];
15182
15183                 # Mark most things before arrows as a quote to
15184                 # get them to line up. Testfile: mixed.pl.
15185                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
15186                     my $next_type = $types_to_go[ $i + 1 ];
15187                     my $i_next_nonblank =
15188                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
15189
15190                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
15191                         $type = 'Q';
15192
15193                         # Patch to ignore leading minus before words,
15194                         # by changing pattern 'mQ' into just 'Q',
15195                         # so that we can align things like this:
15196                         #  Button   => "Print letter \"~$_\"",
15197                         #  -command => [ sub { print "$_[0]\n" }, $_ ],
15198                         if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
15199                     }
15200                 }
15201
15202                 # patch to make numbers and quotes align
15203                 if ( $type eq 'n' ) { $type = 'Q' }
15204
15205                 # patch to ignore any ! in patterns
15206                 if ( $type eq '!' ) { $type = '' }
15207
15208                 $patterns[$j] .= $type;
15209             }
15210
15211             # for keywords we have to use the actual text
15212             else {
15213
15214                 my $tok = $tokens_to_go[$i];
15215
15216                 # but map certain keywords to a common string to allow
15217                 # alignment.
15218                 $tok = $keyword_map{$tok}
15219                   if ( defined( $keyword_map{$tok} ) );
15220                 $patterns[$j] .= $tok;
15221             }
15222         }
15223
15224         # done with this line .. join text of tokens to make the last field
15225         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
15226         return ( \@tokens, \@fields, \@patterns );
15227     }
15228
15229 }    # end make_alignment_patterns
15230
15231 {    # begin unmatched_indexes
15232
15233     # closure to keep track of unbalanced containers.
15234     # arrays shared by the routines in this block:
15235     my @unmatched_opening_indexes_in_this_batch;
15236     my @unmatched_closing_indexes_in_this_batch;
15237     my %comma_arrow_count;
15238
15239     sub is_unbalanced_batch {
15240         return @unmatched_opening_indexes_in_this_batch +
15241           @unmatched_closing_indexes_in_this_batch;
15242     }
15243
15244     sub comma_arrow_count {
15245         my $seqno = shift;
15246         return $comma_arrow_count{$seqno};
15247     }
15248
15249     sub match_opening_and_closing_tokens {
15250
15251         # Match up indexes of opening and closing braces, etc, in this batch.
15252         # This has to be done after all tokens are stored because unstoring
15253         # of tokens would otherwise cause trouble.
15254
15255         @unmatched_opening_indexes_in_this_batch = ();
15256         @unmatched_closing_indexes_in_this_batch = ();
15257         %comma_arrow_count                       = ();
15258         my $comma_arrow_count_contained = 0;
15259
15260         foreach my $i ( 0 .. $max_index_to_go ) {
15261             if ( $type_sequence_to_go[$i] ) {
15262                 my $token = $tokens_to_go[$i];
15263                 if ( $token =~ /^[\(\[\{\?]$/ ) {
15264                     push @unmatched_opening_indexes_in_this_batch, $i;
15265                 }
15266                 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
15267
15268                     my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
15269                     if ( defined($i_mate) && $i_mate >= 0 ) {
15270                         if ( $type_sequence_to_go[$i_mate] ==
15271                             $type_sequence_to_go[$i] )
15272                         {
15273                             $mate_index_to_go[$i]      = $i_mate;
15274                             $mate_index_to_go[$i_mate] = $i;
15275                             my $seqno = $type_sequence_to_go[$i];
15276                             if ( $comma_arrow_count{$seqno} ) {
15277                                 $comma_arrow_count_contained +=
15278                                   $comma_arrow_count{$seqno};
15279                             }
15280                         }
15281                         else {
15282                             push @unmatched_opening_indexes_in_this_batch,
15283                               $i_mate;
15284                             push @unmatched_closing_indexes_in_this_batch, $i;
15285                         }
15286                     }
15287                     else {
15288                         push @unmatched_closing_indexes_in_this_batch, $i;
15289                     }
15290                 }
15291             }
15292             elsif ( $tokens_to_go[$i] eq '=>' ) {
15293                 if (@unmatched_opening_indexes_in_this_batch) {
15294                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
15295                     my $seqno = $type_sequence_to_go[$j];
15296                     $comma_arrow_count{$seqno}++;
15297                 }
15298             }
15299         }
15300         return $comma_arrow_count_contained;
15301     }
15302
15303     sub save_opening_indentation {
15304
15305         # This should be called after each batch of tokens is output. It
15306         # saves indentations of lines of all unmatched opening tokens.
15307         # These will be used by sub get_opening_indentation.
15308
15309         my ( $ri_first, $ri_last, $rindentation_list ) = @_;
15310
15311         # we no longer need indentations of any saved indentations which
15312         # are unmatched closing tokens in this batch, because we will
15313         # never encounter them again.  So we can delete them to keep
15314         # the hash size down.
15315         foreach (@unmatched_closing_indexes_in_this_batch) {
15316             my $seqno = $type_sequence_to_go[$_];
15317             delete $saved_opening_indentation{$seqno};
15318         }
15319
15320         # we need to save indentations of any unmatched opening tokens
15321         # in this batch because we may need them in a subsequent batch.
15322         foreach (@unmatched_opening_indexes_in_this_batch) {
15323             my $seqno = $type_sequence_to_go[$_];
15324             $saved_opening_indentation{$seqno} = [
15325                 lookup_opening_indentation(
15326                     $_, $ri_first, $ri_last, $rindentation_list
15327                 )
15328             ];
15329         }
15330         return;
15331     }
15332 }    # end unmatched_indexes
15333
15334 sub get_opening_indentation {
15335
15336     # get the indentation of the line which output the opening token
15337     # corresponding to a given closing token in the current output batch.
15338     #
15339     # given:
15340     # $i_closing - index in this line of a closing token ')' '}' or ']'
15341     #
15342     # $ri_first - reference to list of the first index $i for each output
15343     #               line in this batch
15344     # $ri_last - reference to list of the last index $i for each output line
15345     #              in this batch
15346     # $rindentation_list - reference to a list containing the indentation
15347     #            used for each line.
15348     #
15349     # return:
15350     #   -the indentation of the line which contained the opening token
15351     #    which matches the token at index $i_opening
15352     #   -and its offset (number of columns) from the start of the line
15353     #
15354     my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
15355
15356     # first, see if the opening token is in the current batch
15357     my $i_opening = $mate_index_to_go[$i_closing];
15358     my ( $indent, $offset, $is_leading, $exists );
15359     $exists = 1;
15360     if ( $i_opening >= 0 ) {
15361
15362         # it is..look up the indentation
15363         ( $indent, $offset, $is_leading ) =
15364           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
15365             $rindentation_list );
15366     }
15367
15368     # if not, it should have been stored in the hash by a previous batch
15369     else {
15370         my $seqno = $type_sequence_to_go[$i_closing];
15371         if ($seqno) {
15372             if ( $saved_opening_indentation{$seqno} ) {
15373                 ( $indent, $offset, $is_leading ) =
15374                   @{ $saved_opening_indentation{$seqno} };
15375             }
15376
15377             # some kind of serious error
15378             # (example is badfile.t)
15379             else {
15380                 $indent     = 0;
15381                 $offset     = 0;
15382                 $is_leading = 0;
15383                 $exists     = 0;
15384             }
15385         }
15386
15387         # if no sequence number it must be an unbalanced container
15388         else {
15389             $indent     = 0;
15390             $offset     = 0;
15391             $is_leading = 0;
15392             $exists     = 0;
15393         }
15394     }
15395     return ( $indent, $offset, $is_leading, $exists );
15396 }
15397
15398 sub lookup_opening_indentation {
15399
15400     # get the indentation of the line in the current output batch
15401     # which output a selected opening token
15402     #
15403     # given:
15404     #   $i_opening - index of an opening token in the current output batch
15405     #                whose line indentation we need
15406     #   $ri_first - reference to list of the first index $i for each output
15407     #               line in this batch
15408     #   $ri_last - reference to list of the last index $i for each output line
15409     #              in this batch
15410     #   $rindentation_list - reference to a list containing the indentation
15411     #            used for each line.  (NOTE: the first slot in
15412     #            this list is the last returned line number, and this is
15413     #            followed by the list of indentations).
15414     #
15415     # return
15416     #   -the indentation of the line which contained token $i_opening
15417     #   -and its offset (number of columns) from the start of the line
15418
15419     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
15420
15421     my $nline = $rindentation_list->[0];    # line number of previous lookup
15422
15423     # reset line location if necessary
15424     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
15425
15426     # find the correct line
15427     unless ( $i_opening > $ri_last->[-1] ) {
15428         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
15429     }
15430
15431     # error - token index is out of bounds - shouldn't happen
15432     else {
15433         warning(
15434 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
15435         );
15436         report_definite_bug();
15437         $nline = $#{$ri_last};
15438     }
15439
15440     $rindentation_list->[0] =
15441       $nline;    # save line number to start looking next call
15442     my $ibeg       = $ri_start->[$nline];
15443     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
15444     my $is_leading = ( $ibeg == $i_opening );
15445     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
15446 }
15447
15448 {
15449     my %is_if_elsif_else_unless_while_until_for_foreach;
15450
15451     BEGIN {
15452
15453         # These block types may have text between the keyword and opening
15454         # curly.  Note: 'else' does not, but must be included to allow trailing
15455         # if/elsif text to be appended.
15456         # patch for SWITCH/CASE: added 'case' and 'when'
15457         my @q = qw(if elsif else unless while until for foreach case when);
15458         @is_if_elsif_else_unless_while_until_for_foreach{@q} =
15459           (1) x scalar(@q);
15460     }
15461
15462     sub set_adjusted_indentation {
15463
15464         # This routine has the final say regarding the actual indentation of
15465         # a line.  It starts with the basic indentation which has been
15466         # defined for the leading token, and then takes into account any
15467         # options that the user has set regarding special indenting and
15468         # outdenting.
15469
15470         my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
15471             $rindentation_list, $level_jump )
15472           = @_;
15473
15474         # we need to know the last token of this line
15475         my ( $terminal_type, $i_terminal ) =
15476           terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
15477
15478         my $is_outdented_line = 0;
15479
15480         my $is_semicolon_terminated = $terminal_type eq ';'
15481           && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
15482
15483         # NOTE: A future improvement would be to make it semicolon terminated
15484         # even if it does not have a semicolon but is followed by a closing
15485         # block brace. This would undo ci even for something like the
15486         # following, in which the final paren does not have a semicolon because
15487         # it is a possible weld location:
15488
15489         # if ($BOLD_MATH) {
15490         #     (
15491         #         $labels, $comment,
15492         #         join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
15493         #     )
15494         # }
15495         #
15496
15497         # MOJO: Set a flag if this lines begins with ')->'
15498         my $leading_paren_arrow = (
15499                  $types_to_go[$ibeg] eq '}'
15500               && $tokens_to_go[$ibeg] eq ')'
15501               && (
15502                 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
15503                 || (   $ibeg < $i_terminal - 1
15504                     && $types_to_go[ $ibeg + 1 ] eq 'b'
15505                     && $types_to_go[ $ibeg + 2 ] eq '->' )
15506               )
15507         );
15508
15509         ##########################################################
15510         # Section 1: set a flag and a default indentation
15511         #
15512         # Most lines are indented according to the initial token.
15513         # But it is common to outdent to the level just after the
15514         # terminal token in certain cases...
15515         # adjust_indentation flag:
15516         #       0 - do not adjust
15517         #       1 - outdent
15518         #       2 - vertically align with opening token
15519         #       3 - indent
15520         ##########################################################
15521         my $adjust_indentation         = 0;
15522         my $default_adjust_indentation = $adjust_indentation;
15523
15524         my (
15525             $opening_indentation, $opening_offset,
15526             $is_leading,          $opening_exists
15527         );
15528
15529         # if we are at a closing token of some type..
15530         if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
15531
15532             # get the indentation of the line containing the corresponding
15533             # opening token
15534             (
15535                 $opening_indentation, $opening_offset,
15536                 $is_leading,          $opening_exists
15537               )
15538               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
15539                 $rindentation_list );
15540
15541             # First set the default behavior:
15542             if (
15543
15544                 # default behavior is to outdent closing lines
15545                 # of the form:   ");  };  ];  )->xxx;"
15546                 $is_semicolon_terminated
15547
15548                 # and 'cuddled parens' of the form:   ")->pack("
15549                 # Bug fix for RT #123749]: the types here were
15550                 # incorrectly '(' and ')'.  Corrected to be '{' and '}'
15551                 || (
15552                        $terminal_type eq '{'
15553                     && $types_to_go[$ibeg] eq '}'
15554                     && ( $nesting_depth_to_go[$iend] + 1 ==
15555                         $nesting_depth_to_go[$ibeg] )
15556                 )
15557
15558                 # remove continuation indentation for any line like
15559                 #       } ... {
15560                 # or without ending '{' and unbalanced, such as
15561                 #       such as '}->{$operator}'
15562                 || (
15563                     $types_to_go[$ibeg] eq '}'
15564
15565                     && (   $types_to_go[$iend] eq '{'
15566                         || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
15567                 )
15568
15569                 # and when the next line is at a lower indentation level
15570                 # PATCH: and only if the style allows undoing continuation
15571                 # for all closing token types. We should really wait until
15572                 # the indentation of the next line is known and then make
15573                 # a decision, but that would require another pass.
15574                 || ( $level_jump < 0 && !$some_closing_token_indentation )
15575
15576                 # Patch for -wn=2, multiple welded closing tokens
15577                 || (   $i_terminal > $ibeg
15578                     && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
15579
15580               )
15581             {
15582                 $adjust_indentation = 1;
15583             }
15584
15585             # outdent something like '),'
15586             if (
15587                 $terminal_type eq ','
15588
15589                 # Removed this constraint for -wn
15590                 # OLD: allow just one character before the comma
15591                 # && $i_terminal == $ibeg + 1
15592
15593                 # require LIST environment; otherwise, we may outdent too much -
15594                 # this can happen in calls without parentheses (overload.t);
15595                 && $container_environment_to_go[$i_terminal] eq 'LIST'
15596               )
15597             {
15598                 $adjust_indentation = 1;
15599             }
15600
15601             # undo continuation indentation of a terminal closing token if
15602             # it is the last token before a level decrease.  This will allow
15603             # a closing token to line up with its opening counterpart, and
15604             # avoids a indentation jump larger than 1 level.
15605             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
15606                 && $i_terminal == $ibeg )
15607             {
15608                 my $ci        = $ci_levels_to_go[$ibeg];
15609                 my $lev       = $levels_to_go[$ibeg];
15610                 my $next_type = $types_to_go[ $ibeg + 1 ];
15611                 my $i_next_nonblank =
15612                   ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
15613                 if (   $i_next_nonblank <= $max_index_to_go
15614                     && $levels_to_go[$i_next_nonblank] < $lev )
15615                 {
15616                     $adjust_indentation = 1;
15617                 }
15618
15619                 # Patch for RT #96101, in which closing brace of anonymous subs
15620                 # was not outdented.  We should look ahead and see if there is
15621                 # a level decrease at the next token (i.e., a closing token),
15622                 # but right now we do not have that information.  For now
15623                 # we see if we are in a list, and this works well.
15624                 # See test files 'sub*.t' for good test cases.
15625                 if (   $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
15626                     && $container_environment_to_go[$i_terminal] eq 'LIST'
15627                     && !$rOpts->{'indent-closing-brace'} )
15628                 {
15629                     (
15630                         $opening_indentation, $opening_offset,
15631                         $is_leading,          $opening_exists
15632                       )
15633                       = get_opening_indentation( $ibeg, $ri_first, $ri_last,
15634                         $rindentation_list );
15635                     my $indentation = $leading_spaces_to_go[$ibeg];
15636                     if ( defined($opening_indentation)
15637                         && get_spaces($indentation) >
15638                         get_spaces($opening_indentation) )
15639                     {
15640                         $adjust_indentation = 1;
15641                     }
15642                 }
15643             }
15644
15645             # YVES patch 1 of 2:
15646             # Undo ci of line with leading closing eval brace,
15647             # but not beyond the indention of the line with
15648             # the opening brace.
15649             if (   $block_type_to_go[$ibeg] eq 'eval'
15650                 && !$rOpts->{'line-up-parentheses'}
15651                 && !$rOpts->{'indent-closing-brace'} )
15652             {
15653                 (
15654                     $opening_indentation, $opening_offset,
15655                     $is_leading,          $opening_exists
15656                   )
15657                   = get_opening_indentation( $ibeg, $ri_first, $ri_last,
15658                     $rindentation_list );
15659                 my $indentation = $leading_spaces_to_go[$ibeg];
15660                 if ( defined($opening_indentation)
15661                     && get_spaces($indentation) >
15662                     get_spaces($opening_indentation) )
15663                 {
15664                     $adjust_indentation = 1;
15665                 }
15666             }
15667
15668             $default_adjust_indentation = $adjust_indentation;
15669
15670             # Now modify default behavior according to user request:
15671             # handle option to indent non-blocks of the form );  };  ];
15672             # But don't do special indentation to something like ')->pack('
15673             if ( !$block_type_to_go[$ibeg] ) {
15674                 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
15675                 if ( $cti == 1 ) {
15676                     if (   $i_terminal <= $ibeg + 1
15677                         || $is_semicolon_terminated )
15678                     {
15679                         $adjust_indentation = 2;
15680                     }
15681                     else {
15682                         $adjust_indentation = 0;
15683                     }
15684                 }
15685                 elsif ( $cti == 2 ) {
15686                     if ($is_semicolon_terminated) {
15687                         $adjust_indentation = 3;
15688                     }
15689                     else {
15690                         $adjust_indentation = 0;
15691                     }
15692                 }
15693                 elsif ( $cti == 3 ) {
15694                     $adjust_indentation = 3;
15695                 }
15696             }
15697
15698             # handle option to indent blocks
15699             else {
15700                 if (
15701                     $rOpts->{'indent-closing-brace'}
15702                     && (
15703                         $i_terminal == $ibeg    #  isolated terminal '}'
15704                         || $is_semicolon_terminated
15705                     )
15706                   )                             #  } xxxx ;
15707                 {
15708                     $adjust_indentation = 3;
15709                 }
15710             }
15711         }
15712
15713         # if at ');', '};', '>;', and '];' of a terminal qw quote
15714         elsif ($rpatterns->[0] =~ /^qb*;$/
15715             && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
15716         {
15717             if ( $closing_token_indentation{$1} == 0 ) {
15718                 $adjust_indentation = 1;
15719             }
15720             else {
15721                 $adjust_indentation = 3;
15722             }
15723         }
15724
15725         # if line begins with a ':', align it with any
15726         # previous line leading with corresponding ?
15727         elsif ( $types_to_go[$ibeg] eq ':' ) {
15728             (
15729                 $opening_indentation, $opening_offset,
15730                 $is_leading,          $opening_exists
15731               )
15732               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
15733                 $rindentation_list );
15734             if ($is_leading) { $adjust_indentation = 2; }
15735         }
15736
15737         ##########################################################
15738         # Section 2: set indentation according to flag set above
15739         #
15740         # Select the indentation object to define leading
15741         # whitespace.  If we are outdenting something like '} } );'
15742         # then we want to use one level below the last token
15743         # ($i_terminal) in order to get it to fully outdent through
15744         # all levels.
15745         ##########################################################
15746         my $indentation;
15747         my $lev;
15748         my $level_end = $levels_to_go[$iend];
15749
15750         if ( $adjust_indentation == 0 ) {
15751             $indentation = $leading_spaces_to_go[$ibeg];
15752             $lev         = $levels_to_go[$ibeg];
15753         }
15754         elsif ( $adjust_indentation == 1 ) {
15755
15756             # Change the indentation to be that of a different token on the line
15757             # Previously, the indentation of the terminal token was used:
15758             # OLD CODING:
15759             # $indentation = $reduced_spaces_to_go[$i_terminal];
15760             # $lev         = $levels_to_go[$i_terminal];
15761
15762             # Generalization for MOJO:
15763             # Use the lowest level indentation of the tokens on the line.
15764             # For example, here we can use the indentation of the ending ';':
15765             #    } until ($selection > 0 and $selection < 10);   # ok to use ';'
15766             # But this will not outdent if we use the terminal indentation:
15767             #    )->then( sub {      # use indentation of the ->, not the {
15768             # Warning: reduced_spaces_to_go[] may be a reference, do not
15769             # do numerical checks with it
15770
15771             my $i_ind = $ibeg;
15772             $indentation = $reduced_spaces_to_go[$i_ind];
15773             $lev         = $levels_to_go[$i_ind];
15774             while ( $i_ind < $i_terminal ) {
15775                 $i_ind++;
15776                 if ( $levels_to_go[$i_ind] < $lev ) {
15777                     $indentation = $reduced_spaces_to_go[$i_ind];
15778                     $lev         = $levels_to_go[$i_ind];
15779                 }
15780             }
15781         }
15782
15783         # handle indented closing token which aligns with opening token
15784         elsif ( $adjust_indentation == 2 ) {
15785
15786             # handle option to align closing token with opening token
15787             $lev = $levels_to_go[$ibeg];
15788
15789             # calculate spaces needed to align with opening token
15790             my $space_count =
15791               get_spaces($opening_indentation) + $opening_offset;
15792
15793             # Indent less than the previous line.
15794             #
15795             # Problem: For -lp we don't exactly know what it was if there
15796             # were recoverable spaces sent to the aligner.  A good solution
15797             # would be to force a flush of the vertical alignment buffer, so
15798             # that we would know.  For now, this rule is used for -lp:
15799             #
15800             # When the last line did not start with a closing token we will
15801             # be optimistic that the aligner will recover everything wanted.
15802             #
15803             # This rule will prevent us from breaking a hierarchy of closing
15804             # tokens, and in a worst case will leave a closing paren too far
15805             # indented, but this is better than frequently leaving it not
15806             # indented enough.
15807             my $last_spaces = get_spaces($last_indentation_written);
15808             if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
15809                 $last_spaces +=
15810                   get_recoverable_spaces($last_indentation_written);
15811             }
15812
15813             # reset the indentation to the new space count if it works
15814             # only options are all or none: nothing in-between looks good
15815             $lev = $levels_to_go[$ibeg];
15816             if ( $space_count < $last_spaces ) {
15817                 if ($rOpts_line_up_parentheses) {
15818                     my $lev = $levels_to_go[$ibeg];
15819                     $indentation =
15820                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
15821                 }
15822                 else {
15823                     $indentation = $space_count;
15824                 }
15825             }
15826
15827             # revert to default if it doesn't work
15828             else {
15829                 $space_count = leading_spaces_to_go($ibeg);
15830                 if ( $default_adjust_indentation == 0 ) {
15831                     $indentation = $leading_spaces_to_go[$ibeg];
15832                 }
15833                 elsif ( $default_adjust_indentation == 1 ) {
15834                     $indentation = $reduced_spaces_to_go[$i_terminal];
15835                     $lev         = $levels_to_go[$i_terminal];
15836                 }
15837             }
15838         }
15839
15840         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
15841         else {
15842
15843             # handle -icb (indented closing code block braces)
15844             # Updated method for indented block braces: indent one full level if
15845             # there is no continuation indentation.  This will occur for major
15846             # structures such as sub, if, else, but not for things like map
15847             # blocks.
15848             #
15849             # Note: only code blocks without continuation indentation are
15850             # handled here (if, else, unless, ..). In the following snippet,
15851             # the terminal brace of the sort block will have continuation
15852             # indentation as shown so it will not be handled by the coding
15853             # here.  We would have to undo the continuation indentation to do
15854             # this, but it probably looks ok as is.  This is a possible future
15855             # update for semicolon terminated lines.
15856             #
15857             #     if ($sortby eq 'date' or $sortby eq 'size') {
15858             #         @files = sort {
15859             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
15860             #                 or $a cmp $b
15861             #                 } @files;
15862             #         }
15863             #
15864             if (   $block_type_to_go[$ibeg]
15865                 && $ci_levels_to_go[$i_terminal] == 0 )
15866             {
15867                 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
15868                 $indentation = $spaces + $rOpts_indent_columns;
15869
15870                 # NOTE: for -lp we could create a new indentation object, but
15871                 # there is probably no need to do it
15872             }
15873
15874             # handle -icp and any -icb block braces which fall through above
15875             # test such as the 'sort' block mentioned above.
15876             else {
15877
15878                 # There are currently two ways to handle -icp...
15879                 # One way is to use the indentation of the previous line:
15880                 # $indentation = $last_indentation_written;
15881
15882                 # The other way is to use the indentation that the previous line
15883                 # would have had if it hadn't been adjusted:
15884                 $indentation = $last_unadjusted_indentation;
15885
15886                 # Current method: use the minimum of the two. This avoids
15887                 # inconsistent indentation.
15888                 if ( get_spaces($last_indentation_written) <
15889                     get_spaces($indentation) )
15890                 {
15891                     $indentation = $last_indentation_written;
15892                 }
15893             }
15894
15895             # use previous indentation but use own level
15896             # to cause list to be flushed properly
15897             $lev = $levels_to_go[$ibeg];
15898         }
15899
15900         # remember indentation except for multi-line quotes, which get
15901         # no indentation
15902         unless ( $ibeg == 0 && $starting_in_quote ) {
15903             $last_indentation_written    = $indentation;
15904             $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
15905             $last_leading_token          = $tokens_to_go[$ibeg];
15906         }
15907
15908         # be sure lines with leading closing tokens are not outdented more
15909         # than the line which contained the corresponding opening token.
15910
15911         #############################################################
15912         # updated per bug report in alex_bug.pl: we must not
15913         # mess with the indentation of closing logical braces so
15914         # we must treat something like '} else {' as if it were
15915         # an isolated brace my $is_isolated_block_brace = (
15916         # $iend == $ibeg ) && $block_type_to_go[$ibeg];
15917         #############################################################
15918         my $is_isolated_block_brace = $block_type_to_go[$ibeg]
15919           && ( $iend == $ibeg
15920             || $is_if_elsif_else_unless_while_until_for_foreach{
15921                 $block_type_to_go[$ibeg]
15922             } );
15923
15924         # only do this for a ':; which is aligned with its leading '?'
15925         my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
15926
15927         if (
15928             defined($opening_indentation)
15929             && !$leading_paren_arrow    # MOJO
15930             && !$is_isolated_block_brace
15931             && !$is_unaligned_colon
15932           )
15933         {
15934             if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
15935                 $indentation = $opening_indentation;
15936             }
15937         }
15938
15939         # remember the indentation of each line of this batch
15940         push @{$rindentation_list}, $indentation;
15941
15942         # outdent lines with certain leading tokens...
15943         if (
15944
15945             # must be first word of this batch
15946             $ibeg == 0
15947
15948             # and ...
15949             && (
15950
15951                 # certain leading keywords if requested
15952                 (
15953                        $rOpts->{'outdent-keywords'}
15954                     && $types_to_go[$ibeg] eq 'k'
15955                     && $outdent_keyword{ $tokens_to_go[$ibeg] }
15956                 )
15957
15958                 # or labels if requested
15959                 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
15960
15961                 # or static block comments if requested
15962                 || (   $types_to_go[$ibeg] eq '#'
15963                     && $rOpts->{'outdent-static-block-comments'}
15964                     && $is_static_block_comment )
15965             )
15966           )
15967
15968         {
15969             my $space_count = leading_spaces_to_go($ibeg);
15970             if ( $space_count > 0 ) {
15971                 $space_count -= $rOpts_continuation_indentation;
15972                 $is_outdented_line = 1;
15973                 if ( $space_count < 0 ) { $space_count = 0 }
15974
15975                 # do not promote a spaced static block comment to non-spaced;
15976                 # this is not normally necessary but could be for some
15977                 # unusual user inputs (such as -ci = -i)
15978                 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
15979                     $space_count = 1;
15980                 }
15981
15982                 if ($rOpts_line_up_parentheses) {
15983                     $indentation =
15984                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
15985                 }
15986                 else {
15987                     $indentation = $space_count;
15988                 }
15989             }
15990         }
15991
15992         return ( $indentation, $lev, $level_end, $terminal_type,
15993             $is_semicolon_terminated, $is_outdented_line );
15994     }
15995 }
15996
15997 sub set_vertical_tightness_flags {
15998
15999     my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
16000
16001     # Define vertical tightness controls for the nth line of a batch.
16002     # We create an array of parameters which tell the vertical aligner
16003     # if we should combine this line with the next line to achieve the
16004     # desired vertical tightness.  The array of parameters contains:
16005     #
16006     #   [0] type: 1=opening non-block    2=closing non-block
16007     #             3=opening block brace  4=closing block brace
16008     #
16009     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
16010     #             if closing: spaces of padding to use
16011     #   [2] sequence number of container
16012     #   [3] valid flag: do not append if this flag is false. Will be
16013     #       true if appropriate -vt flag is set.  Otherwise, Will be
16014     #       made true only for 2 line container in parens with -lp
16015     #
16016     # These flags are used by sub set_leading_whitespace in
16017     # the vertical aligner
16018
16019     my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
16020
16021     #--------------------------------------------------------------
16022     # Vertical Tightness Flags Section 1:
16023     # Handle Lines 1 .. n-1 but not the last line
16024     # For non-BLOCK tokens, we will need to examine the next line
16025     # too, so we won't consider the last line.
16026     #--------------------------------------------------------------
16027     if ( $n < $n_last_line ) {
16028
16029         #--------------------------------------------------------------
16030         # Vertical Tightness Flags Section 1a:
16031         # Look for Type 1, last token of this line is a non-block opening token
16032         #--------------------------------------------------------------
16033         my $ibeg_next = $ri_first->[ $n + 1 ];
16034         my $token_end = $tokens_to_go[$iend];
16035         my $iend_next = $ri_last->[ $n + 1 ];
16036         if (
16037                $type_sequence_to_go[$iend]
16038             && !$block_type_to_go[$iend]
16039             && $is_opening_token{$token_end}
16040             && (
16041                 $opening_vertical_tightness{$token_end} > 0
16042
16043                 # allow 2-line method call to be closed up
16044                 || (   $rOpts_line_up_parentheses
16045                     && $token_end eq '('
16046                     && $iend > $ibeg
16047                     && $types_to_go[ $iend - 1 ] ne 'b' )
16048             )
16049           )
16050         {
16051
16052             # avoid multiple jumps in nesting depth in one line if
16053             # requested
16054             my $ovt       = $opening_vertical_tightness{$token_end};
16055             my $iend_next = $ri_last->[ $n + 1 ];
16056             unless (
16057                 $ovt < 2
16058                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
16059                     $nesting_depth_to_go[$ibeg_next] )
16060               )
16061             {
16062
16063                 # If -vt flag has not been set, mark this as invalid
16064                 # and aligner will validate it if it sees the closing paren
16065                 # within 2 lines.
16066                 my $valid_flag = $ovt;
16067                 @{$rvertical_tightness_flags} =
16068                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
16069             }
16070         }
16071
16072         #--------------------------------------------------------------
16073         # Vertical Tightness Flags Section 1b:
16074         # Look for Type 2, first token of next line is a non-block closing
16075         # token .. and be sure this line does not have a side comment
16076         #--------------------------------------------------------------
16077         my $token_next = $tokens_to_go[$ibeg_next];
16078         if (   $type_sequence_to_go[$ibeg_next]
16079             && !$block_type_to_go[$ibeg_next]
16080             && $is_closing_token{$token_next}
16081             && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
16082         {
16083             my $ovt = $opening_vertical_tightness{$token_next};
16084             my $cvt = $closing_vertical_tightness{$token_next};
16085             if (
16086
16087                 # never append a trailing line like   )->pack(
16088                 # because it will throw off later alignment
16089                 (
16090                     $nesting_depth_to_go[$ibeg_next] ==
16091                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
16092                 )
16093                 && (
16094                     $cvt == 2
16095                     || (
16096                         $container_environment_to_go[$ibeg_next] ne 'LIST'
16097                         && (
16098                             $cvt == 1
16099
16100                             # allow closing up 2-line method calls
16101                             || (   $rOpts_line_up_parentheses
16102                                 && $token_next eq ')' )
16103                         )
16104                     )
16105                 )
16106               )
16107             {
16108
16109                 # decide which trailing closing tokens to append..
16110                 my $ok = 0;
16111                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
16112                 else {
16113                     my $str = join( '',
16114                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
16115
16116                     # append closing token if followed by comment or ';'
16117                     if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
16118                 }
16119
16120                 if ($ok) {
16121                     my $valid_flag = $cvt;
16122                     @{$rvertical_tightness_flags} = (
16123                         2,
16124                         $tightness{$token_next} == 2 ? 0 : 1,
16125                         $type_sequence_to_go[$ibeg_next], $valid_flag,
16126                     );
16127                 }
16128             }
16129         }
16130
16131         #--------------------------------------------------------------
16132         # Vertical Tightness Flags Section 1c:
16133         # Implement the Opening Token Right flag (Type 2)..
16134         # If requested, move an isolated trailing opening token to the end of
16135         # the previous line which ended in a comma.  We could do this
16136         # in sub recombine_breakpoints but that would cause problems
16137         # with -lp formatting.  The problem is that indentation will
16138         # quickly move far to the right in nested expressions.  By
16139         # doing it after indentation has been set, we avoid changes
16140         # to the indentation.  Actual movement of the token takes place
16141         # in sub valign_output_step_B.
16142         #--------------------------------------------------------------
16143         if (
16144             $opening_token_right{ $tokens_to_go[$ibeg_next] }
16145
16146             # previous line is not opening
16147             # (use -sot to combine with it)
16148             && !$is_opening_token{$token_end}
16149
16150             # previous line ended in one of these
16151             # (add other cases if necessary; '=>' and '.' are not necessary
16152             && !$block_type_to_go[$ibeg_next]
16153
16154             # this is a line with just an opening token
16155             && (   $iend_next == $ibeg_next
16156                 || $iend_next == $ibeg_next + 2
16157                 && $types_to_go[$iend_next] eq '#' )
16158
16159             # looks bad if we align vertically with the wrong container
16160             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
16161           )
16162         {
16163             my $valid_flag = 1;
16164             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
16165             @{$rvertical_tightness_flags} =
16166               ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
16167         }
16168
16169         #--------------------------------------------------------------
16170         # Vertical Tightness Flags Section 1d:
16171         # Stacking of opening and closing tokens (Type 2)
16172         #--------------------------------------------------------------
16173         my $stackable;
16174         my $token_beg_next = $tokens_to_go[$ibeg_next];
16175
16176         # patch to make something like 'qw(' behave like an opening paren
16177         # (aran.t)
16178         if ( $types_to_go[$ibeg_next] eq 'q' ) {
16179             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
16180                 $token_beg_next = $1;
16181             }
16182         }
16183
16184         if (   $is_closing_token{$token_end}
16185             && $is_closing_token{$token_beg_next} )
16186         {
16187             $stackable = $stack_closing_token{$token_beg_next}
16188               unless ( $block_type_to_go[$ibeg_next] )
16189               ;    # shouldn't happen; just checking
16190         }
16191         elsif ($is_opening_token{$token_end}
16192             && $is_opening_token{$token_beg_next} )
16193         {
16194             $stackable = $stack_opening_token{$token_beg_next}
16195               unless ( $block_type_to_go[$ibeg_next] )
16196               ;    # shouldn't happen; just checking
16197         }
16198
16199         if ($stackable) {
16200
16201             my $is_semicolon_terminated;
16202             if ( $n + 1 == $n_last_line ) {
16203                 my ( $terminal_type, $i_terminal ) = terminal_type(
16204                     \@types_to_go, \@block_type_to_go,
16205                     $ibeg_next,    $iend_next
16206                 );
16207                 $is_semicolon_terminated = $terminal_type eq ';'
16208                   && $nesting_depth_to_go[$iend_next] <
16209                   $nesting_depth_to_go[$ibeg_next];
16210             }
16211
16212             # this must be a line with just an opening token
16213             # or end in a semicolon
16214             if (
16215                 $is_semicolon_terminated
16216                 || (   $iend_next == $ibeg_next
16217                     || $iend_next == $ibeg_next + 2
16218                     && $types_to_go[$iend_next] eq '#' )
16219               )
16220             {
16221                 my $valid_flag = 1;
16222                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
16223                 @{$rvertical_tightness_flags} =
16224                   ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
16225                   );
16226             }
16227         }
16228     }
16229
16230     #--------------------------------------------------------------
16231     # Vertical Tightness Flags Section 2:
16232     # Handle type 3, opening block braces on last line of the batch
16233     # Check for a last line with isolated opening BLOCK curly
16234     #--------------------------------------------------------------
16235     elsif ($rOpts_block_brace_vertical_tightness
16236         && $ibeg eq $iend
16237         && $types_to_go[$iend] eq '{'
16238         && $block_type_to_go[$iend] =~
16239         /$block_brace_vertical_tightness_pattern/o )
16240     {
16241         @{$rvertical_tightness_flags} =
16242           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
16243     }
16244
16245     #--------------------------------------------------------------
16246     # Vertical Tightness Flags Section 3:
16247     # Handle type 4, a closing block brace on the last line of the batch Check
16248     # for a last line with isolated closing BLOCK curly
16249     #--------------------------------------------------------------
16250     elsif ($rOpts_stack_closing_block_brace
16251         && $ibeg eq $iend
16252         && $block_type_to_go[$iend]
16253         && $types_to_go[$iend] eq '}' )
16254     {
16255         my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
16256         @{$rvertical_tightness_flags} =
16257           ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
16258     }
16259
16260     # pack in the sequence numbers of the ends of this line
16261     $rvertical_tightness_flags->[4] = get_seqno($ibeg);
16262     $rvertical_tightness_flags->[5] = get_seqno($iend);
16263     return $rvertical_tightness_flags;
16264 }
16265
16266 sub get_seqno {
16267
16268     # get opening and closing sequence numbers of a token for the vertical
16269     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
16270     # to be treated somewhat like opening and closing tokens for stacking
16271     # tokens by the vertical aligner.
16272     my ($ii) = @_;
16273     my $seqno = $type_sequence_to_go[$ii];
16274     if ( $types_to_go[$ii] eq 'q' ) {
16275         my $SEQ_QW = -1;
16276         if ( $ii > 0 ) {
16277             $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
16278         }
16279         else {
16280             if ( !$ending_in_quote ) {
16281                 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
16282             }
16283         }
16284     }
16285     return ($seqno);
16286 }
16287
16288 {
16289     my %is_vertical_alignment_type;
16290     my %is_vertical_alignment_keyword;
16291     my %is_terminal_alignment_type;
16292
16293     BEGIN {
16294
16295         my @q;
16296
16297         # Removed =~ from list to improve chances of alignment
16298         # Removed // from list to improve chances of alignment (RT# 119588)
16299         @q = qw#
16300           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
16301           { ? : => && || ~~ !~~
16302           #;
16303         @is_vertical_alignment_type{@q} = (1) x scalar(@q);
16304
16305         # only align these at end of line
16306         @q = qw(&& ||);
16307         @is_terminal_alignment_type{@q} = (1) x scalar(@q);
16308
16309         # eq and ne were removed from this list to improve alignment chances
16310         @q = qw(if unless and or err for foreach while until);
16311         @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
16312     }
16313
16314     sub set_vertical_alignment_markers {
16315
16316         # This routine takes the first step toward vertical alignment of the
16317         # lines of output text.  It looks for certain tokens which can serve as
16318         # vertical alignment markers (such as an '=').
16319         #
16320         # Method: We look at each token $i in this output batch and set
16321         # $matching_token_to_go[$i] equal to those tokens at which we would
16322         # accept vertical alignment.
16323
16324         my ( $ri_first, $ri_last ) = @_;
16325
16326         # nothing to do if we aren't allowed to change whitespace
16327         if ( !$rOpts_add_whitespace ) {
16328             for my $i ( 0 .. $max_index_to_go ) {
16329                 $matching_token_to_go[$i] = '';
16330             }
16331             return;
16332         }
16333
16334         # remember the index of last nonblank token before any sidecomment
16335         my $i_terminal = $max_index_to_go;
16336         if ( $types_to_go[$i_terminal] eq '#' ) {
16337             if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
16338                 if ( $i_terminal > 0 ) { --$i_terminal }
16339             }
16340         }
16341
16342         # look at each line of this batch..
16343         my $last_vertical_alignment_before_index;
16344         my $vert_last_nonblank_type;
16345         my $vert_last_nonblank_token;
16346         my $vert_last_nonblank_block_type;
16347         my $max_line = @{$ri_first} - 1;
16348
16349         foreach my $line ( 0 .. $max_line ) {
16350             my $ibeg = $ri_first->[$line];
16351             my $iend = $ri_last->[$line];
16352             $last_vertical_alignment_before_index = -1;
16353             $vert_last_nonblank_type              = '';
16354             $vert_last_nonblank_token             = '';
16355             $vert_last_nonblank_block_type        = '';
16356
16357             # look at each token in this output line..
16358             foreach my $i ( $ibeg .. $iend ) {
16359                 my $alignment_type = '';
16360                 my $type           = $types_to_go[$i];
16361                 my $block_type     = $block_type_to_go[$i];
16362                 my $token          = $tokens_to_go[$i];
16363
16364                 # check for flag indicating that we should not align
16365                 # this token
16366                 if ( $matching_token_to_go[$i] ) {
16367                     $matching_token_to_go[$i] = '';
16368                     next;
16369                 }
16370
16371                 #--------------------------------------------------------
16372                 # First see if we want to align BEFORE this token
16373                 #--------------------------------------------------------
16374
16375                 # The first possible token that we can align before
16376                 # is index 2 because: 1) it doesn't normally make sense to
16377                 # align before the first token and 2) the second
16378                 # token must be a blank if we are to align before
16379                 # the third
16380                 if ( $i < $ibeg + 2 ) { }
16381
16382                 # must follow a blank token
16383                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
16384
16385                 # align a side comment --
16386                 elsif ( $type eq '#' ) {
16387
16388                     unless (
16389
16390                         # it is a static side comment
16391                         (
16392                                $rOpts->{'static-side-comments'}
16393                             && $token =~ /$static_side_comment_pattern/o
16394                         )
16395
16396                         # or a closing side comment
16397                         || (   $vert_last_nonblank_block_type
16398                             && $token =~
16399                             /$closing_side_comment_prefix_pattern/o )
16400                       )
16401                     {
16402                         $alignment_type = $type;
16403                     }    ## Example of a static side comment
16404                 }
16405
16406                 # otherwise, do not align two in a row to create a
16407                 # blank field
16408                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
16409
16410                 # align before one of these keywords
16411                 # (within a line, since $i>1)
16412                 elsif ( $type eq 'k' ) {
16413
16414                     #  /^(if|unless|and|or|eq|ne)$/
16415                     if ( $is_vertical_alignment_keyword{$token} ) {
16416                         $alignment_type = $token;
16417                     }
16418                 }
16419
16420                 # align before one of these types..
16421                 # Note: add '.' after new vertical aligner is operational
16422                 elsif ( $is_vertical_alignment_type{$type} ) {
16423                     $alignment_type = $token;
16424
16425                     # Do not align a terminal token.  Although it might
16426                     # occasionally look ok to do this, this has been found to be
16427                     # a good general rule.  The main problems are:
16428                     # (1) that the terminal token (such as an = or :) might get
16429                     # moved far to the right where it is hard to see because
16430                     # nothing follows it, and
16431                     # (2) doing so may prevent other good alignments.
16432                     # Current exceptions are && and ||
16433                     if ( $i == $iend || $i >= $i_terminal ) {
16434                         $alignment_type = ""
16435                           unless ( $is_terminal_alignment_type{$type} );
16436                     }
16437
16438                     # Do not align leading ': (' or '. ('.  This would prevent
16439                     # alignment in something like the following:
16440                     #   $extra_space .=
16441                     #       ( $input_line_number < 10 )  ? "  "
16442                     #     : ( $input_line_number < 100 ) ? " "
16443                     #     :                                "";
16444                     # or
16445                     #  $code =
16446                     #      ( $case_matters ? $accessor : " lc($accessor) " )
16447                     #    . ( $yesno        ? " eq "       : " ne " )
16448                     if (   $i == $ibeg + 2
16449                         && $types_to_go[$ibeg] =~ /^[\.\:]$/
16450                         && $types_to_go[ $i - 1 ] eq 'b' )
16451                     {
16452                         $alignment_type = "";
16453                     }
16454
16455                     # For a paren after keyword, only align something like this:
16456                     #    if    ( $a ) { &a }
16457                     #    elsif ( $b ) { &b }
16458                     if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
16459                         $alignment_type = ""
16460                           unless $vert_last_nonblank_token =~
16461                           /^(if|unless|elsif)$/;
16462                     }
16463
16464                     # be sure the alignment tokens are unique
16465                     # This didn't work well: reason not determined
16466                     # if ($token ne $type) {$alignment_type .= $type}
16467                 }
16468
16469                 # NOTE: This is deactivated because it causes the previous
16470                 # if/elsif alignment to fail
16471                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
16472                 #{ $alignment_type = $type; }
16473
16474                 if ($alignment_type) {
16475                     $last_vertical_alignment_before_index = $i;
16476                 }
16477
16478                 #--------------------------------------------------------
16479                 # Next see if we want to align AFTER the previous nonblank
16480                 #--------------------------------------------------------
16481
16482                 # We want to line up ',' and interior ';' tokens, with the added
16483                 # space AFTER these tokens.  (Note: interior ';' is included
16484                 # because it may occur in short blocks).
16485                 if (
16486
16487                     # we haven't already set it
16488                     !$alignment_type
16489
16490                     # and its not the first token of the line
16491                     && ( $i > $ibeg )
16492
16493                     # and it follows a blank
16494                     && $types_to_go[ $i - 1 ] eq 'b'
16495
16496                     # and previous token IS one of these:
16497                     && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
16498
16499                     # and it's NOT one of these
16500                     && ( $type !~ /^[b\#\)\]\}]$/ )
16501
16502                     # then go ahead and align
16503                   )
16504
16505                 {
16506                     $alignment_type = $vert_last_nonblank_type;
16507                 }
16508
16509                 #--------------------------------------------------------
16510                 # then store the value
16511                 #--------------------------------------------------------
16512                 $matching_token_to_go[$i] = $alignment_type;
16513                 if ( $type ne 'b' ) {
16514                     $vert_last_nonblank_type       = $type;
16515                     $vert_last_nonblank_token      = $token;
16516                     $vert_last_nonblank_block_type = $block_type;
16517                 }
16518             }
16519         }
16520         return;
16521     }
16522 }
16523
16524 sub terminal_type {
16525
16526     #    returns type of last token on this line (terminal token), as follows:
16527     #    returns # for a full-line comment
16528     #    returns ' ' for a blank line
16529     #    otherwise returns final token type
16530
16531     my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
16532
16533     # check for full-line comment..
16534     if ( $rtype->[$ibeg] eq '#' ) {
16535         return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg];
16536     }
16537     else {
16538
16539         # start at end and walk backwards..
16540         for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
16541
16542             # skip past any side comment and blanks
16543             next if ( $rtype->[$i] eq 'b' );
16544             next if ( $rtype->[$i] eq '#' );
16545
16546             # found it..make sure it is a BLOCK termination,
16547             # but hide a terminal } after sort/grep/map because it is not
16548             # necessarily the end of the line.  (terminal.t)
16549             my $terminal_type = $rtype->[$i];
16550             if (
16551                 $terminal_type eq '}'
16552                 && ( !$rblock_type->[$i]
16553                     || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) )
16554               )
16555             {
16556                 $terminal_type = 'b';
16557             }
16558             return wantarray ? ( $terminal_type, $i ) : $terminal_type;
16559         }
16560
16561         # empty line
16562         return wantarray ? ( ' ', $ibeg ) : ' ';
16563     }
16564 }
16565
16566 {    # set_bond_strengths
16567
16568     my %is_good_keyword_breakpoint;
16569     my %is_lt_gt_le_ge;
16570
16571     my %binary_bond_strength;
16572     my %nobreak_lhs;
16573     my %nobreak_rhs;
16574
16575     my @bias_tokens;
16576     my $delta_bias;
16577
16578     sub bias_table_key {
16579         my ( $type, $token ) = @_;
16580         my $bias_table_key = $type;
16581         if ( $type eq 'k' ) {
16582             $bias_table_key = $token;
16583             if ( $token eq 'err' ) { $bias_table_key = 'or' }
16584         }
16585         return $bias_table_key;
16586     }
16587
16588     sub set_bond_strengths {
16589
16590         BEGIN {
16591
16592             my @q;
16593             @q = qw(if unless while until for foreach);
16594             @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
16595
16596             @q = qw(lt gt le ge);
16597             @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
16598             #
16599             # The decision about where to break a line depends upon a "bond
16600             # strength" between tokens.  The LOWER the bond strength, the MORE
16601             # likely a break.  A bond strength may be any value but to simplify
16602             # things there are several pre-defined strength levels:
16603
16604             #    NO_BREAK    => 10000;
16605             #    VERY_STRONG => 100;
16606             #    STRONG      => 2.1;
16607             #    NOMINAL     => 1.1;
16608             #    WEAK        => 0.8;
16609             #    VERY_WEAK   => 0.55;
16610
16611             # The strength values are based on trial-and-error, and need to be
16612             # tweaked occasionally to get desired results.  Some comments:
16613             #
16614             #   1. Only relative strengths are important.  small differences
16615             #      in strengths can make big formatting differences.
16616             #   2. Each indentation level adds one unit of bond strength.
16617             #   3. A value of NO_BREAK makes an unbreakable bond
16618             #   4. A value of VERY_WEAK is the strength of a ','
16619             #   5. Values below NOMINAL are considered ok break points.
16620             #   6. Values above NOMINAL are considered poor break points.
16621             #
16622             # The bond strengths should roughly follow precedence order where
16623             # possible.  If you make changes, please check the results very
16624             # carefully on a variety of scripts.  Testing with the -extrude
16625             # options is particularly helpful in exercising all of the rules.
16626
16627             # Wherever possible, bond strengths are defined in the following
16628             # tables.  There are two main stages to setting bond strengths and
16629             # two types of tables:
16630             #
16631             # The first stage involves looking at each token individually and
16632             # defining left and right bond strengths, according to if we want
16633             # to break to the left or right side, and how good a break point it
16634             # is.  For example tokens like =, ||, && make good break points and
16635             # will have low strengths, but one might want to break on either
16636             # side to put them at the end of one line or beginning of the next.
16637             #
16638             # The second stage involves looking at certain pairs of tokens and
16639             # defining a bond strength for that particular pair.  This second
16640             # stage has priority.
16641
16642             #---------------------------------------------------------------
16643             # Bond Strength BEGIN Section 1.
16644             # Set left and right bond strengths of individual tokens.
16645             #---------------------------------------------------------------
16646
16647             # NOTE: NO_BREAK's set in this section first are HINTS which will
16648             # probably not be honored. Essential NO_BREAKS's should be set in
16649             # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
16650             # of this subroutine.
16651
16652             # Note that we are setting defaults in this section.  The user
16653             # cannot change bond strengths but can cause the left and right
16654             # bond strengths of any token type to be swapped through the use of
16655             # the -wba and -wbb flags. In this way the user can determine if a
16656             # breakpoint token should appear at the end of one line or the
16657             # beginning of the next line.
16658
16659             # The hash keys in this section are token types, plus the text of
16660             # certain keywords like 'or', 'and'.
16661
16662             # no break around possible filehandle
16663             $left_bond_strength{'Z'}  = NO_BREAK;
16664             $right_bond_strength{'Z'} = NO_BREAK;
16665
16666             # never put a bare word on a new line:
16667             # example print (STDERR, "bla"); will fail with break after (
16668             $left_bond_strength{'w'} = NO_BREAK;
16669
16670             # blanks always have infinite strength to force breaks after
16671             # real tokens
16672             $right_bond_strength{'b'} = NO_BREAK;
16673
16674             # try not to break on exponentation
16675             @q                       = qw" ** .. ... <=> ";
16676             @left_bond_strength{@q}  = (STRONG) x scalar(@q);
16677             @right_bond_strength{@q} = (STRONG) x scalar(@q);
16678
16679             # The comma-arrow has very low precedence but not a good break point
16680             $left_bond_strength{'=>'}  = NO_BREAK;
16681             $right_bond_strength{'=>'} = NOMINAL;
16682
16683             # ok to break after label
16684             $left_bond_strength{'J'}  = NO_BREAK;
16685             $right_bond_strength{'J'} = NOMINAL;
16686             $left_bond_strength{'j'}  = STRONG;
16687             $right_bond_strength{'j'} = STRONG;
16688             $left_bond_strength{'A'}  = STRONG;
16689             $right_bond_strength{'A'} = STRONG;
16690
16691             $left_bond_strength{'->'}  = STRONG;
16692             $right_bond_strength{'->'} = VERY_STRONG;
16693
16694             $left_bond_strength{'CORE::'}  = NOMINAL;
16695             $right_bond_strength{'CORE::'} = NO_BREAK;
16696
16697             # breaking AFTER modulus operator is ok:
16698             @q = qw" % ";
16699             @left_bond_strength{@q} = (STRONG) x scalar(@q);
16700             @right_bond_strength{@q} =
16701               ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
16702
16703             # Break AFTER math operators * and /
16704             @q                       = qw" * / x  ";
16705             @left_bond_strength{@q}  = (STRONG) x scalar(@q);
16706             @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
16707
16708             # Break AFTER weakest math operators + and -
16709             # Make them weaker than * but a bit stronger than '.'
16710             @q = qw" + - ";
16711             @left_bond_strength{@q} = (STRONG) x scalar(@q);
16712             @right_bond_strength{@q} =
16713               ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
16714
16715             # breaking BEFORE these is just ok:
16716             @q                       = qw" >> << ";
16717             @right_bond_strength{@q} = (STRONG) x scalar(@q);
16718             @left_bond_strength{@q}  = (NOMINAL) x scalar(@q);
16719
16720             # breaking before the string concatenation operator seems best
16721             # because it can be hard to see at the end of a line
16722             $right_bond_strength{'.'} = STRONG;
16723             $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
16724
16725             @q                       = qw"} ] ) R";
16726             @left_bond_strength{@q}  = (STRONG) x scalar(@q);
16727             @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
16728
16729             # make these a little weaker than nominal so that they get
16730             # favored for end-of-line characters
16731             @q = qw"!= == =~ !~ ~~ !~~";
16732             @left_bond_strength{@q} = (STRONG) x scalar(@q);
16733             @right_bond_strength{@q} =
16734               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
16735
16736             # break AFTER these
16737             @q = qw" < >  | & >= <=";
16738             @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
16739             @right_bond_strength{@q} =
16740               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
16741
16742             # breaking either before or after a quote is ok
16743             # but bias for breaking before a quote
16744             $left_bond_strength{'Q'}  = NOMINAL;
16745             $right_bond_strength{'Q'} = NOMINAL + 0.02;
16746             $left_bond_strength{'q'}  = NOMINAL;
16747             $right_bond_strength{'q'} = NOMINAL;
16748
16749             # starting a line with a keyword is usually ok
16750             $left_bond_strength{'k'} = NOMINAL;
16751
16752             # we usually want to bond a keyword strongly to what immediately
16753             # follows, rather than leaving it stranded at the end of a line
16754             $right_bond_strength{'k'} = STRONG;
16755
16756             $left_bond_strength{'G'}  = NOMINAL;
16757             $right_bond_strength{'G'} = STRONG;
16758
16759             # assignment operators
16760             @q = qw(
16761               = **= += *= &= <<= &&=
16762               -= /= |= >>= ||= //=
16763               .= %= ^=
16764               x=
16765             );
16766
16767             # Default is to break AFTER various assignment operators
16768             @left_bond_strength{@q} = (STRONG) x scalar(@q);
16769             @right_bond_strength{@q} =
16770               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
16771
16772             # Default is to break BEFORE '&&' and '||' and '//'
16773             # set strength of '||' to same as '=' so that chains like
16774             # $a = $b || $c || $d   will break before the first '||'
16775             $right_bond_strength{'||'} = NOMINAL;
16776             $left_bond_strength{'||'}  = $right_bond_strength{'='};
16777
16778             # same thing for '//'
16779             $right_bond_strength{'//'} = NOMINAL;
16780             $left_bond_strength{'//'}  = $right_bond_strength{'='};
16781
16782             # set strength of && a little higher than ||
16783             $right_bond_strength{'&&'} = NOMINAL;
16784             $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
16785
16786             $left_bond_strength{';'}  = VERY_STRONG;
16787             $right_bond_strength{';'} = VERY_WEAK;
16788             $left_bond_strength{'f'}  = VERY_STRONG;
16789
16790             # make right strength of for ';' a little less than '='
16791             # to make for contents break after the ';' to avoid this:
16792             #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
16793             #     $number_of_fields )
16794             # and make it weaker than ',' and 'and' too
16795             $right_bond_strength{'f'} = VERY_WEAK - 0.03;
16796
16797             # The strengths of ?/: should be somewhere between
16798             # an '=' and a quote (NOMINAL),
16799             # make strength of ':' slightly less than '?' to help
16800             # break long chains of ? : after the colons
16801             $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
16802             $right_bond_strength{':'} = NO_BREAK;
16803             $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
16804             $right_bond_strength{'?'} = NO_BREAK;
16805
16806             $left_bond_strength{','}  = VERY_STRONG;
16807             $right_bond_strength{','} = VERY_WEAK;
16808
16809             # remaining digraphs and trigraphs not defined above
16810             @q                       = qw( :: <> ++ --);
16811             @left_bond_strength{@q}  = (WEAK) x scalar(@q);
16812             @right_bond_strength{@q} = (STRONG) x scalar(@q);
16813
16814             # Set bond strengths of certain keywords
16815             # make 'or', 'err', 'and' slightly weaker than a ','
16816             $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
16817             $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
16818             $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
16819             $left_bond_strength{'xor'}  = NOMINAL;
16820             $right_bond_strength{'and'} = NOMINAL;
16821             $right_bond_strength{'or'}  = NOMINAL;
16822             $right_bond_strength{'err'} = NOMINAL;
16823             $right_bond_strength{'xor'} = STRONG;
16824
16825             #---------------------------------------------------------------
16826             # Bond Strength BEGIN Section 2.
16827             # Set binary rules for bond strengths between certain token types.
16828             #---------------------------------------------------------------
16829
16830             #  We have a little problem making tables which apply to the
16831             #  container tokens.  Here is a list of container tokens and
16832             #  their types:
16833             #
16834             #   type    tokens // meaning
16835             #      {    {, [, ( // indent
16836             #      }    }, ], ) // outdent
16837             #      [    [ // left non-structural [ (enclosing an array index)
16838             #      ]    ] // right non-structural square bracket
16839             #      (    ( // left non-structural paren
16840             #      )    ) // right non-structural paren
16841             #      L    { // left non-structural curly brace (enclosing a key)
16842             #      R    } // right non-structural curly brace
16843             #
16844             #  Some rules apply to token types and some to just the token
16845             #  itself.  We solve the problem by combining type and token into a
16846             #  new hash key for the container types.
16847             #
16848             #  If a rule applies to a token 'type' then we need to make rules
16849             #  for each of these 'type.token' combinations:
16850             #  Type    Type.Token
16851             #  {       {{, {[, {(
16852             #  [       [[
16853             #  (       ((
16854             #  L       L{
16855             #  }       }}, }], })
16856             #  ]       ]]
16857             #  )       ))
16858             #  R       R}
16859             #
16860             #  If a rule applies to a token then we need to make rules for
16861             #  these 'type.token' combinations:
16862             #  Token   Type.Token
16863             #  {       {{, L{
16864             #  [       {[, [[
16865             #  (       {(, ((
16866             #  }       }}, R}
16867             #  ]       }], ]]
16868             #  )       }), ))
16869
16870             # allow long lines before final { in an if statement, as in:
16871             #    if (..........
16872             #      ..........)
16873             #    {
16874             #
16875             # Otherwise, the line before the { tends to be too short.
16876
16877             $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
16878             $binary_bond_strength{'(('}{'{{'} = NOMINAL;
16879
16880             # break on something like '} (', but keep this stronger than a ','
16881             # example is in 'howe.pl'
16882             $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
16883             $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
16884
16885             # keep matrix and hash indices together
16886             # but make them a little below STRONG to allow breaking open
16887             # something like {'some-word'}{'some-very-long-word'} at the }{
16888             # (bracebrk.t)
16889             $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
16890             $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
16891             $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
16892             $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
16893
16894             # increase strength to the point where a break in the following
16895             # will be after the opening paren rather than at the arrow:
16896             #    $a->$b($c);
16897             $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
16898
16899             $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16900             $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16901             $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16902             $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16903             $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16904             $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
16905
16906             $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
16907             $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
16908             $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
16909             $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
16910
16911             #---------------------------------------------------------------
16912             # Binary NO_BREAK rules
16913             #---------------------------------------------------------------
16914
16915             # use strict requires that bare word and => not be separated
16916             $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
16917             $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
16918
16919             # Never break between a bareword and a following paren because
16920             # perl may give an error.  For example, if a break is placed
16921             # between 'to_filehandle' and its '(' the following line will
16922             # give a syntax error [Carp.pm]: my( $no) =fileno(
16923             # to_filehandle( $in)) ;
16924             $binary_bond_strength{'C'}{'(('} = NO_BREAK;
16925             $binary_bond_strength{'C'}{'{('} = NO_BREAK;
16926             $binary_bond_strength{'U'}{'(('} = NO_BREAK;
16927             $binary_bond_strength{'U'}{'{('} = NO_BREAK;
16928
16929             # use strict requires that bare word within braces not start new
16930             # line
16931             $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
16932
16933             $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
16934
16935             # use strict requires that bare word and => not be separated
16936             $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
16937
16938             # use strict does not allow separating type info from trailing { }
16939             # testfile is readmail.pl
16940             $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
16941             $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
16942
16943             # As a defensive measure, do not break between a '(' and a
16944             # filehandle.  In some cases, this can cause an error.  For
16945             # example, the following program works:
16946             #    my $msg="hi!\n";
16947             #    print
16948             #    ( STDOUT
16949             #    $msg
16950             #    );
16951             #
16952             # But this program fails:
16953             #    my $msg="hi!\n";
16954             #    print
16955             #    (
16956             #    STDOUT
16957             #    $msg
16958             #    );
16959             #
16960             # This is normally only a problem with the 'extrude' option
16961             $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
16962             $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
16963
16964             # never break between sub name and opening paren
16965             $binary_bond_strength{'w'}{'(('} = NO_BREAK;
16966             $binary_bond_strength{'w'}{'{('} = NO_BREAK;
16967
16968             # keep '}' together with ';'
16969             $binary_bond_strength{'}}'}{';'} = NO_BREAK;
16970
16971             # Breaking before a ++ can cause perl to guess wrong. For
16972             # example the following line will cause a syntax error
16973             # with -extrude if we break between '$i' and '++' [fixstyle2]
16974             #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
16975             $nobreak_lhs{'++'} = NO_BREAK;
16976
16977             # Do not break before a possible file handle
16978             $nobreak_lhs{'Z'} = NO_BREAK;
16979
16980             # use strict hates bare words on any new line.  For
16981             # example, a break before the underscore here provokes the
16982             # wrath of use strict:
16983             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
16984             $nobreak_rhs{'F'}      = NO_BREAK;
16985             $nobreak_rhs{'CORE::'} = NO_BREAK;
16986
16987             #---------------------------------------------------------------
16988             # Bond Strength BEGIN Section 3.
16989             # Define tables and values for applying a small bias to the above
16990             # values.
16991             #---------------------------------------------------------------
16992             # Adding a small 'bias' to strengths is a simple way to make a line
16993             # break at the first of a sequence of identical terms.  For
16994             # example, to force long string of conditional operators to break
16995             # with each line ending in a ':', we can add a small number to the
16996             # bond strength of each ':' (colon.t)
16997             @bias_tokens = qw( : && || f and or . );    # tokens which get bias
16998             $delta_bias = 0.0001;    # a very small strength level
16999
17000         } ## end BEGIN
17001
17002         # patch-its always ok to break at end of line
17003         $nobreak_to_go[$max_index_to_go] = 0;
17004
17005         # we start a new set of bias values for each line
17006         my %bias;
17007         @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
17008         my $code_bias = -.01;        # bias for closing block braces
17009
17010         my $type  = 'b';
17011         my $token = ' ';
17012         my $last_type;
17013         my $last_nonblank_type  = $type;
17014         my $last_nonblank_token = $token;
17015         my $list_str            = $left_bond_strength{'?'};
17016
17017         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
17018             $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
17019         );
17020
17021         # main loop to compute bond strengths between each pair of tokens
17022         foreach my $i ( 0 .. $max_index_to_go ) {
17023             $last_type = $type;
17024             if ( $type ne 'b' ) {
17025                 $last_nonblank_type  = $type;
17026                 $last_nonblank_token = $token;
17027             }
17028             $type = $types_to_go[$i];
17029
17030             # strength on both sides of a blank is the same
17031             if ( $type eq 'b' && $last_type ne 'b' ) {
17032                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
17033                 next;
17034             }
17035
17036             $token               = $tokens_to_go[$i];
17037             $block_type          = $block_type_to_go[$i];
17038             $i_next              = $i + 1;
17039             $next_type           = $types_to_go[$i_next];
17040             $next_token          = $tokens_to_go[$i_next];
17041             $total_nesting_depth = $nesting_depth_to_go[$i_next];
17042             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
17043             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17044             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17045
17046             # We are computing the strength of the bond between the current
17047             # token and the NEXT token.
17048
17049             #---------------------------------------------------------------
17050             # Bond Strength Section 1:
17051             # First Approximation.
17052             # Use minimum of individual left and right tabulated bond
17053             # strengths.
17054             #---------------------------------------------------------------
17055             my $bsr = $right_bond_strength{$type};
17056             my $bsl = $left_bond_strength{$next_nonblank_type};
17057
17058             # define right bond strengths of certain keywords
17059             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
17060                 $bsr = $right_bond_strength{$token};
17061             }
17062             elsif ( $token eq 'ne' or $token eq 'eq' ) {
17063                 $bsr = NOMINAL;
17064             }
17065
17066             # set terminal bond strength to the nominal value
17067             # this will cause good preceding breaks to be retained
17068             if ( $i_next_nonblank > $max_index_to_go ) {
17069                 $bsl = NOMINAL;
17070             }
17071
17072             # define right bond strengths of certain keywords
17073             if ( $next_nonblank_type eq 'k'
17074                 && defined( $left_bond_strength{$next_nonblank_token} ) )
17075             {
17076                 $bsl = $left_bond_strength{$next_nonblank_token};
17077             }
17078             elsif ($next_nonblank_token eq 'ne'
17079                 or $next_nonblank_token eq 'eq' )
17080             {
17081                 $bsl = NOMINAL;
17082             }
17083             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
17084                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
17085             }
17086
17087             # Use the minimum of the left and right strengths.  Note: it might
17088             # seem that we would want to keep a NO_BREAK if either token has
17089             # this value.  This didn't work, for example because in an arrow
17090             # list, it prevents the comma from separating from the following
17091             # bare word (which is probably quoted by its arrow).  So necessary
17092             # NO_BREAK's have to be handled as special cases in the final
17093             # section.
17094             if ( !defined($bsr) ) { $bsr = VERY_STRONG }
17095             if ( !defined($bsl) ) { $bsl = VERY_STRONG }
17096             my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
17097             my $bond_str_1 = $bond_str;
17098
17099             #---------------------------------------------------------------
17100             # Bond Strength Section 2:
17101             # Apply hardwired rules..
17102             #---------------------------------------------------------------
17103
17104             # Patch to put terminal or clauses on a new line: Weaken the bond
17105             # at an || followed by die or similar keyword to make the terminal
17106             # or clause fall on a new line, like this:
17107             #
17108             #   my $class = shift
17109             #     || die "Cannot add broadcast:  No class identifier found";
17110             #
17111             # Otherwise the break will be at the previous '=' since the || and
17112             # = have the same starting strength and the or is biased, like
17113             # this:
17114             #
17115             # my $class =
17116             #   shift || die "Cannot add broadcast:  No class identifier found";
17117             #
17118             # In any case if the user places a break at either the = or the ||
17119             # it should remain there.
17120             if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
17121                 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
17122                     if ( $want_break_before{$token} && $i > 0 ) {
17123                         $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
17124                     }
17125                     else {
17126                         $bond_str -= $delta_bias;
17127                     }
17128                 }
17129             }
17130
17131             # good to break after end of code blocks
17132             if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
17133
17134                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
17135                 $code_bias += $delta_bias;
17136             }
17137
17138             if ( $type eq 'k' ) {
17139
17140                 # allow certain control keywords to stand out
17141                 if (   $next_nonblank_type eq 'k'
17142                     && $is_last_next_redo_return{$token} )
17143                 {
17144                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
17145                 }
17146
17147                 # Don't break after keyword my.  This is a quick fix for a
17148                 # rare problem with perl. An example is this line from file
17149                 # Container.pm:
17150
17151                 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
17152                 # $this->{'question'} ) )
17153
17154                 if ( $token eq 'my' ) {
17155                     $bond_str = NO_BREAK;
17156                 }
17157
17158             }
17159
17160             # good to break before 'if', 'unless', etc
17161             if ( $is_if_brace_follower{$next_nonblank_token} ) {
17162                 $bond_str = VERY_WEAK;
17163             }
17164
17165             if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
17166
17167                 # FIXME: needs more testing
17168                 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
17169                     $bond_str = $list_str if ( $bond_str > $list_str );
17170                 }
17171
17172                 # keywords like 'unless', 'if', etc, within statements
17173                 # make good breaks
17174                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
17175                     $bond_str = VERY_WEAK / 1.05;
17176                 }
17177             }
17178
17179             # try not to break before a comma-arrow
17180             elsif ( $next_nonblank_type eq '=>' ) {
17181                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
17182             }
17183
17184             #---------------------------------------------------------------
17185             # Additional hardwired NOBREAK rules
17186             #---------------------------------------------------------------
17187
17188             # map1.t -- correct for a quirk in perl
17189             if (   $token eq '('
17190                 && $next_nonblank_type eq 'i'
17191                 && $last_nonblank_type eq 'k'
17192                 && $is_sort_map_grep{$last_nonblank_token} )
17193
17194               #     /^(sort|map|grep)$/ )
17195             {
17196                 $bond_str = NO_BREAK;
17197             }
17198
17199             # extrude.t: do not break before paren at:
17200             #    -l pid_filename(
17201             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
17202                 $bond_str = NO_BREAK;
17203             }
17204
17205             # in older version of perl, use strict can cause problems with
17206             # breaks before bare words following opening parens.  For example,
17207             # this will fail under older versions if a break is made between
17208             # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
17209             # command"); close MAIL;
17210             if ( $type eq '{' ) {
17211
17212                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
17213
17214                     # but it's fine to break if the word is followed by a '=>'
17215                     # or if it is obviously a sub call
17216                     my $i_next_next_nonblank = $i_next_nonblank + 1;
17217                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
17218                     if (   $next_next_type eq 'b'
17219                         && $i_next_nonblank < $max_index_to_go )
17220                     {
17221                         $i_next_next_nonblank++;
17222                         $next_next_type = $types_to_go[$i_next_next_nonblank];
17223                     }
17224
17225                     # We'll check for an old breakpoint and keep a leading
17226                     # bareword if it was that way in the input file.
17227                     # Presumably it was ok that way.  For example, the
17228                     # following would remain unchanged:
17229                     #
17230                     # @months = (
17231                     #   January,   February, March,    April,
17232                     #   May,       June,     July,     August,
17233                     #   September, October,  November, December,
17234                     # );
17235                     #
17236                     # This should be sufficient:
17237                     if (
17238                         !$old_breakpoint_to_go[$i]
17239                         && (   $next_next_type eq ','
17240                             || $next_next_type eq '}' )
17241                       )
17242                     {
17243                         $bond_str = NO_BREAK;
17244                     }
17245                 }
17246             }
17247
17248             # Do not break between a possible filehandle and a ? or / and do
17249             # not introduce a break after it if there is no blank
17250             # (extrude.t)
17251             elsif ( $type eq 'Z' ) {
17252
17253                 # don't break..
17254                 if (
17255
17256                     # if there is no blank and we do not want one. Examples:
17257                     #    print $x++    # do not break after $x
17258                     #    print HTML"HELLO"   # break ok after HTML
17259                     (
17260                            $next_type ne 'b'
17261                         && defined( $want_left_space{$next_type} )
17262                         && $want_left_space{$next_type} == WS_NO
17263                     )
17264
17265                     # or we might be followed by the start of a quote
17266                     || $next_nonblank_type =~ /^[\/\?]$/
17267                   )
17268                 {
17269                     $bond_str = NO_BREAK;
17270                 }
17271             }
17272
17273             # Breaking before a ? before a quote can cause trouble if
17274             # they are not separated by a blank.
17275             # Example: a syntax error occurs if you break before the ? here
17276             #  my$logic=join$all?' && ':' || ',@regexps;
17277             # From: Professional_Perl_Programming_Code/multifind.pl
17278             if ( $next_nonblank_type eq '?' ) {
17279                 $bond_str = NO_BREAK
17280                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
17281             }
17282
17283             # Breaking before a . followed by a number
17284             # can cause trouble if there is no intervening space
17285             # Example: a syntax error occurs if you break before the .2 here
17286             #  $str .= pack($endian.2, ensurrogate($ord));
17287             # From: perl58/Unicode.pm
17288             elsif ( $next_nonblank_type eq '.' ) {
17289                 $bond_str = NO_BREAK
17290                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
17291             }
17292
17293             # patch to put cuddled elses back together when on multiple
17294             # lines, as in: } \n else \n { \n
17295             if ($rOpts_cuddled_else) {
17296
17297                 if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
17298                     || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
17299                 {
17300                     $bond_str = NO_BREAK;
17301                 }
17302             }
17303             my $bond_str_2 = $bond_str;
17304
17305             #---------------------------------------------------------------
17306             # End of hardwired rules
17307             #---------------------------------------------------------------
17308
17309             #---------------------------------------------------------------
17310             # Bond Strength Section 3:
17311             # Apply table rules. These have priority over the above
17312             # hardwired rules.
17313             #---------------------------------------------------------------
17314
17315             my $tabulated_bond_str;
17316             my $ltype = $type;
17317             my $rtype = $next_nonblank_type;
17318             if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
17319             if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
17320                 $rtype = $next_nonblank_type . $next_nonblank_token;
17321             }
17322
17323             if ( $binary_bond_strength{$ltype}{$rtype} ) {
17324                 $bond_str           = $binary_bond_strength{$ltype}{$rtype};
17325                 $tabulated_bond_str = $bond_str;
17326             }
17327
17328             if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
17329                 $bond_str           = NO_BREAK;
17330                 $tabulated_bond_str = $bond_str;
17331             }
17332             my $bond_str_3 = $bond_str;
17333
17334             # If the hardwired rules conflict with the tabulated bond
17335             # strength then there is an inconsistency that should be fixed
17336             FORMATTER_DEBUG_FLAG_BOND_TABLES
17337               && $tabulated_bond_str
17338               && $bond_str_1
17339               && $bond_str_1 != $bond_str_2
17340               && $bond_str_2 != $tabulated_bond_str
17341               && do {
17342                 print STDERR
17343 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
17344               };
17345
17346            #-----------------------------------------------------------------
17347            # Bond Strength Section 4:
17348            # Modify strengths of certain tokens which often occur in sequence
17349            # by adding a small bias to each one in turn so that the breaks
17350            # occur from left to right.
17351            #
17352            # Note that we only changing strengths by small amounts here,
17353            # and usually increasing, so we should not be altering any NO_BREAKs.
17354            # Other routines which check for NO_BREAKs will use a tolerance
17355            # of one to avoid any problem.
17356            #-----------------------------------------------------------------
17357
17358             # The bias tables use special keys
17359             my $left_key = bias_table_key( $type, $token );
17360             my $right_key =
17361               bias_table_key( $next_nonblank_type, $next_nonblank_token );
17362
17363             # add any bias set by sub scan_list at old comma break points.
17364             if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
17365
17366             # bias left token
17367             elsif ( defined( $bias{$left_key} ) ) {
17368                 if ( !$want_break_before{$left_key} ) {
17369                     $bias{$left_key} += $delta_bias;
17370                     $bond_str += $bias{$left_key};
17371                 }
17372             }
17373
17374             # bias right token
17375             if ( defined( $bias{$right_key} ) ) {
17376                 if ( $want_break_before{$right_key} ) {
17377
17378                     # for leading '.' align all but 'short' quotes; the idea
17379                     # is to not place something like "\n" on a single line.
17380                     if ( $right_key eq '.' ) {
17381                         unless (
17382                             $last_nonblank_type eq '.'
17383                             && (
17384                                 length($token) <=
17385                                 $rOpts_short_concatenation_item_length )
17386                             && ( !$is_closing_token{$token} )
17387                           )
17388                         {
17389                             $bias{$right_key} += $delta_bias;
17390                         }
17391                     }
17392                     else {
17393                         $bias{$right_key} += $delta_bias;
17394                     }
17395                     $bond_str += $bias{$right_key};
17396                 }
17397             }
17398             my $bond_str_4 = $bond_str;
17399
17400             #---------------------------------------------------------------
17401             # Bond Strength Section 5:
17402             # Fifth Approximation.
17403             # Take nesting depth into account by adding the nesting depth
17404             # to the bond strength.
17405             #---------------------------------------------------------------
17406             my $strength;
17407
17408             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
17409                 if ( $total_nesting_depth > 0 ) {
17410                     $strength = $bond_str + $total_nesting_depth;
17411                 }
17412                 else {
17413                     $strength = $bond_str;
17414                 }
17415             }
17416             else {
17417                 $strength = NO_BREAK;
17418             }
17419
17420             #---------------------------------------------------------------
17421             # Bond Strength Section 6:
17422             # Sixth Approximation. Welds.
17423             #---------------------------------------------------------------
17424
17425             # Do not allow a break within welds,
17426             if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
17427
17428             # But encourage breaking after opening welded tokens
17429             elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
17430                 $strength -= 1;
17431             }
17432
17433 ##          # TESTING: weaken before first weld closing token
17434 ##          # This did not help
17435 ##            elsif ($i_next_nonblank <= $max_index_to_go
17436 ##                && weld_len_right_to_go($i_next_nonblank)
17437 ##                && $next_nonblank_token =~ /^[\}\]\)]$/ )
17438 ##            {
17439 ##                $strength -= 0.9;
17440 ##            }
17441
17442             # always break after side comment
17443             if ( $type eq '#' ) { $strength = 0 }
17444
17445             $bond_strength_to_go[$i] = $strength;
17446
17447             FORMATTER_DEBUG_FLAG_BOND && do {
17448                 my $str = substr( $token, 0, 15 );
17449                 $str .= ' ' x ( 16 - length($str) );
17450                 print STDOUT
17451 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
17452             };
17453         } ## end main loop
17454         return;
17455     } ## end sub set_bond_strengths
17456 }
17457
17458 sub pad_array_to_go {
17459
17460     # to simplify coding in scan_list and set_bond_strengths, it helps
17461     # to create some extra blank tokens at the end of the arrays
17462     $tokens_to_go[ $max_index_to_go + 1 ] = '';
17463     $tokens_to_go[ $max_index_to_go + 2 ] = '';
17464     $types_to_go[ $max_index_to_go + 1 ]  = 'b';
17465     $types_to_go[ $max_index_to_go + 2 ]  = 'b';
17466     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
17467       $nesting_depth_to_go[$max_index_to_go];
17468
17469     #    /^[R\}\)\]]$/
17470     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
17471         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
17472
17473             # shouldn't happen:
17474             unless ( get_saw_brace_error() ) {
17475                 warning(
17476 "Program bug in scan_list: hit nesting error which should have been caught\n"
17477                 );
17478                 report_definite_bug();
17479             }
17480         }
17481         else {
17482             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
17483         }
17484     }
17485
17486     #       /^[L\{\(\[]$/
17487     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
17488         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
17489     }
17490     return;
17491 }
17492
17493 {    # begin scan_list
17494
17495     my (
17496         $block_type,               $current_depth,
17497         $depth,                    $i,
17498         $i_last_nonblank_token,    $last_colon_sequence_number,
17499         $last_nonblank_token,      $last_nonblank_type,
17500         $last_nonblank_block_type, $last_old_breakpoint_count,
17501         $minimum_depth,            $next_nonblank_block_type,
17502         $next_nonblank_token,      $next_nonblank_type,
17503         $old_breakpoint_count,     $starting_breakpoint_count,
17504         $starting_depth,           $token,
17505         $type,                     $type_sequence,
17506     );
17507
17508     my (
17509         @breakpoint_stack,              @breakpoint_undo_stack,
17510         @comma_index,                   @container_type,
17511         @identifier_count_stack,        @index_before_arrow,
17512         @interrupted_list,              @item_count_stack,
17513         @last_comma_index,              @last_dot_index,
17514         @last_nonblank_type,            @old_breakpoint_count_stack,
17515         @opening_structure_index_stack, @rfor_semicolon_list,
17516         @has_old_logical_breakpoints,   @rand_or_list,
17517         @i_equals,
17518     );
17519
17520     # routine to define essential variables when we go 'up' to
17521     # a new depth
17522     sub check_for_new_minimum_depth {
17523         my $depth = shift;
17524         if ( $depth < $minimum_depth ) {
17525
17526             $minimum_depth = $depth;
17527
17528             # these arrays need not retain values between calls
17529             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
17530             $container_type[$depth]                = "";
17531             $identifier_count_stack[$depth]        = 0;
17532             $index_before_arrow[$depth]            = -1;
17533             $interrupted_list[$depth]              = 1;
17534             $item_count_stack[$depth]              = 0;
17535             $last_nonblank_type[$depth]            = "";
17536             $opening_structure_index_stack[$depth] = -1;
17537
17538             $breakpoint_undo_stack[$depth]       = undef;
17539             $comma_index[$depth]                 = undef;
17540             $last_comma_index[$depth]            = undef;
17541             $last_dot_index[$depth]              = undef;
17542             $old_breakpoint_count_stack[$depth]  = undef;
17543             $has_old_logical_breakpoints[$depth] = 0;
17544             $rand_or_list[$depth]                = [];
17545             $rfor_semicolon_list[$depth]         = [];
17546             $i_equals[$depth]                    = -1;
17547
17548             # these arrays must retain values between calls
17549             if ( !defined( $has_broken_sublist[$depth] ) ) {
17550                 $dont_align[$depth]         = 0;
17551                 $has_broken_sublist[$depth] = 0;
17552                 $want_comma_break[$depth]   = 0;
17553             }
17554         }
17555         return;
17556     }
17557
17558     # routine to decide which commas to break at within a container;
17559     # returns:
17560     #   $bp_count = number of comma breakpoints set
17561     #   $do_not_break_apart = a flag indicating if container need not
17562     #     be broken open
17563     sub set_comma_breakpoints {
17564
17565         my $dd                 = shift;
17566         my $bp_count           = 0;
17567         my $do_not_break_apart = 0;
17568
17569         # anything to do?
17570         if ( $item_count_stack[$dd] ) {
17571
17572             # handle commas not in containers...
17573             if ( $dont_align[$dd] ) {
17574                 do_uncontained_comma_breaks($dd);
17575             }
17576
17577             # handle commas within containers...
17578             else {
17579                 my $fbc = $forced_breakpoint_count;
17580
17581                 # always open comma lists not preceded by keywords,
17582                 # barewords, identifiers (that is, anything that doesn't
17583                 # look like a function call)
17584                 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
17585
17586                 set_comma_breakpoints_do(
17587                     $dd,
17588                     $opening_structure_index_stack[$dd],
17589                     $i,
17590                     $item_count_stack[$dd],
17591                     $identifier_count_stack[$dd],
17592                     $comma_index[$dd],
17593                     $next_nonblank_type,
17594                     $container_type[$dd],
17595                     $interrupted_list[$dd],
17596                     \$do_not_break_apart,
17597                     $must_break_open,
17598                 );
17599                 $bp_count = $forced_breakpoint_count - $fbc;
17600                 $do_not_break_apart = 0 if $must_break_open;
17601             }
17602         }
17603         return ( $bp_count, $do_not_break_apart );
17604     }
17605
17606     sub do_uncontained_comma_breaks {
17607
17608         # Handle commas not in containers...
17609         # This is a catch-all routine for commas that we
17610         # don't know what to do with because the don't fall
17611         # within containers.  We will bias the bond strength
17612         # to break at commas which ended lines in the input
17613         # file.  This usually works better than just trying
17614         # to put as many items on a line as possible.  A
17615         # downside is that if the input file is garbage it
17616         # won't work very well. However, the user can always
17617         # prevent following the old breakpoints with the
17618         # -iob flag.
17619         my $dd                    = shift;
17620         my $bias                  = -.01;
17621         my $old_comma_break_count = 0;
17622         foreach my $ii ( @{ $comma_index[$dd] } ) {
17623             if ( $old_breakpoint_to_go[$ii] ) {
17624                 $old_comma_break_count++;
17625                 $bond_strength_to_go[$ii] = $bias;
17626
17627                 # reduce bias magnitude to force breaks in order
17628                 $bias *= 0.99;
17629             }
17630         }
17631
17632         # Also put a break before the first comma if
17633         # (1) there was a break there in the input, and
17634         # (2) there was exactly one old break before the first comma break
17635         # (3) OLD: there are multiple old comma breaks
17636         # (3) NEW: there are one or more old comma breaks (see return example)
17637         #
17638         # For example, we will follow the user and break after
17639         # 'print' in this snippet:
17640         #    print
17641         #      "conformability (Not the same dimension)\n",
17642         #      "\t", $have, " is ", text_unit($hu), "\n",
17643         #      "\t", $want, " is ", text_unit($wu), "\n",
17644         #      ;
17645         #
17646         # Another example, just one comma, where we will break after
17647         # the return:
17648         #  return
17649         #    $x * cos($a) - $y * sin($a),
17650         #    $x * sin($a) + $y * cos($a);
17651
17652         # Breaking a print statement:
17653         # print SAVEOUT
17654         #   ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
17655         #   ( $? & 128 ) ? " -- core dumped" : "", "\n";
17656         #
17657         #  But we will not force a break after the opening paren here
17658         #  (causes a blinker):
17659         #        $heap->{stream}->set_output_filter(
17660         #            poe::filter::reference->new('myotherfreezer') ),
17661         #          ;
17662         #
17663         my $i_first_comma = $comma_index[$dd]->[0];
17664         if ( $old_breakpoint_to_go[$i_first_comma] ) {
17665             my $level_comma = $levels_to_go[$i_first_comma];
17666             my $ibreak      = -1;
17667             my $obp_count   = 0;
17668             for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
17669                 if ( $old_breakpoint_to_go[$ii] ) {
17670                     $obp_count++;
17671                     last if ( $obp_count > 1 );
17672                     $ibreak = $ii
17673                       if ( $levels_to_go[$ii] == $level_comma );
17674                 }
17675             }
17676
17677             # Changed rule from multiple old commas to just one here:
17678             if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
17679             {
17680                 # Do not to break before an opening token because
17681                 # it can lead to "blinkers".
17682                 my $ibreakm = $ibreak;
17683                 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
17684                 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
17685                 {
17686                     set_forced_breakpoint($ibreak);
17687                 }
17688             }
17689         }
17690         return;
17691     }
17692
17693     my %is_logical_container;
17694
17695     BEGIN {
17696         my @q = qw# if elsif unless while and or err not && | || ? : ! #;
17697         @is_logical_container{@q} = (1) x scalar(@q);
17698     }
17699
17700     sub set_for_semicolon_breakpoints {
17701         my $dd = shift;
17702         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
17703             set_forced_breakpoint($_);
17704         }
17705         return;
17706     }
17707
17708     sub set_logical_breakpoints {
17709         my $dd = shift;
17710         if (
17711                $item_count_stack[$dd] == 0
17712             && $is_logical_container{ $container_type[$dd] }
17713
17714             || $has_old_logical_breakpoints[$dd]
17715           )
17716         {
17717
17718             # Look for breaks in this order:
17719             # 0   1    2   3
17720             # or  and  ||  &&
17721             foreach my $i ( 0 .. 3 ) {
17722                 if ( $rand_or_list[$dd][$i] ) {
17723                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
17724                         set_forced_breakpoint($_);
17725                     }
17726
17727                     # break at any 'if' and 'unless' too
17728                     foreach ( @{ $rand_or_list[$dd][4] } ) {
17729                         set_forced_breakpoint($_);
17730                     }
17731                     $rand_or_list[$dd] = [];
17732                     last;
17733                 }
17734             }
17735         }
17736         return;
17737     }
17738
17739     sub is_unbreakable_container {
17740
17741         # never break a container of one of these types
17742         # because bad things can happen (map1.t)
17743         my $dd = shift;
17744         return $is_sort_map_grep{ $container_type[$dd] };
17745     }
17746
17747     sub scan_list {
17748
17749         # This routine is responsible for setting line breaks for all lists,
17750         # so that hierarchical structure can be displayed and so that list
17751         # items can be vertically aligned.  The output of this routine is
17752         # stored in the array @forced_breakpoint_to_go, which is used to set
17753         # final breakpoints.
17754
17755         $starting_depth = $nesting_depth_to_go[0];
17756
17757         $block_type                 = ' ';
17758         $current_depth              = $starting_depth;
17759         $i                          = -1;
17760         $last_colon_sequence_number = -1;
17761         $last_nonblank_token        = ';';
17762         $last_nonblank_type         = ';';
17763         $last_nonblank_block_type   = ' ';
17764         $last_old_breakpoint_count  = 0;
17765         $minimum_depth = $current_depth + 1;    # forces update in check below
17766         $old_breakpoint_count      = 0;
17767         $starting_breakpoint_count = $forced_breakpoint_count;
17768         $token                     = ';';
17769         $type                      = ';';
17770         $type_sequence             = '';
17771
17772         my $total_depth_variation = 0;
17773         my $i_old_assignment_break;
17774         my $depth_last = $starting_depth;
17775
17776         check_for_new_minimum_depth($current_depth);
17777
17778         my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
17779         my $want_previous_breakpoint = -1;
17780
17781         my $saw_good_breakpoint;
17782         my $i_line_end   = -1;
17783         my $i_line_start = -1;
17784
17785         # loop over all tokens in this batch
17786         while ( ++$i <= $max_index_to_go ) {
17787             if ( $type ne 'b' ) {
17788                 $i_last_nonblank_token    = $i - 1;
17789                 $last_nonblank_type       = $type;
17790                 $last_nonblank_token      = $token;
17791                 $last_nonblank_block_type = $block_type;
17792             } ## end if ( $type ne 'b' )
17793             $type          = $types_to_go[$i];
17794             $block_type    = $block_type_to_go[$i];
17795             $token         = $tokens_to_go[$i];
17796             $type_sequence = $type_sequence_to_go[$i];
17797             my $next_type       = $types_to_go[ $i + 1 ];
17798             my $next_token      = $tokens_to_go[ $i + 1 ];
17799             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
17800             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
17801             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
17802             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
17803
17804             # set break if flag was set
17805             if ( $want_previous_breakpoint >= 0 ) {
17806                 set_forced_breakpoint($want_previous_breakpoint);
17807                 $want_previous_breakpoint = -1;
17808             }
17809
17810             $last_old_breakpoint_count = $old_breakpoint_count;
17811             if ( $old_breakpoint_to_go[$i] ) {
17812                 $i_line_end   = $i;
17813                 $i_line_start = $i_next_nonblank;
17814
17815                 $old_breakpoint_count++;
17816
17817                 # Break before certain keywords if user broke there and
17818                 # this is a 'safe' break point. The idea is to retain
17819                 # any preferred breaks for sequential list operations,
17820                 # like a schwartzian transform.
17821                 if ($rOpts_break_at_old_keyword_breakpoints) {
17822                     if (
17823                            $next_nonblank_type eq 'k'
17824                         && $is_keyword_returning_list{$next_nonblank_token}
17825                         && (   $type =~ /^[=\)\]\}Riw]$/
17826                             || $type eq 'k'
17827                             && $is_keyword_returning_list{$token} )
17828                       )
17829                     {
17830
17831                         # we actually have to set this break next time through
17832                         # the loop because if we are at a closing token (such
17833                         # as '}') which forms a one-line block, this break might
17834                         # get undone.
17835                         $want_previous_breakpoint = $i;
17836                     } ## end if ( $next_nonblank_type...)
17837                 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
17838
17839                 # Break before attributes if user broke there
17840                 if ($rOpts_break_at_old_attribute_breakpoints) {
17841                     if ( $next_nonblank_type eq 'A' ) {
17842                         $want_previous_breakpoint = $i;
17843                     }
17844                 }
17845
17846                 # remember an = break as possible good break point
17847                 if ( $is_assignment{$type} ) {
17848                     $i_old_assignment_break = $i;
17849                 }
17850                 elsif ( $is_assignment{$next_nonblank_type} ) {
17851                     $i_old_assignment_break = $i_next_nonblank;
17852                 }
17853             } ## end if ( $old_breakpoint_to_go...)
17854
17855             next if ( $type eq 'b' );
17856             $depth = $nesting_depth_to_go[ $i + 1 ];
17857
17858             $total_depth_variation += abs( $depth - $depth_last );
17859             $depth_last = $depth;
17860
17861             # safety check - be sure we always break after a comment
17862             # Shouldn't happen .. an error here probably means that the
17863             # nobreak flag did not get turned off correctly during
17864             # formatting.
17865             if ( $type eq '#' ) {
17866                 if ( $i != $max_index_to_go ) {
17867                     warning(
17868 "Non-fatal program bug: backup logic needed to break after a comment\n"
17869                     );
17870                     report_definite_bug();
17871                     $nobreak_to_go[$i] = 0;
17872                     set_forced_breakpoint($i);
17873                 } ## end if ( $i != $max_index_to_go)
17874             } ## end if ( $type eq '#' )
17875
17876             # Force breakpoints at certain tokens in long lines.
17877             # Note that such breakpoints will be undone later if these tokens
17878             # are fully contained within parens on a line.
17879             if (
17880
17881                 # break before a keyword within a line
17882                 $type eq 'k'
17883                 && $i > 0
17884
17885                 # if one of these keywords:
17886                 && $token =~ /^(if|unless|while|until|for)$/
17887
17888                 # but do not break at something like '1 while'
17889                 && ( $last_nonblank_type ne 'n' || $i > 2 )
17890
17891                 # and let keywords follow a closing 'do' brace
17892                 && $last_nonblank_block_type ne 'do'
17893
17894                 && (
17895                     $is_long_line
17896
17897                     # or container is broken (by side-comment, etc)
17898                     || (   $next_nonblank_token eq '('
17899                         && $mate_index_to_go[$i_next_nonblank] < $i )
17900                 )
17901               )
17902             {
17903                 set_forced_breakpoint( $i - 1 );
17904             } ## end if ( $type eq 'k' && $i...)
17905
17906             # remember locations of '||'  and '&&' for possible breaks if we
17907             # decide this is a long logical expression.
17908             if ( $type eq '||' ) {
17909                 push @{ $rand_or_list[$depth][2] }, $i;
17910                 ++$has_old_logical_breakpoints[$depth]
17911                   if ( ( $i == $i_line_start || $i == $i_line_end )
17912                     && $rOpts_break_at_old_logical_breakpoints );
17913             } ## end if ( $type eq '||' )
17914             elsif ( $type eq '&&' ) {
17915                 push @{ $rand_or_list[$depth][3] }, $i;
17916                 ++$has_old_logical_breakpoints[$depth]
17917                   if ( ( $i == $i_line_start || $i == $i_line_end )
17918                     && $rOpts_break_at_old_logical_breakpoints );
17919             } ## end elsif ( $type eq '&&' )
17920             elsif ( $type eq 'f' ) {
17921                 push @{ $rfor_semicolon_list[$depth] }, $i;
17922             }
17923             elsif ( $type eq 'k' ) {
17924                 if ( $token eq 'and' ) {
17925                     push @{ $rand_or_list[$depth][1] }, $i;
17926                     ++$has_old_logical_breakpoints[$depth]
17927                       if ( ( $i == $i_line_start || $i == $i_line_end )
17928                         && $rOpts_break_at_old_logical_breakpoints );
17929                 } ## end if ( $token eq 'and' )
17930
17931                 # break immediately at 'or's which are probably not in a logical
17932                 # block -- but we will break in logical breaks below so that
17933                 # they do not add to the forced_breakpoint_count
17934                 elsif ( $token eq 'or' ) {
17935                     push @{ $rand_or_list[$depth][0] }, $i;
17936                     ++$has_old_logical_breakpoints[$depth]
17937                       if ( ( $i == $i_line_start || $i == $i_line_end )
17938                         && $rOpts_break_at_old_logical_breakpoints );
17939                     if ( $is_logical_container{ $container_type[$depth] } ) {
17940                     }
17941                     else {
17942                         if ($is_long_line) { set_forced_breakpoint($i) }
17943                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
17944                             && $rOpts_break_at_old_logical_breakpoints )
17945                         {
17946                             $saw_good_breakpoint = 1;
17947                         }
17948                     } ## end else [ if ( $is_logical_container...)]
17949                 } ## end elsif ( $token eq 'or' )
17950                 elsif ( $token eq 'if' || $token eq 'unless' ) {
17951                     push @{ $rand_or_list[$depth][4] }, $i;
17952                     if ( ( $i == $i_line_start || $i == $i_line_end )
17953                         && $rOpts_break_at_old_logical_breakpoints )
17954                     {
17955                         set_forced_breakpoint($i);
17956                     }
17957                 } ## end elsif ( $token eq 'if' ||...)
17958             } ## end elsif ( $type eq 'k' )
17959             elsif ( $is_assignment{$type} ) {
17960                 $i_equals[$depth] = $i;
17961             }
17962
17963             if ($type_sequence) {
17964
17965                 # handle any postponed closing breakpoints
17966                 if ( $token =~ /^[\)\]\}\:]$/ ) {
17967                     if ( $type eq ':' ) {
17968                         $last_colon_sequence_number = $type_sequence;
17969
17970                         # retain break at a ':' line break
17971                         if ( ( $i == $i_line_start || $i == $i_line_end )
17972                             && $rOpts_break_at_old_ternary_breakpoints )
17973                         {
17974
17975                             set_forced_breakpoint($i);
17976
17977                             # break at previous '='
17978                             if ( $i_equals[$depth] > 0 ) {
17979                                 set_forced_breakpoint( $i_equals[$depth] );
17980                                 $i_equals[$depth] = -1;
17981                             }
17982                         } ## end if ( ( $i == $i_line_start...))
17983                     } ## end if ( $type eq ':' )
17984                     if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
17985                         my $inc = ( $type eq ':' ) ? 0 : 1;
17986                         set_forced_breakpoint( $i - $inc );
17987                         delete $postponed_breakpoint{$type_sequence};
17988                     }
17989                 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
17990
17991                 # set breaks at ?/: if they will get separated (and are
17992                 # not a ?/: chain), or if the '?' is at the end of the
17993                 # line
17994                 elsif ( $token eq '?' ) {
17995                     my $i_colon = $mate_index_to_go[$i];
17996                     if (
17997                         $i_colon <= 0  # the ':' is not in this batch
17998                         || $i == 0     # this '?' is the first token of the line
17999                         || $i ==
18000                         $max_index_to_go    # or this '?' is the last token
18001                       )
18002                     {
18003
18004                         # don't break at a '?' if preceded by ':' on
18005                         # this line of previous ?/: pair on this line.
18006                         # This is an attempt to preserve a chain of ?/:
18007                         # expressions (elsif2.t).  And don't break if
18008                         # this has a side comment.
18009                         set_forced_breakpoint($i)
18010                           unless (
18011                             $type_sequence == (
18012                                 $last_colon_sequence_number +
18013                                   TYPE_SEQUENCE_INCREMENT
18014                             )
18015                             || $tokens_to_go[$max_index_to_go] eq '#'
18016                           );
18017                         set_closing_breakpoint($i);
18018                     } ## end if ( $i_colon <= 0  ||...)
18019                 } ## end elsif ( $token eq '?' )
18020             } ## end if ($type_sequence)
18021
18022 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
18023
18024             #------------------------------------------------------------
18025             # Handle Increasing Depth..
18026             #
18027             # prepare for a new list when depth increases
18028             # token $i is a '(','{', or '['
18029             #------------------------------------------------------------
18030             if ( $depth > $current_depth ) {
18031
18032                 $breakpoint_stack[$depth]       = $forced_breakpoint_count;
18033                 $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
18034                 $has_broken_sublist[$depth]     = 0;
18035                 $identifier_count_stack[$depth] = 0;
18036                 $index_before_arrow[$depth]     = -1;
18037                 $interrupted_list[$depth]       = 0;
18038                 $item_count_stack[$depth]       = 0;
18039                 $last_comma_index[$depth]       = undef;
18040                 $last_dot_index[$depth]         = undef;
18041                 $last_nonblank_type[$depth]     = $last_nonblank_type;
18042                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
18043                 $opening_structure_index_stack[$depth] = $i;
18044                 $rand_or_list[$depth]                  = [];
18045                 $rfor_semicolon_list[$depth]           = [];
18046                 $i_equals[$depth]                      = -1;
18047                 $want_comma_break[$depth]              = 0;
18048                 $container_type[$depth] =
18049                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
18050                   ? $last_nonblank_token
18051                   : "";
18052                 $has_old_logical_breakpoints[$depth] = 0;
18053
18054                 # if line ends here then signal closing token to break
18055                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
18056                 {
18057                     set_closing_breakpoint($i);
18058                 }
18059
18060                 # Not all lists of values should be vertically aligned..
18061                 $dont_align[$depth] =
18062
18063                   # code BLOCKS are handled at a higher level
18064                   ( $block_type ne "" )
18065
18066                   # certain paren lists
18067                   || ( $type eq '(' ) && (
18068
18069                     # it does not usually look good to align a list of
18070                     # identifiers in a parameter list, as in:
18071                     #    my($var1, $var2, ...)
18072                     # (This test should probably be refined, for now I'm just
18073                     # testing for any keyword)
18074                     ( $last_nonblank_type eq 'k' )
18075
18076                     # a trailing '(' usually indicates a non-list
18077                     || ( $next_nonblank_type eq '(' )
18078                   );
18079
18080                 # patch to outdent opening brace of long if/for/..
18081                 # statements (like this one).  See similar coding in
18082                 # set_continuation breaks.  We have also catch it here for
18083                 # short line fragments which otherwise will not go through
18084                 # set_continuation_breaks.
18085                 if (
18086                     $block_type
18087
18088                     # if we have the ')' but not its '(' in this batch..
18089                     && ( $last_nonblank_token eq ')' )
18090                     && $mate_index_to_go[$i_last_nonblank_token] < 0
18091
18092                     # and user wants brace to left
18093                     && !$rOpts->{'opening-brace-always-on-right'}
18094
18095                     && ( $type eq '{' )     # should be true
18096                     && ( $token eq '{' )    # should be true
18097                   )
18098                 {
18099                     set_forced_breakpoint( $i - 1 );
18100                 } ## end if ( $block_type && ( ...))
18101             } ## end if ( $depth > $current_depth)
18102
18103             #------------------------------------------------------------
18104             # Handle Decreasing Depth..
18105             #
18106             # finish off any old list when depth decreases
18107             # token $i is a ')','}', or ']'
18108             #------------------------------------------------------------
18109             elsif ( $depth < $current_depth ) {
18110
18111                 check_for_new_minimum_depth($depth);
18112
18113                 # force all outer logical containers to break after we see on
18114                 # old breakpoint
18115                 $has_old_logical_breakpoints[$depth] ||=
18116                   $has_old_logical_breakpoints[$current_depth];
18117
18118                 # Patch to break between ') {' if the paren list is broken.
18119                 # There is similar logic in set_continuation_breaks for
18120                 # non-broken lists.
18121                 if (   $token eq ')'
18122                     && $next_nonblank_block_type
18123                     && $interrupted_list[$current_depth]
18124                     && $next_nonblank_type eq '{'
18125                     && !$rOpts->{'opening-brace-always-on-right'} )
18126                 {
18127                     set_forced_breakpoint($i);
18128                 } ## end if ( $token eq ')' && ...
18129
18130 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
18131
18132                 # set breaks at commas if necessary
18133                 my ( $bp_count, $do_not_break_apart ) =
18134                   set_comma_breakpoints($current_depth);
18135
18136                 my $i_opening = $opening_structure_index_stack[$current_depth];
18137                 my $saw_opening_structure = ( $i_opening >= 0 );
18138
18139                 # this term is long if we had to break at interior commas..
18140                 my $is_long_term = $bp_count > 0;
18141
18142                 # If this is a short container with one or more comma arrows,
18143                 # then we will mark it as a long term to open it if requested.
18144                 # $rOpts_comma_arrow_breakpoints =
18145                 #    0 - open only if comma precedes closing brace
18146                 #    1 - stable: except for one line blocks
18147                 #    2 - try to form 1 line blocks
18148                 #    3 - ignore =>
18149                 #    4 - always open up if vt=0
18150                 #    5 - stable: even for one line blocks if vt=0
18151                 if (  !$is_long_term
18152                     && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
18153                     && $index_before_arrow[ $depth + 1 ] > 0
18154                     && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
18155                   )
18156                 {
18157                     $is_long_term = $rOpts_comma_arrow_breakpoints == 4
18158                       || ( $rOpts_comma_arrow_breakpoints == 0
18159                         && $last_nonblank_token eq ',' )
18160                       || ( $rOpts_comma_arrow_breakpoints == 5
18161                         && $old_breakpoint_to_go[$i_opening] );
18162                 } ## end if ( !$is_long_term &&...)
18163
18164                 # mark term as long if the length between opening and closing
18165                 # parens exceeds allowed line length
18166                 if ( !$is_long_term && $saw_opening_structure ) {
18167                     my $i_opening_minus = find_token_starting_list($i_opening);
18168
18169                     # Note: we have to allow for one extra space after a
18170                     # closing token so that we do not strand a comma or
18171                     # semicolon, hence the '>=' here (oneline.t)
18172                     # Note: we ignore left weld lengths here for best results
18173                     $is_long_term =
18174                       excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
18175                 } ## end if ( !$is_long_term &&...)
18176
18177                 # We've set breaks after all comma-arrows.  Now we have to
18178                 # undo them if this can be a one-line block
18179                 # (the only breakpoints set will be due to comma-arrows)
18180                 if (
18181
18182                     # user doesn't require breaking after all comma-arrows
18183                     ( $rOpts_comma_arrow_breakpoints != 0 )
18184                     && ( $rOpts_comma_arrow_breakpoints != 4 )
18185
18186                     # and if the opening structure is in this batch
18187                     && $saw_opening_structure
18188
18189                     # and either on the same old line
18190                     && (
18191                         $old_breakpoint_count_stack[$current_depth] ==
18192                         $last_old_breakpoint_count
18193
18194                         # or user wants to form long blocks with arrows
18195                         || $rOpts_comma_arrow_breakpoints == 2
18196                     )
18197
18198                   # and we made some breakpoints between the opening and closing
18199                     && ( $breakpoint_undo_stack[$current_depth] <
18200                         $forced_breakpoint_undo_count )
18201
18202                     # and this block is short enough to fit on one line
18203                     # Note: use < because need 1 more space for possible comma
18204                     && !$is_long_term
18205
18206                   )
18207                 {
18208                     undo_forced_breakpoint_stack(
18209                         $breakpoint_undo_stack[$current_depth] );
18210                 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
18211
18212                 # now see if we have any comma breakpoints left
18213                 my $has_comma_breakpoints =
18214                   ( $breakpoint_stack[$current_depth] !=
18215                       $forced_breakpoint_count );
18216
18217                 # update broken-sublist flag of the outer container
18218                 $has_broken_sublist[$depth] =
18219                      $has_broken_sublist[$depth]
18220                   || $has_broken_sublist[$current_depth]
18221                   || $is_long_term
18222                   || $has_comma_breakpoints;
18223
18224 # Having come to the closing ')', '}', or ']', now we have to decide if we
18225 # should 'open up' the structure by placing breaks at the opening and
18226 # closing containers.  This is a tricky decision.  Here are some of the
18227 # basic considerations:
18228 #
18229 # -If this is a BLOCK container, then any breakpoints will have already
18230 # been set (and according to user preferences), so we need do nothing here.
18231 #
18232 # -If we have a comma-separated list for which we can align the list items,
18233 # then we need to do so because otherwise the vertical aligner cannot
18234 # currently do the alignment.
18235 #
18236 # -If this container does itself contain a container which has been broken
18237 # open, then it should be broken open to properly show the structure.
18238 #
18239 # -If there is nothing to align, and no other reason to break apart,
18240 # then do not do it.
18241 #
18242 # We will not break open the parens of a long but 'simple' logical expression.
18243 # For example:
18244 #
18245 # This is an example of a simple logical expression and its formatting:
18246 #
18247 #     if ( $bigwasteofspace1 && $bigwasteofspace2
18248 #         || $bigwasteofspace3 && $bigwasteofspace4 )
18249 #
18250 # Most people would prefer this than the 'spacey' version:
18251 #
18252 #     if (
18253 #         $bigwasteofspace1 && $bigwasteofspace2
18254 #         || $bigwasteofspace3 && $bigwasteofspace4
18255 #     )
18256 #
18257 # To illustrate the rules for breaking logical expressions, consider:
18258 #
18259 #             FULLY DENSE:
18260 #             if ( $opt_excl
18261 #                 and ( exists $ids_excl_uc{$id_uc}
18262 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
18263 #
18264 # This is on the verge of being difficult to read.  The current default is to
18265 # open it up like this:
18266 #
18267 #             DEFAULT:
18268 #             if (
18269 #                 $opt_excl
18270 #                 and ( exists $ids_excl_uc{$id_uc}
18271 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
18272 #               )
18273 #
18274 # This is a compromise which tries to avoid being too dense and to spacey.
18275 # A more spaced version would be:
18276 #
18277 #             SPACEY:
18278 #             if (
18279 #                 $opt_excl
18280 #                 and (
18281 #                     exists $ids_excl_uc{$id_uc}
18282 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
18283 #                 )
18284 #               )
18285 #
18286 # Some people might prefer the spacey version -- an option could be added.  The
18287 # innermost expression contains a long block '( exists $ids_...  ')'.
18288 #
18289 # Here is how the logic goes: We will force a break at the 'or' that the
18290 # innermost expression contains, but we will not break apart its opening and
18291 # closing containers because (1) it contains no multi-line sub-containers itself,
18292 # and (2) there is no alignment to be gained by breaking it open like this
18293 #
18294 #             and (
18295 #                 exists $ids_excl_uc{$id_uc}
18296 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
18297 #             )
18298 #
18299 # (although this looks perfectly ok and might be good for long expressions).  The
18300 # outer 'if' container, though, contains a broken sub-container, so it will be
18301 # broken open to avoid too much density.  Also, since it contains no 'or's, there
18302 # will be a forced break at its 'and'.
18303
18304                 # set some flags telling something about this container..
18305                 my $is_simple_logical_expression = 0;
18306                 if (   $item_count_stack[$current_depth] == 0
18307                     && $saw_opening_structure
18308                     && $tokens_to_go[$i_opening] eq '('
18309                     && $is_logical_container{ $container_type[$current_depth] }
18310                   )
18311                 {
18312
18313                     # This seems to be a simple logical expression with
18314                     # no existing breakpoints.  Set a flag to prevent
18315                     # opening it up.
18316                     if ( !$has_comma_breakpoints ) {
18317                         $is_simple_logical_expression = 1;
18318                     }
18319
18320                     # This seems to be a simple logical expression with
18321                     # breakpoints (broken sublists, for example).  Break
18322                     # at all 'or's and '||'s.
18323                     else {
18324                         set_logical_breakpoints($current_depth);
18325                     }
18326                 } ## end if ( $item_count_stack...)
18327
18328                 if ( $is_long_term
18329                     && @{ $rfor_semicolon_list[$current_depth] } )
18330                 {
18331                     set_for_semicolon_breakpoints($current_depth);
18332
18333                     # open up a long 'for' or 'foreach' container to allow
18334                     # leading term alignment unless -lp is used.
18335                     $has_comma_breakpoints = 1
18336                       unless $rOpts_line_up_parentheses;
18337                 } ## end if ( $is_long_term && ...)
18338
18339                 if (
18340
18341                     # breaks for code BLOCKS are handled at a higher level
18342                     !$block_type
18343
18344                     # we do not need to break at the top level of an 'if'
18345                     # type expression
18346                     && !$is_simple_logical_expression
18347
18348                     ## modification to keep ': (' containers vertically tight;
18349                     ## but probably better to let user set -vt=1 to avoid
18350                     ## inconsistency with other paren types
18351                     ## && ($container_type[$current_depth] ne ':')
18352
18353                     # otherwise, we require one of these reasons for breaking:
18354                     && (
18355
18356                         # - this term has forced line breaks
18357                         $has_comma_breakpoints
18358
18359                        # - the opening container is separated from this batch
18360                        #   for some reason (comment, blank line, code block)
18361                        # - this is a non-paren container spanning multiple lines
18362                         || !$saw_opening_structure
18363
18364                         # - this is a long block contained in another breakable
18365                         #   container
18366                         || (   $is_long_term
18367                             && $container_environment_to_go[$i_opening] ne
18368                             'BLOCK' )
18369                     )
18370                   )
18371                 {
18372
18373                     # For -lp option, we must put a breakpoint before
18374                     # the token which has been identified as starting
18375                     # this indentation level.  This is necessary for
18376                     # proper alignment.
18377                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
18378                     {
18379                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
18380                         if (   $i_opening + 1 < $max_index_to_go
18381                             && $types_to_go[ $i_opening + 1 ] eq 'b' )
18382                         {
18383                             $item = $leading_spaces_to_go[ $i_opening + 2 ];
18384                         }
18385                         if ( defined($item) ) {
18386                             my $i_start_2 = $item->get_starting_index();
18387                             if (
18388                                 defined($i_start_2)
18389
18390                                 # we are breaking after an opening brace, paren,
18391                                 # so don't break before it too
18392                                 && $i_start_2 ne $i_opening
18393                               )
18394                             {
18395
18396                                 # Only break for breakpoints at the same
18397                                 # indentation level as the opening paren
18398                                 my $test1 = $nesting_depth_to_go[$i_opening];
18399                                 my $test2 = $nesting_depth_to_go[$i_start_2];
18400                                 if ( $test2 == $test1 ) {
18401                                     set_forced_breakpoint( $i_start_2 - 1 );
18402                                 }
18403                             } ## end if ( defined($i_start_2...))
18404                         } ## end if ( defined($item) )
18405                     } ## end if ( $rOpts_line_up_parentheses...)
18406
18407                     # break after opening structure.
18408                     # note: break before closing structure will be automatic
18409                     if ( $minimum_depth <= $current_depth ) {
18410
18411                         set_forced_breakpoint($i_opening)
18412                           unless ( $do_not_break_apart
18413                             || is_unbreakable_container($current_depth) );
18414
18415                         # break at ',' of lower depth level before opening token
18416                         if ( $last_comma_index[$depth] ) {
18417                             set_forced_breakpoint( $last_comma_index[$depth] );
18418                         }
18419
18420                         # break at '.' of lower depth level before opening token
18421                         if ( $last_dot_index[$depth] ) {
18422                             set_forced_breakpoint( $last_dot_index[$depth] );
18423                         }
18424
18425                         # break before opening structure if preceded by another
18426                         # closing structure and a comma.  This is normally
18427                         # done by the previous closing brace, but not
18428                         # if it was a one-line block.
18429                         if ( $i_opening > 2 ) {
18430                             my $i_prev =
18431                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
18432                               ? $i_opening - 2
18433                               : $i_opening - 1;
18434
18435                             if (   $types_to_go[$i_prev] eq ','
18436                                 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
18437                             {
18438                                 set_forced_breakpoint($i_prev);
18439                             }
18440
18441                             # also break before something like ':('  or '?('
18442                             # if appropriate.
18443                             elsif (
18444                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
18445                             {
18446                                 my $token_prev = $tokens_to_go[$i_prev];
18447                                 if ( $want_break_before{$token_prev} ) {
18448                                     set_forced_breakpoint($i_prev);
18449                                 }
18450                             } ## end elsif ( $types_to_go[$i_prev...])
18451                         } ## end if ( $i_opening > 2 )
18452                     } ## end if ( $minimum_depth <=...)
18453
18454                     # break after comma following closing structure
18455                     if ( $next_type eq ',' ) {
18456                         set_forced_breakpoint( $i + 1 );
18457                     }
18458
18459                     # break before an '=' following closing structure
18460                     if (
18461                         $is_assignment{$next_nonblank_type}
18462                         && ( $breakpoint_stack[$current_depth] !=
18463                             $forced_breakpoint_count )
18464                       )
18465                     {
18466                         set_forced_breakpoint($i);
18467                     } ## end if ( $is_assignment{$next_nonblank_type...})
18468
18469                     # break at any comma before the opening structure Added
18470                     # for -lp, but seems to be good in general.  It isn't
18471                     # obvious how far back to look; the '5' below seems to
18472                     # work well and will catch the comma in something like
18473                     #  push @list, myfunc( $param, $param, ..
18474
18475                     my $icomma = $last_comma_index[$depth];
18476                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
18477                         unless ( $forced_breakpoint_to_go[$icomma] ) {
18478                             set_forced_breakpoint($icomma);
18479                         }
18480                     }
18481                 }    # end logic to open up a container
18482
18483                 # Break open a logical container open if it was already open
18484                 elsif ($is_simple_logical_expression
18485                     && $has_old_logical_breakpoints[$current_depth] )
18486                 {
18487                     set_logical_breakpoints($current_depth);
18488                 }
18489
18490                 # Handle long container which does not get opened up
18491                 elsif ($is_long_term) {
18492
18493                     # must set fake breakpoint to alert outer containers that
18494                     # they are complex
18495                     set_fake_breakpoint();
18496                 } ## end elsif ($is_long_term)
18497
18498             } ## end elsif ( $depth < $current_depth)
18499
18500             #------------------------------------------------------------
18501             # Handle this token
18502             #------------------------------------------------------------
18503
18504             $current_depth = $depth;
18505
18506             # handle comma-arrow
18507             if ( $type eq '=>' ) {
18508                 next if ( $last_nonblank_type eq '=>' );
18509                 next if $rOpts_break_at_old_comma_breakpoints;
18510                 next if $rOpts_comma_arrow_breakpoints == 3;
18511                 $want_comma_break[$depth]   = 1;
18512                 $index_before_arrow[$depth] = $i_last_nonblank_token;
18513                 next;
18514             } ## end if ( $type eq '=>' )
18515
18516             elsif ( $type eq '.' ) {
18517                 $last_dot_index[$depth] = $i;
18518             }
18519
18520             # Turn off alignment if we are sure that this is not a list
18521             # environment.  To be safe, we will do this if we see certain
18522             # non-list tokens, such as ';', and also the environment is
18523             # not a list.  Note that '=' could be in any of the = operators
18524             # (lextest.t). We can't just use the reported environment
18525             # because it can be incorrect in some cases.
18526             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
18527                 && $container_environment_to_go[$i] ne 'LIST' )
18528             {
18529                 $dont_align[$depth]         = 1;
18530                 $want_comma_break[$depth]   = 0;
18531                 $index_before_arrow[$depth] = -1;
18532             } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
18533
18534             # now just handle any commas
18535             next unless ( $type eq ',' );
18536
18537             $last_dot_index[$depth]   = undef;
18538             $last_comma_index[$depth] = $i;
18539
18540             # break here if this comma follows a '=>'
18541             # but not if there is a side comment after the comma
18542             if ( $want_comma_break[$depth] ) {
18543
18544                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
18545                     if ($rOpts_comma_arrow_breakpoints) {
18546                         $want_comma_break[$depth] = 0;
18547                         ##$index_before_arrow[$depth] = -1;
18548                         next;
18549                     }
18550                 }
18551
18552                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
18553
18554                 # break before the previous token if it looks safe
18555                 # Example of something that we will not try to break before:
18556                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
18557                 # Also we don't want to break at a binary operator (like +):
18558                 # $c->createOval(
18559                 #    $x + $R, $y +
18560                 #    $R => $x - $R,
18561                 #    $y - $R, -fill   => 'black',
18562                 # );
18563                 my $ibreak = $index_before_arrow[$depth] - 1;
18564                 if (   $ibreak > 0
18565                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
18566                 {
18567                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
18568                     if ( $types_to_go[$ibreak] eq 'b' )  { $ibreak-- }
18569                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
18570
18571                         # don't break pointer calls, such as the following:
18572                         #  File::Spec->curdir  => 1,
18573                         # (This is tokenized as adjacent 'w' tokens)
18574                         ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
18575
18576                         # And don't break before a comma, as in the following:
18577                         # ( LONGER_THAN,=> 1,
18578                         #    EIGHTY_CHARACTERS,=> 2,
18579                         #    CAUSES_FORMATTING,=> 3,
18580                         #    LIKE_THIS,=> 4,
18581                         # );
18582                         # This example is for -tso but should be general rule
18583                         if (   $tokens_to_go[ $ibreak + 1 ] ne '->'
18584                             && $tokens_to_go[ $ibreak + 1 ] ne ',' )
18585                         {
18586                             set_forced_breakpoint($ibreak);
18587                         }
18588                     } ## end if ( $types_to_go[$ibreak...])
18589                 } ## end if ( $ibreak > 0 && $tokens_to_go...)
18590
18591                 $want_comma_break[$depth]   = 0;
18592                 $index_before_arrow[$depth] = -1;
18593
18594                 # handle list which mixes '=>'s and ','s:
18595                 # treat any list items so far as an interrupted list
18596                 $interrupted_list[$depth] = 1;
18597                 next;
18598             } ## end if ( $want_comma_break...)
18599
18600             # break after all commas above starting depth
18601             if ( $depth < $starting_depth && !$dont_align[$depth] ) {
18602                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
18603                 next;
18604             }
18605
18606             # add this comma to the list..
18607             my $item_count = $item_count_stack[$depth];
18608             if ( $item_count == 0 ) {
18609
18610                 # but do not form a list with no opening structure
18611                 # for example:
18612
18613                 #            open INFILE_COPY, ">$input_file_copy"
18614                 #              or die ("very long message");
18615
18616                 if ( ( $opening_structure_index_stack[$depth] < 0 )
18617                     && $container_environment_to_go[$i] eq 'BLOCK' )
18618                 {
18619                     $dont_align[$depth] = 1;
18620                 }
18621             } ## end if ( $item_count == 0 )
18622
18623             $comma_index[$depth][$item_count] = $i;
18624             ++$item_count_stack[$depth];
18625             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
18626                 $identifier_count_stack[$depth]++;
18627             }
18628         } ## end while ( ++$i <= $max_index_to_go)
18629
18630         #-------------------------------------------
18631         # end of loop over all tokens in this batch
18632         #-------------------------------------------
18633
18634         # set breaks for any unfinished lists ..
18635         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
18636
18637             $interrupted_list[$dd] = 1;
18638             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
18639             set_comma_breakpoints($dd);
18640             set_logical_breakpoints($dd)
18641               if ( $has_old_logical_breakpoints[$dd] );
18642             set_for_semicolon_breakpoints($dd);
18643
18644             # break open container...
18645             my $i_opening = $opening_structure_index_stack[$dd];
18646             set_forced_breakpoint($i_opening)
18647               unless (
18648                 is_unbreakable_container($dd)
18649
18650                 # Avoid a break which would place an isolated ' or "
18651                 # on a line
18652                 || (   $type eq 'Q'
18653                     && $i_opening >= $max_index_to_go - 2
18654                     && $token =~ /^['"]$/ )
18655               );
18656         } ## end for ( my $dd = $current_depth...)
18657
18658         # Return a flag indicating if the input file had some good breakpoints.
18659         # This flag will be used to force a break in a line shorter than the
18660         # allowed line length.
18661         if ( $has_old_logical_breakpoints[$current_depth] ) {
18662             $saw_good_breakpoint = 1;
18663         }
18664
18665         # A complex line with one break at an = has a good breakpoint.
18666         # This is not complex ($total_depth_variation=0):
18667         # $res1
18668         #   = 10;
18669         #
18670         # This is complex ($total_depth_variation=6):
18671         # $res2 =
18672         #  (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
18673         elsif ($i_old_assignment_break
18674             && $total_depth_variation > 4
18675             && $old_breakpoint_count == 1 )
18676         {
18677             $saw_good_breakpoint = 1;
18678         } ## end elsif ( $i_old_assignment_break...)
18679
18680         return $saw_good_breakpoint;
18681     } ## end sub scan_list
18682 }    # end scan_list
18683
18684 sub find_token_starting_list {
18685
18686     # When testing to see if a block will fit on one line, some
18687     # previous token(s) may also need to be on the line; particularly
18688     # if this is a sub call.  So we will look back at least one
18689     # token. NOTE: This isn't perfect, but not critical, because
18690     # if we mis-identify a block, it will be wrapped and therefore
18691     # fixed the next time it is formatted.
18692     my $i_opening_paren = shift;
18693     my $i_opening_minus = $i_opening_paren;
18694     my $im1             = $i_opening_paren - 1;
18695     my $im2             = $i_opening_paren - 2;
18696     my $im3             = $i_opening_paren - 3;
18697     my $typem1          = $types_to_go[$im1];
18698     my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
18699     if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
18700         $i_opening_minus = $i_opening_paren;
18701     }
18702     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
18703         $i_opening_minus = $im1 if $im1 >= 0;
18704
18705         # walk back to improve length estimate
18706         for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
18707             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
18708             $i_opening_minus = $j;
18709         }
18710         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
18711     }
18712     elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
18713     elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
18714         $i_opening_minus = $im2;
18715     }
18716     return $i_opening_minus;
18717 }
18718
18719 {    # begin set_comma_breakpoints_do
18720
18721     my %is_keyword_with_special_leading_term;
18722
18723     BEGIN {
18724
18725         # These keywords have prototypes which allow a special leading item
18726         # followed by a list
18727         my @q =
18728           qw(formline grep kill map printf sprintf push chmod join pack unshift);
18729         @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
18730     }
18731
18732     sub set_comma_breakpoints_do {
18733
18734         # Given a list with some commas, set breakpoints at some of the
18735         # commas, if necessary, to make it easy to read.  This list is
18736         # an example:
18737         my (
18738             $depth,               $i_opening_paren,  $i_closing_paren,
18739             $item_count,          $identifier_count, $rcomma_index,
18740             $next_nonblank_type,  $list_type,        $interrupted,
18741             $rdo_not_break_apart, $must_break_open,
18742         ) = @_;
18743
18744         # nothing to do if no commas seen
18745         return if ( $item_count < 1 );
18746         my $i_first_comma     = $rcomma_index->[0];
18747         my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
18748         my $i_last_comma      = $i_true_last_comma;
18749         if ( $i_last_comma >= $max_index_to_go ) {
18750             $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
18751             return if ( $item_count < 1 );
18752         }
18753
18754         #---------------------------------------------------------------
18755         # find lengths of all items in the list to calculate page layout
18756         #---------------------------------------------------------------
18757         my $comma_count = $item_count;
18758         my @item_lengths;
18759         my @i_term_begin;
18760         my @i_term_end;
18761         my @i_term_comma;
18762         my $i_prev_plus;
18763         my @max_length = ( 0, 0 );
18764         my $first_term_length;
18765         my $i      = $i_opening_paren;
18766         my $is_odd = 1;
18767
18768         foreach my $j ( 0 .. $comma_count - 1 ) {
18769             $is_odd      = 1 - $is_odd;
18770             $i_prev_plus = $i + 1;
18771             $i           = $rcomma_index->[$j];
18772
18773             my $i_term_end =
18774               ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
18775             my $i_term_begin =
18776               ( $types_to_go[$i_prev_plus] eq 'b' )
18777               ? $i_prev_plus + 1
18778               : $i_prev_plus;
18779             push @i_term_begin, $i_term_begin;
18780             push @i_term_end,   $i_term_end;
18781             push @i_term_comma, $i;
18782
18783             # note: currently adding 2 to all lengths (for comma and space)
18784             my $length =
18785               2 + token_sequence_length( $i_term_begin, $i_term_end );
18786             push @item_lengths, $length;
18787
18788             if ( $j == 0 ) {
18789                 $first_term_length = $length;
18790             }
18791             else {
18792
18793                 if ( $length > $max_length[$is_odd] ) {
18794                     $max_length[$is_odd] = $length;
18795                 }
18796             }
18797         }
18798
18799         # now we have to make a distinction between the comma count and item
18800         # count, because the item count will be one greater than the comma
18801         # count if the last item is not terminated with a comma
18802         my $i_b =
18803           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
18804           ? $i_last_comma + 1
18805           : $i_last_comma;
18806         my $i_e =
18807           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
18808           ? $i_closing_paren - 2
18809           : $i_closing_paren - 1;
18810         my $i_effective_last_comma = $i_last_comma;
18811
18812         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
18813
18814         if ( $last_item_length > 0 ) {
18815
18816             # add 2 to length because other lengths include a comma and a blank
18817             $last_item_length += 2;
18818             push @item_lengths, $last_item_length;
18819             push @i_term_begin, $i_b + 1;
18820             push @i_term_end,   $i_e;
18821             push @i_term_comma, undef;
18822
18823             my $i_odd = $item_count % 2;
18824
18825             if ( $last_item_length > $max_length[$i_odd] ) {
18826                 $max_length[$i_odd] = $last_item_length;
18827             }
18828
18829             $item_count++;
18830             $i_effective_last_comma = $i_e + 1;
18831
18832             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
18833                 $identifier_count++;
18834             }
18835         }
18836
18837         #---------------------------------------------------------------
18838         # End of length calculations
18839         #---------------------------------------------------------------
18840
18841         #---------------------------------------------------------------
18842         # Compound List Rule 1:
18843         # Break at (almost) every comma for a list containing a broken
18844         # sublist.  This has higher priority than the Interrupted List
18845         # Rule.
18846         #---------------------------------------------------------------
18847         if ( $has_broken_sublist[$depth] ) {
18848
18849             # Break at every comma except for a comma between two
18850             # simple, small terms.  This prevents long vertical
18851             # columns of, say, just 0's.
18852             my $small_length = 10;    # 2 + actual maximum length wanted
18853
18854             # We'll insert a break in long runs of small terms to
18855             # allow alignment in uniform tables.
18856             my $skipped_count = 0;
18857             my $columns       = table_columns_available($i_first_comma);
18858             my $fields        = int( $columns / $small_length );
18859             if (   $rOpts_maximum_fields_per_table
18860                 && $fields > $rOpts_maximum_fields_per_table )
18861             {
18862                 $fields = $rOpts_maximum_fields_per_table;
18863             }
18864             my $max_skipped_count = $fields - 1;
18865
18866             my $is_simple_last_term = 0;
18867             my $is_simple_next_term = 0;
18868             foreach my $j ( 0 .. $item_count ) {
18869                 $is_simple_last_term = $is_simple_next_term;
18870                 $is_simple_next_term = 0;
18871                 if (   $j < $item_count
18872                     && $i_term_end[$j] == $i_term_begin[$j]
18873                     && $item_lengths[$j] <= $small_length )
18874                 {
18875                     $is_simple_next_term = 1;
18876                 }
18877                 next if $j == 0;
18878                 if (   $is_simple_last_term
18879                     && $is_simple_next_term
18880                     && $skipped_count < $max_skipped_count )
18881                 {
18882                     $skipped_count++;
18883                 }
18884                 else {
18885                     $skipped_count = 0;
18886                     my $i = $i_term_comma[ $j - 1 ];
18887                     last unless defined $i;
18888                     set_forced_breakpoint($i);
18889                 }
18890             }
18891
18892             # always break at the last comma if this list is
18893             # interrupted; we wouldn't want to leave a terminal '{', for
18894             # example.
18895             if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
18896             return;
18897         }
18898
18899 #my ( $a, $b, $c ) = caller();
18900 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
18901 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
18902 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
18903
18904         #---------------------------------------------------------------
18905         # Interrupted List Rule:
18906         # A list is forced to use old breakpoints if it was interrupted
18907         # by side comments or blank lines, or requested by user.
18908         #---------------------------------------------------------------
18909         if (   $rOpts_break_at_old_comma_breakpoints
18910             || $interrupted
18911             || $i_opening_paren < 0 )
18912         {
18913             copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
18914             return;
18915         }
18916
18917         #---------------------------------------------------------------
18918         # Looks like a list of items.  We have to look at it and size it up.
18919         #---------------------------------------------------------------
18920
18921         my $opening_token = $tokens_to_go[$i_opening_paren];
18922         my $opening_environment =
18923           $container_environment_to_go[$i_opening_paren];
18924
18925         #-------------------------------------------------------------------
18926         # Return if this will fit on one line
18927         #-------------------------------------------------------------------
18928
18929         my $i_opening_minus = find_token_starting_list($i_opening_paren);
18930         return
18931           unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
18932
18933         #-------------------------------------------------------------------
18934         # Now we know that this block spans multiple lines; we have to set
18935         # at least one breakpoint -- real or fake -- as a signal to break
18936         # open any outer containers.
18937         #-------------------------------------------------------------------
18938         set_fake_breakpoint();
18939
18940         # be sure we do not extend beyond the current list length
18941         if ( $i_effective_last_comma >= $max_index_to_go ) {
18942             $i_effective_last_comma = $max_index_to_go - 1;
18943         }
18944
18945         # Set a flag indicating if we need to break open to keep -lp
18946         # items aligned.  This is necessary if any of the list terms
18947         # exceeds the available space after the '('.
18948         my $need_lp_break_open = $must_break_open;
18949         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
18950             my $columns_if_unbroken =
18951               maximum_line_length($i_opening_minus) -
18952               total_line_length( $i_opening_minus, $i_opening_paren );
18953             $need_lp_break_open =
18954                  ( $max_length[0] > $columns_if_unbroken )
18955               || ( $max_length[1] > $columns_if_unbroken )
18956               || ( $first_term_length > $columns_if_unbroken );
18957         }
18958
18959         # Specify if the list must have an even number of fields or not.
18960         # It is generally safest to assume an even number, because the
18961         # list items might be a hash list.  But if we can be sure that
18962         # it is not a hash, then we can allow an odd number for more
18963         # flexibility.
18964         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
18965
18966         if (   $identifier_count >= $item_count - 1
18967             || $is_assignment{$next_nonblank_type}
18968             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
18969           )
18970         {
18971             $odd_or_even = 1;
18972         }
18973
18974         # do we have a long first term which should be
18975         # left on a line by itself?
18976         my $use_separate_first_term = (
18977             $odd_or_even == 1       # only if we can use 1 field/line
18978               && $item_count > 3    # need several items
18979               && $first_term_length >
18980               2 * $max_length[0] - 2    # need long first term
18981               && $first_term_length >
18982               2 * $max_length[1] - 2    # need long first term
18983         );
18984
18985         # or do we know from the type of list that the first term should
18986         # be placed alone?
18987         if ( !$use_separate_first_term ) {
18988             if ( $is_keyword_with_special_leading_term{$list_type} ) {
18989                 $use_separate_first_term = 1;
18990
18991                 # should the container be broken open?
18992                 if ( $item_count < 3 ) {
18993                     if ( $i_first_comma - $i_opening_paren < 4 ) {
18994                         ${$rdo_not_break_apart} = 1;
18995                     }
18996                 }
18997                 elsif ($first_term_length < 20
18998                     && $i_first_comma - $i_opening_paren < 4 )
18999                 {
19000                     my $columns = table_columns_available($i_first_comma);
19001                     if ( $first_term_length < $columns ) {
19002                         ${$rdo_not_break_apart} = 1;
19003                     }
19004                 }
19005             }
19006         }
19007
19008         # if so,
19009         if ($use_separate_first_term) {
19010
19011             # ..set a break and update starting values
19012             $use_separate_first_term = 1;
19013             set_forced_breakpoint($i_first_comma);
19014             $i_opening_paren = $i_first_comma;
19015             $i_first_comma   = $rcomma_index->[1];
19016             $item_count--;
19017             return if $comma_count == 1;
19018             shift @item_lengths;
19019             shift @i_term_begin;
19020             shift @i_term_end;
19021             shift @i_term_comma;
19022         }
19023
19024         # if not, update the metrics to include the first term
19025         else {
19026             if ( $first_term_length > $max_length[0] ) {
19027                 $max_length[0] = $first_term_length;
19028             }
19029         }
19030
19031         # Field width parameters
19032         my $pair_width = ( $max_length[0] + $max_length[1] );
19033         my $max_width =
19034           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
19035
19036         # Number of free columns across the page width for laying out tables
19037         my $columns = table_columns_available($i_first_comma);
19038
19039         # Estimated maximum number of fields which fit this space
19040         # This will be our first guess
19041         my $number_of_fields_max =
19042           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
19043             $pair_width );
19044         my $number_of_fields = $number_of_fields_max;
19045
19046         # Find the best-looking number of fields
19047         # and make this our second guess if possible
19048         my ( $number_of_fields_best, $ri_ragged_break_list,
19049             $new_identifier_count )
19050           = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
19051             $max_width );
19052
19053         if (   $number_of_fields_best != 0
19054             && $number_of_fields_best < $number_of_fields_max )
19055         {
19056             $number_of_fields = $number_of_fields_best;
19057         }
19058
19059         # ----------------------------------------------------------------------
19060         # If we are crowded and the -lp option is being used, try to
19061         # undo some indentation
19062         # ----------------------------------------------------------------------
19063         if (
19064             $rOpts_line_up_parentheses
19065             && (
19066                 $number_of_fields == 0
19067                 || (   $number_of_fields == 1
19068                     && $number_of_fields != $number_of_fields_best )
19069             )
19070           )
19071         {
19072             my $available_spaces = get_available_spaces_to_go($i_first_comma);
19073             if ( $available_spaces > 0 ) {
19074
19075                 my $spaces_wanted = $max_width - $columns;    # for 1 field
19076
19077                 if ( $number_of_fields_best == 0 ) {
19078                     $number_of_fields_best =
19079                       get_maximum_fields_wanted( \@item_lengths );
19080                 }
19081
19082                 if ( $number_of_fields_best != 1 ) {
19083                     my $spaces_wanted_2 =
19084                       1 + $pair_width - $columns;             # for 2 fields
19085                     if ( $available_spaces > $spaces_wanted_2 ) {
19086                         $spaces_wanted = $spaces_wanted_2;
19087                     }
19088                 }
19089
19090                 if ( $spaces_wanted > 0 ) {
19091                     my $deleted_spaces =
19092                       reduce_lp_indentation( $i_first_comma, $spaces_wanted );
19093
19094                     # redo the math
19095                     if ( $deleted_spaces > 0 ) {
19096                         $columns = table_columns_available($i_first_comma);
19097                         $number_of_fields_max =
19098                           maximum_number_of_fields( $columns, $odd_or_even,
19099                             $max_width, $pair_width );
19100                         $number_of_fields = $number_of_fields_max;
19101
19102                         if (   $number_of_fields_best == 1
19103                             && $number_of_fields >= 1 )
19104                         {
19105                             $number_of_fields = $number_of_fields_best;
19106                         }
19107                     }
19108                 }
19109             }
19110         }
19111
19112         # try for one column if two won't work
19113         if ( $number_of_fields <= 0 ) {
19114             $number_of_fields = int( $columns / $max_width );
19115         }
19116
19117         # The user can place an upper bound on the number of fields,
19118         # which can be useful for doing maintenance on tables
19119         if (   $rOpts_maximum_fields_per_table
19120             && $number_of_fields > $rOpts_maximum_fields_per_table )
19121         {
19122             $number_of_fields = $rOpts_maximum_fields_per_table;
19123         }
19124
19125         # How many columns (characters) and lines would this container take
19126         # if no additional whitespace were added?
19127         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
19128             $i_effective_last_comma + 1 );
19129         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
19130         my $packed_lines = 1 + int( $packed_columns / $columns );
19131
19132         # are we an item contained in an outer list?
19133         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
19134
19135         if ( $number_of_fields <= 0 ) {
19136
19137 #         #---------------------------------------------------------------
19138 #         # We're in trouble.  We can't find a single field width that works.
19139 #         # There is no simple answer here; we may have a single long list
19140 #         # item, or many.
19141 #         #---------------------------------------------------------------
19142 #
19143 #         In many cases, it may be best to not force a break if there is just one
19144 #         comma, because the standard continuation break logic will do a better
19145 #         job without it.
19146 #
19147 #         In the common case that all but one of the terms can fit
19148 #         on a single line, it may look better not to break open the
19149 #         containing parens.  Consider, for example
19150 #
19151 #             $color =
19152 #               join ( '/',
19153 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
19154 #                 keys %colors );
19155 #
19156 #         which will look like this with the container broken:
19157 #
19158 #             $color = join (
19159 #                 '/',
19160 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
19161 #             );
19162 #
19163 #         Here is an example of this rule for a long last term:
19164 #
19165 #             log_message( 0, 256, 128,
19166 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
19167 #
19168 #         And here is an example with a long first term:
19169 #
19170 #         $s = sprintf(
19171 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
19172 #             $r, $pu, $ps, $cu, $cs, $tt
19173 #           )
19174 #           if $style eq 'all';
19175
19176             my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
19177             my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
19178             my $long_first_term =
19179               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
19180
19181             # break at every comma ...
19182             if (
19183
19184                 # if requested by user or is best looking
19185                 $number_of_fields_best == 1
19186
19187                 # or if this is a sublist of a larger list
19188                 || $in_hierarchical_list
19189
19190                 # or if multiple commas and we don't have a long first or last
19191                 # term
19192                 || ( $comma_count > 1
19193                     && !( $long_last_term || $long_first_term ) )
19194               )
19195             {
19196                 foreach ( 0 .. $comma_count - 1 ) {
19197                     set_forced_breakpoint( $rcomma_index->[$_] );
19198                 }
19199             }
19200             elsif ($long_last_term) {
19201
19202                 set_forced_breakpoint($i_last_comma);
19203                 ${$rdo_not_break_apart} = 1 unless $must_break_open;
19204             }
19205             elsif ($long_first_term) {
19206
19207                 set_forced_breakpoint($i_first_comma);
19208             }
19209             else {
19210
19211                 # let breaks be defined by default bond strength logic
19212             }
19213             return;
19214         }
19215
19216         # --------------------------------------------------------
19217         # We have a tentative field count that seems to work.
19218         # How many lines will this require?
19219         # --------------------------------------------------------
19220         my $formatted_lines = $item_count / ($number_of_fields);
19221         if ( $formatted_lines != int $formatted_lines ) {
19222             $formatted_lines = 1 + int $formatted_lines;
19223         }
19224
19225         # So far we've been trying to fill out to the right margin.  But
19226         # compact tables are easier to read, so let's see if we can use fewer
19227         # fields without increasing the number of lines.
19228         $number_of_fields =
19229           compactify_table( $item_count, $number_of_fields, $formatted_lines,
19230             $odd_or_even );
19231
19232         # How many spaces across the page will we fill?
19233         my $columns_per_line =
19234           ( int $number_of_fields / 2 ) * $pair_width +
19235           ( $number_of_fields % 2 ) * $max_width;
19236
19237         my $formatted_columns;
19238
19239         if ( $number_of_fields > 1 ) {
19240             $formatted_columns =
19241               ( $pair_width * ( int( $item_count / 2 ) ) +
19242                   ( $item_count % 2 ) * $max_width );
19243         }
19244         else {
19245             $formatted_columns = $max_width * $item_count;
19246         }
19247         if ( $formatted_columns < $packed_columns ) {
19248             $formatted_columns = $packed_columns;
19249         }
19250
19251         my $unused_columns = $formatted_columns - $packed_columns;
19252
19253         # set some empirical parameters to help decide if we should try to
19254         # align; high sparsity does not look good, especially with few lines
19255         my $sparsity = ($unused_columns) / ($formatted_columns);
19256         my $max_allowed_sparsity =
19257             ( $item_count < 3 )    ? 0.1
19258           : ( $packed_lines == 1 ) ? 0.15
19259           : ( $packed_lines == 2 ) ? 0.4
19260           :                          0.7;
19261
19262         # Begin check for shortcut methods, which avoid treating a list
19263         # as a table for relatively small parenthesized lists.  These
19264         # are usually easier to read if not formatted as tables.
19265         if (
19266             $packed_lines <= 2                    # probably can fit in 2 lines
19267             && $item_count < 9                    # doesn't have too many items
19268             && $opening_environment eq 'BLOCK'    # not a sub-container
19269             && $opening_token eq '('              # is paren list
19270           )
19271         {
19272
19273             # Shortcut method 1: for -lp and just one comma:
19274             # This is a no-brainer, just break at the comma.
19275             if (
19276                 $rOpts_line_up_parentheses    # -lp
19277                 && $item_count == 2           # two items, one comma
19278                 && !$must_break_open
19279               )
19280             {
19281                 my $i_break = $rcomma_index->[0];
19282                 set_forced_breakpoint($i_break);
19283                 ${$rdo_not_break_apart} = 1;
19284                 set_non_alignment_flags( $comma_count, $rcomma_index );
19285                 return;
19286
19287             }
19288
19289             # method 2 is for most small ragged lists which might look
19290             # best if not displayed as a table.
19291             if (
19292                 ( $number_of_fields == 2 && $item_count == 3 )
19293                 || (
19294                     $new_identifier_count > 0    # isn't all quotes
19295                     && $sparsity > 0.15
19296                 )    # would be fairly spaced gaps if aligned
19297               )
19298             {
19299
19300                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
19301                     $ri_ragged_break_list );
19302                 ++$break_count if ($use_separate_first_term);
19303
19304                 # NOTE: we should really use the true break count here,
19305                 # which can be greater if there are large terms and
19306                 # little space, but usually this will work well enough.
19307                 unless ($must_break_open) {
19308
19309                     if ( $break_count <= 1 ) {
19310                         ${$rdo_not_break_apart} = 1;
19311                     }
19312                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
19313                     {
19314                         ${$rdo_not_break_apart} = 1;
19315                     }
19316                 }
19317                 set_non_alignment_flags( $comma_count, $rcomma_index );
19318                 return;
19319             }
19320
19321         }    # end shortcut methods
19322
19323         # debug stuff
19324
19325         FORMATTER_DEBUG_FLAG_SPARSE && do {
19326             print STDOUT
19327 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
19328
19329         };
19330
19331         #---------------------------------------------------------------
19332         # Compound List Rule 2:
19333         # If this list is too long for one line, and it is an item of a
19334         # larger list, then we must format it, regardless of sparsity
19335         # (ian.t).  One reason that we have to do this is to trigger
19336         # Compound List Rule 1, above, which causes breaks at all commas of
19337         # all outer lists.  In this way, the structure will be properly
19338         # displayed.
19339         #---------------------------------------------------------------
19340
19341         # Decide if this list is too long for one line unless broken
19342         my $total_columns = table_columns_available($i_opening_paren);
19343         my $too_long      = $packed_columns > $total_columns;
19344
19345         # For a paren list, include the length of the token just before the
19346         # '(' because this is likely a sub call, and we would have to
19347         # include the sub name on the same line as the list.  This is still
19348         # imprecise, but not too bad.  (steve.t)
19349         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
19350
19351             $too_long = excess_line_length( $i_opening_minus,
19352                 $i_effective_last_comma + 1 ) > 0;
19353         }
19354
19355         # FIXME: For an item after a '=>', try to include the length of the
19356         # thing before the '=>'.  This is crude and should be improved by
19357         # actually looking back token by token.
19358         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
19359             my $i_opening_minus = $i_opening_paren - 4;
19360             if ( $i_opening_minus >= 0 ) {
19361                 $too_long = excess_line_length( $i_opening_minus,
19362                     $i_effective_last_comma + 1 ) > 0;
19363             }
19364         }
19365
19366         # Always break lists contained in '[' and '{' if too long for 1 line,
19367         # and always break lists which are too long and part of a more complex
19368         # structure.
19369         my $must_break_open_container = $must_break_open
19370           || ( $too_long
19371             && ( $in_hierarchical_list || $opening_token ne '(' ) );
19372
19373 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
19374
19375         #---------------------------------------------------------------
19376         # The main decision:
19377         # Now decide if we will align the data into aligned columns.  Do not
19378         # attempt to align columns if this is a tiny table or it would be
19379         # too spaced.  It seems that the more packed lines we have, the
19380         # sparser the list that can be allowed and still look ok.
19381         #---------------------------------------------------------------
19382
19383         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
19384             || ( $formatted_lines < 2 )
19385             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
19386           )
19387         {
19388
19389             #---------------------------------------------------------------
19390             # too sparse: would look ugly if aligned in a table;
19391             #---------------------------------------------------------------
19392
19393             # use old breakpoints if this is a 'big' list
19394             # FIXME: goal is to improve set_ragged_breakpoints so that
19395             # this is not necessary.
19396             if ( $packed_lines > 2 && $item_count > 10 ) {
19397                 write_logfile_entry("List sparse: using old breakpoints\n");
19398                 copy_old_breakpoints( $i_first_comma, $i_last_comma );
19399             }
19400
19401             # let the continuation logic handle it if 2 lines
19402             else {
19403
19404                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
19405                     $ri_ragged_break_list );
19406                 ++$break_count if ($use_separate_first_term);
19407
19408                 unless ($must_break_open_container) {
19409                     if ( $break_count <= 1 ) {
19410                         ${$rdo_not_break_apart} = 1;
19411                     }
19412                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
19413                     {
19414                         ${$rdo_not_break_apart} = 1;
19415                     }
19416                 }
19417                 set_non_alignment_flags( $comma_count, $rcomma_index );
19418             }
19419             return;
19420         }
19421
19422         #---------------------------------------------------------------
19423         # go ahead and format as a table
19424         #---------------------------------------------------------------
19425         write_logfile_entry(
19426             "List: auto formatting with $number_of_fields fields/row\n");
19427
19428         my $j_first_break =
19429           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
19430
19431         for (
19432             my $j = $j_first_break ;
19433             $j < $comma_count ;
19434             $j += $number_of_fields
19435           )
19436         {
19437             my $i = $rcomma_index->[$j];
19438             set_forced_breakpoint($i);
19439         }
19440         return;
19441     }
19442 }
19443
19444 sub set_non_alignment_flags {
19445
19446     # set flag which indicates that these commas should not be
19447     # aligned
19448     my ( $comma_count, $rcomma_index ) = @_;
19449     foreach ( 0 .. $comma_count - 1 ) {
19450         $matching_token_to_go[ $rcomma_index->[$_] ] = 1;
19451     }
19452     return;
19453 }
19454
19455 sub study_list_complexity {
19456
19457     # Look for complex tables which should be formatted with one term per line.
19458     # Returns the following:
19459     #
19460     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
19461     #    which are hard to read
19462     #  $number_of_fields_best = suggested number of fields based on
19463     #    complexity; = 0 if any number may be used.
19464     #
19465     my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
19466     my $item_count            = @{$ri_term_begin};
19467     my $complex_item_count    = 0;
19468     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
19469     my $i_max                 = @{$ritem_lengths} - 1;
19470     ##my @item_complexity;
19471
19472     my $i_last_last_break = -3;
19473     my $i_last_break      = -2;
19474     my @i_ragged_break_list;
19475
19476     my $definitely_complex = 30;
19477     my $definitely_simple  = 12;
19478     my $quote_count        = 0;
19479
19480     for my $i ( 0 .. $i_max ) {
19481         my $ib = $ri_term_begin->[$i];
19482         my $ie = $ri_term_end->[$i];
19483
19484         # define complexity: start with the actual term length
19485         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
19486
19487         ##TBD: join types here and check for variations
19488         ##my $str=join "", @tokens_to_go[$ib..$ie];
19489
19490         my $is_quote = 0;
19491         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
19492             $is_quote = 1;
19493             $quote_count++;
19494         }
19495         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
19496             $quote_count++;
19497         }
19498
19499         if ( $ib eq $ie ) {
19500             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
19501                 $complex_item_count++;
19502                 $weighted_length *= 2;
19503             }
19504             else {
19505             }
19506         }
19507         else {
19508             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
19509                 $complex_item_count++;
19510                 $weighted_length *= 2;
19511             }
19512             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
19513                 $weighted_length += 4;
19514             }
19515         }
19516
19517         # add weight for extra tokens.
19518         $weighted_length += 2 * ( $ie - $ib );
19519
19520 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
19521 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
19522
19523 ##push @item_complexity, $weighted_length;
19524
19525         # now mark a ragged break after this item it if it is 'long and
19526         # complex':
19527         if ( $weighted_length >= $definitely_complex ) {
19528
19529             # if we broke after the previous term
19530             # then break before it too
19531             if (   $i_last_break == $i - 1
19532                 && $i > 1
19533                 && $i_last_last_break != $i - 2 )
19534             {
19535
19536                 ## FIXME: don't strand a small term
19537                 pop @i_ragged_break_list;
19538                 push @i_ragged_break_list, $i - 2;
19539                 push @i_ragged_break_list, $i - 1;
19540             }
19541
19542             push @i_ragged_break_list, $i;
19543             $i_last_last_break = $i_last_break;
19544             $i_last_break      = $i;
19545         }
19546
19547         # don't break before a small last term -- it will
19548         # not look good on a line by itself.
19549         elsif ($i == $i_max
19550             && $i_last_break == $i - 1
19551             && $weighted_length <= $definitely_simple )
19552         {
19553             pop @i_ragged_break_list;
19554         }
19555     }
19556
19557     my $identifier_count = $i_max + 1 - $quote_count;
19558
19559     # Need more tuning here..
19560     if (   $max_width > 12
19561         && $complex_item_count > $item_count / 2
19562         && $number_of_fields_best != 2 )
19563     {
19564         $number_of_fields_best = 1;
19565     }
19566
19567     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
19568 }
19569
19570 sub get_maximum_fields_wanted {
19571
19572     # Not all tables look good with more than one field of items.
19573     # This routine looks at a table and decides if it should be
19574     # formatted with just one field or not.
19575     # This coding is still under development.
19576     my ($ritem_lengths) = @_;
19577
19578     my $number_of_fields_best = 0;
19579
19580     # For just a few items, we tentatively assume just 1 field.
19581     my $item_count = @{$ritem_lengths};
19582     if ( $item_count <= 5 ) {
19583         $number_of_fields_best = 1;
19584     }
19585
19586     # For larger tables, look at it both ways and see what looks best
19587     else {
19588
19589         my $is_odd            = 1;
19590         my @max_length        = ( 0, 0 );
19591         my @last_length_2     = ( undef, undef );
19592         my @first_length_2    = ( undef, undef );
19593         my $last_length       = undef;
19594         my $total_variation_1 = 0;
19595         my $total_variation_2 = 0;
19596         my @total_variation_2 = ( 0, 0 );
19597         foreach my $j ( 0 .. $item_count - 1 ) {
19598
19599             $is_odd = 1 - $is_odd;
19600             my $length = $ritem_lengths->[$j];
19601             if ( $length > $max_length[$is_odd] ) {
19602                 $max_length[$is_odd] = $length;
19603             }
19604
19605             if ( defined($last_length) ) {
19606                 my $dl = abs( $length - $last_length );
19607                 $total_variation_1 += $dl;
19608             }
19609             $last_length = $length;
19610
19611             my $ll = $last_length_2[$is_odd];
19612             if ( defined($ll) ) {
19613                 my $dl = abs( $length - $ll );
19614                 $total_variation_2[$is_odd] += $dl;
19615             }
19616             else {
19617                 $first_length_2[$is_odd] = $length;
19618             }
19619             $last_length_2[$is_odd] = $length;
19620         }
19621         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
19622
19623         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
19624         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
19625             $number_of_fields_best = 1;
19626         }
19627     }
19628     return ($number_of_fields_best);
19629 }
19630
19631 sub table_columns_available {
19632     my $i_first_comma = shift;
19633     my $columns =
19634       maximum_line_length($i_first_comma) -
19635       leading_spaces_to_go($i_first_comma);
19636
19637     # Patch: the vertical formatter does not line up lines whose lengths
19638     # exactly equal the available line length because of allowances
19639     # that must be made for side comments.  Therefore, the number of
19640     # available columns is reduced by 1 character.
19641     $columns -= 1;
19642     return $columns;
19643 }
19644
19645 sub maximum_number_of_fields {
19646
19647     # how many fields will fit in the available space?
19648     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
19649     my $max_pairs        = int( $columns / $pair_width );
19650     my $number_of_fields = $max_pairs * 2;
19651     if (   $odd_or_even == 1
19652         && $max_pairs * $pair_width + $max_width <= $columns )
19653     {
19654         $number_of_fields++;
19655     }
19656     return $number_of_fields;
19657 }
19658
19659 sub compactify_table {
19660
19661     # given a table with a certain number of fields and a certain number
19662     # of lines, see if reducing the number of fields will make it look
19663     # better.
19664     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
19665     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
19666         my $min_fields;
19667
19668         for (
19669             $min_fields = $number_of_fields ;
19670             $min_fields >= $odd_or_even
19671             && $min_fields * $formatted_lines >= $item_count ;
19672             $min_fields -= $odd_or_even
19673           )
19674         {
19675             $number_of_fields = $min_fields;
19676         }
19677     }
19678     return $number_of_fields;
19679 }
19680
19681 sub set_ragged_breakpoints {
19682
19683     # Set breakpoints in a list that cannot be formatted nicely as a
19684     # table.
19685     my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
19686
19687     my $break_count = 0;
19688     foreach ( @{$ri_ragged_break_list} ) {
19689         my $j = $ri_term_comma->[$_];
19690         if ($j) {
19691             set_forced_breakpoint($j);
19692             $break_count++;
19693         }
19694     }
19695     return $break_count;
19696 }
19697
19698 sub copy_old_breakpoints {
19699     my ( $i_first_comma, $i_last_comma ) = @_;
19700     for my $i ( $i_first_comma .. $i_last_comma ) {
19701         if ( $old_breakpoint_to_go[$i] ) {
19702             set_forced_breakpoint($i);
19703         }
19704     }
19705     return;
19706 }
19707
19708 sub set_nobreaks {
19709     my ( $i, $j ) = @_;
19710     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
19711
19712         FORMATTER_DEBUG_FLAG_NOBREAK && do {
19713             my ( $a, $b, $c ) = caller();
19714             print STDOUT
19715 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
19716         };
19717
19718         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
19719     }
19720
19721     # shouldn't happen; non-critical error
19722     else {
19723         FORMATTER_DEBUG_FLAG_NOBREAK && do {
19724             my ( $a, $b, $c ) = caller();
19725             print STDOUT
19726               "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
19727         };
19728     }
19729     return;
19730 }
19731
19732 sub set_fake_breakpoint {
19733
19734     # Just bump up the breakpoint count as a signal that there are breaks.
19735     # This is useful if we have breaks but may want to postpone deciding where
19736     # to make them.
19737     $forced_breakpoint_count++;
19738     return;
19739 }
19740
19741 sub set_forced_breakpoint {
19742     my $i = shift;
19743
19744     return unless defined $i && $i >= 0;
19745
19746     # no breaks between welded tokens
19747     return if ( weld_len_right_to_go($i) );
19748
19749     # when called with certain tokens, use bond strengths to decide
19750     # if we break before or after it
19751     my $token = $tokens_to_go[$i];
19752
19753     if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
19754         if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
19755     }
19756
19757     # breaks are forced before 'if' and 'unless'
19758     elsif ( $is_if_unless{$token} ) { $i-- }
19759
19760     if ( $i >= 0 && $i <= $max_index_to_go ) {
19761         my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
19762
19763         FORMATTER_DEBUG_FLAG_FORCE && do {
19764             my ( $a, $b, $c ) = caller();
19765             print STDOUT
19766 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
19767         };
19768
19769         if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
19770             $forced_breakpoint_to_go[$i_nonblank] = 1;
19771
19772             if ( $i_nonblank > $index_max_forced_break ) {
19773                 $index_max_forced_break = $i_nonblank;
19774             }
19775             $forced_breakpoint_count++;
19776             $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
19777               $i_nonblank;
19778
19779             # if we break at an opening container..break at the closing
19780             if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
19781                 set_closing_breakpoint($i_nonblank);
19782             }
19783         }
19784     }
19785     return;
19786 }
19787
19788 sub clear_breakpoint_undo_stack {
19789     $forced_breakpoint_undo_count = 0;
19790     return;
19791 }
19792
19793 sub undo_forced_breakpoint_stack {
19794
19795     my $i_start = shift;
19796     if ( $i_start < 0 ) {
19797         $i_start = 0;
19798         my ( $a, $b, $c ) = caller();
19799         warning(
19800 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
19801         );
19802     }
19803
19804     while ( $forced_breakpoint_undo_count > $i_start ) {
19805         my $i =
19806           $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
19807         if ( $i >= 0 && $i <= $max_index_to_go ) {
19808             $forced_breakpoint_to_go[$i] = 0;
19809             $forced_breakpoint_count--;
19810
19811             FORMATTER_DEBUG_FLAG_UNDOBP && do {
19812                 my ( $a, $b, $c ) = caller();
19813                 print STDOUT
19814 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
19815             };
19816         }
19817
19818         # shouldn't happen, but not a critical error
19819         else {
19820             FORMATTER_DEBUG_FLAG_UNDOBP && do {
19821                 my ( $a, $b, $c ) = caller();
19822                 print STDOUT
19823 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
19824             };
19825         }
19826     }
19827     return;
19828 }
19829
19830 {    # begin recombine_breakpoints
19831
19832     my %is_amp_amp;
19833     my %is_ternary;
19834     my %is_math_op;
19835     my %is_plus_minus;
19836     my %is_mult_div;
19837
19838     BEGIN {
19839
19840         my @q;
19841         @q = qw( && || );
19842         @is_amp_amp{@q} = (1) x scalar(@q);
19843
19844         @q = qw( ? : );
19845         @is_ternary{@q} = (1) x scalar(@q);
19846
19847         @q = qw( + - * / );
19848         @is_math_op{@q} = (1) x scalar(@q);
19849
19850         @q = qw( + - );
19851         @is_plus_minus{@q} = (1) x scalar(@q);
19852
19853         @q = qw( * / );
19854         @is_mult_div{@q} = (1) x scalar(@q);
19855     }
19856
19857     sub DUMP_BREAKPOINTS {
19858
19859         # Debug routine to dump current breakpoints...not normally called
19860         # We are given indexes to the current lines:
19861         # $ri_beg = ref to array of BEGinning indexes of each line
19862         # $ri_end = ref to array of ENDing indexes of each line
19863         my ( $ri_beg, $ri_end, $msg ) = @_;
19864         print STDERR "----Dumping breakpoints from: $msg----\n";
19865         for my $n ( 0 .. @{$ri_end} - 1 ) {
19866             my $ibeg = $ri_beg->[$n];
19867             my $iend = $ri_end->[$n];
19868             my $text = "";
19869             foreach my $i ( $ibeg .. $iend ) {
19870                 $text .= $tokens_to_go[$i];
19871             }
19872             print STDERR "$n ($ibeg:$iend) $text\n";
19873         }
19874         print STDERR "----\n";
19875         return;
19876     }
19877
19878     sub unmask_phantom_semicolons {
19879
19880         my ( $self, $ri_beg, $ri_end ) = @_;
19881
19882         # Walk down the lines of this batch and unmask any invisible line-ending
19883         # semicolons.  They were placed by sub respace_tokens but we only now
19884         # know if we actually need them.
19885
19886         my $nmax = @{$ri_end} - 1;
19887         foreach my $n ( 0 .. $nmax ) {
19888
19889             my $i = $ri_end->[$n];
19890             if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
19891
19892                 $tokens_to_go[$i] = $rtoken_vars_to_go[$i]->[_TOKEN_] =
19893                   $want_left_space{';'} == WS_NO ? ';' : ' ;';
19894                 my $line_number = $rtoken_vars_to_go[$i]->[_LINE_INDEX_] + 1;
19895                 note_added_semicolon($line_number);
19896             }
19897         }
19898         return;
19899     }
19900
19901     sub recombine_breakpoints {
19902
19903         # sub set_continuation_breaks is very liberal in setting line breaks
19904         # for long lines, always setting breaks at good breakpoints, even
19905         # when that creates small lines.  Sometimes small line fragments
19906         # are produced which would look better if they were combined.
19907         # That's the task of this routine.
19908         #
19909         # We are given indexes to the current lines:
19910         # $ri_beg = ref to array of BEGinning indexes of each line
19911         # $ri_end = ref to array of ENDing indexes of each line
19912         my ( $ri_beg, $ri_end ) = @_;
19913
19914         # Make a list of all good joining tokens between the lines
19915         # n-1 and n.
19916         my @joint;
19917         my $nmax = @{$ri_end} - 1;
19918         for my $n ( 1 .. $nmax ) {
19919             my $ibeg_1 = $ri_beg->[ $n - 1 ];
19920             my $iend_1 = $ri_end->[ $n - 1 ];
19921             my $iend_2 = $ri_end->[$n];
19922             my $ibeg_2 = $ri_beg->[$n];
19923
19924             my ( $itok, $itokp, $itokm );
19925
19926             foreach my $itest ( $iend_1, $ibeg_2 ) {
19927                 my $type = $types_to_go[$itest];
19928                 if (   $is_math_op{$type}
19929                     || $is_amp_amp{$type}
19930                     || $is_assignment{$type}
19931                     || $type eq ':' )
19932                 {
19933                     $itok = $itest;
19934                 }
19935             }
19936             $joint[$n] = [$itok];
19937         }
19938
19939         my $more_to_do = 1;
19940
19941         # We keep looping over all of the lines of this batch
19942         # until there are no more possible recombinations
19943         my $nmax_last = @{$ri_end};
19944         my $reverse   = 0;
19945         while ($more_to_do) {
19946             my $n_best = 0;
19947             my $bs_best;
19948             my $nmax = @{$ri_end} - 1;
19949
19950             # Safety check for infinite loop
19951             unless ( $nmax < $nmax_last ) {
19952
19953                 # Shouldn't happen because splice below decreases nmax on each
19954                 # pass.
19955                 Fault("Program bug-infinite loop in recombine breakpoints\n");
19956             }
19957             $nmax_last  = $nmax;
19958             $more_to_do = 0;
19959             my $skip_Section_3;
19960             my $leading_amp_count = 0;
19961             my $this_line_is_semicolon_terminated;
19962
19963             # loop over all remaining lines in this batch
19964             for my $iter ( 1 .. $nmax ) {
19965
19966                 # alternating sweep direction gives symmetric results
19967                 # for recombining lines which exceed the line length
19968                 # such as eval {{{{.... }}}}
19969                 my $n;
19970                 if   ($reverse) { $n = 1 + $nmax - $iter; }
19971                 else            { $n = $iter }
19972
19973                 #----------------------------------------------------------
19974                 # If we join the current pair of lines,
19975                 # line $n-1 will become the left part of the joined line
19976                 # line $n will become the right part of the joined line
19977                 #
19978                 # Here are Indexes of the endpoint tokens of the two lines:
19979                 #
19980                 #  -----line $n-1--- | -----line $n-----
19981                 #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
19982                 #                    ^
19983                 #                    |
19984                 # We want to decide if we should remove the line break
19985                 # between the tokens at $iend_1 and $ibeg_2
19986                 #
19987                 # We will apply a number of ad-hoc tests to see if joining
19988                 # here will look ok.  The code will just issue a 'next'
19989                 # command if the join doesn't look good.  If we get through
19990                 # the gauntlet of tests, the lines will be recombined.
19991                 #----------------------------------------------------------
19992                 #
19993                 # beginning and ending tokens of the lines we are working on
19994                 my $ibeg_1    = $ri_beg->[ $n - 1 ];
19995                 my $iend_1    = $ri_end->[ $n - 1 ];
19996                 my $iend_2    = $ri_end->[$n];
19997                 my $ibeg_2    = $ri_beg->[$n];
19998                 my $ibeg_nmax = $ri_beg->[$nmax];
19999
20000                 # combined line cannot be too long
20001                 my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
20002                 next if ( $excess > 0 );
20003
20004                 my $type_iend_1 = $types_to_go[$iend_1];
20005                 my $type_iend_2 = $types_to_go[$iend_2];
20006                 my $type_ibeg_1 = $types_to_go[$ibeg_1];
20007                 my $type_ibeg_2 = $types_to_go[$ibeg_2];
20008
20009                 # terminal token of line 2 if any side comment is ignored:
20010                 my $iend_2t      = $iend_2;
20011                 my $type_iend_2t = $type_iend_2;
20012
20013                 # some beginning indexes of other lines, which may not exist
20014                 my $ibeg_0 = $n > 1          ? $ri_beg->[ $n - 2 ] : -1;
20015                 my $ibeg_3 = $n < $nmax      ? $ri_beg->[ $n + 1 ] : -1;
20016                 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
20017
20018                 my $bs_tweak = 0;
20019
20020                 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
20021                 #        $nesting_depth_to_go[$ibeg_1] );
20022
20023                 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
20024                     print STDERR
20025 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
20026                 };
20027
20028                 # If line $n is the last line, we set some flags and
20029                 # do any special checks for it
20030                 if ( $n == $nmax ) {
20031
20032                     # a terminal '{' should stay where it is
20033                     next if $type_ibeg_2 eq '{';
20034
20035                     if (   $type_iend_2 eq '#'
20036                         && $iend_2 - $ibeg_2 >= 2
20037                         && $types_to_go[ $iend_2 - 1 ] eq 'b' )
20038                     {
20039                         $iend_2t      = $iend_2 - 2;
20040                         $type_iend_2t = $types_to_go[$iend_2t];
20041                     }
20042
20043                     $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
20044                 }
20045
20046                 #----------------------------------------------------------
20047                 # Recombine Section 0:
20048                 # Examine the special token joining this line pair, if any.
20049                 # Put as many tests in this section to avoid duplicate code and
20050                 # to make formatting independent of whether breaks are to the
20051                 # left or right of an operator.
20052                 #----------------------------------------------------------
20053
20054                 my ($itok) = @{ $joint[$n] };
20055                 if ($itok) {
20056
20057                     # FIXME: Patch - may not be necessary
20058                     my $iend_1 =
20059                         $type_iend_1 eq 'b'
20060                       ? $iend_1 - 1
20061                       : $iend_1;
20062
20063                     my $iend_2 =
20064                         $type_iend_2 eq 'b'
20065                       ? $iend_2 - 1
20066                       : $iend_2;
20067                     ## END PATCH
20068
20069                     my $type = $types_to_go[$itok];
20070
20071                     if ( $type eq ':' ) {
20072
20073                    # do not join at a colon unless it disobeys the break request
20074                         if ( $itok eq $iend_1 ) {
20075                             next unless $want_break_before{$type};
20076                         }
20077                         else {
20078                             $leading_amp_count++;
20079                             next if $want_break_before{$type};
20080                         }
20081                     } ## end if ':'
20082
20083                     # handle math operators + - * /
20084                     elsif ( $is_math_op{$type} ) {
20085
20086                         # Combine these lines if this line is a single
20087                         # number, or if it is a short term with same
20088                         # operator as the previous line.  For example, in
20089                         # the following code we will combine all of the
20090                         # short terms $A, $B, $C, $D, $E, $F, together
20091                         # instead of leaving them one per line:
20092                         #  my $time =
20093                         #    $A * $B * $C * $D * $E * $F *
20094                         #    ( 2. * $eps * $sigma * $area ) *
20095                         #    ( 1. / $tcold**3 - 1. / $thot**3 );
20096
20097                         # This can be important in math-intensive code.
20098
20099                         my $good_combo;
20100
20101                         my $itokp  = min( $inext_to_go[$itok],  $iend_2 );
20102                         my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
20103                         my $itokm  = max( $iprev_to_go[$itok],  $ibeg_1 );
20104                         my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
20105
20106                         # check for a number on the right
20107                         if ( $types_to_go[$itokp] eq 'n' ) {
20108
20109                             # ok if nothing else on right
20110                             if ( $itokp == $iend_2 ) {
20111                                 $good_combo = 1;
20112                             }
20113                             else {
20114
20115                                 # look one more token to right..
20116                                 # okay if math operator or some termination
20117                                 $good_combo =
20118                                   ( ( $itokpp == $iend_2 )
20119                                       && $is_math_op{ $types_to_go[$itokpp] } )
20120                                   || $types_to_go[$itokpp] =~ /^[#,;]$/;
20121                             }
20122                         }
20123
20124                         # check for a number on the left
20125                         if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
20126
20127                             # okay if nothing else to left
20128                             if ( $itokm == $ibeg_1 ) {
20129                                 $good_combo = 1;
20130                             }
20131
20132                             # otherwise look one more token to left
20133                             else {
20134
20135                                 # okay if math operator, comma, or assignment
20136                                 $good_combo = ( $itokmm == $ibeg_1 )
20137                                   && ( $is_math_op{ $types_to_go[$itokmm] }
20138                                     || $types_to_go[$itokmm] =~ /^[,]$/
20139                                     || $is_assignment{ $types_to_go[$itokmm] }
20140                                   );
20141                             }
20142                         }
20143
20144                         # look for a single short token either side of the
20145                         # operator
20146                         if ( !$good_combo ) {
20147
20148                             # Slight adjustment factor to make results
20149                             # independent of break before or after operator in
20150                             # long summed lists.  (An operator and a space make
20151                             # two spaces).
20152                             my $two = ( $itok eq $iend_1 ) ? 2 : 0;
20153
20154                             $good_combo =
20155
20156                               # numbers or id's on both sides of this joint
20157                               $types_to_go[$itokp] =~ /^[in]$/
20158                               && $types_to_go[$itokm] =~ /^[in]$/
20159
20160                               # one of the two lines must be short:
20161                               && (
20162                                 (
20163                                     # no more than 2 nonblank tokens right of
20164                                     # joint
20165                                     $itokpp == $iend_2
20166
20167                                     # short
20168                                     && token_sequence_length( $itokp, $iend_2 )
20169                                     < $two +
20170                                     $rOpts_short_concatenation_item_length
20171                                 )
20172                                 || (
20173                                     # no more than 2 nonblank tokens left of
20174                                     # joint
20175                                     $itokmm == $ibeg_1
20176
20177                                     # short
20178                                     && token_sequence_length( $ibeg_1, $itokm )
20179                                     < 2 - $two +
20180                                     $rOpts_short_concatenation_item_length
20181                                 )
20182
20183                               )
20184
20185                               # keep pure terms; don't mix +- with */
20186                               && !(
20187                                 $is_plus_minus{$type}
20188                                 && (   $is_mult_div{ $types_to_go[$itokmm] }
20189                                     || $is_mult_div{ $types_to_go[$itokpp] } )
20190                               )
20191                               && !(
20192                                 $is_mult_div{$type}
20193                                 && (   $is_plus_minus{ $types_to_go[$itokmm] }
20194                                     || $is_plus_minus{ $types_to_go[$itokpp] } )
20195                               )
20196
20197                               ;
20198                         }
20199
20200                         # it is also good to combine if we can reduce to 2 lines
20201                         if ( !$good_combo ) {
20202
20203                             # index on other line where same token would be in a
20204                             # long chain.
20205                             my $iother =
20206                               ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
20207
20208                             $good_combo =
20209                                  $n == 2
20210                               && $n == $nmax
20211                               && $types_to_go[$iother] ne $type;
20212                         }
20213
20214                         next unless ($good_combo);
20215
20216                     } ## end math
20217
20218                     elsif ( $is_amp_amp{$type} ) {
20219                         ##TBD
20220                     } ## end &&, ||
20221
20222                     elsif ( $is_assignment{$type} ) {
20223                         ##TBD
20224                     } ## end assignment
20225                 }
20226
20227                 #----------------------------------------------------------
20228                 # Recombine Section 1:
20229                 # Join welded nested containers immediately
20230                 # use alternating sweep direction until all are welds
20231                 # are done.  This produces more symmetric opening and
20232                 # closing joins when lines exceed line length.
20233                 #----------------------------------------------------------
20234                 if (   weld_len_right_to_go($iend_1)
20235                     || weld_len_left_to_go($ibeg_2) )
20236                 {
20237                     $n_best  = $n;
20238                     $reverse = 1 - $reverse;
20239                     last;
20240                 }
20241                 $reverse = 0;
20242
20243                 #----------------------------------------------------------
20244                 # Recombine Section 2:
20245                 # Examine token at $iend_1 (right end of first line of pair)
20246                 #----------------------------------------------------------
20247
20248                 # an isolated '}' may join with a ';' terminated segment
20249                 if ( $type_iend_1 eq '}' ) {
20250
20251                     # Check for cases where combining a semicolon terminated
20252                     # statement with a previous isolated closing paren will
20253                     # allow the combined line to be outdented.  This is
20254                     # generally a good move.  For example, we can join up
20255                     # the last two lines here:
20256                     #  (
20257                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
20258                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
20259                     #    )
20260                     #    = stat($file);
20261                     #
20262                     # to get:
20263                     #  (
20264                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
20265                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
20266                     #  ) = stat($file);
20267                     #
20268                     # which makes the parens line up.
20269                     #
20270                     # Another example, from Joe Matarazzo, probably looks best
20271                     # with the 'or' clause appended to the trailing paren:
20272                     #  $self->some_method(
20273                     #      PARAM1 => 'foo',
20274                     #      PARAM2 => 'bar'
20275                     #  ) or die "Some_method didn't work";
20276                     #
20277                     # But we do not want to do this for something like the -lp
20278                     # option where the paren is not outdentable because the
20279                     # trailing clause will be far to the right.
20280                     #
20281                     # The logic here is synchronized with the logic in sub
20282                     # sub set_adjusted_indentation, which actually does
20283                     # the outdenting.
20284                     #
20285                     $skip_Section_3 ||= $this_line_is_semicolon_terminated
20286
20287                       # only one token on last line
20288                       && $ibeg_1 == $iend_1
20289
20290                       # must be structural paren
20291                       && $tokens_to_go[$iend_1] eq ')'
20292
20293                       # style must allow outdenting,
20294                       && !$closing_token_indentation{')'}
20295
20296                       # only leading '&&', '||', and ':' if no others seen
20297                       # (but note: our count made below could be wrong
20298                       # due to intervening comments)
20299                       && ( $leading_amp_count == 0
20300                         || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
20301
20302                       # but leading colons probably line up with a
20303                       # previous colon or question (count could be wrong).
20304                       && $type_ibeg_2 ne ':'
20305
20306                       # only one step in depth allowed.  this line must not
20307                       # begin with a ')' itself.
20308                       && ( $nesting_depth_to_go[$iend_1] ==
20309                         $nesting_depth_to_go[$iend_2] + 1 );
20310
20311                     # YVES patch 2 of 2:
20312                     # Allow cuddled eval chains, like this:
20313                     #   eval {
20314                     #       #STUFF;
20315                     #       1; # return true
20316                     #   } or do {
20317                     #       #handle error
20318                     #   };
20319                     # This patch works together with a patch in
20320                     # setting adjusted indentation (where the closing eval
20321                     # brace is outdented if possible).
20322                     # The problem is that an 'eval' block has continuation
20323                     # indentation and it looks better to undo it in some
20324                     # cases.  If we do not use this patch we would get:
20325                     #   eval {
20326                     #       #STUFF;
20327                     #       1; # return true
20328                     #       }
20329                     #       or do {
20330                     #       #handle error
20331                     #     };
20332                     # The alternative, for uncuddled style, is to create
20333                     # a patch in set_adjusted_indentation which undoes
20334                     # the indentation of a leading line like 'or do {'.
20335                     # This doesn't work well with -icb through
20336                     if (
20337                            $block_type_to_go[$iend_1] eq 'eval'
20338                         && !$rOpts->{'line-up-parentheses'}
20339                         && !$rOpts->{'indent-closing-brace'}
20340                         && $tokens_to_go[$iend_2] eq '{'
20341                         && (
20342                             ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
20343                             || (   $type_ibeg_2 eq 'k'
20344                                 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
20345                             || $is_if_unless{ $tokens_to_go[$ibeg_2] }
20346                         )
20347                       )
20348                     {
20349                         $skip_Section_3 ||= 1;
20350                     }
20351
20352                     next
20353                       unless (
20354                         $skip_Section_3
20355
20356                         # handle '.' and '?' specially below
20357                         || ( $type_ibeg_2 =~ /^[\.\?]$/ )
20358                       );
20359                 }
20360
20361                 elsif ( $type_iend_1 eq '{' ) {
20362
20363                     # YVES
20364                     # honor breaks at opening brace
20365                     # Added to prevent recombining something like this:
20366                     #  } || eval { package main;
20367                     next if $forced_breakpoint_to_go[$iend_1];
20368                 }
20369
20370                 # do not recombine lines with ending &&, ||,
20371                 elsif ( $is_amp_amp{$type_iend_1} ) {
20372                     next unless $want_break_before{$type_iend_1};
20373                 }
20374
20375                 # Identify and recombine a broken ?/: chain
20376                 elsif ( $type_iend_1 eq '?' ) {
20377
20378                     # Do not recombine different levels
20379                     next
20380                       if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
20381
20382                     # do not recombine unless next line ends in :
20383                     next unless $type_iend_2 eq ':';
20384                 }
20385
20386                 # for lines ending in a comma...
20387                 elsif ( $type_iend_1 eq ',' ) {
20388
20389                     # Do not recombine at comma which is following the
20390                     # input bias.
20391                     # TODO: might be best to make a special flag
20392                     next if ( $old_breakpoint_to_go[$iend_1] );
20393
20394                  # an isolated '},' may join with an identifier + ';'
20395                  # this is useful for the class of a 'bless' statement (bless.t)
20396                     if (   $type_ibeg_1 eq '}'
20397                         && $type_ibeg_2 eq 'i' )
20398                     {
20399                         next
20400                           unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
20401                             && ( $iend_2 == ( $ibeg_2 + 1 ) )
20402                             && $this_line_is_semicolon_terminated );
20403
20404                         # override breakpoint
20405                         $forced_breakpoint_to_go[$iend_1] = 0;
20406                     }
20407
20408                     # but otherwise ..
20409                     else {
20410
20411                         # do not recombine after a comma unless this will leave
20412                         # just 1 more line
20413                         next unless ( $n + 1 >= $nmax );
20414
20415                     # do not recombine if there is a change in indentation depth
20416                         next
20417                           if (
20418                             $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
20419
20420                         # do not recombine a "complex expression" after a
20421                         # comma.  "complex" means no parens.
20422                         my $saw_paren;
20423                         foreach my $ii ( $ibeg_2 .. $iend_2 ) {
20424                             if ( $tokens_to_go[$ii] eq '(' ) {
20425                                 $saw_paren = 1;
20426                                 last;
20427                             }
20428                         }
20429                         next if $saw_paren;
20430                     }
20431                 }
20432
20433                 # opening paren..
20434                 elsif ( $type_iend_1 eq '(' ) {
20435
20436                     # No longer doing this
20437                 }
20438
20439                 elsif ( $type_iend_1 eq ')' ) {
20440
20441                     # No longer doing this
20442                 }
20443
20444                 # keep a terminal for-semicolon
20445                 elsif ( $type_iend_1 eq 'f' ) {
20446                     next;
20447                 }
20448
20449                 # if '=' at end of line ...
20450                 elsif ( $is_assignment{$type_iend_1} ) {
20451
20452                     # keep break after = if it was in input stream
20453                     # this helps prevent 'blinkers'
20454                     next if $old_breakpoint_to_go[$iend_1]
20455
20456                       # don't strand an isolated '='
20457                       && $iend_1 != $ibeg_1;
20458
20459                     my $is_short_quote =
20460                       (      $type_ibeg_2 eq 'Q'
20461                           && $ibeg_2 == $iend_2
20462                           && token_sequence_length( $ibeg_2, $ibeg_2 ) <
20463                           $rOpts_short_concatenation_item_length );
20464                     my $is_ternary =
20465                       ( $type_ibeg_1 eq '?'
20466                           && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
20467
20468                     # always join an isolated '=', a short quote, or if this
20469                     # will put ?/: at start of adjacent lines
20470                     if (   $ibeg_1 != $iend_1
20471                         && !$is_short_quote
20472                         && !$is_ternary )
20473                     {
20474                         next
20475                           unless (
20476                             (
20477
20478                                 # unless we can reduce this to two lines
20479                                 $nmax < $n + 2
20480
20481                              # or three lines, the last with a leading semicolon
20482                                 || (   $nmax == $n + 2
20483                                     && $types_to_go[$ibeg_nmax] eq ';' )
20484
20485                                 # or the next line ends with a here doc
20486                                 || $type_iend_2 eq 'h'
20487
20488                                # or the next line ends in an open paren or brace
20489                                # and the break hasn't been forced [dima.t]
20490                                 || (  !$forced_breakpoint_to_go[$iend_1]
20491                                     && $type_iend_2 eq '{' )
20492                             )
20493
20494                             # do not recombine if the two lines might align well
20495                             # this is a very approximate test for this
20496                             && (   $ibeg_3 >= 0
20497                                 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
20498                           );
20499
20500                         if (
20501
20502                             # Recombine if we can make two lines
20503                             $nmax >= $n + 2
20504
20505                             # -lp users often prefer this:
20506                             #  my $title = function($env, $env, $sysarea,
20507                             #                       "bubba Borrower Entry");
20508                             #  so we will recombine if -lp is used we have
20509                             #  ending comma
20510                             && (  !$rOpts_line_up_parentheses
20511                                 || $type_iend_2 ne ',' )
20512                           )
20513                         {
20514
20515                            # otherwise, scan the rhs line up to last token for
20516                            # complexity.  Note that we are not counting the last
20517                            # token in case it is an opening paren.
20518                             my $tv    = 0;
20519                             my $depth = $nesting_depth_to_go[$ibeg_2];
20520                             foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
20521                                 if ( $nesting_depth_to_go[$i] != $depth ) {
20522                                     $tv++;
20523                                     last if ( $tv > 1 );
20524                                 }
20525                                 $depth = $nesting_depth_to_go[$i];
20526                             }
20527
20528                          # ok to recombine if no level changes before last token
20529                             if ( $tv > 0 ) {
20530
20531                                 # otherwise, do not recombine if more than two
20532                                 # level changes.
20533                                 next if ( $tv > 1 );
20534
20535                               # check total complexity of the two adjacent lines
20536                               # that will occur if we do this join
20537                                 my $istop =
20538                                   ( $n < $nmax )
20539                                   ? $ri_end->[ $n + 1 ]
20540                                   : $iend_2;
20541                                 foreach my $i ( $iend_2 .. $istop ) {
20542                                     if ( $nesting_depth_to_go[$i] != $depth ) {
20543                                         $tv++;
20544                                         last if ( $tv > 2 );
20545                                     }
20546                                     $depth = $nesting_depth_to_go[$i];
20547                                 }
20548
20549                         # do not recombine if total is more than 2 level changes
20550                                 next if ( $tv > 2 );
20551                             }
20552                         }
20553                     }
20554
20555                     unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
20556                         $forced_breakpoint_to_go[$iend_1] = 0;
20557                     }
20558                 }
20559
20560                 # for keywords..
20561                 elsif ( $type_iend_1 eq 'k' ) {
20562
20563                     # make major control keywords stand out
20564                     # (recombine.t)
20565                     next
20566                       if (
20567
20568                         #/^(last|next|redo|return)$/
20569                         $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
20570
20571                         # but only if followed by multiple lines
20572                         && $n < $nmax
20573                       );
20574
20575                     if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
20576                         next
20577                           unless $want_break_before{ $tokens_to_go[$iend_1] };
20578                     }
20579                 }
20580
20581                 #----------------------------------------------------------
20582                 # Recombine Section 3:
20583                 # Examine token at $ibeg_2 (left end of second line of pair)
20584                 #----------------------------------------------------------
20585
20586                 # join lines identified above as capable of
20587                 # causing an outdented line with leading closing paren
20588                 # Note that we are skipping the rest of this section
20589                 # and the rest of the loop to do the join
20590                 if ($skip_Section_3) {
20591                     $forced_breakpoint_to_go[$iend_1] = 0;
20592                     $n_best = $n;
20593                     last;
20594                 }
20595
20596                 # handle lines with leading &&, ||
20597                 elsif ( $is_amp_amp{$type_ibeg_2} ) {
20598
20599                     $leading_amp_count++;
20600
20601                     # ok to recombine if it follows a ? or :
20602                     # and is followed by an open paren..
20603                     my $ok =
20604                       (      $is_ternary{$type_ibeg_1}
20605                           && $tokens_to_go[$iend_2] eq '(' )
20606
20607                     # or is followed by a ? or : at same depth
20608                     #
20609                     # We are looking for something like this. We can
20610                     # recombine the && line with the line above to make the
20611                     # structure more clear:
20612                     #  return
20613                     #    exists $G->{Attr}->{V}
20614                     #    && exists $G->{Attr}->{V}->{$u}
20615                     #    ? %{ $G->{Attr}->{V}->{$u} }
20616                     #    : ();
20617                     #
20618                     # We should probably leave something like this alone:
20619                     #  return
20620                     #       exists $G->{Attr}->{E}
20621                     #    && exists $G->{Attr}->{E}->{$u}
20622                     #    && exists $G->{Attr}->{E}->{$u}->{$v}
20623                     #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
20624                     #    : ();
20625                     # so that we either have all of the &&'s (or ||'s)
20626                     # on one line, as in the first example, or break at
20627                     # each one as in the second example.  However, it
20628                     # sometimes makes things worse to check for this because
20629                     # it prevents multiple recombinations.  So this is not done.
20630                       || ( $ibeg_3 >= 0
20631                         && $is_ternary{ $types_to_go[$ibeg_3] }
20632                         && $nesting_depth_to_go[$ibeg_3] ==
20633                         $nesting_depth_to_go[$ibeg_2] );
20634
20635                     next if !$ok && $want_break_before{$type_ibeg_2};
20636                     $forced_breakpoint_to_go[$iend_1] = 0;
20637
20638                     # tweak the bond strength to give this joint priority
20639                     # over ? and :
20640                     $bs_tweak = 0.25;
20641                 }
20642
20643                 # Identify and recombine a broken ?/: chain
20644                 elsif ( $type_ibeg_2 eq '?' ) {
20645
20646                     # Do not recombine different levels
20647                     my $lev = $levels_to_go[$ibeg_2];
20648                     next if ( $lev ne $levels_to_go[$ibeg_1] );
20649
20650                     # Do not recombine a '?' if either next line or
20651                     # previous line does not start with a ':'.  The reasons
20652                     # are that (1) no alignment of the ? will be possible
20653                     # and (2) the expression is somewhat complex, so the
20654                     # '?' is harder to see in the interior of the line.
20655                     my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
20656                     my $precedes_colon =
20657                       $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
20658                     next unless ( $follows_colon || $precedes_colon );
20659
20660                     # we will always combining a ? line following a : line
20661                     if ( !$follows_colon ) {
20662
20663                         # ...otherwise recombine only if it looks like a chain.
20664                         # we will just look at a few nearby lines to see if
20665                         # this looks like a chain.
20666                         my $local_count = 0;
20667                         foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
20668                             $local_count++
20669                               if $ii >= 0
20670                               && $types_to_go[$ii] eq ':'
20671                               && $levels_to_go[$ii] == $lev;
20672                         }
20673                         next unless ( $local_count > 1 );
20674                     }
20675                     $forced_breakpoint_to_go[$iend_1] = 0;
20676                 }
20677
20678                 # do not recombine lines with leading '.'
20679                 elsif ( $type_ibeg_2 eq '.' ) {
20680                     my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
20681                     next
20682                       unless (
20683
20684                    # ... unless there is just one and we can reduce
20685                    # this to two lines if we do.  For example, this
20686                    #
20687                    #
20688                    #  $bodyA .=
20689                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
20690                    #
20691                    #  looks better than this:
20692                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
20693                    #    . '$args .= $pat;'
20694
20695                         (
20696                                $n == 2
20697                             && $n == $nmax
20698                             && $type_ibeg_1 ne $type_ibeg_2
20699                         )
20700
20701                         #  ... or this would strand a short quote , like this
20702                         #                . "some long quote"
20703                         #                . "\n";
20704
20705                         || (   $types_to_go[$i_next_nonblank] eq 'Q'
20706                             && $i_next_nonblank >= $iend_2 - 1
20707                             && $token_lengths_to_go[$i_next_nonblank] <
20708                             $rOpts_short_concatenation_item_length )
20709                       );
20710                 }
20711
20712                 # handle leading keyword..
20713                 elsif ( $type_ibeg_2 eq 'k' ) {
20714
20715                     # handle leading "or"
20716                     if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
20717                         next
20718                           unless (
20719                             $this_line_is_semicolon_terminated
20720                             && (
20721
20722                                 # following 'if' or 'unless' or 'or'
20723                                 $type_ibeg_1 eq 'k'
20724                                 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
20725
20726                                 # important: only combine a very simple or
20727                                 # statement because the step below may have
20728                                 # combined a trailing 'and' with this or,
20729                                 # and we do not want to then combine
20730                                 # everything together
20731                                 && ( $iend_2 - $ibeg_2 <= 7 )
20732                             )
20733                           );
20734
20735                         #X: RT #81854
20736                         $forced_breakpoint_to_go[$iend_1] = 0
20737                           unless $old_breakpoint_to_go[$iend_1];
20738                     }
20739
20740                     # handle leading 'and'
20741                     elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
20742
20743                         # Decide if we will combine a single terminal 'and'
20744                         # after an 'if' or 'unless'.
20745
20746                         #     This looks best with the 'and' on the same
20747                         #     line as the 'if':
20748                         #
20749                         #         $a = 1
20750                         #           if $seconds and $nu < 2;
20751                         #
20752                         #     But this looks better as shown:
20753                         #
20754                         #         $a = 1
20755                         #           if !$this->{Parents}{$_}
20756                         #           or $this->{Parents}{$_} eq $_;
20757                         #
20758                         next
20759                           unless (
20760                             $this_line_is_semicolon_terminated
20761                             && (
20762
20763                                 # following 'if' or 'unless' or 'or'
20764                                 $type_ibeg_1 eq 'k'
20765                                 && (   $is_if_unless{ $tokens_to_go[$ibeg_1] }
20766                                     || $tokens_to_go[$ibeg_1] eq 'or' )
20767                             )
20768                           );
20769                     }
20770
20771                     # handle leading "if" and "unless"
20772                     elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
20773
20774                       # FIXME: This is still experimental..may not be too useful
20775                         next
20776                           unless (
20777                             $this_line_is_semicolon_terminated
20778
20779                             #  previous line begins with 'and' or 'or'
20780                             && $type_ibeg_1 eq 'k'
20781                             && $is_and_or{ $tokens_to_go[$ibeg_1] }
20782
20783                           );
20784                     }
20785
20786                     # handle all other leading keywords
20787                     else {
20788
20789                         # keywords look best at start of lines,
20790                         # but combine things like "1 while"
20791                         unless ( $is_assignment{$type_iend_1} ) {
20792                             next
20793                               if ( ( $type_iend_1 ne 'k' )
20794                                 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
20795                         }
20796                     }
20797                 }
20798
20799                 # similar treatment of && and || as above for 'and' and 'or':
20800                 # NOTE: This block of code is currently bypassed because
20801                 # of a previous block but is retained for possible future use.
20802                 elsif ( $is_amp_amp{$type_ibeg_2} ) {
20803
20804                     # maybe looking at something like:
20805                     # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
20806
20807                     next
20808                       unless (
20809                         $this_line_is_semicolon_terminated
20810
20811                         # previous line begins with an 'if' or 'unless' keyword
20812                         && $type_ibeg_1 eq 'k'
20813                         && $is_if_unless{ $tokens_to_go[$ibeg_1] }
20814
20815                       );
20816                 }
20817
20818                 # handle line with leading = or similar
20819                 elsif ( $is_assignment{$type_ibeg_2} ) {
20820                     next unless ( $n == 1 || $n == $nmax );
20821                     next if $old_breakpoint_to_go[$iend_1];
20822                     next
20823                       unless (
20824
20825                         # unless we can reduce this to two lines
20826                         $nmax == 2
20827
20828                         # or three lines, the last with a leading semicolon
20829                         || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
20830
20831                         # or the next line ends with a here doc
20832                         || $type_iend_2 eq 'h'
20833
20834                         # or this is a short line ending in ;
20835                         || ( $n == $nmax && $this_line_is_semicolon_terminated )
20836                       );
20837                     $forced_breakpoint_to_go[$iend_1] = 0;
20838                 }
20839
20840                 #----------------------------------------------------------
20841                 # Recombine Section 4:
20842                 # Combine the lines if we arrive here and it is possible
20843                 #----------------------------------------------------------
20844
20845                 # honor hard breakpoints
20846                 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
20847
20848                 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
20849
20850                 # Require a few extra spaces before recombining lines if we are
20851                 # at an old breakpoint unless this is a simple list or terminal
20852                 # line.  The goal is to avoid oscillating between two
20853                 # quasi-stable end states.  For example this snippet caused
20854                 # problems:
20855 ##    my $this =
20856 ##    bless {
20857 ##        TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
20858 ##      },
20859 ##      $type;
20860                 next
20861                   if ( $old_breakpoint_to_go[$iend_1]
20862                     && !$this_line_is_semicolon_terminated
20863                     && $n < $nmax
20864                     && $excess + 4 > 0
20865                     && $type_iend_2 ne ',' );
20866
20867                 # do not recombine if we would skip in indentation levels
20868                 if ( $n < $nmax ) {
20869                     my $if_next = $ri_beg->[ $n + 1 ];
20870                     next
20871                       if (
20872                            $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
20873                         && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
20874
20875                         # but an isolated 'if (' is undesirable
20876                         && !(
20877                                $n == 1
20878                             && $iend_1 - $ibeg_1 <= 2
20879                             && $type_ibeg_1 eq 'k'
20880                             && $tokens_to_go[$ibeg_1] eq 'if'
20881                             && $tokens_to_go[$iend_1] ne '('
20882                         )
20883                       );
20884                 }
20885
20886                 # honor no-break's
20887                 next if ( $bs >= NO_BREAK - 1 );
20888
20889                 # remember the pair with the greatest bond strength
20890                 if ( !$n_best ) {
20891                     $n_best  = $n;
20892                     $bs_best = $bs;
20893                 }
20894                 else {
20895
20896                     if ( $bs > $bs_best ) {
20897                         $n_best  = $n;
20898                         $bs_best = $bs;
20899                     }
20900                 }
20901             }
20902
20903             # recombine the pair with the greatest bond strength
20904             if ($n_best) {
20905                 splice @{$ri_beg}, $n_best, 1;
20906                 splice @{$ri_end}, $n_best - 1, 1;
20907                 splice @joint, $n_best, 1;
20908
20909                 # keep going if we are still making progress
20910                 $more_to_do++;
20911             }
20912         }
20913         return ( $ri_beg, $ri_end );
20914     }
20915 }    # end recombine_breakpoints
20916
20917 sub break_all_chain_tokens {
20918
20919     # scan the current breakpoints looking for breaks at certain "chain
20920     # operators" (. : && || + etc) which often occur repeatedly in a long
20921     # statement.  If we see a break at any one, break at all similar tokens
20922     # within the same container.
20923     #
20924     my ( $ri_left, $ri_right ) = @_;
20925
20926     my %saw_chain_type;
20927     my %left_chain_type;
20928     my %right_chain_type;
20929     my %interior_chain_type;
20930     my $nmax = @{$ri_right} - 1;
20931
20932     # scan the left and right end tokens of all lines
20933     my $count = 0;
20934     for my $n ( 0 .. $nmax ) {
20935         my $il    = $ri_left->[$n];
20936         my $ir    = $ri_right->[$n];
20937         my $typel = $types_to_go[$il];
20938         my $typer = $types_to_go[$ir];
20939         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
20940         $typer = '+' if ( $typer eq '-' );
20941         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
20942         $typer = '*' if ( $typer eq '/' );
20943         my $tokenl = $tokens_to_go[$il];
20944         my $tokenr = $tokens_to_go[$ir];
20945
20946         if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
20947             next if ( $typel eq '?' );
20948             push @{ $left_chain_type{$typel} }, $il;
20949             $saw_chain_type{$typel} = 1;
20950             $count++;
20951         }
20952         if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
20953             next if ( $typer eq '?' );
20954             push @{ $right_chain_type{$typer} }, $ir;
20955             $saw_chain_type{$typer} = 1;
20956             $count++;
20957         }
20958     }
20959     return unless $count;
20960
20961     # now look for any interior tokens of the same types
20962     $count = 0;
20963     for my $n ( 0 .. $nmax ) {
20964         my $il = $ri_left->[$n];
20965         my $ir = $ri_right->[$n];
20966         foreach my $i ( $il + 1 .. $ir - 1 ) {
20967             my $type = $types_to_go[$i];
20968             $type = '+' if ( $type eq '-' );
20969             $type = '*' if ( $type eq '/' );
20970             if ( $saw_chain_type{$type} ) {
20971                 push @{ $interior_chain_type{$type} }, $i;
20972                 $count++;
20973             }
20974         }
20975     }
20976     return unless $count;
20977
20978     # now make a list of all new break points
20979     my @insert_list;
20980
20981     # loop over all chain types
20982     foreach my $type ( keys %saw_chain_type ) {
20983
20984         # quit if just ONE continuation line with leading .  For example--
20985         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
20986         #  . $contents;
20987         last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
20988
20989         # loop over all interior chain tokens
20990         foreach my $itest ( @{ $interior_chain_type{$type} } ) {
20991
20992             # loop over all left end tokens of same type
20993             if ( $left_chain_type{$type} ) {
20994                 next if $nobreak_to_go[ $itest - 1 ];
20995                 foreach my $i ( @{ $left_chain_type{$type} } ) {
20996                     next unless in_same_container( $i, $itest );
20997                     push @insert_list, $itest - 1;
20998
20999                     # Break at matching ? if this : is at a different level.
21000                     # For example, the ? before $THRf_DEAD in the following
21001                     # should get a break if its : gets a break.
21002                     #
21003                     # my $flags =
21004                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
21005                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
21006                     #   :              $THRf_R_JOINABLE;
21007                     if (   $type eq ':'
21008                         && $levels_to_go[$i] != $levels_to_go[$itest] )
21009                     {
21010                         my $i_question = $mate_index_to_go[$itest];
21011                         if ( $i_question > 0 ) {
21012                             push @insert_list, $i_question - 1;
21013                         }
21014                     }
21015                     last;
21016                 }
21017             }
21018
21019             # loop over all right end tokens of same type
21020             if ( $right_chain_type{$type} ) {
21021                 next if $nobreak_to_go[$itest];
21022                 foreach my $i ( @{ $right_chain_type{$type} } ) {
21023                     next unless in_same_container( $i, $itest );
21024                     push @insert_list, $itest;
21025
21026                     # break at matching ? if this : is at a different level
21027                     if (   $type eq ':'
21028                         && $levels_to_go[$i] != $levels_to_go[$itest] )
21029                     {
21030                         my $i_question = $mate_index_to_go[$itest];
21031                         if ( $i_question >= 0 ) {
21032                             push @insert_list, $i_question;
21033                         }
21034                     }
21035                     last;
21036                 }
21037             }
21038         }
21039     }
21040
21041     # insert any new break points
21042     if (@insert_list) {
21043         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
21044     }
21045     return;
21046 }
21047
21048 sub break_equals {
21049
21050     # Look for assignment operators that could use a breakpoint.
21051     # For example, in the following snippet
21052     #
21053     #    $HOME = $ENV{HOME}
21054     #      || $ENV{LOGDIR}
21055     #      || $pw[7]
21056     #      || die "no home directory for user $<";
21057     #
21058     # we could break at the = to get this, which is a little nicer:
21059     #    $HOME =
21060     #         $ENV{HOME}
21061     #      || $ENV{LOGDIR}
21062     #      || $pw[7]
21063     #      || die "no home directory for user $<";
21064     #
21065     # The logic here follows the logic in set_logical_padding, which
21066     # will add the padding in the second line to improve alignment.
21067     #
21068     my ( $ri_left, $ri_right ) = @_;
21069     my $nmax = @{$ri_right} - 1;
21070     return unless ( $nmax >= 2 );
21071
21072     # scan the left ends of first two lines
21073     my $tokbeg = "";
21074     my $depth_beg;
21075     for my $n ( 1 .. 2 ) {
21076         my $il     = $ri_left->[$n];
21077         my $typel  = $types_to_go[$il];
21078         my $tokenl = $tokens_to_go[$il];
21079
21080         my $has_leading_op = ( $tokenl =~ /^\w/ )
21081           ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
21082           : $is_chain_operator{$typel};    # and, or
21083         return unless ($has_leading_op);
21084         if ( $n > 1 ) {
21085             return
21086               unless ( $tokenl eq $tokbeg
21087                 && $nesting_depth_to_go[$il] eq $depth_beg );
21088         }
21089         $tokbeg    = $tokenl;
21090         $depth_beg = $nesting_depth_to_go[$il];
21091     }
21092
21093     # now look for any interior tokens of the same types
21094     my $il = $ri_left->[0];
21095     my $ir = $ri_right->[0];
21096
21097     # now make a list of all new break points
21098     my @insert_list;
21099     for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
21100         my $type = $types_to_go[$i];
21101         if (   $is_assignment{$type}
21102             && $nesting_depth_to_go[$i] eq $depth_beg )
21103         {
21104             if ( $want_break_before{$type} ) {
21105                 push @insert_list, $i - 1;
21106             }
21107             else {
21108                 push @insert_list, $i;
21109             }
21110         }
21111     }
21112
21113     # Break after a 'return' followed by a chain of operators
21114     #  return ( $^O !~ /win32|dos/i )
21115     #    && ( $^O ne 'VMS' )
21116     #    && ( $^O ne 'OS2' )
21117     #    && ( $^O ne 'MacOS' );
21118     # To give:
21119     #  return
21120     #       ( $^O !~ /win32|dos/i )
21121     #    && ( $^O ne 'VMS' )
21122     #    && ( $^O ne 'OS2' )
21123     #    && ( $^O ne 'MacOS' );
21124     my $i = 0;
21125     if (   $types_to_go[$i] eq 'k'
21126         && $tokens_to_go[$i] eq 'return'
21127         && $ir > $il
21128         && $nesting_depth_to_go[$i] eq $depth_beg )
21129     {
21130         push @insert_list, $i;
21131     }
21132
21133     return unless (@insert_list);
21134
21135     # One final check...
21136     # scan second and third lines and be sure there are no assignments
21137     # we want to avoid breaking at an = to make something like this:
21138     #    unless ( $icon =
21139     #           $html_icons{"$type-$state"}
21140     #        or $icon = $html_icons{$type}
21141     #        or $icon = $html_icons{$state} )
21142     for my $n ( 1 .. 2 ) {
21143         my $il = $ri_left->[$n];
21144         my $ir = $ri_right->[$n];
21145         foreach my $i ( $il + 1 .. $ir ) {
21146             my $type = $types_to_go[$i];
21147             return
21148               if ( $is_assignment{$type}
21149                 && $nesting_depth_to_go[$i] eq $depth_beg );
21150         }
21151     }
21152
21153     # ok, insert any new break point
21154     if (@insert_list) {
21155         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
21156     }
21157     return;
21158 }
21159
21160 sub insert_final_breaks {
21161
21162     my ( $ri_left, $ri_right ) = @_;
21163
21164     my $nmax = @{$ri_right} - 1;
21165
21166     # scan the left and right end tokens of all lines
21167     my $count         = 0;
21168     my $i_first_colon = -1;
21169     for my $n ( 0 .. $nmax ) {
21170         my $il    = $ri_left->[$n];
21171         my $ir    = $ri_right->[$n];
21172         my $typel = $types_to_go[$il];
21173         my $typer = $types_to_go[$ir];
21174         return if ( $typel eq '?' );
21175         return if ( $typer eq '?' );
21176         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
21177         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
21178     }
21179
21180     # For long ternary chains,
21181     # if the first : we see has its # ? is in the interior
21182     # of a preceding line, then see if there are any good
21183     # breakpoints before the ?.
21184     if ( $i_first_colon > 0 ) {
21185         my $i_question = $mate_index_to_go[$i_first_colon];
21186         if ( $i_question > 0 ) {
21187             my @insert_list;
21188             for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
21189                 my $token = $tokens_to_go[$ii];
21190                 my $type  = $types_to_go[$ii];
21191
21192                 # For now, a good break is either a comma or a 'return'.
21193                 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
21194                     && in_same_container( $ii, $i_question ) )
21195                 {
21196                     push @insert_list, $ii;
21197                     last;
21198                 }
21199             }
21200
21201             # insert any new break points
21202             if (@insert_list) {
21203                 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
21204             }
21205         }
21206     }
21207     return;
21208 }
21209
21210 sub in_same_container {
21211
21212     # check to see if tokens at i1 and i2 are in the
21213     # same container, and not separated by a comma, ? or :
21214     my ( $i1, $i2 ) = @_;
21215     my $type  = $types_to_go[$i1];
21216     my $depth = $nesting_depth_to_go[$i1];
21217     return unless ( $nesting_depth_to_go[$i2] == $depth );
21218     if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
21219
21220     ###########################################################
21221     # This is potentially a very slow routine and not critical.
21222     # For safety just give up for large differences.
21223     # See test file 'infinite_loop.txt'
21224     # TODO: replace this loop with a data structure
21225     ###########################################################
21226     return if ( $i2 - $i1 > 200 );
21227
21228     foreach my $i ( $i1 + 1 .. $i2 - 1 ) {
21229         next   if ( $nesting_depth_to_go[$i] > $depth );
21230         return if ( $nesting_depth_to_go[$i] < $depth );
21231
21232         my $tok = $tokens_to_go[$i];
21233         $tok = ',' if $tok eq '=>';    # treat => same as ,
21234
21235         # Example: we would not want to break at any of these .'s
21236         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
21237         if ( $type ne ':' ) {
21238             return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
21239         }
21240         else {
21241             return if ( $tok =~ /^[\,]$/ );
21242         }
21243     }
21244     return 1;
21245 }
21246
21247 sub set_continuation_breaks {
21248
21249     # Define an array of indexes for inserting newline characters to
21250     # keep the line lengths below the maximum desired length.  There is
21251     # an implied break after the last token, so it need not be included.
21252
21253     # Method:
21254     # This routine is part of series of routines which adjust line
21255     # lengths.  It is only called if a statement is longer than the
21256     # maximum line length, or if a preliminary scanning located
21257     # desirable break points.   Sub scan_list has already looked at
21258     # these tokens and set breakpoints (in array
21259     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
21260     # after commas, after opening parens, and before closing parens).
21261     # This routine will honor these breakpoints and also add additional
21262     # breakpoints as necessary to keep the line length below the maximum
21263     # requested.  It bases its decision on where the 'bond strength' is
21264     # lowest.
21265
21266     # Output: returns references to the arrays:
21267     #  @i_first
21268     #  @i_last
21269     # which contain the indexes $i of the first and last tokens on each
21270     # line.
21271
21272     # In addition, the array:
21273     #   $forced_breakpoint_to_go[$i]
21274     # may be updated to be =1 for any index $i after which there must be
21275     # a break.  This signals later routines not to undo the breakpoint.
21276
21277     my $saw_good_break = shift;
21278     my @i_first        = ();      # the first index to output
21279     my @i_last         = ();      # the last index to output
21280     my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
21281     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
21282
21283     set_bond_strengths();
21284
21285     my $imin = 0;
21286     my $imax = $max_index_to_go;
21287     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
21288     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
21289     my $i_begin = $imin;          # index for starting next iteration
21290
21291     my $leading_spaces          = leading_spaces_to_go($imin);
21292     my $line_count              = 0;
21293     my $last_break_strength     = NO_BREAK;
21294     my $i_last_break            = -1;
21295     my $max_bias                = 0.001;
21296     my $tiny_bias               = 0.0001;
21297     my $leading_alignment_token = "";
21298     my $leading_alignment_type  = "";
21299
21300     # see if any ?/:'s are in order
21301     my $colons_in_order = 1;
21302     my $last_tok        = "";
21303     my @colon_list  = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
21304     my $colon_count = @colon_list;
21305     foreach (@colon_list) {
21306         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
21307         $last_tok = $_;
21308     }
21309
21310     # This is a sufficient but not necessary condition for colon chain
21311     my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
21312
21313     #-------------------------------------------------------
21314     # BEGINNING of main loop to set continuation breakpoints
21315     # Keep iterating until we reach the end
21316     #-------------------------------------------------------
21317     while ( $i_begin <= $imax ) {
21318         my $lowest_strength        = NO_BREAK;
21319         my $starting_sum           = $summed_lengths_to_go[$i_begin];
21320         my $i_lowest               = -1;
21321         my $i_test                 = -1;
21322         my $lowest_next_token      = '';
21323         my $lowest_next_type       = 'b';
21324         my $i_lowest_next_nonblank = -1;
21325
21326         #-------------------------------------------------------
21327         # BEGINNING of inner loop to find the best next breakpoint
21328         #-------------------------------------------------------
21329         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
21330             my $type                     = $types_to_go[$i_test];
21331             my $token                    = $tokens_to_go[$i_test];
21332             my $next_type                = $types_to_go[ $i_test + 1 ];
21333             my $next_token               = $tokens_to_go[ $i_test + 1 ];
21334             my $i_next_nonblank          = $inext_to_go[$i_test];
21335             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
21336             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
21337             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
21338             my $strength                 = $bond_strength_to_go[$i_test];
21339             my $maximum_line_length      = maximum_line_length($i_begin);
21340
21341             # use old breaks as a tie-breaker.  For example to
21342             # prevent blinkers with -pbp in this code:
21343
21344 ##@keywords{
21345 ##    qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
21346 ##    = ();
21347
21348             # At the same time try to prevent a leading * in this code
21349             # with the default formatting:
21350             #
21351 ##                return
21352 ##                    factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
21353 ##                  * ( $x**( $a - 1 ) )
21354 ##                  * ( ( 1 - $x )**( $b - 1 ) );
21355
21356             # reduce strength a bit to break ties at an old breakpoint ...
21357             if (
21358                 $old_breakpoint_to_go[$i_test]
21359
21360                 # which is a 'good' breakpoint, meaning ...
21361                 # we don't want to break before it
21362                 && !$want_break_before{$type}
21363
21364                 # and either we want to break before the next token
21365                 # or the next token is not short (i.e. not a '*', '/' etc.)
21366                 && $i_next_nonblank <= $imax
21367                 && (   $want_break_before{$next_nonblank_type}
21368                     || $token_lengths_to_go[$i_next_nonblank] > 2
21369                     || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
21370               )
21371             {
21372                 $strength -= $tiny_bias;
21373             }
21374
21375             # otherwise increase strength a bit if this token would be at the
21376             # maximum line length.  This is necessary to avoid blinking
21377             # in the above example when the -iob flag is added.
21378             else {
21379                 my $len =
21380                   $leading_spaces +
21381                   $summed_lengths_to_go[ $i_test + 1 ] -
21382                   $starting_sum;
21383                 if ( $len >= $maximum_line_length ) {
21384                     $strength += $tiny_bias;
21385                 }
21386             }
21387
21388             my $must_break = 0;
21389
21390             # Force an immediate break at certain operators
21391             # with lower level than the start of the line,
21392             # unless we've already seen a better break.
21393             #
21394             ##############################################
21395             # Note on an issue with a preceding ?
21396             ##############################################
21397             # We don't include a ? in the above list, but there may
21398             # be a break at a previous ? if the line is long.
21399             # Because of this we do not want to force a break if
21400             # there is a previous ? on this line.  For now the best way
21401             # to do this is to not break if we have seen a lower strength
21402             # point, which is probably a ?.
21403             #
21404             # Example of unwanted breaks we are avoiding at a '.' following a ?
21405             # from pod2html using perltidy -gnu:
21406             # )
21407             # ? "\n&lt;A NAME=\""
21408             # . $value
21409             # . "\"&gt;\n$text&lt;/A&gt;\n"
21410             # : "\n$type$pod2.html\#" . $value . "\"&gt;$text&lt;\/A&gt;\n";
21411             if (
21412                 (
21413                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
21414                     || (   $next_nonblank_type eq 'k'
21415                         && $next_nonblank_token =~ /^(and|or)$/ )
21416                 )
21417                 && ( $nesting_depth_to_go[$i_begin] >
21418                     $nesting_depth_to_go[$i_next_nonblank] )
21419                 && ( $strength <= $lowest_strength )
21420               )
21421             {
21422                 set_forced_breakpoint($i_next_nonblank);
21423             }
21424
21425             if (
21426
21427                 # Try to put a break where requested by scan_list
21428                 $forced_breakpoint_to_go[$i_test]
21429
21430                 # break between ) { in a continued line so that the '{' can
21431                 # be outdented
21432                 # See similar logic in scan_list which catches instances
21433                 # where a line is just something like ') {'.  We have to
21434                 # be careful because the corresponding block keyword might
21435                 # not be on the first line, such as 'for' here:
21436                 #
21437                 # eval {
21438                 #     for ("a") {
21439                 #         for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
21440                 #     }
21441                 # };
21442                 #
21443                 || (
21444                        $line_count
21445                     && ( $token eq ')' )
21446                     && ( $next_nonblank_type eq '{' )
21447                     && ($next_nonblank_block_type)
21448                     && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
21449
21450                     # RT #104427: Dont break before opening sub brace because
21451                     # sub block breaks handled at higher level, unless
21452                     # it looks like the preceeding list is long and broken
21453                     && !(
21454                         $next_nonblank_block_type =~ /^sub\b/
21455                         && ( $nesting_depth_to_go[$i_begin] ==
21456                             $nesting_depth_to_go[$i_next_nonblank] )
21457                     )
21458
21459                     && !$rOpts->{'opening-brace-always-on-right'}
21460                 )
21461
21462                 # There is an implied forced break at a terminal opening brace
21463                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
21464               )
21465             {
21466
21467                 # Forced breakpoints must sometimes be overridden, for example
21468                 # because of a side comment causing a NO_BREAK.  It is easier
21469                 # to catch this here than when they are set.
21470                 if ( $strength < NO_BREAK - 1 ) {
21471                     $strength   = $lowest_strength - $tiny_bias;
21472                     $must_break = 1;
21473                 }
21474             }
21475
21476             # quit if a break here would put a good terminal token on
21477             # the next line and we already have a possible break
21478             if (
21479                    !$must_break
21480                 && ( $next_nonblank_type =~ /^[\;\,]$/ )
21481                 && (
21482                     (
21483                         $leading_spaces +
21484                         $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
21485                         $starting_sum
21486                     ) > $maximum_line_length
21487                 )
21488               )
21489             {
21490                 last if ( $i_lowest >= 0 );
21491             }
21492
21493             # Avoid a break which would strand a single punctuation
21494             # token.  For example, we do not want to strand a leading
21495             # '.' which is followed by a long quoted string.
21496             # But note that we do want to do this with -extrude (l=1)
21497             # so please test any changes to this code on -extrude.
21498             if (
21499                    !$must_break
21500                 && ( $i_test == $i_begin )
21501                 && ( $i_test < $imax )
21502                 && ( $token eq $type )
21503                 && (
21504                     (
21505                         $leading_spaces +
21506                         $summed_lengths_to_go[ $i_test + 1 ] -
21507                         $starting_sum
21508                     ) < $maximum_line_length
21509                 )
21510               )
21511             {
21512                 $i_test = min( $imax, $inext_to_go[$i_test] );
21513                 redo;
21514             }
21515
21516             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
21517             {
21518
21519                 # break at previous best break if it would have produced
21520                 # a leading alignment of certain common tokens, and it
21521                 # is different from the latest candidate break
21522                 last
21523                   if ($leading_alignment_type);
21524
21525                 # Force at least one breakpoint if old code had good
21526                 # break It is only called if a breakpoint is required or
21527                 # desired.  This will probably need some adjustments
21528                 # over time.  A goal is to try to be sure that, if a new
21529                 # side comment is introduced into formatted text, then
21530                 # the same breakpoints will occur.  scbreak.t
21531                 last
21532                   if (
21533                     $i_test == $imax              # we are at the end
21534                     && !$forced_breakpoint_count  #
21535                     && $saw_good_break            # old line had good break
21536                     && $type =~ /^[#;\{]$/        # and this line ends in
21537                                                   # ';' or side comment
21538                     && $i_last_break < 0          # and we haven't made a break
21539                     && $i_lowest >= 0             # and we saw a possible break
21540                     && $i_lowest < $imax - 1      # (but not just before this ;)
21541                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
21542                   );
21543
21544                 # Do not skip past an important break point in a short final
21545                 # segment.  For example, without this check we would miss the
21546                 # break at the final / in the following code:
21547                 #
21548                 #  $depth_stop =
21549                 #    ( $tau * $mass_pellet * $q_0 *
21550                 #        ( 1. - exp( -$t_stop / $tau ) ) -
21551                 #        4. * $pi * $factor * $k_ice *
21552                 #        ( $t_melt - $t_ice ) *
21553                 #        $r_pellet *
21554                 #        $t_stop ) /
21555                 #    ( $rho_ice * $Qs * $pi * $r_pellet**2 );
21556                 #
21557                 if (   $line_count > 2
21558                     && $i_lowest < $i_test
21559                     && $i_test > $imax - 2
21560                     && $nesting_depth_to_go[$i_begin] >
21561                     $nesting_depth_to_go[$i_lowest]
21562                     && $lowest_strength < $last_break_strength - .5 * WEAK )
21563                 {
21564                     # Make this break for math operators for now
21565                     my $ir = $inext_to_go[$i_lowest];
21566                     my $il = $iprev_to_go[$ir];
21567                     last
21568                       if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
21569                         || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
21570                 }
21571
21572                 # Update the minimum bond strength location
21573                 $lowest_strength        = $strength;
21574                 $i_lowest               = $i_test;
21575                 $lowest_next_token      = $next_nonblank_token;
21576                 $lowest_next_type       = $next_nonblank_type;
21577                 $i_lowest_next_nonblank = $i_next_nonblank;
21578                 last if $must_break;
21579
21580                 # set flags to remember if a break here will produce a
21581                 # leading alignment of certain common tokens
21582                 if (   $line_count > 0
21583                     && $i_test < $imax
21584                     && ( $lowest_strength - $last_break_strength <= $max_bias )
21585                   )
21586                 {
21587                     my $i_last_end = $iprev_to_go[$i_begin];
21588                     my $tok_beg    = $tokens_to_go[$i_begin];
21589                     my $type_beg   = $types_to_go[$i_begin];
21590                     if (
21591
21592                         # check for leading alignment of certain tokens
21593                         (
21594                                $tok_beg eq $next_nonblank_token
21595                             && $is_chain_operator{$tok_beg}
21596                             && (   $type_beg eq 'k'
21597                                 || $type_beg eq $tok_beg )
21598                             && $nesting_depth_to_go[$i_begin] >=
21599                             $nesting_depth_to_go[$i_next_nonblank]
21600                         )
21601
21602                         || (   $tokens_to_go[$i_last_end] eq $token
21603                             && $is_chain_operator{$token}
21604                             && ( $type eq 'k' || $type eq $token )
21605                             && $nesting_depth_to_go[$i_last_end] >=
21606                             $nesting_depth_to_go[$i_test] )
21607                       )
21608                     {
21609                         $leading_alignment_token = $next_nonblank_token;
21610                         $leading_alignment_type  = $next_nonblank_type;
21611                     }
21612                 }
21613             }
21614
21615             my $too_long = ( $i_test >= $imax );
21616             if ( !$too_long ) {
21617                 my $next_length =
21618                   $leading_spaces +
21619                   $summed_lengths_to_go[ $i_test + 2 ] -
21620                   $starting_sum;
21621                 $too_long = $next_length > $maximum_line_length;
21622
21623                 # To prevent blinkers we will avoid leaving a token exactly at
21624                 # the line length limit unless it is the last token or one of
21625                 # several "good" types.
21626                 #
21627                 # The following code was a blinker with -pbp before this
21628                 # modification:
21629 ##                    $last_nonblank_token eq '('
21630 ##                        && $is_indirect_object_taker{ $paren_type
21631 ##                            [$paren_depth] }
21632                 # The issue causing the problem is that if the
21633                 # term [$paren_depth] gets broken across a line then
21634                 # the whitespace routine doesn't see both opening and closing
21635                 # brackets and will format like '[ $paren_depth ]'.  This
21636                 # leads to an oscillation in length depending if we break
21637                 # before the closing bracket or not.
21638                 if (  !$too_long
21639                     && $i_test + 1 < $imax
21640                     && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
21641                 {
21642                     $too_long = $next_length >= $maximum_line_length;
21643                 }
21644             }
21645
21646             FORMATTER_DEBUG_FLAG_BREAK
21647               && do {
21648                 my $ltok     = $token;
21649                 my $rtok     = $next_nonblank_token ? $next_nonblank_token : "";
21650                 my $i_testp2 = $i_test + 2;
21651                 if ( $i_testp2 > $max_index_to_go + 1 ) {
21652                     $i_testp2 = $max_index_to_go + 1;
21653                 }
21654                 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
21655                 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
21656                 print STDOUT
21657 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength    $ltok $rtok\n";
21658               };
21659
21660             # allow one extra terminal token after exceeding line length
21661             # if it would strand this token.
21662             if (   $rOpts_fuzzy_line_length
21663                 && $too_long
21664                 && $i_lowest == $i_test
21665                 && $token_lengths_to_go[$i_test] > 1
21666                 && $next_nonblank_type =~ /^[\;\,]$/ )
21667             {
21668                 $too_long = 0;
21669             }
21670
21671             last
21672               if (
21673                 ( $i_test == $imax )    # we're done if no more tokens,
21674                 || (
21675                     ( $i_lowest >= 0 )    # or no more space and we have a break
21676                     && $too_long
21677                 )
21678               );
21679         }
21680
21681         #-------------------------------------------------------
21682         # END of inner loop to find the best next breakpoint
21683         # Now decide exactly where to put the breakpoint
21684         #-------------------------------------------------------
21685
21686         # it's always ok to break at imax if no other break was found
21687         if ( $i_lowest < 0 ) { $i_lowest = $imax }
21688
21689         # semi-final index calculation
21690         my $i_next_nonblank     = $inext_to_go[$i_lowest];
21691         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
21692         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
21693
21694         #-------------------------------------------------------
21695         # ?/: rule 1 : if a break here will separate a '?' on this
21696         # line from its closing ':', then break at the '?' instead.
21697         #-------------------------------------------------------
21698         foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
21699             next unless ( $tokens_to_go[$i] eq '?' );
21700
21701             # do not break if probable sequence of ?/: statements
21702             next if ($is_colon_chain);
21703
21704             # do not break if statement is broken by side comment
21705             next
21706               if (
21707                 $tokens_to_go[$max_index_to_go] eq '#'
21708                 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
21709                     $max_index_to_go ) !~ /^[\;\}]$/
21710               );
21711
21712             # no break needed if matching : is also on the line
21713             next
21714               if ( $mate_index_to_go[$i] >= 0
21715                 && $mate_index_to_go[$i] <= $i_next_nonblank );
21716
21717             $i_lowest = $i;
21718             if ( $want_break_before{'?'} ) { $i_lowest-- }
21719             last;
21720         }
21721
21722         #-------------------------------------------------------
21723         # END of inner loop to find the best next breakpoint:
21724         # Break the line after the token with index i=$i_lowest
21725         #-------------------------------------------------------
21726
21727         # final index calculation
21728         $i_next_nonblank     = $inext_to_go[$i_lowest];
21729         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
21730         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
21731
21732         FORMATTER_DEBUG_FLAG_BREAK
21733           && print STDOUT
21734           "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
21735
21736         #-------------------------------------------------------
21737         # ?/: rule 2 : if we break at a '?', then break at its ':'
21738         #
21739         # Note: this rule is also in sub scan_list to handle a break
21740         # at the start and end of a line (in case breaks are dictated
21741         # by side comments).
21742         #-------------------------------------------------------
21743         if ( $next_nonblank_type eq '?' ) {
21744             set_closing_breakpoint($i_next_nonblank);
21745         }
21746         elsif ( $types_to_go[$i_lowest] eq '?' ) {
21747             set_closing_breakpoint($i_lowest);
21748         }
21749
21750         #-------------------------------------------------------
21751         # ?/: rule 3 : if we break at a ':' then we save
21752         # its location for further work below.  We may need to go
21753         # back and break at its '?'.
21754         #-------------------------------------------------------
21755         if ( $next_nonblank_type eq ':' ) {
21756             push @i_colon_breaks, $i_next_nonblank;
21757         }
21758         elsif ( $types_to_go[$i_lowest] eq ':' ) {
21759             push @i_colon_breaks, $i_lowest;
21760         }
21761
21762         # here we should set breaks for all '?'/':' pairs which are
21763         # separated by this line
21764
21765         $line_count++;
21766
21767         # save this line segment, after trimming blanks at the ends
21768         push( @i_first,
21769             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
21770         push( @i_last,
21771             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
21772
21773         # set a forced breakpoint at a container opening, if necessary, to
21774         # signal a break at a closing container.  Excepting '(' for now.
21775         if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
21776             && !$forced_breakpoint_to_go[$i_lowest] )
21777         {
21778             set_closing_breakpoint($i_lowest);
21779         }
21780
21781         # get ready to go again
21782         $i_begin                 = $i_lowest + 1;
21783         $last_break_strength     = $lowest_strength;
21784         $i_last_break            = $i_lowest;
21785         $leading_alignment_token = "";
21786         $leading_alignment_type  = "";
21787         $lowest_next_token       = '';
21788         $lowest_next_type        = 'b';
21789
21790         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
21791             $i_begin++;
21792         }
21793
21794         # update indentation size
21795         if ( $i_begin <= $imax ) {
21796             $leading_spaces = leading_spaces_to_go($i_begin);
21797         }
21798     }
21799
21800     #-------------------------------------------------------
21801     # END of main loop to set continuation breakpoints
21802     # Now go back and make any necessary corrections
21803     #-------------------------------------------------------
21804
21805     #-------------------------------------------------------
21806     # ?/: rule 4 -- if we broke at a ':', then break at
21807     # corresponding '?' unless this is a chain of ?: expressions
21808     #-------------------------------------------------------
21809     if (@i_colon_breaks) {
21810
21811         # using a simple method for deciding if we are in a ?/: chain --
21812         # this is a chain if it has multiple ?/: pairs all in order;
21813         # otherwise not.
21814         # Note that if line starts in a ':' we count that above as a break
21815         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
21816
21817         unless ($is_chain) {
21818             my @insert_list = ();
21819             foreach (@i_colon_breaks) {
21820                 my $i_question = $mate_index_to_go[$_];
21821                 if ( $i_question >= 0 ) {
21822                     if ( $want_break_before{'?'} ) {
21823                         $i_question = $iprev_to_go[$i_question];
21824                     }
21825
21826                     if ( $i_question >= 0 ) {
21827                         push @insert_list, $i_question;
21828                     }
21829                 }
21830                 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
21831             }
21832         }
21833     }
21834     return ( \@i_first, \@i_last, $colon_count );
21835 }
21836
21837 sub insert_additional_breaks {
21838
21839     # this routine will add line breaks at requested locations after
21840     # sub set_continuation_breaks has made preliminary breaks.
21841
21842     my ( $ri_break_list, $ri_first, $ri_last ) = @_;
21843     my $i_f;
21844     my $i_l;
21845     my $line_number = 0;
21846     foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
21847
21848         $i_f = $ri_first->[$line_number];
21849         $i_l = $ri_last->[$line_number];
21850         while ( $i_break_left >= $i_l ) {
21851             $line_number++;
21852
21853             # shouldn't happen unless caller passes bad indexes
21854             if ( $line_number >= @{$ri_last} ) {
21855                 warning(
21856 "Non-fatal program bug: couldn't set break at $i_break_left\n"
21857                 );
21858                 report_definite_bug();
21859                 return;
21860             }
21861             $i_f = $ri_first->[$line_number];
21862             $i_l = $ri_last->[$line_number];
21863         }
21864
21865         # Do not leave a blank at the end of a line; back up if necessary
21866         if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
21867
21868         my $i_break_right = $inext_to_go[$i_break_left];
21869         if (   $i_break_left >= $i_f
21870             && $i_break_left < $i_l
21871             && $i_break_right > $i_f
21872             && $i_break_right <= $i_l )
21873         {
21874             splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
21875             splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
21876         }
21877     }
21878     return;
21879 }
21880
21881 sub set_closing_breakpoint {
21882
21883     # set a breakpoint at a matching closing token
21884     # at present, this is only used to break at a ':' which matches a '?'
21885     my $i_break = shift;
21886
21887     if ( $mate_index_to_go[$i_break] >= 0 ) {
21888
21889         # CAUTION: infinite recursion possible here:
21890         #   set_closing_breakpoint calls set_forced_breakpoint, and
21891         #   set_forced_breakpoint call set_closing_breakpoint
21892         #   ( test files attrib.t, BasicLyx.pm.html).
21893         # Don't reduce the '2' in the statement below
21894         if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
21895
21896             # break before } ] and ), but sub set_forced_breakpoint will decide
21897             # to break before or after a ? and :
21898             my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
21899             set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
21900         }
21901     }
21902     else {
21903         my $type_sequence = $type_sequence_to_go[$i_break];
21904         if ($type_sequence) {
21905             my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
21906             $postponed_breakpoint{$type_sequence} = 1;
21907         }
21908     }
21909     return;
21910 }
21911
21912 sub compare_indentation_levels {
21913
21914     # check to see if output line tabbing agrees with input line
21915     # this can be very useful for debugging a script which has an extra
21916     # or missing brace
21917     my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
21918     if ( $guessed_indentation_level ne $structural_indentation_level ) {
21919         $last_tabbing_disagreement = $input_line_number;
21920
21921         if ($in_tabbing_disagreement) {
21922         }
21923         else {
21924             $tabbing_disagreement_count++;
21925
21926             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
21927                 write_logfile_entry(
21928 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
21929                 );
21930             }
21931             $in_tabbing_disagreement    = $input_line_number;
21932             $first_tabbing_disagreement = $in_tabbing_disagreement
21933               unless ($first_tabbing_disagreement);
21934         }
21935     }
21936     else {
21937
21938         if ($in_tabbing_disagreement) {
21939
21940             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
21941                 write_logfile_entry(
21942 "End indentation disagreement from input line $in_tabbing_disagreement\n"
21943                 );
21944
21945                 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
21946                     write_logfile_entry(
21947                         "No further tabbing disagreements will be noted\n");
21948                 }
21949             }
21950             $in_tabbing_disagreement = 0;
21951         }
21952     }
21953     return;
21954 }
21955
21956 #####################################################################
21957 #
21958 # the Perl::Tidy::IndentationItem class supplies items which contain
21959 # how much whitespace should be used at the start of a line
21960 #
21961 #####################################################################
21962
21963 package Perl::Tidy::IndentationItem;
21964
21965 sub new {
21966
21967     # Create an 'indentation_item' which describes one level of leading
21968     # whitespace when the '-lp' indentation is used.
21969     my (
21970         $class,               $spaces,           $level,
21971         $ci_level,            $available_spaces, $index,
21972         $gnu_sequence_number, $align_paren,      $stack_depth,
21973         $starting_index,
21974     ) = @_;
21975
21976     my $closed            = -1;
21977     my $arrow_count       = 0;
21978     my $comma_count       = 0;
21979     my $have_child        = 0;
21980     my $want_right_spaces = 0;
21981     my $marked            = 0;
21982
21983     # DEFINITIONS:
21984     # spaces             =>  # total leading white spaces
21985     # level              =>  # the indentation 'level'
21986     # ci_level           =>  # the 'continuation level'
21987     # available_spaces   =>  # how many left spaces available
21988     #                        # for this level
21989     # closed             =>  # index where we saw closing '}'
21990     # comma_count        =>  # how many commas at this level?
21991     # sequence_number    =>  # output batch number
21992     # index              =>  # index in output batch list
21993     # have_child         =>  # any dependents?
21994     # recoverable_spaces =>  # how many spaces to the right
21995     #                        # we would like to move to get
21996     #                        # alignment (negative if left)
21997     # align_paren        =>  # do we want to try to align
21998     #                        # with an opening structure?
21999     # marked             =>  # if visited by corrector logic
22000     # stack_depth        =>  # indentation nesting depth
22001     # starting_index     =>  # first token index of this level
22002     # arrow_count        =>  # how many =>'s
22003
22004     return bless {
22005         _spaces             => $spaces,
22006         _level              => $level,
22007         _ci_level           => $ci_level,
22008         _available_spaces   => $available_spaces,
22009         _closed             => $closed,
22010         _comma_count        => $comma_count,
22011         _sequence_number    => $gnu_sequence_number,
22012         _index              => $index,
22013         _have_child         => $have_child,
22014         _recoverable_spaces => $want_right_spaces,
22015         _align_paren        => $align_paren,
22016         _marked             => $marked,
22017         _stack_depth        => $stack_depth,
22018         _starting_index     => $starting_index,
22019         _arrow_count        => $arrow_count,
22020     }, $class;
22021 }
22022
22023 sub permanently_decrease_available_spaces {
22024
22025     # make a permanent reduction in the available indentation spaces
22026     # at one indentation item.  NOTE: if there are child nodes, their
22027     # total SPACES must be reduced by the caller.
22028
22029     my ( $item, $spaces_needed ) = @_;
22030     my $available_spaces = $item->get_available_spaces();
22031     my $deleted_spaces =
22032       ( $available_spaces > $spaces_needed )
22033       ? $spaces_needed
22034       : $available_spaces;
22035     $item->decrease_available_spaces($deleted_spaces);
22036     $item->decrease_SPACES($deleted_spaces);
22037     $item->set_recoverable_spaces(0);
22038
22039     return $deleted_spaces;
22040 }
22041
22042 sub tentatively_decrease_available_spaces {
22043
22044     # We are asked to tentatively delete $spaces_needed of indentation
22045     # for a indentation item.  We may want to undo this later.  NOTE: if
22046     # there are child nodes, their total SPACES must be reduced by the
22047     # caller.
22048     my ( $item, $spaces_needed ) = @_;
22049     my $available_spaces = $item->get_available_spaces();
22050     my $deleted_spaces =
22051       ( $available_spaces > $spaces_needed )
22052       ? $spaces_needed
22053       : $available_spaces;
22054     $item->decrease_available_spaces($deleted_spaces);
22055     $item->decrease_SPACES($deleted_spaces);
22056     $item->increase_recoverable_spaces($deleted_spaces);
22057     return $deleted_spaces;
22058 }
22059
22060 sub get_stack_depth {
22061     my $self = shift;
22062     return $self->{_stack_depth};
22063 }
22064
22065 sub get_spaces {
22066     my $self = shift;
22067     return $self->{_spaces};
22068 }
22069
22070 sub get_marked {
22071     my $self = shift;
22072     return $self->{_marked};
22073 }
22074
22075 sub set_marked {
22076     my ( $self, $value ) = @_;
22077     if ( defined($value) ) {
22078         $self->{_marked} = $value;
22079     }
22080     return $self->{_marked};
22081 }
22082
22083 sub get_available_spaces {
22084     my $self = shift;
22085     return $self->{_available_spaces};
22086 }
22087
22088 sub decrease_SPACES {
22089     my ( $self, $value ) = @_;
22090     if ( defined($value) ) {
22091         $self->{_spaces} -= $value;
22092     }
22093     return $self->{_spaces};
22094 }
22095
22096 sub decrease_available_spaces {
22097     my ( $self, $value ) = @_;
22098     if ( defined($value) ) {
22099         $self->{_available_spaces} -= $value;
22100     }
22101     return $self->{_available_spaces};
22102 }
22103
22104 sub get_align_paren {
22105     my $self = shift;
22106     return $self->{_align_paren};
22107 }
22108
22109 sub get_recoverable_spaces {
22110     my $self = shift;
22111     return $self->{_recoverable_spaces};
22112 }
22113
22114 sub set_recoverable_spaces {
22115     my ( $self, $value ) = @_;
22116     if ( defined($value) ) {
22117         $self->{_recoverable_spaces} = $value;
22118     }
22119     return $self->{_recoverable_spaces};
22120 }
22121
22122 sub increase_recoverable_spaces {
22123     my ( $self, $value ) = @_;
22124     if ( defined($value) ) {
22125         $self->{_recoverable_spaces} += $value;
22126     }
22127     return $self->{_recoverable_spaces};
22128 }
22129
22130 sub get_ci_level {
22131     my $self = shift;
22132     return $self->{_ci_level};
22133 }
22134
22135 sub get_level {
22136     my $self = shift;
22137     return $self->{_level};
22138 }
22139
22140 sub get_sequence_number {
22141     my $self = shift;
22142     return $self->{_sequence_number};
22143 }
22144
22145 sub get_index {
22146     my $self = shift;
22147     return $self->{_index};
22148 }
22149
22150 sub get_starting_index {
22151     my $self = shift;
22152     return $self->{_starting_index};
22153 }
22154
22155 sub set_have_child {
22156     my ( $self, $value ) = @_;
22157     if ( defined($value) ) {
22158         $self->{_have_child} = $value;
22159     }
22160     return $self->{_have_child};
22161 }
22162
22163 sub get_have_child {
22164     my $self = shift;
22165     return $self->{_have_child};
22166 }
22167
22168 sub set_arrow_count {
22169     my ( $self, $value ) = @_;
22170     if ( defined($value) ) {
22171         $self->{_arrow_count} = $value;
22172     }
22173     return $self->{_arrow_count};
22174 }
22175
22176 sub get_arrow_count {
22177     my $self = shift;
22178     return $self->{_arrow_count};
22179 }
22180
22181 sub set_comma_count {
22182     my ( $self, $value ) = @_;
22183     if ( defined($value) ) {
22184         $self->{_comma_count} = $value;
22185     }
22186     return $self->{_comma_count};
22187 }
22188
22189 sub get_comma_count {
22190     my $self = shift;
22191     return $self->{_comma_count};
22192 }
22193
22194 sub set_closed {
22195     my ( $self, $value ) = @_;
22196     if ( defined($value) ) {
22197         $self->{_closed} = $value;
22198     }
22199     return $self->{_closed};
22200 }
22201
22202 sub get_closed {
22203     my $self = shift;
22204     return $self->{_closed};
22205 }
22206
22207 #####################################################################
22208 #
22209 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
22210 # contain a single output line
22211 #
22212 #####################################################################
22213
22214 package Perl::Tidy::VerticalAligner::Line;
22215
22216 {
22217
22218     use strict;
22219     ##use Carp;
22220
22221     my %default_data = (
22222         jmax                      => undef,
22223         jmax_original_line        => undef,
22224         rtokens                   => undef,
22225         rfields                   => undef,
22226         rpatterns                 => undef,
22227         indentation               => undef,
22228         leading_space_count       => undef,
22229         outdent_long_lines        => undef,
22230         list_type                 => undef,
22231         is_hanging_side_comment   => undef,
22232         ralignments               => [],
22233         maximum_line_length       => undef,
22234         rvertical_tightness_flags => undef
22235     );
22236     {
22237
22238         # methods to count object population
22239         my $_count = 0;
22240         sub get_count        { return $_count; }
22241         sub _increment_count { return ++$_count }
22242         sub _decrement_count { return --$_count }
22243     }
22244
22245     # Constructor may be called as a class method
22246     sub new {
22247         my ( $caller, %arg ) = @_;
22248         my $caller_is_obj = ref($caller);
22249         my $class = $caller_is_obj || $caller;
22250         ##no strict "refs";
22251         my $self = bless {}, $class;
22252
22253         $self->{_ralignments} = [];
22254
22255         foreach my $key ( keys %default_data ) {
22256             my $_key = '_' . $key;
22257
22258             # Caller keys do not have an underscore
22259             if    ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} }
22260             elsif ($caller_is_obj)      { $self->{$_key} = $caller->{$_key} }
22261             else { $self->{$_key} = $default_data{$_key} }
22262         }
22263
22264         $self->_increment_count();
22265         return $self;
22266     }
22267
22268     sub DESTROY {
22269         my $self = shift;
22270         $self->_decrement_count();
22271         return;
22272     }
22273
22274     sub get_jmax { my $self = shift; return $self->{_jmax} }
22275
22276     sub get_jmax_original_line {
22277         my $self = shift;
22278         return $self->{_jmax_original_line};
22279     }
22280     sub get_rtokens     { my $self = shift; return $self->{_rtokens} }
22281     sub get_rfields     { my $self = shift; return $self->{_rfields} }
22282     sub get_rpatterns   { my $self = shift; return $self->{_rpatterns} }
22283     sub get_indentation { my $self = shift; return $self->{_indentation} }
22284
22285     sub get_leading_space_count {
22286         my $self = shift;
22287         return $self->{_leading_space_count};
22288     }
22289
22290     sub get_outdent_long_lines {
22291         my $self = shift;
22292         return $self->{_outdent_long_lines};
22293     }
22294     sub get_list_type { my $self = shift; return $self->{_list_type} }
22295
22296     sub get_is_hanging_side_comment {
22297         my $self = shift;
22298         return $self->{_is_hanging_side_comment};
22299     }
22300
22301     sub get_rvertical_tightness_flags {
22302         my $self = shift;
22303         return $self->{_rvertical_tightness_flags};
22304     }
22305
22306     sub set_column {
22307         ## FIXME: does caller ever supply $val??
22308         my ( $self, $j, $val ) = @_;
22309         return $self->{_ralignments}->[$j]->set_column($val);
22310     }
22311
22312     sub get_alignment {
22313         my ( $self, $j ) = @_;
22314         return $self->{_ralignments}->[$j];
22315     }
22316     sub get_alignments { my $self = shift; return @{ $self->{_ralignments} } }
22317
22318     sub get_column {
22319         my ( $self, $j ) = @_;
22320         return $self->{_ralignments}->[$j]->get_column();
22321     }
22322
22323     sub get_starting_column {
22324         my ( $self, $j ) = @_;
22325         return $self->{_ralignments}->[$j]->get_starting_column();
22326     }
22327
22328     sub increment_column {
22329         my ( $self, $k, $pad ) = @_;
22330         $self->{_ralignments}->[$k]->increment_column($pad);
22331         return;
22332     }
22333
22334     sub set_alignments {
22335         my $self = shift;
22336         @{ $self->{_ralignments} } = @_;
22337         return;
22338     }
22339
22340     sub current_field_width {
22341         my ( $self, $j ) = @_;
22342         if ( $j == 0 ) {
22343             return $self->get_column($j);
22344         }
22345         else {
22346             return $self->get_column($j) - $self->get_column( $j - 1 );
22347         }
22348     }
22349
22350     sub field_width_growth {
22351         my ( $self, $j ) = @_;
22352         return $self->get_column($j) - $self->get_starting_column($j);
22353     }
22354
22355     sub starting_field_width {
22356         my ( $self, $j ) = @_;
22357         if ( $j == 0 ) {
22358             return $self->get_starting_column($j);
22359         }
22360         else {
22361             return $self->get_starting_column($j) -
22362               $self->get_starting_column( $j - 1 );
22363         }
22364     }
22365
22366     sub increase_field_width {
22367
22368         my ( $self, $j, $pad ) = @_;
22369         my $jmax = $self->get_jmax();
22370         for my $k ( $j .. $jmax ) {
22371             $self->increment_column( $k, $pad );
22372         }
22373         return;
22374     }
22375
22376     sub get_available_space_on_right {
22377         my $self = shift;
22378         my $jmax = $self->get_jmax();
22379         return $self->{_maximum_line_length} - $self->get_column($jmax);
22380     }
22381
22382     sub set_jmax { my ( $self, $val ) = @_; $self->{_jmax} = $val; return }
22383
22384     sub set_jmax_original_line {
22385         my ( $self, $val ) = @_;
22386         $self->{_jmax_original_line} = $val;
22387         return;
22388     }
22389
22390     sub set_rtokens {
22391         my ( $self, $val ) = @_;
22392         $self->{_rtokens} = $val;
22393         return;
22394     }
22395
22396     sub set_rfields {
22397         my ( $self, $val ) = @_;
22398         $self->{_rfields} = $val;
22399         return;
22400     }
22401
22402     sub set_rpatterns {
22403         my ( $self, $val ) = @_;
22404         $self->{_rpatterns} = $val;
22405         return;
22406     }
22407
22408     sub set_indentation {
22409         my ( $self, $val ) = @_;
22410         $self->{_indentation} = $val;
22411         return;
22412     }
22413
22414     sub set_leading_space_count {
22415         my ( $self, $val ) = @_;
22416         $self->{_leading_space_count} = $val;
22417         return;
22418     }
22419
22420     sub set_outdent_long_lines {
22421         my ( $self, $val ) = @_;
22422         $self->{_outdent_long_lines} = $val;
22423         return;
22424     }
22425
22426     sub set_list_type {
22427         my ( $self, $val ) = @_;
22428         $self->{_list_type} = $val;
22429         return;
22430     }
22431
22432     sub set_is_hanging_side_comment {
22433         my ( $self, $val ) = @_;
22434         $self->{_is_hanging_side_comment} = $val;
22435         return;
22436     }
22437
22438     sub set_alignment {
22439         my ( $self, $j, $val ) = @_;
22440         $self->{_ralignments}->[$j] = $val;
22441         return;
22442     }
22443
22444 }
22445
22446 #####################################################################
22447 #
22448 # the Perl::Tidy::VerticalAligner::Alignment class holds information
22449 # on a single column being aligned
22450 #
22451 #####################################################################
22452 package Perl::Tidy::VerticalAligner::Alignment;
22453
22454 {
22455
22456     use strict;
22457
22458     #use Carp;
22459
22460     #    _column          # the current column number
22461     #    _starting_column # column number when created
22462     #    _matching_token  # what token we are matching
22463     #    _starting_line   # the line index of creation
22464     #    _ending_line
22465     # the most recent line to use it
22466     #    _saved_column
22467     #    _serial_number   # unique number for this alignment
22468
22469     my %default_data = (
22470         column          => undef,
22471         starting_column => undef,
22472         matching_token  => undef,
22473         starting_line   => undef,
22474         ending_line     => undef,
22475         saved_column    => undef,
22476         serial_number   => undef,
22477     );
22478
22479     # class population count
22480     {
22481         my $_count = 0;
22482         sub get_count        { return $_count }
22483         sub _increment_count { return ++$_count }
22484         sub _decrement_count { return --$_count }
22485     }
22486
22487     # constructor
22488     sub new {
22489         my ( $caller, %arg ) = @_;
22490         my $caller_is_obj = ref($caller);
22491         my $class = $caller_is_obj || $caller;
22492         ##no strict "refs";
22493         my $self = bless {}, $class;
22494
22495         foreach my $key ( keys %default_data ) {
22496             my $_key = '_' . $key;
22497             if    ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} }
22498             elsif ($caller_is_obj)      { $self->{$_key} = $caller->{$_key} }
22499             else { $self->{$_key} = $default_data{$_key} }
22500         }
22501         $self->_increment_count();
22502         return $self;
22503     }
22504
22505     sub DESTROY {
22506         my $self = shift;
22507         $self->_decrement_count();
22508         return;
22509     }
22510
22511     sub get_column { my $self = shift; return $self->{_column} }
22512
22513     sub get_starting_column {
22514         my $self = shift;
22515         return $self->{_starting_column};
22516     }
22517     sub get_matching_token { my $self = shift; return $self->{_matching_token} }
22518     sub get_starting_line  { my $self = shift; return $self->{_starting_line} }
22519     sub get_ending_line    { my $self = shift; return $self->{_ending_line} }
22520     sub get_serial_number  { my $self = shift; return $self->{_serial_number} }
22521
22522     sub set_column { my ( $self, $val ) = @_; $self->{_column} = $val; return }
22523
22524     sub set_starting_column {
22525         my ( $self, $val ) = @_;
22526         $self->{_starting_column} = $val;
22527         return;
22528     }
22529
22530     sub set_matching_token {
22531         my ( $self, $val ) = @_;
22532         $self->{_matching_token} = $val;
22533         return;
22534     }
22535
22536     sub set_starting_line {
22537         my ( $self, $val ) = @_;
22538         $self->{_starting_line} = $val;
22539         return;
22540     }
22541
22542     sub set_ending_line {
22543         my ( $self, $val ) = @_;
22544         $self->{_ending_line} = $val;
22545         return;
22546     }
22547
22548     sub increment_column {
22549         my ( $self, $val ) = @_;
22550         $self->{_column} += $val;
22551         return;
22552     }
22553
22554     sub save_column {
22555         my $self = shift;
22556         $self->{_saved_column} = $self->{_column};
22557         return;
22558     }
22559
22560     sub restore_column {
22561         my $self = shift;
22562         $self->{_column} = $self->{_saved_column};
22563         return;
22564     }
22565 }
22566
22567 package Perl::Tidy::VerticalAligner;
22568
22569 # The Perl::Tidy::VerticalAligner package collects output lines and
22570 # attempts to line up certain common tokens, such as => and #, which are
22571 # identified by the calling routine.
22572 #
22573 # There are two main routines: valign_input and flush.  Append acts as a
22574 # storage buffer, collecting lines into a group which can be vertically
22575 # aligned.  When alignment is no longer possible or desirable, it dumps
22576 # the group to flush.
22577 #
22578 #     valign_input -----> flush
22579 #
22580 #     collects          writes
22581 #     vertical          one
22582 #     groups            group
22583
22584 BEGIN {
22585
22586     # Caution: these debug flags produce a lot of output
22587     # They should all be 0 except when debugging small scripts
22588
22589     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
22590     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
22591     use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
22592     use constant VALIGN_DEBUG_FLAG_TABS    => 0;
22593
22594     my $debug_warning = sub {
22595         print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
22596         return;
22597     };
22598
22599     VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
22600     VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
22601     VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
22602     VALIGN_DEBUG_FLAG_TABS    && $debug_warning->('TABS');
22603
22604 }
22605
22606 use vars qw(
22607   $vertical_aligner_self
22608   $current_line
22609   $maximum_alignment_index
22610   $ralignment_list
22611   $maximum_jmax_seen
22612   $minimum_jmax_seen
22613   $previous_minimum_jmax_seen
22614   $previous_maximum_jmax_seen
22615   $maximum_line_index
22616   $group_level
22617   $group_type
22618   $group_maximum_gap
22619   $marginal_match
22620   $last_level_written
22621   $last_leading_space_count
22622   $extra_indent_ok
22623   $zero_count
22624   @group_lines
22625   $last_comment_column
22626   $last_side_comment_line_number
22627   $last_side_comment_length
22628   $last_side_comment_level
22629   $outdented_line_count
22630   $first_outdented_line_at
22631   $last_outdented_line_at
22632   $diagnostics_object
22633   $logger_object
22634   $file_writer_object
22635   @side_comment_history
22636   $comment_leading_space_count
22637   $is_matching_terminal_line
22638   $consecutive_block_comments
22639
22640   $cached_line_text
22641   $cached_line_type
22642   $cached_line_flag
22643   $cached_seqno
22644   $cached_line_valid
22645   $cached_line_leading_space_count
22646   $cached_seqno_string
22647
22648   $valign_buffer_filling
22649   @valign_buffer
22650
22651   $seqno_string
22652   $last_nonblank_seqno_string
22653
22654   $rOpts
22655
22656   $rOpts_maximum_line_length
22657   $rOpts_variable_maximum_line_length
22658   $rOpts_continuation_indentation
22659   $rOpts_indent_columns
22660   $rOpts_tabs
22661   $rOpts_entab_leading_whitespace
22662   $rOpts_valign
22663
22664   $rOpts_fixed_position_side_comment
22665   $rOpts_minimum_space_to_comment
22666
22667 );
22668
22669 sub initialize {
22670
22671     (
22672         my $class, $rOpts, $file_writer_object, $logger_object,
22673         $diagnostics_object
22674     ) = @_;
22675
22676     # variables describing the entire space group:
22677     $ralignment_list            = [];
22678     $group_level                = 0;
22679     $last_level_written         = -1;
22680     $extra_indent_ok            = 0;    # can we move all lines to the right?
22681     $last_side_comment_length   = 0;
22682     $maximum_jmax_seen          = 0;
22683     $minimum_jmax_seen          = 0;
22684     $previous_minimum_jmax_seen = 0;
22685     $previous_maximum_jmax_seen = 0;
22686
22687     # variables describing each line of the group
22688     @group_lines = ();                  # list of all lines in group
22689
22690     $outdented_line_count          = 0;
22691     $first_outdented_line_at       = 0;
22692     $last_outdented_line_at        = 0;
22693     $last_side_comment_line_number = 0;
22694     $last_side_comment_level       = -1;
22695     $is_matching_terminal_line     = 0;
22696
22697     # most recent 3 side comments; [ line number, column ]
22698     $side_comment_history[0] = [ -300, 0 ];
22699     $side_comment_history[1] = [ -200, 0 ];
22700     $side_comment_history[2] = [ -100, 0 ];
22701
22702     # valign_output_step_B cache:
22703     $cached_line_text                = "";
22704     $cached_line_type                = 0;
22705     $cached_line_flag                = 0;
22706     $cached_seqno                    = 0;
22707     $cached_line_valid               = 0;
22708     $cached_line_leading_space_count = 0;
22709     $cached_seqno_string             = "";
22710
22711     # string of sequence numbers joined together
22712     $seqno_string               = "";
22713     $last_nonblank_seqno_string = "";
22714
22715     # frequently used parameters
22716     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
22717     $rOpts_tabs                     = $rOpts->{'tabs'};
22718     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
22719     $rOpts_fixed_position_side_comment =
22720       $rOpts->{'fixed-position-side-comment'};
22721     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
22722     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
22723     $rOpts_variable_maximum_line_length =
22724       $rOpts->{'variable-maximum-line-length'};
22725     $rOpts_valign = $rOpts->{'valign'};
22726
22727     $consecutive_block_comments = 0;
22728     forget_side_comment();
22729
22730     initialize_for_new_group();
22731
22732     $vertical_aligner_self = {};
22733     bless $vertical_aligner_self, $class;
22734     return $vertical_aligner_self;
22735 }
22736
22737 sub initialize_for_new_group {
22738     $maximum_line_index      = -1;      # lines in the current group
22739     $maximum_alignment_index = -1;      # alignments in current group
22740     $zero_count              = 0;       # count consecutive lines without tokens
22741     $current_line            = undef;   # line being matched for alignment
22742     $group_maximum_gap       = 0;       # largest gap introduced
22743     $group_type              = "";
22744     $marginal_match          = 0;
22745     $comment_leading_space_count = 0;
22746     $last_leading_space_count    = 0;
22747     return;
22748 }
22749
22750 # interface to Perl::Tidy::Diagnostics routines
22751 sub write_diagnostics {
22752     my $msg = shift;
22753     if ($diagnostics_object) {
22754         $diagnostics_object->write_diagnostics($msg);
22755     }
22756     return;
22757 }
22758
22759 # interface to Perl::Tidy::Logger routines
22760 sub warning {
22761     my ($msg) = @_;
22762     if ($logger_object) {
22763         $logger_object->warning($msg);
22764     }
22765     return;
22766 }
22767
22768 sub write_logfile_entry {
22769     my ($msg) = @_;
22770     if ($logger_object) {
22771         $logger_object->write_logfile_entry($msg);
22772     }
22773     return;
22774 }
22775
22776 sub report_definite_bug {
22777     if ($logger_object) {
22778         $logger_object->report_definite_bug();
22779     }
22780     return;
22781 }
22782
22783 sub get_spaces {
22784
22785     # return the number of leading spaces associated with an indentation
22786     # variable $indentation is either a constant number of spaces or an
22787     # object with a get_spaces method.
22788     my $indentation = shift;
22789     return ref($indentation) ? $indentation->get_spaces() : $indentation;
22790 }
22791
22792 sub get_recoverable_spaces {
22793
22794     # return the number of spaces (+ means shift right, - means shift left)
22795     # that we would like to shift a group of lines with the same indentation
22796     # to get them to line up with their opening parens
22797     my $indentation = shift;
22798     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
22799 }
22800
22801 sub get_stack_depth {
22802
22803     my $indentation = shift;
22804     return ref($indentation) ? $indentation->get_stack_depth() : 0;
22805 }
22806
22807 sub make_alignment {
22808     my ( $col, $token ) = @_;
22809
22810     # make one new alignment at column $col which aligns token $token
22811     ++$maximum_alignment_index;
22812     my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
22813         column          => $col,
22814         starting_column => $col,
22815         matching_token  => $token,
22816         starting_line   => $maximum_line_index,
22817         ending_line     => $maximum_line_index,
22818         serial_number   => $maximum_alignment_index,
22819     );
22820     $ralignment_list->[$maximum_alignment_index] = $alignment;
22821     return $alignment;
22822 }
22823
22824 sub dump_alignments {
22825     print STDOUT
22826 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
22827     for my $i ( 0 .. $maximum_alignment_index ) {
22828         my $column          = $ralignment_list->[$i]->get_column();
22829         my $starting_column = $ralignment_list->[$i]->get_starting_column();
22830         my $matching_token  = $ralignment_list->[$i]->get_matching_token();
22831         my $starting_line   = $ralignment_list->[$i]->get_starting_line();
22832         my $ending_line     = $ralignment_list->[$i]->get_ending_line();
22833         print STDOUT
22834 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
22835     }
22836     return;
22837 }
22838
22839 sub save_alignment_columns {
22840     for my $i ( 0 .. $maximum_alignment_index ) {
22841         $ralignment_list->[$i]->save_column();
22842     }
22843     return;
22844 }
22845
22846 sub restore_alignment_columns {
22847     for my $i ( 0 .. $maximum_alignment_index ) {
22848         $ralignment_list->[$i]->restore_column();
22849     }
22850     return;
22851 }
22852
22853 sub forget_side_comment {
22854     $last_comment_column = 0;
22855     return;
22856 }
22857
22858 sub maximum_line_length_for_level {
22859
22860     # return maximum line length for line starting with a given level
22861     my $maximum_line_length = $rOpts_maximum_line_length;
22862     if ($rOpts_variable_maximum_line_length) {
22863         my $level = shift;
22864         if ( $level < 0 ) { $level = 0 }
22865         $maximum_line_length += $level * $rOpts_indent_columns;
22866     }
22867     return $maximum_line_length;
22868 }
22869
22870 sub valign_input {
22871
22872     # Place one line in the current vertical group.
22873     #
22874     # The input parameters are:
22875     #     $level = indentation level of this line
22876     #     $rfields = reference to array of fields
22877     #     $rpatterns = reference to array of patterns, one per field
22878     #     $rtokens   = reference to array of tokens starting fields 1,2,..
22879     #
22880     # Here is an example of what this package does.  In this example,
22881     # we are trying to line up both the '=>' and the '#'.
22882     #
22883     #         '18' => 'grave',    #   \`
22884     #         '19' => 'acute',    #   `'
22885     #         '20' => 'caron',    #   \v
22886     # <-tabs-><f1-><--field 2 ---><-f3->
22887     # |            |              |    |
22888     # |            |              |    |
22889     # col1        col2         col3 col4
22890     #
22891     # The calling routine has already broken the entire line into 3 fields as
22892     # indicated.  (So the work of identifying promising common tokens has
22893     # already been done).
22894     #
22895     # In this example, there will be 2 tokens being matched: '=>' and '#'.
22896     # They are the leading parts of fields 2 and 3, but we do need to know
22897     # what they are so that we can dump a group of lines when these tokens
22898     # change.
22899     #
22900     # The fields contain the actual characters of each field.  The patterns
22901     # are like the fields, but they contain mainly token types instead
22902     # of tokens, so they have fewer characters.  They are used to be
22903     # sure we are matching fields of similar type.
22904     #
22905     # In this example, there will be 4 column indexes being adjusted.  The
22906     # first one is always at zero.  The interior columns are at the start of
22907     # the matching tokens, and the last one tracks the maximum line length.
22908     #
22909     # Each time a new line comes in, it joins the current vertical
22910     # group if possible.  Otherwise it causes the current group to be dumped
22911     # and a new group is started.
22912     #
22913     # For each new group member, the column locations are increased, as
22914     # necessary, to make room for the new fields.  When the group is finally
22915     # output, these column numbers are used to compute the amount of spaces of
22916     # padding needed for each field.
22917     #
22918     # Programming note: the fields are assumed not to have any tab characters.
22919     # Tabs have been previously removed except for tabs in quoted strings and
22920     # side comments.  Tabs in these fields can mess up the column counting.
22921     # The log file warns the user if there are any such tabs.
22922
22923     my (
22924         $level,               $level_end,
22925         $indentation,         $rfields,
22926         $rtokens,             $rpatterns,
22927         $is_forced_break,     $outdent_long_lines,
22928         $is_terminal_ternary, $is_terminal_statement,
22929         $do_not_pad,          $rvertical_tightness_flags,
22930         $level_jump,
22931     ) = @_;
22932
22933     # number of fields is $jmax
22934     # number of tokens between fields is $jmax-1
22935     my $jmax = $#{$rfields};
22936
22937     my $leading_space_count = get_spaces($indentation);
22938
22939     # set outdented flag to be sure we either align within statements or
22940     # across statement boundaries, but not both.
22941     my $is_outdented = $last_leading_space_count > $leading_space_count;
22942     $last_leading_space_count = $leading_space_count;
22943
22944     # Patch: undo for hanging side comment
22945     my $is_hanging_side_comment =
22946       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
22947     $is_outdented = 0 if $is_hanging_side_comment;
22948
22949     # Forget side comment alignment after seeing 2 or more block comments
22950     my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
22951     if ($is_block_comment) {
22952         $consecutive_block_comments++;
22953     }
22954     else {
22955         if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
22956         $consecutive_block_comments = 0;
22957     }
22958
22959     VALIGN_DEBUG_FLAG_APPEND0 && do {
22960         print STDOUT
22961 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
22962     };
22963
22964     # Validate cached line if necessary: If we can produce a container
22965     # with just 2 lines total by combining an existing cached opening
22966     # token with the closing token to follow, then we will mark both
22967     # cached flags as valid.
22968     if ($rvertical_tightness_flags) {
22969         if (   $maximum_line_index <= 0
22970             && $cached_line_type
22971             && $cached_seqno
22972             && $rvertical_tightness_flags->[2]
22973             && $rvertical_tightness_flags->[2] == $cached_seqno )
22974         {
22975             $rvertical_tightness_flags->[3] ||= 1;
22976             $cached_line_valid ||= 1;
22977         }
22978     }
22979
22980     # do not join an opening block brace with an unbalanced line
22981     # unless requested with a flag value of 2
22982     if (   $cached_line_type == 3
22983         && $maximum_line_index < 0
22984         && $cached_line_flag < 2
22985         && $level_jump != 0 )
22986     {
22987         $cached_line_valid = 0;
22988     }
22989
22990     # patch until new aligner is finished
22991     if ($do_not_pad) { my_flush() }
22992
22993     # shouldn't happen:
22994     if ( $level < 0 ) { $level = 0 }
22995
22996     # do not align code across indentation level changes
22997     # or if vertical alignment is turned off for debugging
22998     if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
22999
23000         # we are allowed to shift a group of lines to the right if its
23001         # level is greater than the previous and next group
23002         $extra_indent_ok =
23003           ( $level < $group_level && $last_level_written < $group_level );
23004
23005         my_flush();
23006
23007         # If we know that this line will get flushed out by itself because
23008         # of level changes, we can leave the extra_indent_ok flag set.
23009         # That way, if we get an external flush call, we will still be
23010         # able to do some -lp alignment if necessary.
23011         $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
23012
23013         $group_level = $level;
23014
23015         # wait until after the above flush to get the leading space
23016         # count because it may have been changed if the -icp flag is in
23017         # effect
23018         $leading_space_count = get_spaces($indentation);
23019
23020     }
23021
23022     # --------------------------------------------------------------------
23023     # Patch to collect outdentable block COMMENTS
23024     # --------------------------------------------------------------------
23025     my $is_blank_line = "";
23026     if ( $group_type eq 'COMMENT' ) {
23027         if (
23028             (
23029                    $is_block_comment
23030                 && $outdent_long_lines
23031                 && $leading_space_count == $comment_leading_space_count
23032             )
23033             || $is_blank_line
23034           )
23035         {
23036             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
23037             return;
23038         }
23039         else {
23040             my_flush();
23041         }
23042     }
23043
23044     # --------------------------------------------------------------------
23045     # add dummy fields for terminal ternary
23046     # --------------------------------------------------------------------
23047     my $j_terminal_match;
23048     if ( $is_terminal_ternary && $current_line ) {
23049         $j_terminal_match =
23050           fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
23051         $jmax = @{$rfields} - 1;
23052     }
23053
23054     # --------------------------------------------------------------------
23055     # add dummy fields for else statement
23056     # --------------------------------------------------------------------
23057     if (   $rfields->[0] =~ /^else\s*$/
23058         && $current_line
23059         && $level_jump == 0 )
23060     {
23061         $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
23062         $jmax = @{$rfields} - 1;
23063     }
23064
23065     # --------------------------------------------------------------------
23066     # Step 1. Handle simple line of code with no fields to match.
23067     # --------------------------------------------------------------------
23068     if ( $jmax <= 0 ) {
23069         $zero_count++;
23070
23071         if ( $maximum_line_index >= 0
23072             && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) )
23073         {
23074
23075             # flush the current group if it has some aligned columns..
23076             if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
23077
23078             # flush current group if we are just collecting side comments..
23079             elsif (
23080
23081                 # ...and we haven't seen a comment lately
23082                 ( $zero_count > 3 )
23083
23084                 # ..or if this new line doesn't fit to the left of the comments
23085                 || ( ( $leading_space_count + length( $rfields->[0] ) ) >
23086                     $group_lines[0]->get_column(0) )
23087               )
23088             {
23089                 my_flush();
23090             }
23091         }
23092
23093         # patch to start new COMMENT group if this comment may be outdented
23094         if (   $is_block_comment
23095             && $outdent_long_lines
23096             && $maximum_line_index < 0 )
23097         {
23098             $group_type                           = 'COMMENT';
23099             $comment_leading_space_count          = $leading_space_count;
23100             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
23101             return;
23102         }
23103
23104         # just write this line directly if no current group, no side comment,
23105         # and no space recovery is needed.
23106         if ( $maximum_line_index < 0 && !get_recoverable_spaces($indentation) )
23107         {
23108             valign_output_step_B( $leading_space_count, $rfields->[0], 0,
23109                 $outdent_long_lines, $rvertical_tightness_flags, $level );
23110             return;
23111         }
23112     }
23113     else {
23114         $zero_count = 0;
23115     }
23116
23117     # programming check: (shouldn't happen)
23118     # an error here implies an incorrect call was made
23119     if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
23120         warning(
23121 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
23122         );
23123         report_definite_bug();
23124     }
23125
23126     # --------------------------------------------------------------------
23127     # create an object to hold this line
23128     # --------------------------------------------------------------------
23129     my $new_line = new Perl::Tidy::VerticalAligner::Line(
23130         jmax                      => $jmax,
23131         jmax_original_line        => $jmax,
23132         rtokens                   => $rtokens,
23133         rfields                   => $rfields,
23134         rpatterns                 => $rpatterns,
23135         indentation               => $indentation,
23136         leading_space_count       => $leading_space_count,
23137         outdent_long_lines        => $outdent_long_lines,
23138         list_type                 => "",
23139         is_hanging_side_comment   => $is_hanging_side_comment,
23140         maximum_line_length       => maximum_line_length_for_level($level),
23141         rvertical_tightness_flags => $rvertical_tightness_flags,
23142     );
23143
23144     # Initialize a global flag saying if the last line of the group should
23145     # match end of group and also terminate the group.  There should be no
23146     # returns between here and where the flag is handled at the bottom.
23147     my $col_matching_terminal = 0;
23148     if ( defined($j_terminal_match) ) {
23149
23150         # remember the column of the terminal ? or { to match with
23151         $col_matching_terminal = $current_line->get_column($j_terminal_match);
23152
23153         # set global flag for sub decide_if_aligned
23154         $is_matching_terminal_line = 1;
23155     }
23156
23157     # --------------------------------------------------------------------
23158     # It simplifies things to create a zero length side comment
23159     # if none exists.
23160     # --------------------------------------------------------------------
23161     make_side_comment( $new_line, $level_end );
23162
23163     # --------------------------------------------------------------------
23164     # Decide if this is a simple list of items.
23165     # There are 3 list types: none, comma, comma-arrow.
23166     # We use this below to be less restrictive in deciding what to align.
23167     # --------------------------------------------------------------------
23168     if ($is_forced_break) {
23169         decide_if_list($new_line);
23170     }
23171
23172     if ($current_line) {
23173
23174         # --------------------------------------------------------------------
23175         # Allow hanging side comment to join current group, if any
23176         # This will help keep side comments aligned, because otherwise we
23177         # will have to start a new group, making alignment less likely.
23178         # --------------------------------------------------------------------
23179         join_hanging_comment( $new_line, $current_line )
23180           if $is_hanging_side_comment;
23181
23182         # --------------------------------------------------------------------
23183         # If there is just one previous line, and it has more fields
23184         # than the new line, try to join fields together to get a match with
23185         # the new line.  At the present time, only a single leading '=' is
23186         # allowed to be compressed out.  This is useful in rare cases where
23187         # a table is forced to use old breakpoints because of side comments,
23188         # and the table starts out something like this:
23189         #   my %MonthChars = ('0', 'Jan',   # side comment
23190         #                     '1', 'Feb',
23191         #                     '2', 'Mar',
23192         # Eliminating the '=' field will allow the remaining fields to line up.
23193         # This situation does not occur if there are no side comments
23194         # because scan_list would put a break after the opening '('.
23195         # --------------------------------------------------------------------
23196         eliminate_old_fields( $new_line, $current_line );
23197
23198         # --------------------------------------------------------------------
23199         # If the new line has more fields than the current group,
23200         # see if we can match the first fields and combine the remaining
23201         # fields of the new line.
23202         # --------------------------------------------------------------------
23203         eliminate_new_fields( $new_line, $current_line );
23204
23205         # --------------------------------------------------------------------
23206         # Flush previous group unless all common tokens and patterns match..
23207         # --------------------------------------------------------------------
23208         check_match( $new_line, $current_line );
23209
23210         # --------------------------------------------------------------------
23211         # See if there is space for this line in the current group (if any)
23212         # --------------------------------------------------------------------
23213         if ($current_line) {
23214             check_fit( $new_line, $current_line );
23215         }
23216     }
23217
23218     # --------------------------------------------------------------------
23219     # Append this line to the current group (or start new group)
23220     # --------------------------------------------------------------------
23221     add_to_group($new_line);
23222
23223     # Future update to allow this to vary:
23224     $current_line = $new_line if ( $maximum_line_index == 0 );
23225
23226     # output this group if it ends in a terminal else or ternary line
23227     if ( defined($j_terminal_match) ) {
23228
23229         # if there is only one line in the group (maybe due to failure to match
23230         # perfectly with previous lines), then align the ? or { of this
23231         # terminal line with the previous one unless that would make the line
23232         # too long
23233         if ( $maximum_line_index == 0 ) {
23234             my $col_now = $current_line->get_column($j_terminal_match);
23235             my $pad     = $col_matching_terminal - $col_now;
23236             my $padding_available =
23237               $current_line->get_available_space_on_right();
23238             if ( $pad > 0 && $pad <= $padding_available ) {
23239                 $current_line->increase_field_width( $j_terminal_match, $pad );
23240             }
23241         }
23242         my_flush();
23243         $is_matching_terminal_line = 0;
23244     }
23245
23246     # --------------------------------------------------------------------
23247     # Step 8. Some old debugging stuff
23248     # --------------------------------------------------------------------
23249     VALIGN_DEBUG_FLAG_APPEND && do {
23250         print STDOUT "APPEND fields:";
23251         dump_array( @{$rfields} );
23252         print STDOUT "APPEND tokens:";
23253         dump_array( @{$rtokens} );
23254         print STDOUT "APPEND patterns:";
23255         dump_array( @{$rpatterns} );
23256         dump_alignments();
23257     };
23258
23259     return;
23260 }
23261
23262 sub join_hanging_comment {
23263
23264     my $line = shift;
23265     my $jmax = $line->get_jmax();
23266     return 0 unless $jmax == 1;    # must be 2 fields
23267     my $rtokens = $line->get_rtokens();
23268     return 0 unless $rtokens->[0] eq '#';    # the second field is a comment..
23269     my $rfields = $line->get_rfields();
23270     return 0 unless $rfields->[0] =~ /^\s*$/;    # the first field is empty...
23271     my $old_line            = shift;
23272     my $maximum_field_index = $old_line->get_jmax();
23273     return 0
23274       unless $maximum_field_index > $jmax;    # the current line has more fields
23275     my $rpatterns = $line->get_rpatterns();
23276
23277     $line->set_is_hanging_side_comment(1);
23278     $jmax = $maximum_field_index;
23279     $line->set_jmax($jmax);
23280     $rfields->[$jmax]         = $rfields->[1];
23281     $rtokens->[ $jmax - 1 ]   = $rtokens->[0];
23282     $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
23283     foreach my $j ( 1 .. $jmax - 1 ) {
23284         $rfields->[$j]         = " "; # NOTE: caused glitch unless 1 blank, why?
23285         $rtokens->[ $j - 1 ]   = "";
23286         $rpatterns->[ $j - 1 ] = "";
23287     }
23288     return 1;
23289 }
23290
23291 sub eliminate_old_fields {
23292
23293     my $new_line = shift;
23294     my $jmax     = $new_line->get_jmax();
23295     if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
23296     if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
23297
23298     # there must be one previous line
23299     return unless ( $maximum_line_index == 0 );
23300
23301     my $old_line            = shift;
23302     my $maximum_field_index = $old_line->get_jmax();
23303
23304     ###############################################
23305     # Moved below to allow new coding for => matches
23306     # return unless $maximum_field_index > $jmax;
23307     ###############################################
23308
23309     # Identify specific cases where field elimination is allowed:
23310     # case=1: both lines have comma-separated lists, and the first
23311     #         line has an equals
23312     # case=2: both lines have leading equals
23313
23314     # case 1 is the default
23315     my $case = 1;
23316
23317     # See if case 2: both lines have leading '='
23318     # We'll require similar leading patterns in this case
23319     my $old_rtokens   = $old_line->get_rtokens();
23320     my $rtokens       = $new_line->get_rtokens();
23321     my $rpatterns     = $new_line->get_rpatterns();
23322     my $old_rpatterns = $old_line->get_rpatterns();
23323     if (   $rtokens->[0] =~ /^=>?\d*$/
23324         && $old_rtokens->[0] eq $rtokens->[0]
23325         && $old_rpatterns->[0] eq $rpatterns->[0] )
23326     {
23327         $case = 2;
23328     }
23329
23330     # not too many fewer fields in new line for case 1
23331     return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
23332
23333     # case 1 must have side comment
23334     my $old_rfields = $old_line->get_rfields();
23335     return
23336       if ( $case == 1
23337         && length( $old_rfields->[$maximum_field_index] ) == 0 );
23338
23339     my $rfields = $new_line->get_rfields();
23340
23341     my $hid_equals = 0;
23342
23343     my @new_alignments        = ();
23344     my @new_fields            = ();
23345     my @new_matching_patterns = ();
23346     my @new_matching_tokens   = ();
23347
23348     my $j               = 0;
23349     my $current_field   = '';
23350     my $current_pattern = '';
23351
23352     # loop over all old tokens
23353     my $in_match = 0;
23354     foreach my $k ( 0 .. $maximum_field_index - 1 ) {
23355         $current_field   .= $old_rfields->[$k];
23356         $current_pattern .= $old_rpatterns->[$k];
23357         last if ( $j > $jmax - 1 );
23358
23359         if ( $old_rtokens->[$k] eq $rtokens->[$j] ) {
23360             $in_match                  = 1;
23361             $new_fields[$j]            = $current_field;
23362             $new_matching_patterns[$j] = $current_pattern;
23363             $current_field             = '';
23364             $current_pattern           = '';
23365             $new_matching_tokens[$j]   = $old_rtokens->[$k];
23366             $new_alignments[$j]        = $old_line->get_alignment($k);
23367             $j++;
23368         }
23369         else {
23370
23371             if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) {
23372                 last if ( $case == 2 );    # avoid problems with stuff
23373                                            # like:   $a=$b=$c=$d;
23374                 $hid_equals = 1;
23375             }
23376             last
23377               if ( $in_match && $case == 1 )
23378               ;    # disallow gaps in matching field types in case 1
23379         }
23380     }
23381
23382     # Modify the current state if we are successful.
23383     # We must exactly reach the ends of the new list for success, and the old
23384     # pattern must have more fields. Here is an example where the first and
23385     # second lines have the same number, and we should not align:
23386     #  my @a = map chr, 0 .. 255;
23387     #  my @b = grep /\W/,    @a;
23388     #  my @c = grep /[^\w]/, @a;
23389
23390     # Otherwise, we would get all of the commas aligned, which doesn't work as
23391     # well:
23392     #  my @a = map chr,      0 .. 255;
23393     #  my @b = grep /\W/,    @a;
23394     #  my @c = grep /[^\w]/, @a;
23395
23396     if (   ( $j == $jmax )
23397         && ( $current_field eq '' )
23398         && ( $case != 1 || $hid_equals )
23399         && ( $maximum_field_index > $jmax ) )
23400     {
23401         my $k = $maximum_field_index;
23402         $current_field   .= $old_rfields->[$k];
23403         $current_pattern .= $old_rpatterns->[$k];
23404         $new_fields[$j]            = $current_field;
23405         $new_matching_patterns[$j] = $current_pattern;
23406
23407         $new_alignments[$j] = $old_line->get_alignment($k);
23408         $maximum_field_index = $j;
23409
23410         $old_line->set_alignments(@new_alignments);
23411         $old_line->set_jmax($jmax);
23412         $old_line->set_rtokens( \@new_matching_tokens );
23413         $old_line->set_rfields( \@new_fields );
23414         $old_line->set_rpatterns( \@{$rpatterns} );
23415     }
23416
23417     # Dumb Down starting match if necessary:
23418     #
23419     # Consider the following two lines:
23420     #
23421     #  {
23422     #   $a => 20 > 3 ? 1 : 0,
23423     #   $xyz => 5,
23424     #  }
23425
23426 # We would like to get alignment regardless of the order of the two lines.
23427 # If the lines come in in this order, then we will simplify the patterns of the first line
23428 # in sub eliminate_new_fields.
23429 # If the lines come in reverse order, then we achieve this with eliminate_new_fields.
23430
23431     # This update is currently restricted to leading '=>' matches. Although we
23432     # could do this for both '=' and '=>', overall the results for '=' come out
23433     # better without this step because this step can eliminate some other good
23434     # matches.  For example, with the '=' we get:
23435
23436 #  my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
23437 #  my @dsf     = map "$_\x{FFFE}Fred", @disilva;
23438 #  my @dsj     = map "$_\x{FFFE}John", @disilva;
23439 #  my @dsJ     = map "$_ John", @disilva;
23440
23441     # without including '=' we get:
23442
23443 #  my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
23444 #  my @dsf = map "$_\x{FFFE}Fred", @disilva;
23445 #  my @dsj = map "$_\x{FFFE}John", @disilva;
23446 #  my @dsJ = map "$_ John",        @disilva;
23447     elsif (
23448         $case == 2
23449
23450         && @new_matching_tokens == 1
23451         ##&& $new_matching_tokens[0] =~ /^=/   # see note above
23452         && $new_matching_tokens[0] =~ /^=>/
23453         && $maximum_field_index > 2
23454       )
23455     {
23456         my $jmaxm             = $jmax - 1;
23457         my $kmaxm             = $maximum_field_index - 1;
23458         my $have_side_comment = $old_rtokens->[$kmaxm] eq '#';
23459
23460         # We need to reduce the group pattern to be just two tokens,
23461         # the leading equality or => and the final side comment
23462
23463         my $mid_field = join "",
23464           @{$old_rfields}[ 1 .. $maximum_field_index - 1 ];
23465         my $mid_patterns = join "",
23466           @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ];
23467         my @new_alignments = (
23468             $old_line->get_alignment(0),
23469             $old_line->get_alignment( $maximum_field_index - 1 )
23470         );
23471         my @new_tokens =
23472           ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] );
23473         my @new_fields = (
23474             $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index]
23475         );
23476         my @new_patterns = (
23477             $old_rpatterns->[0], $mid_patterns,
23478             $old_rpatterns->[$maximum_field_index]
23479         );
23480
23481         $maximum_field_index = 2;
23482         $old_line->set_jmax($maximum_field_index);
23483         $old_line->set_rtokens( \@new_tokens );
23484         $old_line->set_rfields( \@new_fields );
23485         $old_line->set_rpatterns( \@new_patterns );
23486
23487         initialize_for_new_group();
23488         add_to_group($old_line);
23489         $current_line = $old_line;
23490     }
23491     return;
23492 }
23493
23494 # create an empty side comment if none exists
23495 sub make_side_comment {
23496     my ( $new_line, $level_end ) = @_;
23497     my $jmax    = $new_line->get_jmax();
23498     my $rtokens = $new_line->get_rtokens();
23499
23500     # if line does not have a side comment...
23501     if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
23502         my $rfields   = $new_line->get_rfields();
23503         my $rpatterns = $new_line->get_rpatterns();
23504         $rtokens->[$jmax]     = '#';
23505         $rfields->[ ++$jmax ] = '';
23506         $rpatterns->[$jmax]   = '#';
23507         $new_line->set_jmax($jmax);
23508         $new_line->set_jmax_original_line($jmax);
23509     }
23510
23511     # line has a side comment..
23512     else {
23513
23514         # don't remember old side comment location for very long
23515         my $line_number = $vertical_aligner_self->get_output_line_number();
23516         my $rfields     = $new_line->get_rfields();
23517         if (
23518             $line_number - $last_side_comment_line_number > 12
23519
23520             # and don't remember comment location across block level changes
23521             || (   $level_end < $last_side_comment_level
23522                 && $rfields->[0] =~ /^}/ )
23523           )
23524         {
23525             forget_side_comment();
23526         }
23527         $last_side_comment_line_number = $line_number;
23528         $last_side_comment_level       = $level_end;
23529     }
23530     return;
23531 }
23532
23533 sub decide_if_list {
23534
23535     my $line = shift;
23536
23537     # A list will be taken to be a line with a forced break in which all
23538     # of the field separators are commas or comma-arrows (except for the
23539     # trailing #)
23540
23541     # List separator tokens are things like ',3'   or '=>2',
23542     # where the trailing digit is the nesting depth.  Allow braces
23543     # to allow nested list items.
23544     my $rtokens    = $line->get_rtokens();
23545     my $test_token = $rtokens->[0];
23546     if ( $test_token =~ /^(\,|=>)/ ) {
23547         my $list_type = $test_token;
23548         my $jmax      = $line->get_jmax();
23549
23550         foreach ( 1 .. $jmax - 2 ) {
23551             if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) {
23552                 $list_type = "";
23553                 last;
23554             }
23555         }
23556         $line->set_list_type($list_type);
23557     }
23558     return;
23559 }
23560
23561 sub eliminate_new_fields {
23562
23563     my ( $new_line, $old_line ) = @_;
23564     return unless ( $maximum_line_index >= 0 );
23565     my $jmax = $new_line->get_jmax();
23566
23567     my $old_rtokens = $old_line->get_rtokens();
23568     my $rtokens     = $new_line->get_rtokens();
23569     my $is_assignment =
23570       ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
23571
23572     # must be monotonic variation
23573     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
23574
23575     # must be more fields in the new line
23576     my $maximum_field_index = $old_line->get_jmax();
23577     return unless ( $maximum_field_index < $jmax );
23578
23579     unless ($is_assignment) {
23580         return
23581           unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
23582           ;    # only if monotonic
23583
23584         # never combine fields of a comma list
23585         return
23586           unless ( $maximum_field_index > 1 )
23587           && ( $new_line->get_list_type() !~ /^,/ );
23588     }
23589
23590     my $rfields       = $new_line->get_rfields();
23591     my $rpatterns     = $new_line->get_rpatterns();
23592     my $old_rpatterns = $old_line->get_rpatterns();
23593
23594     # loop over all OLD tokens except comment and check match
23595     my $match = 1;
23596     foreach my $k ( 0 .. $maximum_field_index - 2 ) {
23597         if (   ( $old_rtokens->[$k] ne $rtokens->[$k] )
23598             || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) )
23599         {
23600             $match = 0;
23601             last;
23602         }
23603     }
23604
23605     # first tokens agree, so combine extra new tokens
23606     if ($match) {
23607         ##for my $k ( $maximum_field_index .. $jmax - 1 ) {
23608         foreach my $k ( $maximum_field_index .. $jmax - 1 ) {
23609
23610             $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k];
23611             $rfields->[$k] = "";
23612             $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k];
23613             $rpatterns->[$k] = "";
23614         }
23615
23616         $rtokens->[ $maximum_field_index - 1 ] = '#';
23617         $rfields->[$maximum_field_index]       = $rfields->[$jmax];
23618         $rpatterns->[$maximum_field_index]     = $rpatterns->[$jmax];
23619         $jmax                                  = $maximum_field_index;
23620     }
23621     $new_line->set_jmax($jmax);
23622     return;
23623 }
23624
23625 sub fix_terminal_ternary {
23626
23627     # Add empty fields as necessary to align a ternary term
23628     # like this:
23629     #
23630     #  my $leapyear =
23631     #      $year % 4   ? 0
23632     #    : $year % 100 ? 1
23633     #    : $year % 400 ? 0
23634     #    :               1;
23635     #
23636     # returns 1 if the terminal item should be indented
23637
23638     my ( $rfields, $rtokens, $rpatterns ) = @_;
23639
23640     my $jmax        = @{$rfields} - 1;
23641     my $old_line    = $group_lines[$maximum_line_index];
23642     my $rfields_old = $old_line->get_rfields();
23643
23644     my $rpatterns_old       = $old_line->get_rpatterns();
23645     my $rtokens_old         = $old_line->get_rtokens();
23646     my $maximum_field_index = $old_line->get_jmax();
23647
23648     # look for the question mark after the :
23649     my ($jquestion);
23650     my $depth_question;
23651     my $pad = "";
23652     foreach my $j ( 0 .. $maximum_field_index - 1 ) {
23653         my $tok = $rtokens_old->[$j];
23654         if ( $tok =~ /^\?(\d+)$/ ) {
23655             $depth_question = $1;
23656
23657             # depth must be correct
23658             next unless ( $depth_question eq $group_level );
23659
23660             $jquestion = $j;
23661             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
23662                 $pad = " " x length($1);
23663             }
23664             else {
23665                 return;    # shouldn't happen
23666             }
23667             last;
23668         }
23669     }
23670     return unless ( defined($jquestion) );    # shouldn't happen
23671
23672     # Now splice the tokens and patterns of the previous line
23673     # into the else line to insure a match.  Add empty fields
23674     # as necessary.
23675     my $jadd = $jquestion;
23676
23677     # Work on copies of the actual arrays in case we have
23678     # to return due to an error
23679     my @fields   = @{$rfields};
23680     my @patterns = @{$rpatterns};
23681     my @tokens   = @{$rtokens};
23682
23683     VALIGN_DEBUG_FLAG_TERNARY && do {
23684         local $" = '><';
23685         print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
23686         print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
23687         print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
23688         print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
23689         print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
23690         print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
23691     };
23692
23693     # handle cases of leading colon on this line
23694     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
23695
23696         my ( $colon, $therest ) = ( $1, $2 );
23697
23698         # Handle sub-case of first field with leading colon plus additional code
23699         # This is the usual situation as at the '1' below:
23700         #  ...
23701         #  : $year % 400 ? 0
23702         #  :               1;
23703         if ($therest) {
23704
23705             # Split the first field after the leading colon and insert padding.
23706             # Note that this padding will remain even if the terminal value goes
23707             # out on a separate line.  This does not seem to look to bad, so no
23708             # mechanism has been included to undo it.
23709             my $field1 = shift @fields;
23710             unshift @fields, ( $colon, $pad . $therest );
23711
23712             # change the leading pattern from : to ?
23713             return unless ( $patterns[0] =~ s/^\:/?/ );
23714
23715             # install leading tokens and patterns of existing line
23716             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
23717             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
23718
23719             # insert appropriate number of empty fields
23720             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
23721         }
23722
23723         # handle sub-case of first field just equal to leading colon.
23724         # This can happen for example in the example below where
23725         # the leading '(' would create a new alignment token
23726         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
23727         # :                        ( $mname = $name . '->' );
23728         else {
23729
23730             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
23731
23732             # prepend a leading ? onto the second pattern
23733             $patterns[1] = "?b" . $patterns[1];
23734
23735             # pad the second field
23736             $fields[1] = $pad . $fields[1];
23737
23738             # install leading tokens and patterns of existing line, replacing
23739             # leading token and inserting appropriate number of empty fields
23740             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
23741             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
23742             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
23743         }
23744     }
23745
23746     # Handle case of no leading colon on this line.  This will
23747     # be the case when -wba=':' is used.  For example,
23748     #  $year % 400 ? 0 :
23749     #                1;
23750     else {
23751
23752         # install leading tokens and patterns of existing line
23753         $patterns[0] = '?' . 'b' . $patterns[0];
23754         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
23755         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
23756
23757         # insert appropriate number of empty fields
23758         $jadd = $jquestion + 1;
23759         $fields[0] = $pad . $fields[0];
23760         splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
23761     }
23762
23763     VALIGN_DEBUG_FLAG_TERNARY && do {
23764         local $" = '><';
23765         print STDOUT "MODIFIED TOKENS=<@tokens>\n";
23766         print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
23767         print STDOUT "MODIFIED FIELDS=<@fields>\n";
23768     };
23769
23770     # all ok .. update the arrays
23771     @{$rfields}   = @fields;
23772     @{$rtokens}   = @tokens;
23773     @{$rpatterns} = @patterns;
23774
23775     # force a flush after this line
23776     return $jquestion;
23777 }
23778
23779 sub fix_terminal_else {
23780
23781     # Add empty fields as necessary to align a balanced terminal
23782     # else block to a previous if/elsif/unless block,
23783     # like this:
23784     #
23785     #  if   ( 1 || $x ) { print "ok 13\n"; }
23786     #  else             { print "not ok 13\n"; }
23787     #
23788     # returns 1 if the else block should be indented
23789     #
23790     my ( $rfields, $rtokens, $rpatterns ) = @_;
23791     my $jmax = @{$rfields} - 1;
23792     return unless ( $jmax > 0 );
23793
23794     # check for balanced else block following if/elsif/unless
23795     my $rfields_old = $current_line->get_rfields();
23796
23797     # TBD: add handling for 'case'
23798     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
23799
23800     # look for the opening brace after the else, and extract the depth
23801     my $tok_brace = $rtokens->[0];
23802     my $depth_brace;
23803     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
23804
23805     # probably:  "else # side_comment"
23806     else { return }
23807
23808     my $rpatterns_old       = $current_line->get_rpatterns();
23809     my $rtokens_old         = $current_line->get_rtokens();
23810     my $maximum_field_index = $current_line->get_jmax();
23811
23812     # be sure the previous if/elsif is followed by an opening paren
23813     my $jparen    = 0;
23814     my $tok_paren = '(' . $depth_brace;
23815     my $tok_test  = $rtokens_old->[$jparen];
23816     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
23817
23818     # Now find the opening block brace
23819     my ($jbrace);
23820     foreach my $j ( 1 .. $maximum_field_index - 1 ) {
23821         my $tok = $rtokens_old->[$j];
23822         if ( $tok eq $tok_brace ) {
23823             $jbrace = $j;
23824             last;
23825         }
23826     }
23827     return unless ( defined($jbrace) );           # shouldn't happen
23828
23829     # Now splice the tokens and patterns of the previous line
23830     # into the else line to insure a match.  Add empty fields
23831     # as necessary.
23832     my $jadd = $jbrace - $jparen;
23833     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
23834     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
23835     splice( @{$rfields}, 1, 0, ('') x $jadd );
23836
23837     # force a flush after this line if it does not follow a case
23838     if   ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
23839     else                                      { return $jbrace }
23840 }
23841
23842 {    # sub check_match
23843     my %is_good_alignment;
23844
23845     BEGIN {
23846
23847         # Vertically aligning on certain "good" tokens is usually okay
23848         # so we can be less restrictive in marginal cases.
23849         my @q = qw( { ? => = );
23850         push @q, (',');
23851         @is_good_alignment{@q} = (1) x scalar(@q);
23852     }
23853
23854     sub check_match {
23855
23856         # See if the current line matches the current vertical alignment group.
23857         # If not, flush the current group.
23858         my ( $new_line, $old_line ) = @_;
23859
23860         # uses global variables:
23861         #  $previous_minimum_jmax_seen
23862         #  $maximum_jmax_seen
23863         #  $maximum_line_index
23864         #  $marginal_match
23865         my $jmax                = $new_line->get_jmax();
23866         my $maximum_field_index = $old_line->get_jmax();
23867
23868         # flush if this line has too many fields
23869         if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
23870
23871         # flush if adding this line would make a non-monotonic field count
23872         if (
23873             ( $maximum_field_index > $jmax )    # this has too few fields
23874             && (
23875                 ( $previous_minimum_jmax_seen <
23876                     $jmax )                     # and wouldn't be monotonic
23877                 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
23878             )
23879           )
23880         {
23881             goto NO_MATCH;
23882         }
23883
23884         # otherwise see if this line matches the current group
23885         my $jmax_original_line      = $new_line->get_jmax_original_line();
23886         my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
23887         my $rtokens                 = $new_line->get_rtokens();
23888         my $rfields                 = $new_line->get_rfields();
23889         my $rpatterns               = $new_line->get_rpatterns();
23890         my $list_type               = $new_line->get_list_type();
23891
23892         my $group_list_type = $old_line->get_list_type();
23893         my $old_rpatterns   = $old_line->get_rpatterns();
23894         my $old_rtokens     = $old_line->get_rtokens();
23895
23896         my $jlimit = $jmax - 1;
23897         if ( $maximum_field_index > $jmax ) {
23898             $jlimit = $jmax_original_line;
23899             --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
23900         }
23901
23902         # handle comma-separated lists ..
23903         if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
23904             for my $j ( 0 .. $jlimit ) {
23905                 my $old_tok = $old_rtokens->[$j];
23906                 next unless $old_tok;
23907                 my $new_tok = $rtokens->[$j];
23908                 next unless $new_tok;
23909
23910                 # lists always match ...
23911                 # unless they would align any '=>'s with ','s
23912                 goto NO_MATCH
23913                   if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
23914                     || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
23915             }
23916         }
23917
23918         # do detailed check for everything else except hanging side comments
23919         elsif ( !$is_hanging_side_comment ) {
23920
23921             my $leading_space_count = $new_line->get_leading_space_count();
23922
23923             my $max_pad = 0;
23924             my $min_pad = 0;
23925             my $saw_good_alignment;
23926
23927             for my $j ( 0 .. $jlimit ) {
23928
23929                 my $old_tok = $old_rtokens->[$j];
23930                 my $new_tok = $rtokens->[$j];
23931
23932                 # Note on encoding used for alignment tokens:
23933                 # -------------------------------------------
23934                 # Tokens are "decorated" with information which can help
23935                 # prevent unwanted alignments.  Consider for example the
23936                 # following two lines:
23937                 #   local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
23938                 #   local ( $i, $f ) = &'bdiv( $xn, $xd );
23939                 # There are three alignment tokens in each line, a comma,
23940                 # an =, and a comma.  In the first line these three tokens
23941                 # are encoded as:
23942                 #    ,4+local-18     =3      ,4+split-7
23943                 # and in the second line they are encoded as
23944                 #    ,4+local-18     =3      ,4+&'bdiv-8
23945                 # Tokens always at least have token name and nesting
23946                 # depth.  So in this example the ='s are at depth 3 and
23947                 # the ,'s are at depth 4.  This prevents aligning tokens
23948                 # of different depths.  Commas contain additional
23949                 # information, as follows:
23950                 # ,  {depth} + {container name} - {spaces to opening paren}
23951                 # This allows us to reject matching the rightmost commas
23952                 # in the above two lines, since they are for different
23953                 # function calls.  This encoding is done in
23954                 # 'sub send_lines_to_vertical_aligner'.
23955
23956                 # Pick off actual token.
23957                 # Everything up to the first digit is the actual token.
23958                 my $alignment_token = $new_tok;
23959                 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
23960
23961                 # see if the decorated tokens match
23962                 my $tokens_match = $new_tok eq $old_tok
23963
23964                   # Exception for matching terminal : of ternary statement..
23965                   # consider containers prefixed by ? and : a match
23966                   || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
23967
23968                 # No match if the alignment tokens differ...
23969                 if ( !$tokens_match ) {
23970
23971                     # ...Unless this is a side comment
23972                     if (
23973                         $j == $jlimit
23974
23975                         # and there is either at least one alignment token
23976                         # or this is a single item following a list.  This
23977                         # latter rule is required for 'December' to join
23978                         # the following list:
23979                         # my (@months) = (
23980                         #     '',       'January',   'February', 'March',
23981                         #     'April',  'May',       'June',     'July',
23982                         #     'August', 'September', 'October',  'November',
23983                         #     'December'
23984                         # );
23985                         # If it doesn't then the -lp formatting will fail.
23986                         && ( $j > 0 || $old_tok =~ /^,/ )
23987                       )
23988                     {
23989                         $marginal_match = 1
23990                           if ( $marginal_match == 0
23991                             && $maximum_line_index == 0 );
23992                         last;
23993                     }
23994
23995                     goto NO_MATCH;
23996                 }
23997
23998                 # Calculate amount of padding required to fit this in.
23999                 # $pad is the number of spaces by which we must increase
24000                 # the current field to squeeze in this field.
24001                 my $pad =
24002                   length( $rfields->[$j] ) - $old_line->current_field_width($j);
24003                 if ( $j == 0 ) { $pad += $leading_space_count; }
24004
24005                 # remember max pads to limit marginal cases
24006                 if ( $alignment_token ne '#' ) {
24007                     if ( $pad > $max_pad ) { $max_pad = $pad }
24008                     if ( $pad < $min_pad ) { $min_pad = $pad }
24009                 }
24010                 if ( $is_good_alignment{$alignment_token} ) {
24011                     $saw_good_alignment = 1;
24012                 }
24013
24014                 # If patterns don't match, we have to be careful...
24015                 if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) {
24016
24017                     # flag this as a marginal match since patterns differ
24018                     $marginal_match = 1
24019                       if ( $marginal_match == 0 && $maximum_line_index == 0 );
24020
24021                     # We have to be very careful about aligning commas
24022                     # when the pattern's don't match, because it can be
24023                     # worse to create an alignment where none is needed
24024                     # than to omit one.  Here's an example where the ','s
24025                     # are not in named containers.  The first line below
24026                     # should not match the next two:
24027                     #   ( $a, $b ) = ( $b, $r );
24028                     #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
24029                     #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
24030                     if ( $alignment_token eq ',' ) {
24031
24032                        # do not align commas unless they are in named containers
24033                         goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
24034                     }
24035
24036                     # do not align parens unless patterns match;
24037                     # large ugly spaces can occur in math expressions.
24038                     elsif ( $alignment_token eq '(' ) {
24039
24040                         # But we can allow a match if the parens don't
24041                         # require any padding.
24042                         if ( $pad != 0 ) { goto NO_MATCH }
24043                     }
24044
24045                     # Handle an '=' alignment with different patterns to
24046                     # the left.
24047                     elsif ( $alignment_token eq '=' ) {
24048
24049                         # It is best to be a little restrictive when
24050                         # aligning '=' tokens.  Here is an example of
24051                         # two lines that we will not align:
24052                         #       my $variable=6;
24053                         #       $bb=4;
24054                         # The problem is that one is a 'my' declaration,
24055                         # and the other isn't, so they're not very similar.
24056                         # We will filter these out by comparing the first
24057                         # letter of the pattern.  This is crude, but works
24058                         # well enough.
24059                         if (
24060                             substr( $old_rpatterns->[$j], 0, 1 ) ne
24061                             substr( $rpatterns->[$j],     0, 1 ) )
24062                         {
24063                             goto NO_MATCH;
24064                         }
24065
24066                         # If we pass that test, we'll call it a marginal match.
24067                         # Here is an example of a marginal match:
24068                         #       $done{$$op} = 1;
24069                         #       $op         = compile_bblock($op);
24070                         # The left tokens are both identifiers, but
24071                         # one accesses a hash and the other doesn't.
24072                         # We'll let this be a tentative match and undo
24073                         # it later if we don't find more than 2 lines
24074                         # in the group.
24075                         elsif ( $maximum_line_index == 0 ) {
24076                             $marginal_match =
24077                               2;    # =2 prevents being undone below
24078                         }
24079                     }
24080                 }
24081
24082                 # Don't let line with fewer fields increase column widths
24083                 # ( align3.t )
24084                 if ( $maximum_field_index > $jmax ) {
24085
24086                     # Exception: suspend this rule to allow last lines to join
24087                     if ( $pad > 0 ) { goto NO_MATCH; }
24088                 }
24089             } ## end for my $j ( 0 .. $jlimit)
24090
24091             # Turn off the "marginal match" flag in some cases...
24092             # A "marginal match" occurs when the alignment tokens agree
24093             # but there are differences in the other tokens (patterns).
24094             # If we leave the marginal match flag set, then the rule is that we
24095             # will align only if there are more than two lines in the group.
24096             # We will turn of the flag if we almost have a match
24097             # and either we have seen a good alignment token or we
24098             # just need a small pad (2 spaces) to fit.  These rules are
24099             # the result of experimentation.  Tokens which misaligned by just
24100             # one or two characters are annoying.  On the other hand,
24101             # large gaps to less important alignment tokens are also annoying.
24102             if (   $marginal_match == 1
24103                 && $jmax == $maximum_field_index
24104                 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
24105               )
24106             {
24107                 $marginal_match = 0;
24108             }
24109             ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
24110         }
24111
24112         # We have a match (even if marginal).
24113         # If the current line has fewer fields than the current group
24114         # but otherwise matches, copy the remaining group fields to
24115         # make it a perfect match.
24116         if ( $maximum_field_index > $jmax ) {
24117
24118             ##########################################################
24119             # FIXME: The previous version had a bug which made side comments
24120             # become regular fields, so for now the program does not allow a
24121             # line with side comment to match.  This should eventually be done.
24122             # The best test file for experimenting is 'lista.t'
24123             ##########################################################
24124
24125             my $comment = $rfields->[$jmax];
24126             goto NO_MATCH if ($comment);
24127
24128             # Corrected loop
24129             for my $jj ( $jlimit .. $maximum_field_index ) {
24130                 $rtokens->[$jj]         = $old_rtokens->[$jj];
24131                 $rfields->[ $jj + 1 ]   = '';
24132                 $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ];
24133             }
24134
24135 ##          THESE DO NOT GIVE CORRECT RESULTS
24136 ##          $rfields->[$jmax] = $comment;
24137 ##          $new_line->set_jmax($jmax);
24138
24139         }
24140         return;
24141
24142       NO_MATCH:
24143         ##print "no match jmax=$jmax  max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$old_rtokens->[0]\n";
24144         my_flush();
24145         return;
24146     }
24147 }
24148
24149 sub check_fit {
24150
24151     my ( $new_line, $old_line ) = @_;
24152     return unless ( $maximum_line_index >= 0 );
24153
24154     my $jmax                    = $new_line->get_jmax();
24155     my $leading_space_count     = $new_line->get_leading_space_count();
24156     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
24157     my $rtokens                 = $new_line->get_rtokens();
24158     my $rfields                 = $new_line->get_rfields();
24159     my $rpatterns               = $new_line->get_rpatterns();
24160
24161     my $group_list_type = $group_lines[0]->get_list_type();
24162
24163     my $padding_so_far    = 0;
24164     my $padding_available = $old_line->get_available_space_on_right();
24165
24166     # save current columns in case this doesn't work
24167     save_alignment_columns();
24168
24169     my $maximum_field_index = $old_line->get_jmax();
24170     for my $j ( 0 .. $jmax ) {
24171
24172         my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j);
24173
24174         if ( $j == 0 ) {
24175             $pad += $leading_space_count;
24176         }
24177
24178         # remember largest gap of the group, excluding gap to side comment
24179         if (   $pad < 0
24180             && $group_maximum_gap < -$pad
24181             && $j > 0
24182             && $j < $jmax - 1 )
24183         {
24184             $group_maximum_gap = -$pad;
24185         }
24186
24187         next if $pad < 0;
24188
24189         ## OLD NOTES:
24190         ## This patch helps sometimes, but it doesn't check to see if
24191         ## the line is too long even without the side comment.  It needs
24192         ## to be reworked.
24193         ##don't let a long token with no trailing side comment push
24194         ##side comments out, or end a group.  (sidecmt1.t)
24195         ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0);
24196
24197         # BEGIN PATCH for keith1.txt.
24198         # If the group began matching multiple tokens but later this got
24199         # reduced to a fewer number of matching tokens, then the fields
24200         # of the later lines will still have to fit into their corresponding
24201         # fields.  So a large later field will "push" the other fields to
24202         # the right, including previous side comments, and if there is no room
24203         # then there is no match.
24204         # For example, look at the last line in the following snippet:
24205
24206  # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true    : false;
24207  # my $env       = ($b_prod_db)                               ? "prd"   : "val";
24208  # my $plant     = ( $OPT{p} )                                ? $OPT{p} : "STL";
24209  # my $task      = $OPT{t};
24210  # my $fnam      = "longggggggggggggggg.$record_created.$env.$plant.idash";
24211
24212         # The long term will push the '?' to the right to fit in, and in this
24213         # case there is not enough room so it will not match the equals unless
24214         # we do something special.
24215
24216         # Usually it looks good to keep an initial alignment of '=' going, and
24217         # we can do this if the long term can fit in the space taken up by the
24218         # remaining fields (the ? : fields here).
24219
24220         # Allowing any matching token for now, but it could be restricted
24221         # to an '='-like token if necessary.
24222
24223         if (
24224                $pad > $padding_available
24225             && $jmax == 2                        # matching one thing (plus #)
24226             && $j == $jmax - 1                   # at last field
24227             && $maximum_line_index > 0           # more than 1 line in group now
24228             && $jmax < $maximum_field_index      # other lines have more fields
24229             && length( $rfields->[$jmax] ) == 0  # no side comment
24230
24231             # Uncomment to match only equals (but this does not seem necessary)
24232             # && $rtokens->[0] =~ /^=\d/           # matching an equals
24233           )
24234         {
24235             my $extra_padding = 0;
24236             foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) {
24237                 $extra_padding += $old_line->current_field_width($jj);
24238             }
24239
24240             next if ( $pad <= $padding_available + $extra_padding );
24241         }
24242
24243         # END PATCH for keith1.pl
24244
24245         # This line will need space; lets see if we want to accept it..
24246         if (
24247
24248             # not if this won't fit
24249             ( $pad > $padding_available )
24250
24251             # previously, there were upper bounds placed on padding here
24252             # (maximum_whitespace_columns), but they were not really helpful
24253
24254           )
24255         {
24256
24257             # revert to starting state then flush; things didn't work out
24258             restore_alignment_columns();
24259             my_flush();
24260             last;
24261         }
24262
24263         # patch to avoid excessive gaps in previous lines,
24264         # due to a line of fewer fields.
24265         #   return join( ".",
24266         #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
24267         #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
24268         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
24269
24270         # looks ok, squeeze this field in
24271         $old_line->increase_field_width( $j, $pad );
24272         $padding_available -= $pad;
24273
24274         # remember largest gap of the group, excluding gap to side comment
24275         if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
24276             $group_maximum_gap = $pad;
24277         }
24278     }
24279     return;
24280 }
24281
24282 sub add_to_group {
24283
24284     # The current line either starts a new alignment group or is
24285     # accepted into the current alignment group.
24286     my $new_line = shift;
24287     $group_lines[ ++$maximum_line_index ] = $new_line;
24288
24289     # initialize field lengths if starting new group
24290     if ( $maximum_line_index == 0 ) {
24291
24292         my $jmax    = $new_line->get_jmax();
24293         my $rfields = $new_line->get_rfields();
24294         my $rtokens = $new_line->get_rtokens();
24295         my $col     = $new_line->get_leading_space_count();
24296
24297         for my $j ( 0 .. $jmax ) {
24298             $col += length( $rfields->[$j] );
24299
24300             # create initial alignments for the new group
24301             my $token = "";
24302             if ( $j < $jmax ) { $token = $rtokens->[$j] }
24303             my $alignment = make_alignment( $col, $token );
24304             $new_line->set_alignment( $j, $alignment );
24305         }
24306
24307         $maximum_jmax_seen = $jmax;
24308         $minimum_jmax_seen = $jmax;
24309     }
24310
24311     # use previous alignments otherwise
24312     else {
24313         my @new_alignments =
24314           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
24315         $new_line->set_alignments(@new_alignments);
24316     }
24317
24318     # remember group jmax extremes for next call to valign_input
24319     $previous_minimum_jmax_seen = $minimum_jmax_seen;
24320     $previous_maximum_jmax_seen = $maximum_jmax_seen;
24321     return;
24322 }
24323
24324 sub dump_array {
24325
24326     # debug routine to dump array contents
24327     local $" = ')(';
24328     print STDOUT "(@_)\n";
24329     return;
24330 }
24331
24332 # flush() sends the current Perl::Tidy::VerticalAligner group down the
24333 # pipeline to Perl::Tidy::FileWriter.
24334
24335 # This is the external flush, which also empties the buffer and cache
24336 sub flush {
24337
24338     # the buffer must be emptied first, then any cached text
24339     dump_valign_buffer();
24340
24341     if ( $maximum_line_index < 0 ) {
24342         if ($cached_line_type) {
24343             $seqno_string = $cached_seqno_string;
24344             valign_output_step_C( $cached_line_text,
24345                 $cached_line_leading_space_count,
24346                 $last_level_written );
24347             $cached_line_type    = 0;
24348             $cached_line_text    = "";
24349             $cached_seqno_string = "";
24350         }
24351     }
24352     else {
24353         my_flush();
24354     }
24355     return;
24356 }
24357
24358 sub reduce_valign_buffer_indentation {
24359
24360     my ($diff) = @_;
24361     if ( $valign_buffer_filling && $diff ) {
24362         my $max_valign_buffer = @valign_buffer;
24363         foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
24364             my ( $line, $leading_space_count, $level ) =
24365               @{ $valign_buffer[$i] };
24366             my $ws = substr( $line, 0, $diff );
24367             if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
24368                 $line = substr( $line, $diff );
24369             }
24370             if ( $leading_space_count >= $diff ) {
24371                 $leading_space_count -= $diff;
24372                 $level = level_change( $leading_space_count, $diff, $level );
24373             }
24374             $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
24375         }
24376     }
24377     return;
24378 }
24379
24380 sub level_change {
24381
24382     # compute decrease in level when we remove $diff spaces from the
24383     # leading spaces
24384     my ( $leading_space_count, $diff, $level ) = @_;
24385     if ($rOpts_indent_columns) {
24386         my $olev =
24387           int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
24388         my $nlev = int( $leading_space_count / $rOpts_indent_columns );
24389         $level -= ( $olev - $nlev );
24390         if ( $level < 0 ) { $level = 0 }
24391     }
24392     return $level;
24393 }
24394
24395 sub dump_valign_buffer {
24396     if (@valign_buffer) {
24397         foreach (@valign_buffer) {
24398             valign_output_step_D( @{$_} );
24399         }
24400         @valign_buffer = ();
24401     }
24402     $valign_buffer_filling = "";
24403     return;
24404 }
24405
24406 # This is the internal flush, which leaves the cache intact
24407 sub my_flush {
24408
24409     return if ( $maximum_line_index < 0 );
24410
24411     # handle a group of comment lines
24412     if ( $group_type eq 'COMMENT' ) {
24413
24414         VALIGN_DEBUG_FLAG_APPEND0 && do {
24415             my ( $a, $b, $c ) = caller();
24416             print STDOUT
24417 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
24418
24419         };
24420         my $leading_space_count = $comment_leading_space_count;
24421         my $leading_string      = get_leading_string($leading_space_count);
24422
24423         # zero leading space count if any lines are too long
24424         my $max_excess = 0;
24425         for my $i ( 0 .. $maximum_line_index ) {
24426             my $str = $group_lines[$i];
24427             my $excess =
24428               length($str) +
24429               $leading_space_count -
24430               maximum_line_length_for_level($group_level);
24431             if ( $excess > $max_excess ) {
24432                 $max_excess = $excess;
24433             }
24434         }
24435
24436         if ( $max_excess > 0 ) {
24437             $leading_space_count -= $max_excess;
24438             if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
24439             $last_outdented_line_at =
24440               $file_writer_object->get_output_line_number();
24441             unless ($outdented_line_count) {
24442                 $first_outdented_line_at = $last_outdented_line_at;
24443             }
24444             $outdented_line_count += ( $maximum_line_index + 1 );
24445         }
24446
24447         # write the group of lines
24448         my $outdent_long_lines = 0;
24449         for my $i ( 0 .. $maximum_line_index ) {
24450             valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
24451                 $outdent_long_lines, "", $group_level );
24452         }
24453     }
24454
24455     # handle a group of code lines
24456     else {
24457
24458         VALIGN_DEBUG_FLAG_APPEND0 && do {
24459             my $group_list_type = $group_lines[0]->get_list_type();
24460             my ( $a, $b, $c ) = caller();
24461             my $maximum_field_index = $group_lines[0]->get_jmax();
24462             print STDOUT
24463 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
24464
24465         };
24466
24467         # some small groups are best left unaligned
24468         my $do_not_align = decide_if_aligned();
24469
24470         # optimize side comment location
24471         $do_not_align = adjust_side_comment($do_not_align);
24472
24473         # recover spaces for -lp option if possible
24474         my $extra_leading_spaces = get_extra_leading_spaces();
24475
24476         # all lines of this group have the same basic leading spacing
24477         my $group_leader_length = $group_lines[0]->get_leading_space_count();
24478
24479         # add extra leading spaces if helpful
24480         # NOTE: Use zero; this did not work well
24481         my $min_ci_gap = 0;
24482
24483         # loop to output all lines
24484         for my $i ( 0 .. $maximum_line_index ) {
24485             my $line = $group_lines[$i];
24486             valign_output_step_A( $line, $min_ci_gap, $do_not_align,
24487                 $group_leader_length, $extra_leading_spaces );
24488         }
24489     }
24490     initialize_for_new_group();
24491     return;
24492 }
24493
24494 sub decide_if_aligned {
24495
24496     # Do not try to align two lines which are not really similar
24497     return unless $maximum_line_index == 1;
24498     return if ($is_matching_terminal_line);
24499
24500     my $group_list_type = $group_lines[0]->get_list_type();
24501
24502     my $do_not_align = (
24503
24504         # always align lists
24505         !$group_list_type
24506
24507           && (
24508
24509             # don't align if it was just a marginal match
24510             $marginal_match
24511
24512             # don't align two lines with big gap
24513             || $group_maximum_gap > 12
24514
24515             # or lines with differing number of alignment tokens
24516             # TODO: this could be improved.  It occasionally rejects
24517             # good matches.
24518             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
24519           )
24520     );
24521
24522     # But try to convert them into a simple comment group if the first line
24523     # a has side comment
24524     my $rfields             = $group_lines[0]->get_rfields();
24525     my $maximum_field_index = $group_lines[0]->get_jmax();
24526     if (   $do_not_align
24527         && ( $maximum_line_index > 0 )
24528         && ( length( $rfields->[$maximum_field_index] ) > 0 ) )
24529     {
24530         combine_fields();
24531         $do_not_align = 0;
24532     }
24533     return $do_not_align;
24534 }
24535
24536 sub adjust_side_comment {
24537
24538     my $do_not_align = shift;
24539
24540     # let's see if we can move the side comment field out a little
24541     # to improve readability (the last field is always a side comment field)
24542     my $have_side_comment       = 0;
24543     my $first_side_comment_line = -1;
24544     my $maximum_field_index     = $group_lines[0]->get_jmax();
24545     for my $i ( 0 .. $maximum_line_index ) {
24546         my $line = $group_lines[$i];
24547
24548         if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
24549             $have_side_comment       = 1;
24550             $first_side_comment_line = $i;
24551             last;
24552         }
24553     }
24554
24555     my $kmax = $maximum_field_index + 1;
24556
24557     if ($have_side_comment) {
24558
24559         my $line = $group_lines[0];
24560
24561         # the maximum space without exceeding the line length:
24562         my $avail = $line->get_available_space_on_right();
24563
24564         # try to use the previous comment column
24565         my $side_comment_column = $line->get_column( $kmax - 2 );
24566         my $move                = $last_comment_column - $side_comment_column;
24567
24568 ##        my $sc_line0 = $side_comment_history[0]->[0];
24569 ##        my $sc_col0  = $side_comment_history[0]->[1];
24570 ##        my $sc_line1 = $side_comment_history[1]->[0];
24571 ##        my $sc_col1  = $side_comment_history[1]->[1];
24572 ##        my $sc_line2 = $side_comment_history[2]->[0];
24573 ##        my $sc_col2  = $side_comment_history[2]->[1];
24574 ##
24575 ##        # FUTURE UPDATES:
24576 ##        # Be sure to ignore 'do not align' and  '} # end comments'
24577 ##        # Find first $move > 0 and $move <= $avail as follows:
24578 ##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
24579 ##        # 2. try sc_col2 if (line-sc_line2) < 12
24580 ##        # 3. try min possible space, plus up to 8,
24581 ##        # 4. try min possible space
24582
24583         if ( $kmax > 0 && !$do_not_align ) {
24584
24585             # but if this doesn't work, give up and use the minimum space
24586             if ( $move > $avail ) {
24587                 $move = $rOpts_minimum_space_to_comment - 1;
24588             }
24589
24590             # but we want some minimum space to the comment
24591             my $min_move = $rOpts_minimum_space_to_comment - 1;
24592             if (   $move >= 0
24593                 && $last_side_comment_length > 0
24594                 && ( $first_side_comment_line == 0 )
24595                 && $group_level == $last_level_written )
24596             {
24597                 $min_move = 0;
24598             }
24599
24600             if ( $move < $min_move ) {
24601                 $move = $min_move;
24602             }
24603
24604             # previously, an upper bound was placed on $move here,
24605             # (maximum_space_to_comment), but it was not helpful
24606
24607             # don't exceed the available space
24608             if ( $move > $avail ) { $move = $avail }
24609
24610             # we can only increase space, never decrease
24611             if ( $move > 0 ) {
24612                 $line->increase_field_width( $maximum_field_index - 1, $move );
24613             }
24614
24615             # remember this column for the next group
24616             $last_comment_column = $line->get_column( $kmax - 2 );
24617         }
24618         else {
24619
24620             # try to at least line up the existing side comment location
24621             if ( $kmax > 0 && $move > 0 && $move < $avail ) {
24622                 $line->increase_field_width( $maximum_field_index - 1, $move );
24623                 $do_not_align = 0;
24624             }
24625
24626             # reset side comment column if we can't align
24627             else {
24628                 forget_side_comment();
24629             }
24630         }
24631     }
24632     return $do_not_align;
24633 }
24634
24635 sub valign_output_step_A {
24636
24637     ###############################################################
24638     # This is Step A in writing vertically aligned lines.
24639     # The line is prepared according to the alignments which have
24640     # been found and shipped to the next step.
24641     ###############################################################
24642
24643     my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
24644         $extra_leading_spaces )
24645       = @_;
24646     my $rfields                   = $line->get_rfields();
24647     my $leading_space_count       = $line->get_leading_space_count();
24648     my $outdent_long_lines        = $line->get_outdent_long_lines();
24649     my $maximum_field_index       = $line->get_jmax();
24650     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
24651
24652     # add any extra spaces
24653     if ( $leading_space_count > $group_leader_length ) {
24654         $leading_space_count += $min_ci_gap;
24655     }
24656
24657     my $str = $rfields->[0];
24658
24659     # loop to concatenate all fields of this line and needed padding
24660     my $total_pad_count = 0;
24661     for my $j ( 1 .. $maximum_field_index ) {
24662
24663         # skip zero-length side comments
24664         last
24665           if (
24666             ( $j == $maximum_field_index )
24667             && ( !defined( $rfields->[$j] )
24668                 || ( length( $rfields->[$j] ) == 0 ) )
24669           );
24670
24671         # compute spaces of padding before this field
24672         my $col = $line->get_column( $j - 1 );
24673         my $pad = $col - ( length($str) + $leading_space_count );
24674
24675         if ($do_not_align) {
24676             $pad =
24677               ( $j < $maximum_field_index )
24678               ? 0
24679               : $rOpts_minimum_space_to_comment - 1;
24680         }
24681
24682         # if the -fpsc flag is set, move the side comment to the selected
24683         # column if and only if it is possible, ignoring constraints on
24684         # line length and minimum space to comment
24685         if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
24686         {
24687             my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
24688             if ( $newpad >= 0 ) { $pad = $newpad; }
24689         }
24690
24691         # accumulate the padding
24692         if ( $pad > 0 ) { $total_pad_count += $pad; }
24693
24694         # add this field
24695         if ( !defined $rfields->[$j] ) {
24696             write_diagnostics("UNDEFined field at j=$j\n");
24697         }
24698
24699         # only add padding when we have a finite field;
24700         # this avoids extra terminal spaces if we have empty fields
24701         if ( length( $rfields->[$j] ) > 0 ) {
24702             $str .= ' ' x $total_pad_count;
24703             $total_pad_count = 0;
24704             $str .= $rfields->[$j];
24705         }
24706         else {
24707             $total_pad_count = 0;
24708         }
24709
24710         # update side comment history buffer
24711         if ( $j == $maximum_field_index ) {
24712             my $lineno = $file_writer_object->get_output_line_number();
24713             shift @side_comment_history;
24714             push @side_comment_history, [ $lineno, $col ];
24715         }
24716     }
24717
24718     my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) );
24719
24720     # ship this line off
24721     valign_output_step_B( $leading_space_count + $extra_leading_spaces,
24722         $str, $side_comment_length, $outdent_long_lines,
24723         $rvertical_tightness_flags, $group_level );
24724     return;
24725 }
24726
24727 sub get_extra_leading_spaces {
24728
24729     #----------------------------------------------------------
24730     # Define any extra indentation space (for the -lp option).
24731     # Here is why:
24732     # If a list has side comments, sub scan_list must dump the
24733     # list before it sees everything.  When this happens, it sets
24734     # the indentation to the standard scheme, but notes how
24735     # many spaces it would have liked to use.  We may be able
24736     # to recover that space here in the event that all of the
24737     # lines of a list are back together again.
24738     #----------------------------------------------------------
24739
24740     my $extra_leading_spaces = 0;
24741     if ($extra_indent_ok) {
24742         my $object = $group_lines[0]->get_indentation();
24743         if ( ref($object) ) {
24744             my $extra_indentation_spaces_wanted =
24745               get_recoverable_spaces($object);
24746
24747             # all indentation objects must be the same
24748             for my $i ( 1 .. $maximum_line_index ) {
24749                 if ( $object != $group_lines[$i]->get_indentation() ) {
24750                     $extra_indentation_spaces_wanted = 0;
24751                     last;
24752                 }
24753             }
24754
24755             if ($extra_indentation_spaces_wanted) {
24756
24757                 # the maximum space without exceeding the line length:
24758                 my $avail = $group_lines[0]->get_available_space_on_right();
24759                 $extra_leading_spaces =
24760                   ( $avail > $extra_indentation_spaces_wanted )
24761                   ? $extra_indentation_spaces_wanted
24762                   : $avail;
24763
24764                 # update the indentation object because with -icp the terminal
24765                 # ');' will use the same adjustment.
24766                 $object->permanently_decrease_available_spaces(
24767                     -$extra_leading_spaces );
24768             }
24769         }
24770     }
24771     return $extra_leading_spaces;
24772 }
24773
24774 sub combine_fields {
24775
24776     # combine all fields except for the comment field  ( sidecmt.t )
24777     # Uses global variables:
24778     #  @group_lines
24779     #  $maximum_line_index
24780     my $maximum_field_index = $group_lines[0]->get_jmax();
24781     foreach my $j ( 0 .. $maximum_line_index ) {
24782         my $line    = $group_lines[$j];
24783         my $rfields = $line->get_rfields();
24784         foreach ( 1 .. $maximum_field_index - 1 ) {
24785             $rfields->[0] .= $rfields->[$_];
24786         }
24787         $rfields->[1] = $rfields->[$maximum_field_index];
24788
24789         $line->set_jmax(1);
24790         $line->set_column( 0, 0 );
24791         $line->set_column( 1, 0 );
24792
24793     }
24794     $maximum_field_index = 1;
24795
24796     for my $j ( 0 .. $maximum_line_index ) {
24797         my $line    = $group_lines[$j];
24798         my $rfields = $line->get_rfields();
24799         for my $k ( 0 .. $maximum_field_index ) {
24800             my $pad = length( $rfields->[$k] ) - $line->current_field_width($k);
24801             if ( $k == 0 ) {
24802                 $pad += $group_lines[$j]->get_leading_space_count();
24803             }
24804
24805             if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
24806
24807         }
24808     }
24809     return;
24810 }
24811
24812 sub get_output_line_number {
24813
24814     # the output line number reported to a caller is the number of items
24815     # written plus the number of items in the buffer
24816     my $self = shift;
24817     return 1 + $maximum_line_index +
24818       $file_writer_object->get_output_line_number();
24819 }
24820
24821 sub valign_output_step_B {
24822
24823     ###############################################################
24824     # This is Step B in writing vertically aligned lines.
24825     # Vertical tightness is applied according to preset flags.
24826     # In particular this routine handles stacking of opening
24827     # and closing tokens.
24828     ###############################################################
24829
24830     my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
24831         $rvertical_tightness_flags, $level )
24832       = @_;
24833
24834     # handle outdenting of long lines:
24835     if ($outdent_long_lines) {
24836         my $excess =
24837           length($str) -
24838           $side_comment_length +
24839           $leading_space_count -
24840           maximum_line_length_for_level($level);
24841         if ( $excess > 0 ) {
24842             $leading_space_count = 0;
24843             $last_outdented_line_at =
24844               $file_writer_object->get_output_line_number();
24845
24846             unless ($outdented_line_count) {
24847                 $first_outdented_line_at = $last_outdented_line_at;
24848             }
24849             $outdented_line_count++;
24850         }
24851     }
24852
24853     # Make preliminary leading whitespace.  It could get changed
24854     # later by entabbing, so we have to keep track of any changes
24855     # to the leading_space_count from here on.
24856     my $leading_string =
24857       $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
24858
24859     # Unpack any recombination data; it was packed by
24860     # sub send_lines_to_vertical_aligner. Contents:
24861     #
24862     #   [0] type: 1=opening non-block    2=closing non-block
24863     #             3=opening block brace  4=closing block brace
24864     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
24865     #             if closing: spaces of padding to use
24866     #   [2] sequence number of container
24867     #   [3] valid flag: do not append if this flag is false
24868     #
24869     my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
24870         $seqno_end );
24871     if ($rvertical_tightness_flags) {
24872         (
24873             $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
24874             $seqno_end
24875         ) = @{$rvertical_tightness_flags};
24876     }
24877
24878     $seqno_string = $seqno_end;
24879
24880     # handle any cached line ..
24881     # either append this line to it or write it out
24882     if ( length($cached_line_text) ) {
24883
24884         # Dump an invalid cached line
24885         if ( !$cached_line_valid ) {
24886             valign_output_step_C( $cached_line_text,
24887                 $cached_line_leading_space_count,
24888                 $last_level_written );
24889         }
24890
24891         # Handle cached line ending in OPENING tokens
24892         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
24893
24894             my $gap = $leading_space_count - length($cached_line_text);
24895
24896             # handle option of just one tight opening per line:
24897             if ( $cached_line_flag == 1 ) {
24898                 if ( defined($open_or_close) && $open_or_close == 1 ) {
24899                     $gap = -1;
24900                 }
24901             }
24902
24903             if ( $gap >= 0 && defined($seqno_beg) ) {
24904                 $leading_string      = $cached_line_text . ' ' x $gap;
24905                 $leading_space_count = $cached_line_leading_space_count;
24906                 $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
24907                 $level               = $last_level_written;
24908             }
24909             else {
24910                 valign_output_step_C( $cached_line_text,
24911                     $cached_line_leading_space_count,
24912                     $last_level_written );
24913             }
24914         }
24915
24916         # Handle cached line ending in CLOSING tokens
24917         else {
24918             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
24919             if (
24920
24921                 # The new line must start with container
24922                 $seqno_beg
24923
24924                 # The container combination must be okay..
24925                 && (
24926
24927                     # okay to combine like types
24928                     ( $open_or_close == $cached_line_type )
24929
24930                     # closing block brace may append to non-block
24931                     || ( $cached_line_type == 2 && $open_or_close == 4 )
24932
24933                     # something like ');'
24934                     || ( !$open_or_close && $cached_line_type == 2 )
24935
24936                 )
24937
24938                 # The combined line must fit
24939                 && (
24940                     length($test_line) <=
24941                     maximum_line_length_for_level($last_level_written) )
24942               )
24943             {
24944
24945                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
24946
24947                 # Patch to outdent closing tokens ending # in ');'
24948                 # If we are joining a line like ');' to a previous stacked
24949                 # set of closing tokens, then decide if we may outdent the
24950                 # combined stack to the indentation of the ');'.  Since we
24951                 # should not normally outdent any of the other tokens more than
24952                 # the indentation of the lines that contained them, we will
24953                 # only do this if all of the corresponding opening
24954                 # tokens were on the same line.  This can happen with
24955                 # -sot and -sct.  For example, it is ok here:
24956                 #   __PACKAGE__->load_components( qw(
24957                 #         PK::Auto
24958                 #         Core
24959                 #   ));
24960                 #
24961                 #   But, for example, we do not outdent in this example because
24962                 #   that would put the closing sub brace out farther than the
24963                 #   opening sub brace:
24964                 #
24965                 #   perltidy -sot -sct
24966                 #   $c->Tk::bind(
24967                 #       '<Control-f>' => sub {
24968                 #           my ($c) = @_;
24969                 #           my $e = $c->XEvent;
24970                 #           itemsUnderArea $c;
24971                 #       } );
24972                 #
24973                 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
24974
24975                     # The way to tell this is if the stacked sequence numbers
24976                     # of this output line are the reverse of the stacked
24977                     # sequence numbers of the previous non-blank line of
24978                     # sequence numbers.  So we can join if the previous
24979                     # nonblank string of tokens is the mirror image.  For
24980                     # example if stack )}] is 13:8:6 then we are looking for a
24981                     # leading stack like [{( which is 6:8:13 We only need to
24982                     # check the two ends, because the intermediate tokens must
24983                     # fall in order.  Note on speed: having to split on colons
24984                     # and eliminate multiple colons might appear to be slow,
24985                     # but it's not an issue because we almost never come
24986                     # through here.  In a typical file we don't.
24987                     $seqno_string =~ s/^:+//;
24988                     $last_nonblank_seqno_string =~ s/^:+//;
24989                     $seqno_string =~ s/:+/:/g;
24990                     $last_nonblank_seqno_string =~ s/:+/:/g;
24991
24992                     # how many spaces can we outdent?
24993                     my $diff =
24994                       $cached_line_leading_space_count - $leading_space_count;
24995                     if (   $diff > 0
24996                         && length($seqno_string)
24997                         && length($last_nonblank_seqno_string) ==
24998                         length($seqno_string) )
24999                     {
25000                         my @seqno_last =
25001                           ( split ':', $last_nonblank_seqno_string );
25002                         my @seqno_now = ( split ':', $seqno_string );
25003                         if (   $seqno_now[-1] == $seqno_last[0]
25004                             && $seqno_now[0] == $seqno_last[-1] )
25005                         {
25006
25007                             # OK to outdent ..
25008                             # for absolute safety, be sure we only remove
25009                             # whitespace
25010                             my $ws = substr( $test_line, 0, $diff );
25011                             if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
25012
25013                                 $test_line = substr( $test_line, $diff );
25014                                 $cached_line_leading_space_count -= $diff;
25015                                 $last_level_written =
25016                                   level_change(
25017                                     $cached_line_leading_space_count,
25018                                     $diff, $last_level_written );
25019                                 reduce_valign_buffer_indentation($diff);
25020                             }
25021
25022                             # shouldn't happen, but not critical:
25023                             ##else {
25024                             ## ERROR transferring indentation here
25025                             ##}
25026                         }
25027                     }
25028                 }
25029
25030                 $str                 = $test_line;
25031                 $leading_string      = "";
25032                 $leading_space_count = $cached_line_leading_space_count;
25033                 $level               = $last_level_written;
25034             }
25035             else {
25036                 valign_output_step_C( $cached_line_text,
25037                     $cached_line_leading_space_count,
25038                     $last_level_written );
25039             }
25040         }
25041     }
25042     $cached_line_type = 0;
25043     $cached_line_text = "";
25044
25045     # make the line to be written
25046     my $line = $leading_string . $str;
25047
25048     # write or cache this line
25049     if ( !$open_or_close || $side_comment_length > 0 ) {
25050         valign_output_step_C( $line, $leading_space_count, $level );
25051     }
25052     else {
25053         $cached_line_text                = $line;
25054         $cached_line_type                = $open_or_close;
25055         $cached_line_flag                = $tightness_flag;
25056         $cached_seqno                    = $seqno;
25057         $cached_line_valid               = $valid;
25058         $cached_line_leading_space_count = $leading_space_count;
25059         $cached_seqno_string             = $seqno_string;
25060     }
25061
25062     $last_level_written       = $level;
25063     $last_side_comment_length = $side_comment_length;
25064     $extra_indent_ok          = 0;
25065     return;
25066 }
25067
25068 sub valign_output_step_C {
25069
25070     ###############################################################
25071     # This is Step C in writing vertically aligned lines.
25072     # Lines are either stored in a buffer or passed along to the next step.
25073     # The reason for storing lines is that we may later want to reduce their
25074     # indentation when -sot and -sct are both used.
25075     ###############################################################
25076     my @args = @_;
25077
25078     # Dump any saved lines if we see a line with an unbalanced opening or
25079     # closing token.
25080     dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
25081
25082     # Either store or write this line
25083     if ($valign_buffer_filling) {
25084         push @valign_buffer, [@args];
25085     }
25086     else {
25087         valign_output_step_D(@args);
25088     }
25089
25090     # For lines starting or ending with opening or closing tokens..
25091     if ($seqno_string) {
25092         $last_nonblank_seqno_string = $seqno_string;
25093
25094         # Start storing lines when we see a line with multiple stacked opening
25095         # tokens.
25096         # patch for RT #94354, requested by Colin Williams
25097         if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
25098         {
25099
25100             # This test is efficient but a little subtle: The first test says
25101             # that we have multiple sequence numbers and hence multiple opening
25102             # or closing tokens in this line.  The second part of the test
25103             # rejects stacked closing and ternary tokens.  So if we get here
25104             # then we should have stacked unbalanced opening tokens.
25105
25106             # Here is a complex example:
25107
25108             # Foo($Bar[0], {  # (side comment)
25109             #   baz => 1,
25110             # });
25111
25112             # The first line has sequence 6::4.  It does not begin with
25113             # a closing token or ternary, so it passes the test and must be
25114             # stacked opening tokens.
25115
25116             # The last line has sequence 4:6 but is a stack of closing tokens,
25117             # so it gets rejected.
25118
25119             # Note that the sequence number of an opening token for a qw quote
25120             # is a negative number and will be rejected.
25121             # For example, for the following line:
25122             #    skip_symbols([qw(
25123             # $seqno_string='10:5:-1'.  It would be okay to accept it but
25124             # I decided not to do this after testing.
25125
25126             $valign_buffer_filling = $seqno_string;
25127
25128         }
25129     }
25130     return;
25131 }
25132
25133 sub valign_output_step_D {
25134
25135     ###############################################################
25136     # This is Step D in writing vertically aligned lines.
25137     # Write one vertically aligned line of code to the output object.
25138     ###############################################################
25139
25140     my ( $line, $leading_space_count, $level ) = @_;
25141
25142     # The line is currently correct if there is no tabbing (recommended!)
25143     # We may have to lop off some leading spaces and replace with tabs.
25144     if ( $leading_space_count > 0 ) {
25145
25146         # Nothing to do if no tabs
25147         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
25148             || $rOpts_indent_columns <= 0 )
25149         {
25150
25151             # nothing to do
25152         }
25153
25154         # Handle entab option
25155         elsif ($rOpts_entab_leading_whitespace) {
25156             my $space_count =
25157               $leading_space_count % $rOpts_entab_leading_whitespace;
25158             my $tab_count =
25159               int( $leading_space_count / $rOpts_entab_leading_whitespace );
25160             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
25161             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
25162                 substr( $line, 0, $leading_space_count ) = $leading_string;
25163             }
25164             else {
25165
25166                 # shouldn't happen - program error counting whitespace
25167                 # - skip entabbing
25168                 VALIGN_DEBUG_FLAG_TABS
25169                   && warning(
25170 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
25171                   );
25172             }
25173         }
25174
25175         # Handle option of one tab per level
25176         else {
25177             my $leading_string = ( "\t" x $level );
25178             my $space_count =
25179               $leading_space_count - $level * $rOpts_indent_columns;
25180
25181             # shouldn't happen:
25182             if ( $space_count < 0 ) {
25183
25184                 # But it could be an outdented comment
25185                 if ( $line !~ /^\s*#/ ) {
25186                     VALIGN_DEBUG_FLAG_TABS
25187                       && warning(
25188 "Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
25189                       );
25190                 }
25191                 $leading_string = ( ' ' x $leading_space_count );
25192             }
25193             else {
25194                 $leading_string .= ( ' ' x $space_count );
25195             }
25196             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
25197                 substr( $line, 0, $leading_space_count ) = $leading_string;
25198             }
25199             else {
25200
25201                 # shouldn't happen - program error counting whitespace
25202                 # we'll skip entabbing
25203                 VALIGN_DEBUG_FLAG_TABS
25204                   && warning(
25205 "Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
25206                   );
25207             }
25208         }
25209     }
25210     $file_writer_object->write_code_line( $line . "\n" );
25211     return;
25212 }
25213
25214 {    # begin get_leading_string
25215
25216     my @leading_string_cache;
25217
25218     sub get_leading_string {
25219
25220         # define the leading whitespace string for this line..
25221         my $leading_whitespace_count = shift;
25222
25223         # Handle case of zero whitespace, which includes multi-line quotes
25224         # (which may have a finite level; this prevents tab problems)
25225         if ( $leading_whitespace_count <= 0 ) {
25226             return "";
25227         }
25228
25229         # look for previous result
25230         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
25231             return $leading_string_cache[$leading_whitespace_count];
25232         }
25233
25234         # must compute a string for this number of spaces
25235         my $leading_string;
25236
25237         # Handle simple case of no tabs
25238         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
25239             || $rOpts_indent_columns <= 0 )
25240         {
25241             $leading_string = ( ' ' x $leading_whitespace_count );
25242         }
25243
25244         # Handle entab option
25245         elsif ($rOpts_entab_leading_whitespace) {
25246             my $space_count =
25247               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
25248             my $tab_count = int(
25249                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
25250             $leading_string = "\t" x $tab_count . ' ' x $space_count;
25251         }
25252
25253         # Handle option of one tab per level
25254         else {
25255             $leading_string = ( "\t" x $group_level );
25256             my $space_count =
25257               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
25258
25259             # shouldn't happen:
25260             if ( $space_count < 0 ) {
25261                 VALIGN_DEBUG_FLAG_TABS
25262                   && warning(
25263 "Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
25264                   );
25265
25266                 # -- skip entabbing
25267                 $leading_string = ( ' ' x $leading_whitespace_count );
25268             }
25269             else {
25270                 $leading_string .= ( ' ' x $space_count );
25271             }
25272         }
25273         $leading_string_cache[$leading_whitespace_count] = $leading_string;
25274         return $leading_string;
25275     }
25276 }    # end get_leading_string
25277
25278 sub report_anything_unusual {
25279     my $self = shift;
25280     if ( $outdented_line_count > 0 ) {
25281         write_logfile_entry(
25282             "$outdented_line_count long lines were outdented:\n");
25283         write_logfile_entry(
25284             "  First at output line $first_outdented_line_at\n");
25285
25286         if ( $outdented_line_count > 1 ) {
25287             write_logfile_entry(
25288                 "   Last at output line $last_outdented_line_at\n");
25289         }
25290         write_logfile_entry(
25291             "  use -noll to prevent outdenting, -l=n to increase line length\n"
25292         );
25293         write_logfile_entry("\n");
25294     }
25295     return;
25296 }
25297
25298 #####################################################################
25299 #
25300 # the Perl::Tidy::FileWriter class writes the output file
25301 #
25302 #####################################################################
25303
25304 package Perl::Tidy::FileWriter;
25305
25306 # Maximum number of little messages; probably need not be changed.
25307 use constant MAX_NAG_MESSAGES => 6;
25308
25309 sub write_logfile_entry {
25310     my ( $self, $msg ) = @_;
25311     my $logger_object = $self->{_logger_object};
25312     if ($logger_object) {
25313         $logger_object->write_logfile_entry($msg);
25314     }
25315     return;
25316 }
25317
25318 sub new {
25319     my ( $class, $line_sink_object, $rOpts, $logger_object ) = @_;
25320
25321     return bless {
25322         _line_sink_object           => $line_sink_object,
25323         _logger_object              => $logger_object,
25324         _rOpts                      => $rOpts,
25325         _output_line_number         => 1,
25326         _consecutive_blank_lines    => 0,
25327         _consecutive_nonblank_lines => 0,
25328         _first_line_length_error    => 0,
25329         _max_line_length_error      => 0,
25330         _last_line_length_error     => 0,
25331         _first_line_length_error_at => 0,
25332         _max_line_length_error_at   => 0,
25333         _last_line_length_error_at  => 0,
25334         _line_length_error_count    => 0,
25335         _max_output_line_length     => 0,
25336         _max_output_line_length_at  => 0,
25337     }, $class;
25338 }
25339
25340 sub tee_on {
25341     my $self = shift;
25342     $self->{_line_sink_object}->tee_on();
25343     return;
25344 }
25345
25346 sub tee_off {
25347     my $self = shift;
25348     $self->{_line_sink_object}->tee_off();
25349     return;
25350 }
25351
25352 sub get_output_line_number {
25353     my $self = shift;
25354     return $self->{_output_line_number};
25355 }
25356
25357 sub decrement_output_line_number {
25358     my $self = shift;
25359     $self->{_output_line_number}--;
25360     return;
25361 }
25362
25363 sub get_consecutive_nonblank_lines {
25364     my $self = shift;
25365     return $self->{_consecutive_nonblank_lines};
25366 }
25367
25368 sub reset_consecutive_blank_lines {
25369     my $self = shift;
25370     $self->{_consecutive_blank_lines} = 0;
25371     return;
25372 }
25373
25374 sub want_blank_line {
25375     my $self = shift;
25376     unless ( $self->{_consecutive_blank_lines} ) {
25377         $self->write_blank_code_line();
25378     }
25379     return;
25380 }
25381
25382 sub require_blank_code_lines {
25383
25384     # write out the requested number of blanks regardless of the value of -mbl
25385     # unless -mbl=0.  This allows extra blank lines to be written for subs and
25386     # packages even with the default -mbl=1
25387     my ( $self, $count ) = @_;
25388     my $need   = $count - $self->{_consecutive_blank_lines};
25389     my $rOpts  = $self->{_rOpts};
25390     my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
25391     foreach my $i ( 0 .. $need - 1 ) {
25392         $self->write_blank_code_line($forced);
25393     }
25394     return;
25395 }
25396
25397 sub write_blank_code_line {
25398     my $self   = shift;
25399     my $forced = shift;
25400     my $rOpts  = $self->{_rOpts};
25401     return
25402       if (!$forced
25403         && $self->{_consecutive_blank_lines} >=
25404         $rOpts->{'maximum-consecutive-blank-lines'} );
25405     $self->{_consecutive_blank_lines}++;
25406     $self->{_consecutive_nonblank_lines} = 0;
25407     $self->write_line("\n");
25408     return;
25409 }
25410
25411 sub write_code_line {
25412     my $self = shift;
25413     my $a    = shift;
25414
25415     if ( $a =~ /^\s*$/ ) {
25416         my $rOpts = $self->{_rOpts};
25417         return
25418           if ( $self->{_consecutive_blank_lines} >=
25419             $rOpts->{'maximum-consecutive-blank-lines'} );
25420         $self->{_consecutive_blank_lines}++;
25421         $self->{_consecutive_nonblank_lines} = 0;
25422     }
25423     else {
25424         $self->{_consecutive_blank_lines} = 0;
25425         $self->{_consecutive_nonblank_lines}++;
25426     }
25427     $self->write_line($a);
25428     return;
25429 }
25430
25431 sub write_line {
25432     my ( $self, $a ) = @_;
25433
25434     # TODO: go through and see if the test is necessary here
25435     if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
25436
25437     $self->{_line_sink_object}->write_line($a);
25438
25439     # This calculation of excess line length ignores any internal tabs
25440     my $rOpts  = $self->{_rOpts};
25441     my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
25442     if ( $a =~ /^\t+/g ) {
25443         $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
25444     }
25445
25446     # Note that we just incremented output line number to future value
25447     # so we must subtract 1 for current line number
25448     if ( length($a) > 1 + $self->{_max_output_line_length} ) {
25449         $self->{_max_output_line_length}    = length($a) - 1;
25450         $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
25451     }
25452
25453     if ( $exceed > 0 ) {
25454         my $output_line_number = $self->{_output_line_number};
25455         $self->{_last_line_length_error}    = $exceed;
25456         $self->{_last_line_length_error_at} = $output_line_number - 1;
25457         if ( $self->{_line_length_error_count} == 0 ) {
25458             $self->{_first_line_length_error}    = $exceed;
25459             $self->{_first_line_length_error_at} = $output_line_number - 1;
25460         }
25461
25462         if (
25463             $self->{_last_line_length_error} > $self->{_max_line_length_error} )
25464         {
25465             $self->{_max_line_length_error}    = $exceed;
25466             $self->{_max_line_length_error_at} = $output_line_number - 1;
25467         }
25468
25469         if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
25470             $self->write_logfile_entry(
25471                 "Line length exceeded by $exceed characters\n");
25472         }
25473         $self->{_line_length_error_count}++;
25474     }
25475     return;
25476 }
25477
25478 sub report_line_length_errors {
25479     my $self                    = shift;
25480     my $rOpts                   = $self->{_rOpts};
25481     my $line_length_error_count = $self->{_line_length_error_count};
25482     if ( $line_length_error_count == 0 ) {
25483         $self->write_logfile_entry(
25484             "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
25485         my $max_output_line_length    = $self->{_max_output_line_length};
25486         my $max_output_line_length_at = $self->{_max_output_line_length_at};
25487         $self->write_logfile_entry(
25488 "  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
25489         );
25490
25491     }
25492     else {
25493
25494         my $word = ( $line_length_error_count > 1 ) ? "s" : "";
25495         $self->write_logfile_entry(
25496 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
25497         );
25498
25499         $word = ( $line_length_error_count > 1 ) ? "First" : "";
25500         my $first_line_length_error    = $self->{_first_line_length_error};
25501         my $first_line_length_error_at = $self->{_first_line_length_error_at};
25502         $self->write_logfile_entry(
25503 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
25504         );
25505
25506         if ( $line_length_error_count > 1 ) {
25507             my $max_line_length_error     = $self->{_max_line_length_error};
25508             my $max_line_length_error_at  = $self->{_max_line_length_error_at};
25509             my $last_line_length_error    = $self->{_last_line_length_error};
25510             my $last_line_length_error_at = $self->{_last_line_length_error_at};
25511             $self->write_logfile_entry(
25512 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
25513             );
25514             $self->write_logfile_entry(
25515 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
25516             );
25517         }
25518     }
25519     return;
25520 }
25521
25522 #####################################################################
25523 #
25524 # The Perl::Tidy::Debugger class shows line tokenization
25525 #
25526 #####################################################################
25527
25528 package Perl::Tidy::Debugger;
25529
25530 sub new {
25531
25532     my ( $class, $filename ) = @_;
25533
25534     return bless {
25535         _debug_file        => $filename,
25536         _debug_file_opened => 0,
25537         _fh                => undef,
25538     }, $class;
25539 }
25540
25541 sub really_open_debug_file {
25542
25543     my $self       = shift;
25544     my $debug_file = $self->{_debug_file};
25545     my $fh;
25546     unless ( $fh = IO::File->new("> $debug_file") ) {
25547         Perl::Tidy::Warn("can't open $debug_file: $!\n");
25548     }
25549     $self->{_debug_file_opened} = 1;
25550     $self->{_fh}                = $fh;
25551     print $fh
25552       "Use -dump-token-types (-dtt) to get a list of token type codes\n";
25553     return;
25554 }
25555
25556 sub close_debug_file {
25557
25558     my $self = shift;
25559     my $fh   = $self->{_fh};
25560     if ( $self->{_debug_file_opened} ) {
25561         eval { $self->{_fh}->close() };
25562     }
25563     return;
25564 }
25565
25566 sub write_debug_entry {
25567
25568     # This is a debug dump routine which may be modified as necessary
25569     # to dump tokens on a line-by-line basis.  The output will be written
25570     # to the .DEBUG file when the -D flag is entered.
25571     my ( $self, $line_of_tokens ) = @_;
25572
25573     my $input_line = $line_of_tokens->{_line_text};
25574
25575     my $rtoken_type = $line_of_tokens->{_rtoken_type};
25576     my $rtokens     = $line_of_tokens->{_rtokens};
25577     my $rlevels     = $line_of_tokens->{_rlevels};
25578     my $rslevels    = $line_of_tokens->{_rslevels};
25579     my $rblock_type = $line_of_tokens->{_rblock_type};
25580
25581     my $input_line_number = $line_of_tokens->{_line_number};
25582     my $line_type         = $line_of_tokens->{_line_type};
25583     ##my $rtoken_array      = $line_of_tokens->{_token_array};
25584
25585     my ( $j, $num );
25586
25587     my $token_str              = "$input_line_number: ";
25588     my $reconstructed_original = "$input_line_number: ";
25589     my $block_str              = "$input_line_number: ";
25590
25591     #$token_str .= "$line_type: ";
25592     #$reconstructed_original .= "$line_type: ";
25593
25594     my $pattern   = "";
25595     my @next_char = ( '"', '"' );
25596     my $i_next    = 0;
25597     unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
25598     my $fh = $self->{_fh};
25599
25600     # FIXME: could convert to use of token_array instead
25601     foreach my $j ( 0 .. @{$rtoken_type} - 1 ) {
25602
25603         # testing patterns
25604         if ( $rtoken_type->[$j] eq 'k' ) {
25605             $pattern .= $rtokens->[$j];
25606         }
25607         else {
25608             $pattern .= $rtoken_type->[$j];
25609         }
25610         $reconstructed_original .= $rtokens->[$j];
25611         $block_str .= "($rblock_type->[$j])";
25612         $num = length( $rtokens->[$j] );
25613         my $type_str = $rtoken_type->[$j];
25614
25615         # be sure there are no blank tokens (shouldn't happen)
25616         # This can only happen if a programming error has been made
25617         # because all valid tokens are non-blank
25618         if ( $type_str eq ' ' ) {
25619             print $fh "BLANK TOKEN on the next line\n";
25620             $type_str = $next_char[$i_next];
25621             $i_next   = 1 - $i_next;
25622         }
25623
25624         if ( length($type_str) == 1 ) {
25625             $type_str = $type_str x $num;
25626         }
25627         $token_str .= $type_str;
25628     }
25629
25630     # Write what you want here ...
25631     # print $fh "$input_line\n";
25632     # print $fh "$pattern\n";
25633     print $fh "$reconstructed_original\n";
25634     print $fh "$token_str\n";
25635
25636     #print $fh "$block_str\n";
25637     return;
25638 }
25639
25640 #####################################################################
25641 #
25642 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
25643 # method for returning the next line to be parsed, as well as a
25644 # 'peek_ahead()' method
25645 #
25646 # The input parameter is an object with a 'get_line()' method
25647 # which returns the next line to be parsed
25648 #
25649 #####################################################################
25650
25651 package Perl::Tidy::LineBuffer;
25652
25653 sub new {
25654
25655     my ( $class, $line_source_object ) = @_;
25656
25657     return bless {
25658         _line_source_object => $line_source_object,
25659         _rlookahead_buffer  => [],
25660     }, $class;
25661 }
25662
25663 sub peek_ahead {
25664     my ( $self, $buffer_index ) = @_;
25665     my $line               = undef;
25666     my $line_source_object = $self->{_line_source_object};
25667     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
25668     if ( $buffer_index < scalar( @{$rlookahead_buffer} ) ) {
25669         $line = $rlookahead_buffer->[$buffer_index];
25670     }
25671     else {
25672         $line = $line_source_object->get_line();
25673         push( @{$rlookahead_buffer}, $line );
25674     }
25675     return $line;
25676 }
25677
25678 sub get_line {
25679     my $self               = shift;
25680     my $line               = undef;
25681     my $line_source_object = $self->{_line_source_object};
25682     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
25683
25684     if ( scalar( @{$rlookahead_buffer} ) ) {
25685         $line = shift @{$rlookahead_buffer};
25686     }
25687     else {
25688         $line = $line_source_object->get_line();
25689     }
25690     return $line;
25691 }
25692
25693 ########################################################################
25694 #
25695 # the Perl::Tidy::Tokenizer package is essentially a filter which
25696 # reads lines of perl source code from a source object and provides
25697 # corresponding tokenized lines through its get_line() method.  Lines
25698 # flow from the source_object to the caller like this:
25699 #
25700 # source_object --> LineBuffer_object --> Tokenizer -->  calling routine
25701 #   get_line()         get_line()           get_line()     line_of_tokens
25702 #
25703 # The source object can be any object with a get_line() method which
25704 # supplies one line (a character string) perl call.
25705 # The LineBuffer object is created by the Tokenizer.
25706 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
25707 # containing one tokenized line for each call to its get_line() method.
25708 #
25709 # WARNING: This is not a real class yet.  Only one tokenizer my be used.
25710 #
25711 ########################################################################
25712
25713 package Perl::Tidy::Tokenizer;
25714
25715 BEGIN {
25716
25717     # Caution: these debug flags produce a lot of output
25718     # They should all be 0 except when debugging small scripts
25719
25720     use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
25721     use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
25722     use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
25723     use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
25724     use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
25725
25726     my $debug_warning = sub {
25727         print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
25728     };
25729
25730     TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
25731     TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
25732     TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
25733     TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
25734     TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
25735
25736 }
25737
25738 use Carp;
25739
25740 # PACKAGE VARIABLES for processing an entire FILE.
25741 use vars qw{
25742   $tokenizer_self
25743
25744   $last_nonblank_token
25745   $last_nonblank_type
25746   $last_nonblank_block_type
25747   $statement_type
25748   $in_attribute_list
25749   $current_package
25750   $context
25751
25752   %is_constant
25753   %is_user_function
25754   %user_function_prototype
25755   %is_block_function
25756   %is_block_list_function
25757   %saw_function_definition
25758
25759   $brace_depth
25760   $paren_depth
25761   $square_bracket_depth
25762
25763   @current_depth
25764   @total_depth
25765   $total_depth
25766   @nesting_sequence_number
25767   @current_sequence_number
25768   @paren_type
25769   @paren_semicolon_count
25770   @paren_structural_type
25771   @brace_type
25772   @brace_structural_type
25773   @brace_context
25774   @brace_package
25775   @square_bracket_type
25776   @square_bracket_structural_type
25777   @depth_array
25778   @nested_ternary_flag
25779   @nested_statement_type
25780   @starting_line_of_current_depth
25781 };
25782
25783 # GLOBAL CONSTANTS for routines in this package
25784 use vars qw{
25785   %is_indirect_object_taker
25786   %is_block_operator
25787   %expecting_operator_token
25788   %expecting_operator_types
25789   %expecting_term_types
25790   %expecting_term_token
25791   %is_digraph
25792   %is_file_test_operator
25793   %is_trigraph
25794   %is_tetragraph
25795   %is_valid_token_type
25796   %is_keyword
25797   %is_code_block_token
25798   %really_want_term
25799   @opening_brace_names
25800   @closing_brace_names
25801   %is_keyword_taking_list
25802   %is_q_qq_qw_qx_qr_s_y_tr_m
25803 };
25804
25805 # possible values of operator_expected()
25806 use constant TERM     => -1;
25807 use constant UNKNOWN  => 0;
25808 use constant OPERATOR => 1;
25809
25810 # possible values of context
25811 use constant SCALAR_CONTEXT  => -1;
25812 use constant UNKNOWN_CONTEXT => 0;
25813 use constant LIST_CONTEXT    => 1;
25814
25815 # Maximum number of little messages; probably need not be changed.
25816 use constant MAX_NAG_MESSAGES => 6;
25817
25818 {
25819
25820     # methods to count instances
25821     my $_count = 0;
25822     sub get_count        { return $_count; }
25823     sub _increment_count { return ++$_count }
25824     sub _decrement_count { return --$_count }
25825 }
25826
25827 sub DESTROY {
25828     my $self = shift;
25829     $self->_decrement_count();
25830     return;
25831 }
25832
25833 sub new {
25834
25835     my $class = shift;
25836
25837     # Note: 'tabs' and 'indent_columns' are temporary and should be
25838     # removed asap
25839     my %defaults = (
25840         source_object        => undef,
25841         debugger_object      => undef,
25842         diagnostics_object   => undef,
25843         logger_object        => undef,
25844         starting_level       => undef,
25845         indent_columns       => 4,
25846         tabsize              => 8,
25847         look_for_hash_bang   => 0,
25848         trim_qw              => 1,
25849         look_for_autoloader  => 1,
25850         look_for_selfloader  => 1,
25851         starting_line_number => 1,
25852         extended_syntax      => 0,
25853     );
25854     my %args = ( %defaults, @_ );
25855
25856     # we are given an object with a get_line() method to supply source lines
25857     my $source_object = $args{source_object};
25858
25859     # we create another object with a get_line() and peek_ahead() method
25860     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
25861
25862     # Tokenizer state data is as follows:
25863     # _rhere_target_list    reference to list of here-doc targets
25864     # _here_doc_target      the target string for a here document
25865     # _here_quote_character the type of here-doc quoting (" ' ` or none)
25866     #                       to determine if interpolation is done
25867     # _quote_target         character we seek if chasing a quote
25868     # _line_start_quote     line where we started looking for a long quote
25869     # _in_here_doc          flag indicating if we are in a here-doc
25870     # _in_pod               flag set if we are in pod documentation
25871     # _in_error             flag set if we saw severe error (binary in script)
25872     # _in_data              flag set if we are in __DATA__ section
25873     # _in_end               flag set if we are in __END__ section
25874     # _in_format            flag set if we are in a format description
25875     # _in_attribute_list    flag telling if we are looking for attributes
25876     # _in_quote             flag telling if we are chasing a quote
25877     # _starting_level       indentation level of first line
25878     # _line_buffer_object   object with get_line() method to supply source code
25879     # _diagnostics_object   place to write debugging information
25880     # _unexpected_error_count  error count used to limit output
25881     # _lower_case_labels_at  line numbers where lower case labels seen
25882     # _hit_bug               program bug detected
25883     $tokenizer_self = {
25884         _rhere_target_list                  => [],
25885         _in_here_doc                        => 0,
25886         _here_doc_target                    => "",
25887         _here_quote_character               => "",
25888         _in_data                            => 0,
25889         _in_end                             => 0,
25890         _in_format                          => 0,
25891         _in_error                           => 0,
25892         _in_pod                             => 0,
25893         _in_attribute_list                  => 0,
25894         _in_quote                           => 0,
25895         _quote_target                       => "",
25896         _line_start_quote                   => -1,
25897         _starting_level                     => $args{starting_level},
25898         _know_starting_level                => defined( $args{starting_level} ),
25899         _tabsize                            => $args{tabsize},
25900         _indent_columns                     => $args{indent_columns},
25901         _look_for_hash_bang                 => $args{look_for_hash_bang},
25902         _trim_qw                            => $args{trim_qw},
25903         _continuation_indentation           => $args{continuation_indentation},
25904         _outdent_labels                     => $args{outdent_labels},
25905         _last_line_number                   => $args{starting_line_number} - 1,
25906         _saw_perl_dash_P                    => 0,
25907         _saw_perl_dash_w                    => 0,
25908         _saw_use_strict                     => 0,
25909         _saw_v_string                       => 0,
25910         _hit_bug                            => 0,
25911         _look_for_autoloader                => $args{look_for_autoloader},
25912         _look_for_selfloader                => $args{look_for_selfloader},
25913         _saw_autoloader                     => 0,
25914         _saw_selfloader                     => 0,
25915         _saw_hash_bang                      => 0,
25916         _saw_end                            => 0,
25917         _saw_data                           => 0,
25918         _saw_negative_indentation           => 0,
25919         _started_tokenizing                 => 0,
25920         _line_buffer_object                 => $line_buffer_object,
25921         _debugger_object                    => $args{debugger_object},
25922         _diagnostics_object                 => $args{diagnostics_object},
25923         _logger_object                      => $args{logger_object},
25924         _unexpected_error_count             => 0,
25925         _started_looking_for_here_target_at => 0,
25926         _nearly_matched_here_target_at      => undef,
25927         _line_text                          => "",
25928         _rlower_case_labels_at              => undef,
25929         _extended_syntax                    => $args{extended_syntax},
25930     };
25931
25932     prepare_for_a_new_file();
25933     find_starting_indentation_level();
25934
25935     bless $tokenizer_self, $class;
25936
25937     # This is not a full class yet, so die if an attempt is made to
25938     # create more than one object.
25939
25940     if ( _increment_count() > 1 ) {
25941         confess
25942 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
25943     }
25944
25945     return $tokenizer_self;
25946
25947 }
25948
25949 # interface to Perl::Tidy::Logger routines
25950 sub warning {
25951     my $msg           = shift;
25952     my $logger_object = $tokenizer_self->{_logger_object};
25953     if ($logger_object) {
25954         $logger_object->warning($msg);
25955     }
25956     return;
25957 }
25958
25959 sub complain {
25960     my $msg           = shift;
25961     my $logger_object = $tokenizer_self->{_logger_object};
25962     if ($logger_object) {
25963         $logger_object->complain($msg);
25964     }
25965     return;
25966 }
25967
25968 sub write_logfile_entry {
25969     my $msg           = shift;
25970     my $logger_object = $tokenizer_self->{_logger_object};
25971     if ($logger_object) {
25972         $logger_object->write_logfile_entry($msg);
25973     }
25974     return;
25975 }
25976
25977 sub interrupt_logfile {
25978     my $logger_object = $tokenizer_self->{_logger_object};
25979     if ($logger_object) {
25980         $logger_object->interrupt_logfile();
25981     }
25982     return;
25983 }
25984
25985 sub resume_logfile {
25986     my $logger_object = $tokenizer_self->{_logger_object};
25987     if ($logger_object) {
25988         $logger_object->resume_logfile();
25989     }
25990     return;
25991 }
25992
25993 sub increment_brace_error {
25994     my $logger_object = $tokenizer_self->{_logger_object};
25995     if ($logger_object) {
25996         $logger_object->increment_brace_error();
25997     }
25998     return;
25999 }
26000
26001 sub report_definite_bug {
26002     $tokenizer_self->{_hit_bug} = 1;
26003     my $logger_object = $tokenizer_self->{_logger_object};
26004     if ($logger_object) {
26005         $logger_object->report_definite_bug();
26006     }
26007     return;
26008 }
26009
26010 sub brace_warning {
26011     my $msg           = shift;
26012     my $logger_object = $tokenizer_self->{_logger_object};
26013     if ($logger_object) {
26014         $logger_object->brace_warning($msg);
26015     }
26016     return;
26017 }
26018
26019 sub get_saw_brace_error {
26020     my $logger_object = $tokenizer_self->{_logger_object};
26021     if ($logger_object) {
26022         return $logger_object->get_saw_brace_error();
26023     }
26024     else {
26025         return 0;
26026     }
26027 }
26028
26029 # interface to Perl::Tidy::Diagnostics routines
26030 sub write_diagnostics {
26031     my $msg = shift;
26032     if ( $tokenizer_self->{_diagnostics_object} ) {
26033         $tokenizer_self->{_diagnostics_object}->write_diagnostics($msg);
26034     }
26035     return;
26036 }
26037
26038 sub report_tokenization_errors {
26039
26040     my $self         = shift;
26041     my $severe_error = $self->{_in_error};
26042
26043     my $level = get_indentation_level();
26044     if ( $level != $tokenizer_self->{_starting_level} ) {
26045         warning("final indentation level: $level\n");
26046     }
26047
26048     check_final_nesting_depths();
26049
26050     if ( $tokenizer_self->{_look_for_hash_bang}
26051         && !$tokenizer_self->{_saw_hash_bang} )
26052     {
26053         warning(
26054             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
26055     }
26056
26057     if ( $tokenizer_self->{_in_format} ) {
26058         warning("hit EOF while in format description\n");
26059     }
26060
26061     if ( $tokenizer_self->{_in_pod} ) {
26062
26063         # Just write log entry if this is after __END__ or __DATA__
26064         # because this happens to often, and it is not likely to be
26065         # a parsing error.
26066         if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
26067             write_logfile_entry(
26068 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
26069             );
26070         }
26071
26072         else {
26073             complain(
26074 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
26075             );
26076         }
26077
26078     }
26079
26080     if ( $tokenizer_self->{_in_here_doc} ) {
26081         $severe_error = 1;
26082         my $here_doc_target = $tokenizer_self->{_here_doc_target};
26083         my $started_looking_for_here_target_at =
26084           $tokenizer_self->{_started_looking_for_here_target_at};
26085         if ($here_doc_target) {
26086             warning(
26087 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
26088             );
26089         }
26090         else {
26091             warning(
26092 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
26093             );
26094         }
26095         my $nearly_matched_here_target_at =
26096           $tokenizer_self->{_nearly_matched_here_target_at};
26097         if ($nearly_matched_here_target_at) {
26098             warning(
26099 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
26100             );
26101         }
26102     }
26103
26104     if ( $tokenizer_self->{_in_quote} ) {
26105         $severe_error = 1;
26106         my $line_start_quote = $tokenizer_self->{_line_start_quote};
26107         my $quote_target     = $tokenizer_self->{_quote_target};
26108         my $what =
26109           ( $tokenizer_self->{_in_attribute_list} )
26110           ? "attribute list"
26111           : "quote/pattern";
26112         warning(
26113 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
26114         );
26115     }
26116
26117     if ( $tokenizer_self->{_hit_bug} ) {
26118         $severe_error = 1;
26119     }
26120
26121     my $logger_object = $tokenizer_self->{_logger_object};
26122
26123 # TODO: eventually may want to activate this to cause file to be output verbatim
26124     if (0) {
26125
26126         # Set the severe error for a fairly high warning count because
26127         # some of the warnings do not harm formatting, such as duplicate
26128         # sub names.
26129         my $warning_count = $logger_object->{_warning_count};
26130         if ( $warning_count > 50 ) {
26131             $severe_error = 1;
26132         }
26133
26134         # Brace errors are significant, so set the severe error flag at
26135         # a low number.
26136         my $saw_brace_error = $logger_object->{_saw_brace_error};
26137         if ( $saw_brace_error > 2 ) {
26138             $severe_error = 1;
26139         }
26140     }
26141
26142     unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
26143         if ( $] < 5.006 ) {
26144             write_logfile_entry("Suggest including '-w parameter'\n");
26145         }
26146         else {
26147             write_logfile_entry("Suggest including 'use warnings;'\n");
26148         }
26149     }
26150
26151     if ( $tokenizer_self->{_saw_perl_dash_P} ) {
26152         write_logfile_entry("Use of -P parameter for defines is discouraged\n");
26153     }
26154
26155     unless ( $tokenizer_self->{_saw_use_strict} ) {
26156         write_logfile_entry("Suggest including 'use strict;'\n");
26157     }
26158
26159     # it is suggested that labels have at least one upper case character
26160     # for legibility and to avoid code breakage as new keywords are introduced
26161     if ( $tokenizer_self->{_rlower_case_labels_at} ) {
26162         my @lower_case_labels_at =
26163           @{ $tokenizer_self->{_rlower_case_labels_at} };
26164         write_logfile_entry(
26165             "Suggest using upper case characters in label(s)\n");
26166         local $" = ')(';
26167         write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
26168     }
26169     return $severe_error;
26170 }
26171
26172 sub report_v_string {
26173
26174     # warn if this version can't handle v-strings
26175     my $tok = shift;
26176     unless ( $tokenizer_self->{_saw_v_string} ) {
26177         $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
26178     }
26179     if ( $] < 5.006 ) {
26180         warning(
26181 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
26182         );
26183     }
26184     return;
26185 }
26186
26187 sub get_input_line_number {
26188     return $tokenizer_self->{_last_line_number};
26189 }
26190
26191 # returns the next tokenized line
26192 sub get_line {
26193
26194     my $self = shift;
26195
26196     # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
26197     # $square_bracket_depth, $paren_depth
26198
26199     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
26200     $tokenizer_self->{_line_text} = $input_line;
26201
26202     return unless ($input_line);
26203
26204     my $input_line_number = ++$tokenizer_self->{_last_line_number};
26205
26206     # Find and remove what characters terminate this line, including any
26207     # control r
26208     my $input_line_separator = "";
26209     if ( chomp($input_line) ) { $input_line_separator = $/ }
26210
26211     # TODO: what other characters should be included here?
26212     if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
26213         $input_line_separator = $2 . $input_line_separator;
26214     }
26215
26216     # for backwards compatibility we keep the line text terminated with
26217     # a newline character
26218     $input_line .= "\n";
26219     $tokenizer_self->{_line_text} = $input_line;    # update
26220
26221     # create a data structure describing this line which will be
26222     # returned to the caller.
26223
26224     # _line_type codes are:
26225     #   SYSTEM         - system-specific code before hash-bang line
26226     #   CODE           - line of perl code (including comments)
26227     #   POD_START      - line starting pod, such as '=head'
26228     #   POD            - pod documentation text
26229     #   POD_END        - last line of pod section, '=cut'
26230     #   HERE           - text of here-document
26231     #   HERE_END       - last line of here-doc (target word)
26232     #   FORMAT         - format section
26233     #   FORMAT_END     - last line of format section, '.'
26234     #   DATA_START     - __DATA__ line
26235     #   DATA           - unidentified text following __DATA__
26236     #   END_START      - __END__ line
26237     #   END            - unidentified text following __END__
26238     #   ERROR          - we are in big trouble, probably not a perl script
26239
26240     # Other variables:
26241     #   _curly_brace_depth     - depth of curly braces at start of line
26242     #   _square_bracket_depth  - depth of square brackets at start of line
26243     #   _paren_depth           - depth of parens at start of line
26244     #   _starting_in_quote     - this line continues a multi-line quote
26245     #                            (so don't trim leading blanks!)
26246     #   _ending_in_quote       - this line ends in a multi-line quote
26247     #                            (so don't trim trailing blanks!)
26248     my $line_of_tokens = {
26249         _line_type                 => 'EOF',
26250         _line_text                 => $input_line,
26251         _line_number               => $input_line_number,
26252         _rtoken_type               => undef,
26253         _rtokens                   => undef,
26254         _rlevels                   => undef,
26255         _rslevels                  => undef,
26256         _rblock_type               => undef,
26257         _rcontainer_type           => undef,
26258         _rcontainer_environment    => undef,
26259         _rtype_sequence            => undef,
26260         _rnesting_tokens           => undef,
26261         _rci_levels                => undef,
26262         _rnesting_blocks           => undef,
26263         _guessed_indentation_level => 0,
26264         _starting_in_quote    => 0,                    # to be set by subroutine
26265         _ending_in_quote      => 0,
26266         _curly_brace_depth    => $brace_depth,
26267         _square_bracket_depth => $square_bracket_depth,
26268         _paren_depth          => $paren_depth,
26269         _quote_character      => '',
26270     };
26271
26272     # must print line unchanged if we are in a here document
26273     if ( $tokenizer_self->{_in_here_doc} ) {
26274
26275         $line_of_tokens->{_line_type} = 'HERE';
26276         my $here_doc_target      = $tokenizer_self->{_here_doc_target};
26277         my $here_quote_character = $tokenizer_self->{_here_quote_character};
26278         my $candidate_target     = $input_line;
26279         chomp $candidate_target;
26280
26281         # Handle <<~ targets, which are indicated here by a leading space on
26282         # the here quote character
26283         if ( $here_quote_character =~ /^\s/ ) {
26284             $candidate_target =~ s/^\s*//;
26285         }
26286         if ( $candidate_target eq $here_doc_target ) {
26287             $tokenizer_self->{_nearly_matched_here_target_at} = undef;
26288             $line_of_tokens->{_line_type}                     = 'HERE_END';
26289             write_logfile_entry("Exiting HERE document $here_doc_target\n");
26290
26291             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
26292             if ( @{$rhere_target_list} ) {  # there can be multiple here targets
26293                 ( $here_doc_target, $here_quote_character ) =
26294                   @{ shift @{$rhere_target_list} };
26295                 $tokenizer_self->{_here_doc_target} = $here_doc_target;
26296                 $tokenizer_self->{_here_quote_character} =
26297                   $here_quote_character;
26298                 write_logfile_entry(
26299                     "Entering HERE document $here_doc_target\n");
26300                 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
26301                 $tokenizer_self->{_started_looking_for_here_target_at} =
26302                   $input_line_number;
26303             }
26304             else {
26305                 $tokenizer_self->{_in_here_doc}          = 0;
26306                 $tokenizer_self->{_here_doc_target}      = "";
26307                 $tokenizer_self->{_here_quote_character} = "";
26308             }
26309         }
26310
26311         # check for error of extra whitespace
26312         # note for PERL6: leading whitespace is allowed
26313         else {
26314             $candidate_target =~ s/\s*$//;
26315             $candidate_target =~ s/^\s*//;
26316             if ( $candidate_target eq $here_doc_target ) {
26317                 $tokenizer_self->{_nearly_matched_here_target_at} =
26318                   $input_line_number;
26319             }
26320         }
26321         return $line_of_tokens;
26322     }
26323
26324     # must print line unchanged if we are in a format section
26325     elsif ( $tokenizer_self->{_in_format} ) {
26326
26327         if ( $input_line =~ /^\.[\s#]*$/ ) {
26328             write_logfile_entry("Exiting format section\n");
26329             $tokenizer_self->{_in_format} = 0;
26330             $line_of_tokens->{_line_type} = 'FORMAT_END';
26331         }
26332         else {
26333             $line_of_tokens->{_line_type} = 'FORMAT';
26334         }
26335         return $line_of_tokens;
26336     }
26337
26338     # must print line unchanged if we are in pod documentation
26339     elsif ( $tokenizer_self->{_in_pod} ) {
26340
26341         $line_of_tokens->{_line_type} = 'POD';
26342         if ( $input_line =~ /^=cut/ ) {
26343             $line_of_tokens->{_line_type} = 'POD_END';
26344             write_logfile_entry("Exiting POD section\n");
26345             $tokenizer_self->{_in_pod} = 0;
26346         }
26347         if ( $input_line =~ /^\#\!.*perl\b/ ) {
26348             warning(
26349                 "Hash-bang in pod can cause older versions of perl to fail! \n"
26350             );
26351         }
26352
26353         return $line_of_tokens;
26354     }
26355
26356     # must print line unchanged if we have seen a severe error (i.e., we
26357     # are seeing illegal tokens and cannot continue.  Syntax errors do
26358     # not pass this route).  Calling routine can decide what to do, but
26359     # the default can be to just pass all lines as if they were after __END__
26360     elsif ( $tokenizer_self->{_in_error} ) {
26361         $line_of_tokens->{_line_type} = 'ERROR';
26362         return $line_of_tokens;
26363     }
26364
26365     # print line unchanged if we are __DATA__ section
26366     elsif ( $tokenizer_self->{_in_data} ) {
26367
26368         # ...but look for POD
26369         # Note that the _in_data and _in_end flags remain set
26370         # so that we return to that state after seeing the
26371         # end of a pod section
26372         if ( $input_line =~ /^=(?!cut)/ ) {
26373             $line_of_tokens->{_line_type} = 'POD_START';
26374             write_logfile_entry("Entering POD section\n");
26375             $tokenizer_self->{_in_pod} = 1;
26376             return $line_of_tokens;
26377         }
26378         else {
26379             $line_of_tokens->{_line_type} = 'DATA';
26380             return $line_of_tokens;
26381         }
26382     }
26383
26384     # print line unchanged if we are in __END__ section
26385     elsif ( $tokenizer_self->{_in_end} ) {
26386
26387         # ...but look for POD
26388         # Note that the _in_data and _in_end flags remain set
26389         # so that we return to that state after seeing the
26390         # end of a pod section
26391         if ( $input_line =~ /^=(?!cut)/ ) {
26392             $line_of_tokens->{_line_type} = 'POD_START';
26393             write_logfile_entry("Entering POD section\n");
26394             $tokenizer_self->{_in_pod} = 1;
26395             return $line_of_tokens;
26396         }
26397         else {
26398             $line_of_tokens->{_line_type} = 'END';
26399             return $line_of_tokens;
26400         }
26401     }
26402
26403     # check for a hash-bang line if we haven't seen one
26404     if ( !$tokenizer_self->{_saw_hash_bang} ) {
26405         if ( $input_line =~ /^\#\!.*perl\b/ ) {
26406             $tokenizer_self->{_saw_hash_bang} = $input_line_number;
26407
26408             # check for -w and -P flags
26409             if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
26410                 $tokenizer_self->{_saw_perl_dash_P} = 1;
26411             }
26412
26413             if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
26414                 $tokenizer_self->{_saw_perl_dash_w} = 1;
26415             }
26416
26417             if (
26418                 ( $input_line_number > 1 )
26419
26420                 # leave any hash bang in a BEGIN block alone
26421                 # i.e. see 'debugger-duck_type.t'
26422                 && !(
26423                        $last_nonblank_block_type
26424                     && $last_nonblank_block_type eq 'BEGIN'
26425                 )
26426                 && ( !$tokenizer_self->{_look_for_hash_bang} )
26427               )
26428             {
26429
26430                 # this is helpful for VMS systems; we may have accidentally
26431                 # tokenized some DCL commands
26432                 if ( $tokenizer_self->{_started_tokenizing} ) {
26433                     warning(
26434 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
26435                     );
26436                 }
26437                 else {
26438                     complain("Useless hash-bang after line 1\n");
26439                 }
26440             }
26441
26442             # Report the leading hash-bang as a system line
26443             # This will prevent -dac from deleting it
26444             else {
26445                 $line_of_tokens->{_line_type} = 'SYSTEM';
26446                 return $line_of_tokens;
26447             }
26448         }
26449     }
26450
26451     # wait for a hash-bang before parsing if the user invoked us with -x
26452     if ( $tokenizer_self->{_look_for_hash_bang}
26453         && !$tokenizer_self->{_saw_hash_bang} )
26454     {
26455         $line_of_tokens->{_line_type} = 'SYSTEM';
26456         return $line_of_tokens;
26457     }
26458
26459     # a first line of the form ': #' will be marked as SYSTEM
26460     # since lines of this form may be used by tcsh
26461     if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
26462         $line_of_tokens->{_line_type} = 'SYSTEM';
26463         return $line_of_tokens;
26464     }
26465
26466     # now we know that it is ok to tokenize the line...
26467     # the line tokenizer will modify any of these private variables:
26468     #        _rhere_target_list
26469     #        _in_data
26470     #        _in_end
26471     #        _in_format
26472     #        _in_error
26473     #        _in_pod
26474     #        _in_quote
26475     my $ending_in_quote_last = $tokenizer_self->{_in_quote};
26476     tokenize_this_line($line_of_tokens);
26477
26478     # Now finish defining the return structure and return it
26479     $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
26480
26481     # handle severe error (binary data in script)
26482     if ( $tokenizer_self->{_in_error} ) {
26483         $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
26484         warning("Giving up after error\n");
26485         $line_of_tokens->{_line_type} = 'ERROR';
26486         reset_indentation_level(0);          # avoid error messages
26487         return $line_of_tokens;
26488     }
26489
26490     # handle start of pod documentation
26491     if ( $tokenizer_self->{_in_pod} ) {
26492
26493         # This gets tricky..above a __DATA__ or __END__ section, perl
26494         # accepts '=cut' as the start of pod section. But afterwards,
26495         # only pod utilities see it and they may ignore an =cut without
26496         # leading =head.  In any case, this isn't good.
26497         if ( $input_line =~ /^=cut\b/ ) {
26498             if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
26499                 complain("=cut while not in pod ignored\n");
26500                 $tokenizer_self->{_in_pod}    = 0;
26501                 $line_of_tokens->{_line_type} = 'POD_END';
26502             }
26503             else {
26504                 $line_of_tokens->{_line_type} = 'POD_START';
26505                 complain(
26506 "=cut starts a pod section .. this can fool pod utilities.\n"
26507                 );
26508                 write_logfile_entry("Entering POD section\n");
26509             }
26510         }
26511
26512         else {
26513             $line_of_tokens->{_line_type} = 'POD_START';
26514             write_logfile_entry("Entering POD section\n");
26515         }
26516
26517         return $line_of_tokens;
26518     }
26519
26520     # update indentation levels for log messages
26521     if ( $input_line !~ /^\s*$/ ) {
26522         my $rlevels = $line_of_tokens->{_rlevels};
26523         $line_of_tokens->{_guessed_indentation_level} =
26524           guess_old_indentation_level($input_line);
26525     }
26526
26527     # see if this line contains here doc targets
26528     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
26529     if ( @{$rhere_target_list} ) {
26530
26531         my ( $here_doc_target, $here_quote_character ) =
26532           @{ shift @{$rhere_target_list} };
26533         $tokenizer_self->{_in_here_doc}          = 1;
26534         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
26535         $tokenizer_self->{_here_quote_character} = $here_quote_character;
26536         write_logfile_entry("Entering HERE document $here_doc_target\n");
26537         $tokenizer_self->{_started_looking_for_here_target_at} =
26538           $input_line_number;
26539     }
26540
26541     # NOTE: __END__ and __DATA__ statements are written unformatted
26542     # because they can theoretically contain additional characters
26543     # which are not tokenized (and cannot be read with <DATA> either!).
26544     if ( $tokenizer_self->{_in_data} ) {
26545         $line_of_tokens->{_line_type} = 'DATA_START';
26546         write_logfile_entry("Starting __DATA__ section\n");
26547         $tokenizer_self->{_saw_data} = 1;
26548
26549         # keep parsing after __DATA__ if use SelfLoader was seen
26550         if ( $tokenizer_self->{_saw_selfloader} ) {
26551             $tokenizer_self->{_in_data} = 0;
26552             write_logfile_entry(
26553                 "SelfLoader seen, continuing; -nlsl deactivates\n");
26554         }
26555
26556         return $line_of_tokens;
26557     }
26558
26559     elsif ( $tokenizer_self->{_in_end} ) {
26560         $line_of_tokens->{_line_type} = 'END_START';
26561         write_logfile_entry("Starting __END__ section\n");
26562         $tokenizer_self->{_saw_end} = 1;
26563
26564         # keep parsing after __END__ if use AutoLoader was seen
26565         if ( $tokenizer_self->{_saw_autoloader} ) {
26566             $tokenizer_self->{_in_end} = 0;
26567             write_logfile_entry(
26568                 "AutoLoader seen, continuing; -nlal deactivates\n");
26569         }
26570         return $line_of_tokens;
26571     }
26572
26573     # now, finally, we know that this line is type 'CODE'
26574     $line_of_tokens->{_line_type} = 'CODE';
26575
26576     # remember if we have seen any real code
26577     if (  !$tokenizer_self->{_started_tokenizing}
26578         && $input_line !~ /^\s*$/
26579         && $input_line !~ /^\s*#/ )
26580     {
26581         $tokenizer_self->{_started_tokenizing} = 1;
26582     }
26583
26584     if ( $tokenizer_self->{_debugger_object} ) {
26585         $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
26586     }
26587
26588     # Note: if keyword 'format' occurs in this line code, it is still CODE
26589     # (keyword 'format' need not start a line)
26590     if ( $tokenizer_self->{_in_format} ) {
26591         write_logfile_entry("Entering format section\n");
26592     }
26593
26594     if ( $tokenizer_self->{_in_quote}
26595         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
26596     {
26597
26598         #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
26599         if (
26600             ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
26601         {
26602             $tokenizer_self->{_line_start_quote} = $input_line_number;
26603             write_logfile_entry(
26604                 "Start multi-line quote or pattern ending in $quote_target\n");
26605         }
26606     }
26607     elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
26608         && !$tokenizer_self->{_in_quote} )
26609     {
26610         $tokenizer_self->{_line_start_quote} = -1;
26611         write_logfile_entry("End of multi-line quote or pattern\n");
26612     }
26613
26614     # we are returning a line of CODE
26615     return $line_of_tokens;
26616 }
26617
26618 sub find_starting_indentation_level {
26619
26620     # We need to find the indentation level of the first line of the
26621     # script being formatted.  Often it will be zero for an entire file,
26622     # but if we are formatting a local block of code (within an editor for
26623     # example) it may not be zero.  The user may specify this with the
26624     # -sil=n parameter but normally doesn't so we have to guess.
26625     #
26626     # USES GLOBAL VARIABLES: $tokenizer_self
26627     my $starting_level = 0;
26628
26629     # use value if given as parameter
26630     if ( $tokenizer_self->{_know_starting_level} ) {
26631         $starting_level = $tokenizer_self->{_starting_level};
26632     }
26633
26634     # if we know there is a hash_bang line, the level must be zero
26635     elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
26636         $tokenizer_self->{_know_starting_level} = 1;
26637     }
26638
26639     # otherwise figure it out from the input file
26640     else {
26641         my $line;
26642         my $i = 0;
26643
26644         # keep looking at lines until we find a hash bang or piece of code
26645         my $msg = "";
26646         while ( $line =
26647             $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
26648         {
26649
26650             # if first line is #! then assume starting level is zero
26651             if ( $i == 1 && $line =~ /^\#\!/ ) {
26652                 $starting_level = 0;
26653                 last;
26654             }
26655             next if ( $line =~ /^\s*#/ );    # skip past comments
26656             next if ( $line =~ /^\s*$/ );    # skip past blank lines
26657             $starting_level = guess_old_indentation_level($line);
26658             last;
26659         }
26660         $msg = "Line $i implies starting-indentation-level = $starting_level\n";
26661         write_logfile_entry("$msg");
26662     }
26663     $tokenizer_self->{_starting_level} = $starting_level;
26664     reset_indentation_level($starting_level);
26665     return;
26666 }
26667
26668 sub guess_old_indentation_level {
26669     my ($line) = @_;
26670
26671     # Guess the indentation level of an input line.
26672     #
26673     # For the first line of code this result will define the starting
26674     # indentation level.  It will mainly be non-zero when perltidy is applied
26675     # within an editor to a local block of code.
26676     #
26677     # This is an impossible task in general because we can't know what tabs
26678     # meant for the old script and how many spaces were used for one
26679     # indentation level in the given input script.  For example it may have
26680     # been previously formatted with -i=7 -et=3.  But we can at least try to
26681     # make sure that perltidy guesses correctly if it is applied repeatedly to
26682     # a block of code within an editor, so that the block stays at the same
26683     # level when perltidy is applied repeatedly.
26684     #
26685     # USES GLOBAL VARIABLES: $tokenizer_self
26686     my $level = 0;
26687
26688     # find leading tabs, spaces, and any statement label
26689     my $spaces = 0;
26690     if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
26691
26692         # If there are leading tabs, we use the tab scheme for this run, if
26693         # any, so that the code will remain stable when editing.
26694         if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
26695
26696         if ($2) { $spaces += length($2) }
26697
26698         # correct for outdented labels
26699         if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
26700             $spaces += $tokenizer_self->{_continuation_indentation};
26701         }
26702     }
26703
26704     # compute indentation using the value of -i for this run.
26705     # If -i=0 is used for this run (which is possible) it doesn't matter
26706     # what we do here but we'll guess that the old run used 4 spaces per level.
26707     my $indent_columns = $tokenizer_self->{_indent_columns};
26708     $indent_columns = 4 if ( !$indent_columns );
26709     $level = int( $spaces / $indent_columns );
26710     return ($level);
26711 }
26712
26713 # This is a currently unused debug routine
26714 sub dump_functions {
26715
26716     my $fh = *STDOUT;
26717     foreach my $pkg ( keys %is_user_function ) {
26718         print $fh "\nnon-constant subs in package $pkg\n";
26719
26720         foreach my $sub ( keys %{ $is_user_function{$pkg} } ) {
26721             my $msg = "";
26722             if ( $is_block_list_function{$pkg}{$sub} ) {
26723                 $msg = 'block_list';
26724             }
26725
26726             if ( $is_block_function{$pkg}{$sub} ) {
26727                 $msg = 'block';
26728             }
26729             print $fh "$sub $msg\n";
26730         }
26731     }
26732
26733     foreach my $pkg ( keys %is_constant ) {
26734         print $fh "\nconstants and constant subs in package $pkg\n";
26735
26736         foreach my $sub ( keys %{ $is_constant{$pkg} } ) {
26737             print $fh "$sub\n";
26738         }
26739     }
26740     return;
26741 }
26742
26743 sub ones_count {
26744
26745     # count number of 1's in a string of 1's and 0's
26746     # example: ones_count("010101010101") gives 6
26747     my $str = shift;
26748     return $str =~ tr/1/0/;
26749 }
26750
26751 sub prepare_for_a_new_file {
26752
26753     # previous tokens needed to determine what to expect next
26754     $last_nonblank_token      = ';';    # the only possible starting state which
26755     $last_nonblank_type       = ';';    # will make a leading brace a code block
26756     $last_nonblank_block_type = '';
26757
26758     # scalars for remembering statement types across multiple lines
26759     $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
26760     $in_attribute_list = 0;
26761
26762     # scalars for remembering where we are in the file
26763     $current_package = "main";
26764     $context         = UNKNOWN_CONTEXT;
26765
26766     # hashes used to remember function information
26767     %is_constant             = ();      # user-defined constants
26768     %is_user_function        = ();      # user-defined functions
26769     %user_function_prototype = ();      # their prototypes
26770     %is_block_function       = ();
26771     %is_block_list_function  = ();
26772     %saw_function_definition = ();
26773
26774     # variables used to track depths of various containers
26775     # and report nesting errors
26776     $paren_depth          = 0;
26777     $brace_depth          = 0;
26778     $square_bracket_depth = 0;
26779     @current_depth[ 0 .. $#closing_brace_names ] =
26780       (0) x scalar @closing_brace_names;
26781     $total_depth = 0;
26782     @total_depth = ();
26783     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
26784       ( 0 .. $#closing_brace_names );
26785     @current_sequence_number             = ();
26786     $paren_type[$paren_depth]            = '';
26787     $paren_semicolon_count[$paren_depth] = 0;
26788     $paren_structural_type[$brace_depth] = '';
26789     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
26790     $brace_structural_type[$brace_depth]                   = '';
26791     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
26792     $brace_package[$paren_depth]                           = $current_package;
26793     $square_bracket_type[$square_bracket_depth]            = '';
26794     $square_bracket_structural_type[$square_bracket_depth] = '';
26795
26796     initialize_tokenizer_state();
26797     return;
26798 }
26799
26800 {                                       # begin tokenize_this_line
26801
26802     use constant BRACE          => 0;
26803     use constant SQUARE_BRACKET => 1;
26804     use constant PAREN          => 2;
26805     use constant QUESTION_COLON => 3;
26806
26807     # TV1: scalars for processing one LINE.
26808     # Re-initialized on each entry to sub tokenize_this_line.
26809     my (
26810         $block_type,        $container_type,    $expecting,
26811         $i,                 $i_tok,             $input_line,
26812         $input_line_number, $last_nonblank_i,   $max_token_index,
26813         $next_tok,          $next_type,         $peeked_ahead,
26814         $prototype,         $rhere_target_list, $rtoken_map,
26815         $rtoken_type,       $rtokens,           $tok,
26816         $type,              $type_sequence,     $indent_flag,
26817     );
26818
26819     # TV2: refs to ARRAYS for processing one LINE
26820     # Re-initialized on each call.
26821     my $routput_token_list     = [];    # stack of output token indexes
26822     my $routput_token_type     = [];    # token types
26823     my $routput_block_type     = [];    # types of code block
26824     my $routput_container_type = [];    # paren types, such as if, elsif, ..
26825     my $routput_type_sequence  = [];    # nesting sequential number
26826     my $routput_indent_flag    = [];    #
26827
26828     # TV3: SCALARS for quote variables.  These are initialized with a
26829     # subroutine call and continually updated as lines are processed.
26830     my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
26831         $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
26832
26833     # TV4: SCALARS for multi-line identifiers and
26834     # statements. These are initialized with a subroutine call
26835     # and continually updated as lines are processed.
26836     my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
26837
26838     # TV5: SCALARS for tracking indentation level.
26839     # Initialized once and continually updated as lines are
26840     # processed.
26841     my (
26842         $nesting_token_string,      $nesting_type_string,
26843         $nesting_block_string,      $nesting_block_flag,
26844         $nesting_list_string,       $nesting_list_flag,
26845         $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
26846         $in_statement_continuation, $level_in_tokenizer,
26847         $slevel_in_tokenizer,       $rslevel_stack,
26848     );
26849
26850     # TV6: SCALARS for remembering several previous
26851     # tokens. Initialized once and continually updated as
26852     # lines are processed.
26853     my (
26854         $last_nonblank_container_type,     $last_nonblank_type_sequence,
26855         $last_last_nonblank_token,         $last_last_nonblank_type,
26856         $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
26857         $last_last_nonblank_type_sequence, $last_nonblank_prototype,
26858     );
26859
26860     # ----------------------------------------------------------------
26861     # beginning of tokenizer variable access and manipulation routines
26862     # ----------------------------------------------------------------
26863
26864     sub initialize_tokenizer_state {
26865
26866         # TV1: initialized on each call
26867         # TV2: initialized on each call
26868         # TV3:
26869         $in_quote                = 0;
26870         $quote_type              = 'Q';
26871         $quote_character         = "";
26872         $quote_pos               = 0;
26873         $quote_depth             = 0;
26874         $quoted_string_1         = "";
26875         $quoted_string_2         = "";
26876         $allowed_quote_modifiers = "";
26877
26878         # TV4:
26879         $id_scan_state     = '';
26880         $identifier        = '';
26881         $want_paren        = "";
26882         $indented_if_level = 0;
26883
26884         # TV5:
26885         $nesting_token_string             = "";
26886         $nesting_type_string              = "";
26887         $nesting_block_string             = '1';    # initially in a block
26888         $nesting_block_flag               = 1;
26889         $nesting_list_string              = '0';    # initially not in a list
26890         $nesting_list_flag                = 0;      # initially not in a list
26891         $ci_string_in_tokenizer           = "";
26892         $continuation_string_in_tokenizer = "0";
26893         $in_statement_continuation        = 0;
26894         $level_in_tokenizer               = 0;
26895         $slevel_in_tokenizer              = 0;
26896         $rslevel_stack                    = [];
26897
26898         # TV6:
26899         $last_nonblank_container_type      = '';
26900         $last_nonblank_type_sequence       = '';
26901         $last_last_nonblank_token          = ';';
26902         $last_last_nonblank_type           = ';';
26903         $last_last_nonblank_block_type     = '';
26904         $last_last_nonblank_container_type = '';
26905         $last_last_nonblank_type_sequence  = '';
26906         $last_nonblank_prototype           = "";
26907         return;
26908     }
26909
26910     sub save_tokenizer_state {
26911
26912         my $rTV1 = [
26913             $block_type,        $container_type,    $expecting,
26914             $i,                 $i_tok,             $input_line,
26915             $input_line_number, $last_nonblank_i,   $max_token_index,
26916             $next_tok,          $next_type,         $peeked_ahead,
26917             $prototype,         $rhere_target_list, $rtoken_map,
26918             $rtoken_type,       $rtokens,           $tok,
26919             $type,              $type_sequence,     $indent_flag,
26920         ];
26921
26922         my $rTV2 = [
26923             $routput_token_list,    $routput_token_type,
26924             $routput_block_type,    $routput_container_type,
26925             $routput_type_sequence, $routput_indent_flag,
26926         ];
26927
26928         my $rTV3 = [
26929             $in_quote,        $quote_type,
26930             $quote_character, $quote_pos,
26931             $quote_depth,     $quoted_string_1,
26932             $quoted_string_2, $allowed_quote_modifiers,
26933         ];
26934
26935         my $rTV4 =
26936           [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
26937
26938         my $rTV5 = [
26939             $nesting_token_string,      $nesting_type_string,
26940             $nesting_block_string,      $nesting_block_flag,
26941             $nesting_list_string,       $nesting_list_flag,
26942             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
26943             $in_statement_continuation, $level_in_tokenizer,
26944             $slevel_in_tokenizer,       $rslevel_stack,
26945         ];
26946
26947         my $rTV6 = [
26948             $last_nonblank_container_type,
26949             $last_nonblank_type_sequence,
26950             $last_last_nonblank_token,
26951             $last_last_nonblank_type,
26952             $last_last_nonblank_block_type,
26953             $last_last_nonblank_container_type,
26954             $last_last_nonblank_type_sequence,
26955             $last_nonblank_prototype,
26956         ];
26957         return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
26958     }
26959
26960     sub restore_tokenizer_state {
26961         my ($rstate) = @_;
26962         my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
26963         (
26964             $block_type,        $container_type,    $expecting,
26965             $i,                 $i_tok,             $input_line,
26966             $input_line_number, $last_nonblank_i,   $max_token_index,
26967             $next_tok,          $next_type,         $peeked_ahead,
26968             $prototype,         $rhere_target_list, $rtoken_map,
26969             $rtoken_type,       $rtokens,           $tok,
26970             $type,              $type_sequence,     $indent_flag,
26971         ) = @{$rTV1};
26972
26973         (
26974             $routput_token_list,    $routput_token_type,
26975             $routput_block_type,    $routput_container_type,
26976             $routput_type_sequence, $routput_type_sequence,
26977         ) = @{$rTV2};
26978
26979         (
26980             $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
26981             $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
26982         ) = @{$rTV3};
26983
26984         ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
26985           @{$rTV4};
26986
26987         (
26988             $nesting_token_string,      $nesting_type_string,
26989             $nesting_block_string,      $nesting_block_flag,
26990             $nesting_list_string,       $nesting_list_flag,
26991             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
26992             $in_statement_continuation, $level_in_tokenizer,
26993             $slevel_in_tokenizer,       $rslevel_stack,
26994         ) = @{$rTV5};
26995
26996         (
26997             $last_nonblank_container_type,
26998             $last_nonblank_type_sequence,
26999             $last_last_nonblank_token,
27000             $last_last_nonblank_type,
27001             $last_last_nonblank_block_type,
27002             $last_last_nonblank_container_type,
27003             $last_last_nonblank_type_sequence,
27004             $last_nonblank_prototype,
27005         ) = @{$rTV6};
27006         return;
27007     }
27008
27009     sub get_indentation_level {
27010
27011         # patch to avoid reporting error if indented if is not terminated
27012         if ($indented_if_level) { return $level_in_tokenizer - 1 }
27013         return $level_in_tokenizer;
27014     }
27015
27016     sub reset_indentation_level {
27017         $level_in_tokenizer = $slevel_in_tokenizer = shift;
27018         push @{$rslevel_stack}, $slevel_in_tokenizer;
27019         return;
27020     }
27021
27022     sub peeked_ahead {
27023         my $flag = shift;
27024         $peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
27025         return $peeked_ahead;
27026     }
27027
27028     # ------------------------------------------------------------
27029     # end of tokenizer variable access and manipulation routines
27030     # ------------------------------------------------------------
27031
27032     # ------------------------------------------------------------
27033     # beginning of various scanner interface routines
27034     # ------------------------------------------------------------
27035     sub scan_replacement_text {
27036
27037         # check for here-docs in replacement text invoked by
27038         # a substitution operator with executable modifier 'e'.
27039         #
27040         # given:
27041         #  $replacement_text
27042         # return:
27043         #  $rht = reference to any here-doc targets
27044         my ($replacement_text) = @_;
27045
27046         # quick check
27047         return unless ( $replacement_text =~ /<</ );
27048
27049         write_logfile_entry("scanning replacement text for here-doc targets\n");
27050
27051         # save the logger object for error messages
27052         my $logger_object = $tokenizer_self->{_logger_object};
27053
27054         # localize all package variables
27055         local (
27056             $tokenizer_self,                 $last_nonblank_token,
27057             $last_nonblank_type,             $last_nonblank_block_type,
27058             $statement_type,                 $in_attribute_list,
27059             $current_package,                $context,
27060             %is_constant,                    %is_user_function,
27061             %user_function_prototype,        %is_block_function,
27062             %is_block_list_function,         %saw_function_definition,
27063             $brace_depth,                    $paren_depth,
27064             $square_bracket_depth,           @current_depth,
27065             @total_depth,                    $total_depth,
27066             @nesting_sequence_number,        @current_sequence_number,
27067             @paren_type,                     @paren_semicolon_count,
27068             @paren_structural_type,          @brace_type,
27069             @brace_structural_type,          @brace_context,
27070             @brace_package,                  @square_bracket_type,
27071             @square_bracket_structural_type, @depth_array,
27072             @starting_line_of_current_depth, @nested_ternary_flag,
27073             @nested_statement_type,
27074         );
27075
27076         # save all lexical variables
27077         my $rstate = save_tokenizer_state();
27078         _decrement_count();    # avoid error check for multiple tokenizers
27079
27080         # make a new tokenizer
27081         my $rOpts = {};
27082         my $rpending_logfile_message;
27083         my $source_object =
27084           Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
27085             $rpending_logfile_message );
27086         my $tokenizer = Perl::Tidy::Tokenizer->new(
27087             source_object        => $source_object,
27088             logger_object        => $logger_object,
27089             starting_line_number => $input_line_number,
27090         );
27091
27092         # scan the replacement text
27093         1 while ( $tokenizer->get_line() );
27094
27095         # remove any here doc targets
27096         my $rht = undef;
27097         if ( $tokenizer_self->{_in_here_doc} ) {
27098             $rht = [];
27099             push @{$rht},
27100               [
27101                 $tokenizer_self->{_here_doc_target},
27102                 $tokenizer_self->{_here_quote_character}
27103               ];
27104             if ( $tokenizer_self->{_rhere_target_list} ) {
27105                 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
27106                 $tokenizer_self->{_rhere_target_list} = undef;
27107             }
27108             $tokenizer_self->{_in_here_doc} = undef;
27109         }
27110
27111         # now its safe to report errors
27112         my $severe_error = $tokenizer->report_tokenization_errors();
27113
27114         # TODO: Could propagate a severe error up
27115
27116         # restore all tokenizer lexical variables
27117         restore_tokenizer_state($rstate);
27118
27119         # return the here doc targets
27120         return $rht;
27121     }
27122
27123     sub scan_bare_identifier {
27124         ( $i, $tok, $type, $prototype ) =
27125           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
27126             $rtoken_map, $max_token_index );
27127         return;
27128     }
27129
27130     sub scan_identifier {
27131         ( $i, $tok, $type, $id_scan_state, $identifier ) =
27132           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
27133             $max_token_index, $expecting, $paren_type[$paren_depth] );
27134         return;
27135     }
27136
27137     sub scan_id {
27138         ( $i, $tok, $type, $id_scan_state ) =
27139           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
27140             $id_scan_state, $max_token_index );
27141         return;
27142     }
27143
27144     sub scan_number {
27145         my $number;
27146         ( $i, $type, $number ) =
27147           scan_number_do( $input_line, $i, $rtoken_map, $type,
27148             $max_token_index );
27149         return $number;
27150     }
27151
27152     # a sub to warn if token found where term expected
27153     sub error_if_expecting_TERM {
27154         if ( $expecting == TERM ) {
27155             if ( $really_want_term{$last_nonblank_type} ) {
27156                 report_unexpected( $tok, "term", $i_tok, $last_nonblank_i,
27157                     $rtoken_map, $rtoken_type, $input_line );
27158                 return 1;
27159             }
27160         }
27161         return;
27162     }
27163
27164     # a sub to warn if token found where operator expected
27165     sub error_if_expecting_OPERATOR {
27166         my $thing = shift;
27167         if ( $expecting == OPERATOR ) {
27168             if ( !defined($thing) ) { $thing = $tok }
27169             report_unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
27170                 $rtoken_map, $rtoken_type, $input_line );
27171             if ( $i_tok == 0 ) {
27172                 interrupt_logfile();
27173                 warning("Missing ';' above?\n");
27174                 resume_logfile();
27175             }
27176             return 1;
27177         }
27178         return;
27179     }
27180
27181     # ------------------------------------------------------------
27182     # end scanner interfaces
27183     # ------------------------------------------------------------
27184
27185     my %is_for_foreach;
27186     @_ = qw(for foreach);
27187     @is_for_foreach{@_} = (1) x scalar(@_);
27188
27189     my %is_my_our;
27190     @_ = qw(my our);
27191     @is_my_our{@_} = (1) x scalar(@_);
27192
27193     # These keywords may introduce blocks after parenthesized expressions,
27194     # in the form:
27195     # keyword ( .... ) { BLOCK }
27196     # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
27197     my %is_blocktype_with_paren;
27198     @_ =
27199       qw(if elsif unless while until for foreach switch case given when catch);
27200     @is_blocktype_with_paren{@_} = (1) x scalar(@_);
27201
27202     # ------------------------------------------------------------
27203     # begin hash of code for handling most token types
27204     # ------------------------------------------------------------
27205     my $tokenization_code = {
27206
27207         # no special code for these types yet, but syntax checks
27208         # could be added
27209
27210 ##      '!'   => undef,
27211 ##      '!='  => undef,
27212 ##      '!~'  => undef,
27213 ##      '%='  => undef,
27214 ##      '&&=' => undef,
27215 ##      '&='  => undef,
27216 ##      '+='  => undef,
27217 ##      '-='  => undef,
27218 ##      '..'  => undef,
27219 ##      '..'  => undef,
27220 ##      '...' => undef,
27221 ##      '.='  => undef,
27222 ##      '<<=' => undef,
27223 ##      '<='  => undef,
27224 ##      '<=>' => undef,
27225 ##      '<>'  => undef,
27226 ##      '='   => undef,
27227 ##      '=='  => undef,
27228 ##      '=~'  => undef,
27229 ##      '>='  => undef,
27230 ##      '>>'  => undef,
27231 ##      '>>=' => undef,
27232 ##      '\\'  => undef,
27233 ##      '^='  => undef,
27234 ##      '|='  => undef,
27235 ##      '||=' => undef,
27236 ##      '//=' => undef,
27237 ##      '~'   => undef,
27238 ##      '~~'  => undef,
27239 ##      '!~~'  => undef,
27240
27241         '>' => sub {
27242             error_if_expecting_TERM()
27243               if ( $expecting == TERM );
27244         },
27245         '|' => sub {
27246             error_if_expecting_TERM()
27247               if ( $expecting == TERM );
27248         },
27249         '$' => sub {
27250
27251             # start looking for a scalar
27252             error_if_expecting_OPERATOR("Scalar")
27253               if ( $expecting == OPERATOR );
27254             scan_identifier();
27255
27256             if ( $identifier eq '$^W' ) {
27257                 $tokenizer_self->{_saw_perl_dash_w} = 1;
27258             }
27259
27260             # Check for identifier in indirect object slot
27261             # (vorboard.pl, sort.t).  Something like:
27262             #   /^(print|printf|sort|exec|system)$/
27263             if (
27264                 $is_indirect_object_taker{$last_nonblank_token}
27265
27266                 || ( ( $last_nonblank_token eq '(' )
27267                     && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
27268                 || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
27269               )
27270             {
27271                 $type = 'Z';
27272             }
27273         },
27274         '(' => sub {
27275
27276             ++$paren_depth;
27277             $paren_semicolon_count[$paren_depth] = 0;
27278             if ($want_paren) {
27279                 $container_type = $want_paren;
27280                 $want_paren     = "";
27281             }
27282             elsif ( $statement_type =~ /^sub\b/ ) {
27283                 $container_type = $statement_type;
27284             }
27285             else {
27286                 $container_type = $last_nonblank_token;
27287
27288                 # We can check for a syntax error here of unexpected '(',
27289                 # but this is going to get messy...
27290                 if (
27291                     $expecting == OPERATOR
27292
27293                     # be sure this is not a method call of the form
27294                     # &method(...), $method->(..), &{method}(...),
27295                     # $ref[2](list) is ok & short for $ref[2]->(list)
27296                     # NOTE: at present, braces in something like &{ xxx }
27297                     # are not marked as a block, we might have a method call
27298                     && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
27299
27300                   )
27301                 {
27302
27303                     # ref: camel 3 p 703.
27304                     if ( $last_last_nonblank_token eq 'do' ) {
27305                         complain(
27306 "do SUBROUTINE is deprecated; consider & or -> notation\n"
27307                         );
27308                     }
27309                     else {
27310
27311                         # if this is an empty list, (), then it is not an
27312                         # error; for example, we might have a constant pi and
27313                         # invoke it with pi() or just pi;
27314                         my ( $next_nonblank_token, $i_next ) =
27315                           find_next_nonblank_token( $i, $rtokens,
27316                             $max_token_index );
27317                         if ( $next_nonblank_token ne ')' ) {
27318                             my $hint;
27319                             error_if_expecting_OPERATOR('(');
27320
27321                             if ( $last_nonblank_type eq 'C' ) {
27322                                 $hint =
27323                                   "$last_nonblank_token has a void prototype\n";
27324                             }
27325                             elsif ( $last_nonblank_type eq 'i' ) {
27326                                 if (   $i_tok > 0
27327                                     && $last_nonblank_token =~ /^\$/ )
27328                                 {
27329                                     $hint =
27330 "Do you mean '$last_nonblank_token->(' ?\n";
27331                                 }
27332                             }
27333                             if ($hint) {
27334                                 interrupt_logfile();
27335                                 warning($hint);
27336                                 resume_logfile();
27337                             }
27338                         } ## end if ( $next_nonblank_token...
27339                     } ## end else [ if ( $last_last_nonblank_token...
27340                 } ## end if ( $expecting == OPERATOR...
27341             }
27342             $paren_type[$paren_depth] = $container_type;
27343             ( $type_sequence, $indent_flag ) =
27344               increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
27345
27346             # propagate types down through nested parens
27347             # for example: the second paren in 'if ((' would be structural
27348             # since the first is.
27349
27350             if ( $last_nonblank_token eq '(' ) {
27351                 $type = $last_nonblank_type;
27352             }
27353
27354             #     We exclude parens as structural after a ',' because it
27355             #     causes subtle problems with continuation indentation for
27356             #     something like this, where the first 'or' will not get
27357             #     indented.
27358             #
27359             #         assert(
27360             #             __LINE__,
27361             #             ( not defined $check )
27362             #               or ref $check
27363             #               or $check eq "new"
27364             #               or $check eq "old",
27365             #         );
27366             #
27367             #     Likewise, we exclude parens where a statement can start
27368             #     because of problems with continuation indentation, like
27369             #     these:
27370             #
27371             #         ($firstline =~ /^#\!.*perl/)
27372             #         and (print $File::Find::name, "\n")
27373             #           and (return 1);
27374             #
27375             #         (ref($usage_fref) =~ /CODE/)
27376             #         ? &$usage_fref
27377             #           : (&blast_usage, &blast_params, &blast_general_params);
27378
27379             else {
27380                 $type = '{';
27381             }
27382
27383             if ( $last_nonblank_type eq ')' ) {
27384                 warning(
27385                     "Syntax error? found token '$last_nonblank_type' then '('\n"
27386                 );
27387             }
27388             $paren_structural_type[$paren_depth] = $type;
27389
27390         },
27391         ')' => sub {
27392             ( $type_sequence, $indent_flag ) =
27393               decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
27394
27395             if ( $paren_structural_type[$paren_depth] eq '{' ) {
27396                 $type = '}';
27397             }
27398
27399             $container_type = $paren_type[$paren_depth];
27400
27401             # restore statement type as 'sub' at closing paren of a signature
27402             # so that a subsequent ':' is identified as an attribute
27403             if ( $container_type =~ /^sub\b/ ) {
27404                 $statement_type = $container_type;
27405             }
27406
27407             #    /^(for|foreach)$/
27408             if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
27409                 my $num_sc = $paren_semicolon_count[$paren_depth];
27410                 if ( $num_sc > 0 && $num_sc != 2 ) {
27411                     warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
27412                 }
27413             }
27414
27415             if ( $paren_depth > 0 ) { $paren_depth-- }
27416         },
27417         ',' => sub {
27418             if ( $last_nonblank_type eq ',' ) {
27419                 complain("Repeated ','s \n");
27420             }
27421
27422             # patch for operator_expected: note if we are in the list (use.t)
27423             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
27424 ##                FIXME: need to move this elsewhere, perhaps check after a '('
27425 ##                elsif ($last_nonblank_token eq '(') {
27426 ##                    warning("Leading ','s illegal in some versions of perl\n");
27427 ##                }
27428         },
27429         ';' => sub {
27430             $context        = UNKNOWN_CONTEXT;
27431             $statement_type = '';
27432             $want_paren     = "";
27433
27434             #    /^(for|foreach)$/
27435             if ( $is_for_foreach{ $paren_type[$paren_depth] } )
27436             {    # mark ; in for loop
27437
27438                 # Be careful: we do not want a semicolon such as the
27439                 # following to be included:
27440                 #
27441                 #    for (sort {strcoll($a,$b);} keys %investments) {
27442
27443                 if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
27444                     && $square_bracket_depth ==
27445                     $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
27446                 {
27447
27448                     $type = 'f';
27449                     $paren_semicolon_count[$paren_depth]++;
27450                 }
27451             }
27452
27453         },
27454         '"' => sub {
27455             error_if_expecting_OPERATOR("String")
27456               if ( $expecting == OPERATOR );
27457             $in_quote                = 1;
27458             $type                    = 'Q';
27459             $allowed_quote_modifiers = "";
27460         },
27461         "'" => sub {
27462             error_if_expecting_OPERATOR("String")
27463               if ( $expecting == OPERATOR );
27464             $in_quote                = 1;
27465             $type                    = 'Q';
27466             $allowed_quote_modifiers = "";
27467         },
27468         '`' => sub {
27469             error_if_expecting_OPERATOR("String")
27470               if ( $expecting == OPERATOR );
27471             $in_quote                = 1;
27472             $type                    = 'Q';
27473             $allowed_quote_modifiers = "";
27474         },
27475         '/' => sub {
27476             my $is_pattern;
27477
27478             if ( $expecting == UNKNOWN ) {    # indeterminate, must guess..
27479                 my $msg;
27480                 ( $is_pattern, $msg ) =
27481                   guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
27482                     $max_token_index );
27483
27484                 if ($msg) {
27485                     write_diagnostics("DIVIDE:$msg\n");
27486                     write_logfile_entry($msg);
27487                 }
27488             }
27489             else { $is_pattern = ( $expecting == TERM ) }
27490
27491             if ($is_pattern) {
27492                 $in_quote                = 1;
27493                 $type                    = 'Q';
27494                 $allowed_quote_modifiers = '[msixpodualngc]';
27495             }
27496             else {    # not a pattern; check for a /= token
27497
27498                 if ( $rtokens->[ $i + 1 ] eq '=' ) {    # form token /=
27499                     $i++;
27500                     $tok  = '/=';
27501                     $type = $tok;
27502                 }
27503
27504               #DEBUG - collecting info on what tokens follow a divide
27505               # for development of guessing algorithm
27506               #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
27507               #    #write_diagnostics( "DIVIDE? $input_line\n" );
27508               #}
27509             }
27510         },
27511         '{' => sub {
27512
27513             # if we just saw a ')', we will label this block with
27514             # its type.  We need to do this to allow sub
27515             # code_block_type to determine if this brace starts a
27516             # code block or anonymous hash.  (The type of a paren
27517             # pair is the preceding token, such as 'if', 'else',
27518             # etc).
27519             $container_type = "";
27520
27521             # ATTRS: for a '{' following an attribute list, reset
27522             # things to look like we just saw the sub name
27523             if ( $statement_type =~ /^sub/ ) {
27524                 $last_nonblank_token = $statement_type;
27525                 $last_nonblank_type  = 'i';
27526                 $statement_type      = "";
27527             }
27528
27529             # patch for SWITCH/CASE: hide these keywords from an immediately
27530             # following opening brace
27531             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
27532                 && $statement_type eq $last_nonblank_token )
27533             {
27534                 $last_nonblank_token = ";";
27535             }
27536
27537             elsif ( $last_nonblank_token eq ')' ) {
27538                 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
27539
27540                 # defensive move in case of a nesting error (pbug.t)
27541                 # in which this ')' had no previous '('
27542                 # this nesting error will have been caught
27543                 if ( !defined($last_nonblank_token) ) {
27544                     $last_nonblank_token = 'if';
27545                 }
27546
27547                 # check for syntax error here;
27548                 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
27549                     if ( $tokenizer_self->{'_extended_syntax'} ) {
27550
27551                         # we append a trailing () to mark this as an unknown
27552                         # block type.  This allows perltidy to format some
27553                         # common extensions of perl syntax.
27554                         # This is used by sub code_block_type
27555                         $last_nonblank_token .= '()';
27556                     }
27557                     else {
27558                         my $list =
27559                           join( ' ', sort keys %is_blocktype_with_paren );
27560                         warning(
27561 "syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
27562                         );
27563                     }
27564                 }
27565             }
27566
27567             # patch for paren-less for/foreach glitch, part 2.
27568             # see note below under 'qw'
27569             elsif ($last_nonblank_token eq 'qw'
27570                 && $is_for_foreach{$want_paren} )
27571             {
27572                 $last_nonblank_token = $want_paren;
27573                 if ( $last_last_nonblank_token eq $want_paren ) {
27574                     warning(
27575 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
27576                     );
27577
27578                 }
27579                 $want_paren = "";
27580             }
27581
27582             # now identify which of the three possible types of
27583             # curly braces we have: hash index container, anonymous
27584             # hash reference, or code block.
27585
27586             # non-structural (hash index) curly brace pair
27587             # get marked 'L' and 'R'
27588             if ( is_non_structural_brace() ) {
27589                 $type = 'L';
27590
27591                 # patch for SWITCH/CASE:
27592                 # allow paren-less identifier after 'when'
27593                 # if the brace is preceded by a space
27594                 if (   $statement_type eq 'when'
27595                     && $last_nonblank_type eq 'i'
27596                     && $last_last_nonblank_type eq 'k'
27597                     && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
27598                 {
27599                     $type       = '{';
27600                     $block_type = $statement_type;
27601                 }
27602             }
27603
27604             # code and anonymous hash have the same type, '{', but are
27605             # distinguished by 'block_type',
27606             # which will be blank for an anonymous hash
27607             else {
27608
27609                 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
27610                     $max_token_index );
27611
27612                 # patch to promote bareword type to function taking block
27613                 if (   $block_type
27614                     && $last_nonblank_type eq 'w'
27615                     && $last_nonblank_i >= 0 )
27616                 {
27617                     if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
27618                         $routput_token_type->[$last_nonblank_i] = 'G';
27619                     }
27620                 }
27621
27622                 # patch for SWITCH/CASE: if we find a stray opening block brace
27623                 # where we might accept a 'case' or 'when' block, then take it
27624                 if (   $statement_type eq 'case'
27625                     || $statement_type eq 'when' )
27626                 {
27627                     if ( !$block_type || $block_type eq '}' ) {
27628                         $block_type = $statement_type;
27629                     }
27630                 }
27631             }
27632
27633             $brace_type[ ++$brace_depth ]        = $block_type;
27634             $brace_package[$brace_depth]         = $current_package;
27635             $brace_structural_type[$brace_depth] = $type;
27636             $brace_context[$brace_depth]         = $context;
27637             ( $type_sequence, $indent_flag ) =
27638               increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
27639         },
27640         '}' => sub {
27641             $block_type = $brace_type[$brace_depth];
27642             if ($block_type) { $statement_type = '' }
27643             if ( defined( $brace_package[$brace_depth] ) ) {
27644                 $current_package = $brace_package[$brace_depth];
27645             }
27646
27647             # can happen on brace error (caught elsewhere)
27648             else {
27649             }
27650             ( $type_sequence, $indent_flag ) =
27651               decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
27652
27653             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
27654                 $type = 'R';
27655             }
27656
27657             # propagate type information for 'do' and 'eval' blocks, and also
27658             # for smartmatch operator.  This is necessary to enable us to know
27659             # if an operator or term is expected next.
27660             if ( $is_block_operator{$block_type} ) {
27661                 $tok = $block_type;
27662             }
27663
27664             $context = $brace_context[$brace_depth];
27665             if ( $brace_depth > 0 ) { $brace_depth--; }
27666         },
27667         '&' => sub {    # maybe sub call? start looking
27668
27669             # We have to check for sub call unless we are sure we
27670             # are expecting an operator.  This example from s2p
27671             # got mistaken as a q operator in an early version:
27672             #   print BODY &q(<<'EOT');
27673             if ( $expecting != OPERATOR ) {
27674
27675                 # But only look for a sub call if we are expecting a term or
27676                 # if there is no existing space after the &.
27677                 # For example we probably don't want & as sub call here:
27678                 #    Fcntl::S_IRUSR & $mode;
27679                 if ( $expecting == TERM || $next_type ne 'b' ) {
27680                     scan_identifier();
27681                 }
27682             }
27683             else {
27684             }
27685         },
27686         '<' => sub {    # angle operator or less than?
27687
27688             if ( $expecting != OPERATOR ) {
27689                 ( $i, $type ) =
27690                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
27691                     $expecting, $max_token_index );
27692
27693                 if ( $type eq '<' && $expecting == TERM ) {
27694                     error_if_expecting_TERM();
27695                     interrupt_logfile();
27696                     warning("Unterminated <> operator?\n");
27697                     resume_logfile();
27698                 }
27699             }
27700             else {
27701             }
27702         },
27703         '?' => sub {    # ?: conditional or starting pattern?
27704
27705             my $is_pattern;
27706
27707             if ( $expecting == UNKNOWN ) {
27708
27709                 my $msg;
27710                 ( $is_pattern, $msg ) =
27711                   guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
27712                     $max_token_index );
27713
27714                 if ($msg) { write_logfile_entry($msg) }
27715             }
27716             else { $is_pattern = ( $expecting == TERM ) }
27717
27718             if ($is_pattern) {
27719                 $in_quote                = 1;
27720                 $type                    = 'Q';
27721                 $allowed_quote_modifiers = '[msixpodualngc]';
27722             }
27723             else {
27724                 ( $type_sequence, $indent_flag ) =
27725                   increase_nesting_depth( QUESTION_COLON,
27726                     $rtoken_map->[$i_tok] );
27727             }
27728         },
27729         '*' => sub {    # typeglob, or multiply?
27730
27731             if ( $expecting == TERM ) {
27732                 scan_identifier();
27733             }
27734             else {
27735
27736                 if ( $rtokens->[ $i + 1 ] eq '=' ) {
27737                     $tok  = '*=';
27738                     $type = $tok;
27739                     $i++;
27740                 }
27741                 elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
27742                     $tok  = '**';
27743                     $type = $tok;
27744                     $i++;
27745                     if ( $rtokens->[ $i + 1 ] eq '=' ) {
27746                         $tok  = '**=';
27747                         $type = $tok;
27748                         $i++;
27749                     }
27750                 }
27751             }
27752         },
27753         '.' => sub {    # what kind of . ?
27754
27755             if ( $expecting != OPERATOR ) {
27756                 scan_number();
27757                 if ( $type eq '.' ) {
27758                     error_if_expecting_TERM()
27759                       if ( $expecting == TERM );
27760                 }
27761             }
27762             else {
27763             }
27764         },
27765         ':' => sub {
27766
27767             # if this is the first nonblank character, call it a label
27768             # since perl seems to just swallow it
27769             if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
27770                 $type = 'J';
27771             }
27772
27773             # ATTRS: check for a ':' which introduces an attribute list
27774             # (this might eventually get its own token type)
27775             elsif ( $statement_type =~ /^sub\b/ ) {
27776                 $type              = 'A';
27777                 $in_attribute_list = 1;
27778             }
27779
27780             # check for scalar attribute, such as
27781             # my $foo : shared = 1;
27782             elsif ($is_my_our{$statement_type}
27783                 && $current_depth[QUESTION_COLON] == 0 )
27784             {
27785                 $type              = 'A';
27786                 $in_attribute_list = 1;
27787             }
27788
27789             # otherwise, it should be part of a ?/: operator
27790             else {
27791                 ( $type_sequence, $indent_flag ) =
27792                   decrease_nesting_depth( QUESTION_COLON,
27793                     $rtoken_map->[$i_tok] );
27794                 if ( $last_nonblank_token eq '?' ) {
27795                     warning("Syntax error near ? :\n");
27796                 }
27797             }
27798         },
27799         '+' => sub {    # what kind of plus?
27800
27801             if ( $expecting == TERM ) {
27802                 my $number = scan_number();
27803
27804                 # unary plus is safest assumption if not a number
27805                 if ( !defined($number) ) { $type = 'p'; }
27806             }
27807             elsif ( $expecting == OPERATOR ) {
27808             }
27809             else {
27810                 if ( $next_type eq 'w' ) { $type = 'p' }
27811             }
27812         },
27813         '@' => sub {
27814
27815             error_if_expecting_OPERATOR("Array")
27816               if ( $expecting == OPERATOR );
27817             scan_identifier();
27818         },
27819         '%' => sub {    # hash or modulo?
27820
27821             # first guess is hash if no following blank
27822             if ( $expecting == UNKNOWN ) {
27823                 if ( $next_type ne 'b' ) { $expecting = TERM }
27824             }
27825             if ( $expecting == TERM ) {
27826                 scan_identifier();
27827             }
27828         },
27829         '[' => sub {
27830             $square_bracket_type[ ++$square_bracket_depth ] =
27831               $last_nonblank_token;
27832             ( $type_sequence, $indent_flag ) =
27833               increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
27834
27835             # It may seem odd, but structural square brackets have
27836             # type '{' and '}'.  This simplifies the indentation logic.
27837             if ( !is_non_structural_brace() ) {
27838                 $type = '{';
27839             }
27840             $square_bracket_structural_type[$square_bracket_depth] = $type;
27841         },
27842         ']' => sub {
27843             ( $type_sequence, $indent_flag ) =
27844               decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
27845
27846             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
27847             {
27848                 $type = '}';
27849             }
27850
27851             # propagate type information for smartmatch operator.  This is
27852             # necessary to enable us to know if an operator or term is expected
27853             # next.
27854             if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
27855                 $tok = $square_bracket_type[$square_bracket_depth];
27856             }
27857
27858             if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
27859         },
27860         '-' => sub {    # what kind of minus?
27861
27862             if ( ( $expecting != OPERATOR )
27863                 && $is_file_test_operator{$next_tok} )
27864             {
27865                 my ( $next_nonblank_token, $i_next ) =
27866                   find_next_nonblank_token( $i + 1, $rtokens,
27867                     $max_token_index );
27868
27869                 # check for a quoted word like "-w=>xx";
27870                 # it is sufficient to just check for a following '='
27871                 if ( $next_nonblank_token eq '=' ) {
27872                     $type = 'm';
27873                 }
27874                 else {
27875                     $i++;
27876                     $tok .= $next_tok;
27877                     $type = 'F';
27878                 }
27879             }
27880             elsif ( $expecting == TERM ) {
27881                 my $number = scan_number();
27882
27883                 # maybe part of bareword token? unary is safest
27884                 if ( !defined($number) ) { $type = 'm'; }
27885
27886             }
27887             elsif ( $expecting == OPERATOR ) {
27888             }
27889             else {
27890
27891                 if ( $next_type eq 'w' ) {
27892                     $type = 'm';
27893                 }
27894             }
27895         },
27896
27897         '^' => sub {
27898
27899             # check for special variables like ${^WARNING_BITS}
27900             if ( $expecting == TERM ) {
27901
27902                 # FIXME: this should work but will not catch errors
27903                 # because we also have to be sure that previous token is
27904                 # a type character ($,@,%).
27905                 if ( $last_nonblank_token eq '{'
27906                     && ( $next_tok =~ /^[A-Za-z_]/ ) )
27907                 {
27908
27909                     if ( $next_tok eq 'W' ) {
27910                         $tokenizer_self->{_saw_perl_dash_w} = 1;
27911                     }
27912                     $tok  = $tok . $next_tok;
27913                     $i    = $i + 1;
27914                     $type = 'w';
27915                 }
27916
27917                 else {
27918                     unless ( error_if_expecting_TERM() ) {
27919
27920                         # Something like this is valid but strange:
27921                         # undef ^I;
27922                         complain("The '^' seems unusual here\n");
27923                     }
27924                 }
27925             }
27926         },
27927
27928         '::' => sub {    # probably a sub call
27929             scan_bare_identifier();
27930         },
27931         '<<' => sub {    # maybe a here-doc?
27932             return
27933               unless ( $i < $max_token_index )
27934               ;          # here-doc not possible if end of line
27935
27936             if ( $expecting != OPERATOR ) {
27937                 my ( $found_target, $here_doc_target, $here_quote_character,
27938                     $saw_error );
27939                 (
27940                     $found_target, $here_doc_target, $here_quote_character, $i,
27941                     $saw_error
27942                   )
27943                   = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
27944                     $max_token_index );
27945
27946                 if ($found_target) {
27947                     push @{$rhere_target_list},
27948                       [ $here_doc_target, $here_quote_character ];
27949                     $type = 'h';
27950                     if ( length($here_doc_target) > 80 ) {
27951                         my $truncated = substr( $here_doc_target, 0, 80 );
27952                         complain("Long here-target: '$truncated' ...\n");
27953                     }
27954                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
27955                         complain(
27956                             "Unconventional here-target: '$here_doc_target'\n");
27957                     }
27958                 }
27959                 elsif ( $expecting == TERM ) {
27960                     unless ($saw_error) {
27961
27962                         # shouldn't happen..
27963                         warning("Program bug; didn't find here doc target\n");
27964                         report_definite_bug();
27965                     }
27966                 }
27967             }
27968             else {
27969             }
27970         },
27971         '<<~' => sub {    # a here-doc, new type added in v26
27972             return
27973               unless ( $i < $max_token_index )
27974               ;           # here-doc not possible if end of line
27975             if ( $expecting != OPERATOR ) {
27976                 my ( $found_target, $here_doc_target, $here_quote_character,
27977                     $saw_error );
27978                 (
27979                     $found_target, $here_doc_target, $here_quote_character, $i,
27980                     $saw_error
27981                   )
27982                   = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
27983                     $max_token_index );
27984
27985                 if ($found_target) {
27986
27987                     if ( length($here_doc_target) > 80 ) {
27988                         my $truncated = substr( $here_doc_target, 0, 80 );
27989                         complain("Long here-target: '$truncated' ...\n");
27990                     }
27991                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
27992                         complain(
27993                             "Unconventional here-target: '$here_doc_target'\n");
27994                     }
27995
27996                     # Note that we put a leading space on the here quote
27997                     # character indicate that it may be preceded by spaces
27998                     $here_quote_character = " " . $here_quote_character;
27999                     push @{$rhere_target_list},
28000                       [ $here_doc_target, $here_quote_character ];
28001                     $type = 'h';
28002                 }
28003                 elsif ( $expecting == TERM ) {
28004                     unless ($saw_error) {
28005
28006                         # shouldn't happen..
28007                         warning("Program bug; didn't find here doc target\n");
28008                         report_definite_bug();
28009                     }
28010                 }
28011             }
28012             else {
28013             }
28014         },
28015         '->' => sub {
28016
28017             # if -> points to a bare word, we must scan for an identifier,
28018             # otherwise something like ->y would look like the y operator
28019             scan_identifier();
28020         },
28021
28022         # type = 'pp' for pre-increment, '++' for post-increment
28023         '++' => sub {
28024             if ( $expecting == TERM ) { $type = 'pp' }
28025             elsif ( $expecting == UNKNOWN ) {
28026                 my ( $next_nonblank_token, $i_next ) =
28027                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
28028                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
28029             }
28030         },
28031
28032         '=>' => sub {
28033             if ( $last_nonblank_type eq $tok ) {
28034                 complain("Repeated '=>'s \n");
28035             }
28036
28037             # patch for operator_expected: note if we are in the list (use.t)
28038             # TODO: make version numbers a new token type
28039             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
28040         },
28041
28042         # type = 'mm' for pre-decrement, '--' for post-decrement
28043         '--' => sub {
28044
28045             if ( $expecting == TERM ) { $type = 'mm' }
28046             elsif ( $expecting == UNKNOWN ) {
28047                 my ( $next_nonblank_token, $i_next ) =
28048                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
28049                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
28050             }
28051         },
28052
28053         '&&' => sub {
28054             error_if_expecting_TERM()
28055               if ( $expecting == TERM );
28056         },
28057
28058         '||' => sub {
28059             error_if_expecting_TERM()
28060               if ( $expecting == TERM );
28061         },
28062
28063         '//' => sub {
28064             error_if_expecting_TERM()
28065               if ( $expecting == TERM );
28066         },
28067     };
28068
28069     # ------------------------------------------------------------
28070     # end hash of code for handling individual token types
28071     # ------------------------------------------------------------
28072
28073     my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
28074
28075     # These block types terminate statements and do not need a trailing
28076     # semicolon
28077     # patched for SWITCH/CASE/
28078     my %is_zero_continuation_block_type;
28079     @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
28080       if elsif else unless while until for foreach switch case given when);
28081     @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
28082
28083     my %is_not_zero_continuation_block_type;
28084     @_ = qw(sort grep map do eval);
28085     @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
28086
28087     my %is_logical_container;
28088     @_ = qw(if elsif unless while and or err not && !  || for foreach);
28089     @is_logical_container{@_} = (1) x scalar(@_);
28090
28091     my %is_binary_type;
28092     @_ = qw(|| &&);
28093     @is_binary_type{@_} = (1) x scalar(@_);
28094
28095     my %is_binary_keyword;
28096     @_ = qw(and or err eq ne cmp);
28097     @is_binary_keyword{@_} = (1) x scalar(@_);
28098
28099     # 'L' is token for opening { at hash key
28100     my %is_opening_type;
28101     @_ = qw" L { ( [ ";
28102     @is_opening_type{@_} = (1) x scalar(@_);
28103
28104     # 'R' is token for closing } at hash key
28105     my %is_closing_type;
28106     @_ = qw" R } ) ] ";
28107     @is_closing_type{@_} = (1) x scalar(@_);
28108
28109     my %is_redo_last_next_goto;
28110     @_ = qw(redo last next goto);
28111     @is_redo_last_next_goto{@_} = (1) x scalar(@_);
28112
28113     my %is_use_require;
28114     @_ = qw(use require);
28115     @is_use_require{@_} = (1) x scalar(@_);
28116
28117     my %is_sub_package;
28118     @_ = qw(sub package);
28119     @is_sub_package{@_} = (1) x scalar(@_);
28120
28121     # This hash holds the hash key in $tokenizer_self for these keywords:
28122     my %is_format_END_DATA = (
28123         'format'   => '_in_format',
28124         '__END__'  => '_in_end',
28125         '__DATA__' => '_in_data',
28126     );
28127
28128     # original ref: camel 3 p 147,
28129     # but perl may accept undocumented flags
28130     # perl 5.10 adds 'p' (preserve)
28131     # Perl version 5.22 added 'n'
28132     # From http://perldoc.perl.org/perlop.html we have
28133     # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
28134     # s/PATTERN/REPLACEMENT/msixpodualngcer
28135     # y/SEARCHLIST/REPLACEMENTLIST/cdsr
28136     # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
28137     # qr/STRING/msixpodualn
28138     my %quote_modifiers = (
28139         's'  => '[msixpodualngcer]',
28140         'y'  => '[cdsr]',
28141         'tr' => '[cdsr]',
28142         'm'  => '[msixpodualngc]',
28143         'qr' => '[msixpodualn]',
28144         'q'  => "",
28145         'qq' => "",
28146         'qw' => "",
28147         'qx' => "",
28148     );
28149
28150     # table showing how many quoted things to look for after quote operator..
28151     # s, y, tr have 2 (pattern and replacement)
28152     # others have 1 (pattern only)
28153     my %quote_items = (
28154         's'  => 2,
28155         'y'  => 2,
28156         'tr' => 2,
28157         'm'  => 1,
28158         'qr' => 1,
28159         'q'  => 1,
28160         'qq' => 1,
28161         'qw' => 1,
28162         'qx' => 1,
28163     );
28164
28165     sub tokenize_this_line {
28166
28167   # This routine breaks a line of perl code into tokens which are of use in
28168   # indentation and reformatting.  One of my goals has been to define tokens
28169   # such that a newline may be inserted between any pair of tokens without
28170   # changing or invalidating the program. This version comes close to this,
28171   # although there are necessarily a few exceptions which must be caught by
28172   # the formatter.  Many of these involve the treatment of bare words.
28173   #
28174   # The tokens and their types are returned in arrays.  See previous
28175   # routine for their names.
28176   #
28177   # See also the array "valid_token_types" in the BEGIN section for an
28178   # up-to-date list.
28179   #
28180   # To simplify things, token types are either a single character, or they
28181   # are identical to the tokens themselves.
28182   #
28183   # As a debugging aid, the -D flag creates a file containing a side-by-side
28184   # comparison of the input string and its tokenization for each line of a file.
28185   # This is an invaluable debugging aid.
28186   #
28187   # In addition to tokens, and some associated quantities, the tokenizer
28188   # also returns flags indication any special line types.  These include
28189   # quotes, here_docs, formats.
28190   #
28191   # -----------------------------------------------------------------------
28192   #
28193   # How to add NEW_TOKENS:
28194   #
28195   # New token types will undoubtedly be needed in the future both to keep up
28196   # with changes in perl and to help adapt the tokenizer to other applications.
28197   #
28198   # Here are some notes on the minimal steps.  I wrote these notes while
28199   # adding the 'v' token type for v-strings, which are things like version
28200   # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
28201   # can use your editor to search for the string "NEW_TOKENS" to find the
28202   # appropriate sections to change):
28203   #
28204   # *. Try to talk somebody else into doing it!  If not, ..
28205   #
28206   # *. Make a backup of your current version in case things don't work out!
28207   #
28208   # *. Think of a new, unused character for the token type, and add to
28209   # the array @valid_token_types in the BEGIN section of this package.
28210   # For example, I used 'v' for v-strings.
28211   #
28212   # *. Implement coding to recognize the $type of the token in this routine.
28213   # This is the hardest part, and is best done by imitating or modifying
28214   # some of the existing coding.  For example, to recognize v-strings, I
28215   # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
28216   # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
28217   #
28218   # *. Update sub operator_expected.  This update is critically important but
28219   # the coding is trivial.  Look at the comments in that routine for help.
28220   # For v-strings, which should behave like numbers, I just added 'v' to the
28221   # regex used to handle numbers and strings (types 'n' and 'Q').
28222   #
28223   # *. Implement a 'bond strength' rule in sub set_bond_strengths in
28224   # Perl::Tidy::Formatter for breaking lines around this token type.  You can
28225   # skip this step and take the default at first, then adjust later to get
28226   # desired results.  For adding type 'v', I looked at sub bond_strength and
28227   # saw that number type 'n' was using default strengths, so I didn't do
28228   # anything.  I may tune it up someday if I don't like the way line
28229   # breaks with v-strings look.
28230   #
28231   # *. Implement a 'whitespace' rule in sub set_whitespace_flags in
28232   # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
28233   # and saw that type 'n' used spaces on both sides, so I just added 'v'
28234   # to the array @spaces_both_sides.
28235   #
28236   # *. Update HtmlWriter package so that users can colorize the token as
28237   # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
28238   # that package.  For v-strings, I initially chose to use a default color
28239   # equal to the default for numbers, but it might be nice to change that
28240   # eventually.
28241   #
28242   # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
28243   #
28244   # *. Run lots and lots of debug tests.  Start with special files designed
28245   # to test the new token type.  Run with the -D flag to create a .DEBUG
28246   # file which shows the tokenization.  When these work ok, test as many old
28247   # scripts as possible.  Start with all of the '.t' files in the 'test'
28248   # directory of the distribution file.  Compare .tdy output with previous
28249   # version and updated version to see the differences.  Then include as
28250   # many more files as possible. My own technique has been to collect a huge
28251   # number of perl scripts (thousands!) into one directory and run perltidy
28252   # *, then run diff between the output of the previous version and the
28253   # current version.
28254   #
28255   # *. For another example, search for the smartmatch operator '~~'
28256   # with your editor to see where updates were made for it.
28257   #
28258   # -----------------------------------------------------------------------
28259
28260         my $line_of_tokens = shift;
28261         my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
28262
28263         # patch while coding change is underway
28264         # make callers private data to allow access
28265         # $tokenizer_self = $caller_tokenizer_self;
28266
28267         # extract line number for use in error messages
28268         $input_line_number = $line_of_tokens->{_line_number};
28269
28270         # reinitialize for multi-line quote
28271         $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
28272
28273         # check for pod documentation
28274         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
28275
28276             # must not be in multi-line quote
28277             # and must not be in an equation
28278             if ( !$in_quote && ( operator_expected( 'b', '=', 'b' ) == TERM ) )
28279             {
28280                 $tokenizer_self->{_in_pod} = 1;
28281                 return;
28282             }
28283         }
28284
28285         $input_line = $untrimmed_input_line;
28286
28287         chomp $input_line;
28288
28289         # trim start of this line unless we are continuing a quoted line
28290         # do not trim end because we might end in a quote (test: deken4.pl)
28291         # Perl::Tidy::Formatter will delete needless trailing blanks
28292         unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
28293             $input_line =~ s/^\s*//;    # trim left end
28294         }
28295
28296         # Set a flag to indicate if we might be at an __END__ or __DATA__ line
28297         # This will be used below to avoid quoting a bare word followed by
28298         # a fat comma.
28299         my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/;
28300
28301         # update the copy of the line for use in error messages
28302         # This must be exactly what we give the pre_tokenizer
28303         $tokenizer_self->{_line_text} = $input_line;
28304
28305         # re-initialize for the main loop
28306         $routput_token_list     = [];    # stack of output token indexes
28307         $routput_token_type     = [];    # token types
28308         $routput_block_type     = [];    # types of code block
28309         $routput_container_type = [];    # paren types, such as if, elsif, ..
28310         $routput_type_sequence  = [];    # nesting sequential number
28311
28312         $rhere_target_list = [];
28313
28314         $tok             = $last_nonblank_token;
28315         $type            = $last_nonblank_type;
28316         $prototype       = $last_nonblank_prototype;
28317         $last_nonblank_i = -1;
28318         $block_type      = $last_nonblank_block_type;
28319         $container_type  = $last_nonblank_container_type;
28320         $type_sequence   = $last_nonblank_type_sequence;
28321         $indent_flag     = 0;
28322         $peeked_ahead    = 0;
28323
28324         # tokenization is done in two stages..
28325         # stage 1 is a very simple pre-tokenization
28326         my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
28327
28328         # a little optimization for a full-line comment
28329         if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
28330             $max_tokens_wanted = 1    # no use tokenizing a comment
28331         }
28332
28333         # start by breaking the line into pre-tokens
28334         ( $rtokens, $rtoken_map, $rtoken_type ) =
28335           pre_tokenize( $input_line, $max_tokens_wanted );
28336
28337         $max_token_index = scalar( @{$rtokens} ) - 1;
28338         push( @{$rtokens}, ' ', ' ', ' ' );  # extra whitespace simplifies logic
28339         push( @{$rtoken_map},  0,   0,   0 );     # shouldn't be referenced
28340         push( @{$rtoken_type}, 'b', 'b', 'b' );
28341
28342         # initialize for main loop
28343         foreach my $ii ( 0 .. $max_token_index + 3 ) {
28344             $routput_token_type->[$ii]     = "";
28345             $routput_block_type->[$ii]     = "";
28346             $routput_container_type->[$ii] = "";
28347             $routput_type_sequence->[$ii]  = "";
28348             $routput_indent_flag->[$ii]    = 0;
28349         }
28350         $i     = -1;
28351         $i_tok = -1;
28352
28353         # ------------------------------------------------------------
28354         # begin main tokenization loop
28355         # ------------------------------------------------------------
28356
28357         # we are looking at each pre-token of one line and combining them
28358         # into tokens
28359         while ( ++$i <= $max_token_index ) {
28360
28361             if ($in_quote) {    # continue looking for end of a quote
28362                 $type = $quote_type;
28363
28364                 unless ( @{$routput_token_list} )
28365                 {               # initialize if continuation line
28366                     push( @{$routput_token_list}, $i );
28367                     $routput_token_type->[$i] = $type;
28368
28369                 }
28370                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
28371
28372                 # scan for the end of the quote or pattern
28373                 (
28374                     $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
28375                     $quoted_string_1, $quoted_string_2
28376                   )
28377                   = do_quote(
28378                     $i,               $in_quote,    $quote_character,
28379                     $quote_pos,       $quote_depth, $quoted_string_1,
28380                     $quoted_string_2, $rtokens,     $rtoken_map,
28381                     $max_token_index
28382                   );
28383
28384                 # all done if we didn't find it
28385                 last if ($in_quote);
28386
28387                 # save pattern and replacement text for rescanning
28388                 my $qs1 = $quoted_string_1;
28389                 my $qs2 = $quoted_string_2;
28390
28391                 # re-initialize for next search
28392                 $quote_character = '';
28393                 $quote_pos       = 0;
28394                 $quote_type      = 'Q';
28395                 $quoted_string_1 = "";
28396                 $quoted_string_2 = "";
28397                 last if ( ++$i > $max_token_index );
28398
28399                 # look for any modifiers
28400                 if ($allowed_quote_modifiers) {
28401
28402                     # check for exact quote modifiers
28403                     if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
28404                         my $str = $rtokens->[$i];
28405                         my $saw_modifier_e;
28406                         while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
28407                             my $pos = pos($str);
28408                             my $char = substr( $str, $pos - 1, 1 );
28409                             $saw_modifier_e ||= ( $char eq 'e' );
28410                         }
28411
28412                         # For an 'e' quote modifier we must scan the replacement
28413                         # text for here-doc targets.
28414                         if ($saw_modifier_e) {
28415
28416                             my $rht = scan_replacement_text($qs1);
28417
28418                             # Change type from 'Q' to 'h' for quotes with
28419                             # here-doc targets so that the formatter (see sub
28420                             # print_line_of_tokens) will not make any line
28421                             # breaks after this point.
28422                             if ($rht) {
28423                                 push @{$rhere_target_list}, @{$rht};
28424                                 $type = 'h';
28425                                 if ( $i_tok < 0 ) {
28426                                     my $ilast = $routput_token_list->[-1];
28427                                     $routput_token_type->[$ilast] = $type;
28428                                 }
28429                             }
28430                         }
28431
28432                         if ( defined( pos($str) ) ) {
28433
28434                             # matched
28435                             if ( pos($str) == length($str) ) {
28436                                 last if ( ++$i > $max_token_index );
28437                             }
28438
28439                             # Looks like a joined quote modifier
28440                             # and keyword, maybe something like
28441                             # s/xxx/yyy/gefor @k=...
28442                             # Example is "galgen.pl".  Would have to split
28443                             # the word and insert a new token in the
28444                             # pre-token list.  This is so rare that I haven't
28445                             # done it.  Will just issue a warning citation.
28446
28447                             # This error might also be triggered if my quote
28448                             # modifier characters are incomplete
28449                             else {
28450                                 warning(<<EOM);
28451
28452 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
28453 Please put a space between quote modifiers and trailing keywords.
28454 EOM
28455
28456                          # print "token $rtokens->[$i]\n";
28457                          # my $num = length($str) - pos($str);
28458                          # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
28459                          # print "continuing with new token $rtokens->[$i]\n";
28460
28461                                 # skipping past this token does least damage
28462                                 last if ( ++$i > $max_token_index );
28463                             }
28464                         }
28465                         else {
28466
28467                             # example file: rokicki4.pl
28468                             # This error might also be triggered if my quote
28469                             # modifier characters are incomplete
28470                             write_logfile_entry(
28471 "Note: found word $str at quote modifier location\n"
28472                             );
28473                         }
28474                     }
28475
28476                     # re-initialize
28477                     $allowed_quote_modifiers = "";
28478                 }
28479             }
28480
28481             unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
28482
28483                 # try to catch some common errors
28484                 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
28485
28486                     if ( $last_nonblank_token eq 'eq' ) {
28487                         complain("Should 'eq' be '==' here ?\n");
28488                     }
28489                     elsif ( $last_nonblank_token eq 'ne' ) {
28490                         complain("Should 'ne' be '!=' here ?\n");
28491                     }
28492                 }
28493
28494                 $last_last_nonblank_token      = $last_nonblank_token;
28495                 $last_last_nonblank_type       = $last_nonblank_type;
28496                 $last_last_nonblank_block_type = $last_nonblank_block_type;
28497                 $last_last_nonblank_container_type =
28498                   $last_nonblank_container_type;
28499                 $last_last_nonblank_type_sequence =
28500                   $last_nonblank_type_sequence;
28501                 $last_nonblank_token          = $tok;
28502                 $last_nonblank_type           = $type;
28503                 $last_nonblank_prototype      = $prototype;
28504                 $last_nonblank_block_type     = $block_type;
28505                 $last_nonblank_container_type = $container_type;
28506                 $last_nonblank_type_sequence  = $type_sequence;
28507                 $last_nonblank_i              = $i_tok;
28508             }
28509
28510             # store previous token type
28511             if ( $i_tok >= 0 ) {
28512                 $routput_token_type->[$i_tok]     = $type;
28513                 $routput_block_type->[$i_tok]     = $block_type;
28514                 $routput_container_type->[$i_tok] = $container_type;
28515                 $routput_type_sequence->[$i_tok]  = $type_sequence;
28516                 $routput_indent_flag->[$i_tok]    = $indent_flag;
28517             }
28518             my $pre_tok  = $rtokens->[$i];        # get the next pre-token
28519             my $pre_type = $rtoken_type->[$i];    # and type
28520             $tok  = $pre_tok;
28521             $type = $pre_type;                    # to be modified as necessary
28522             $block_type = "";    # blank for all tokens except code block braces
28523             $container_type = "";    # blank for all tokens except some parens
28524             $type_sequence  = "";    # blank for all tokens except ?/:
28525             $indent_flag    = 0;
28526             $prototype = "";    # blank for all tokens except user defined subs
28527             $i_tok     = $i;
28528
28529             # this pre-token will start an output token
28530             push( @{$routput_token_list}, $i_tok );
28531
28532             # continue gathering identifier if necessary
28533             # but do not start on blanks and comments
28534             if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
28535
28536                 if ( $id_scan_state =~ /^(sub|package)/ ) {
28537                     scan_id();
28538                 }
28539                 else {
28540                     scan_identifier();
28541                 }
28542
28543                 last if ($id_scan_state);
28544                 next if ( ( $i > 0 ) || $type );
28545
28546                 # didn't find any token; start over
28547                 $type = $pre_type;
28548                 $tok  = $pre_tok;
28549             }
28550
28551             # handle whitespace tokens..
28552             next if ( $type eq 'b' );
28553             my $prev_tok  = $i > 0 ? $rtokens->[ $i - 1 ]     : ' ';
28554             my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b';
28555
28556             # Build larger tokens where possible, since we are not in a quote.
28557             #
28558             # First try to assemble digraphs.  The following tokens are
28559             # excluded and handled specially:
28560             # '/=' is excluded because the / might start a pattern.
28561             # 'x=' is excluded since it might be $x=, with $ on previous line
28562             # '**' and *= might be typeglobs of punctuation variables
28563             # I have allowed tokens starting with <, such as <=,
28564             # because I don't think these could be valid angle operators.
28565             # test file: storrs4.pl
28566             my $test_tok   = $tok . $rtokens->[ $i + 1 ];
28567             my $combine_ok = $is_digraph{$test_tok};
28568
28569             # check for special cases which cannot be combined
28570             if ($combine_ok) {
28571
28572                 # '//' must be defined_or operator if an operator is expected.
28573                 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
28574                 # could be migrated here for clarity
28575
28576               # Patch for RT#102371, misparsing a // in the following snippet:
28577               #     state $b //= ccc();
28578               # The solution is to always accept the digraph (or trigraph) after
28579               # token type 'Z' (possible file handle).  The reason is that
28580               # sub operator_expected gives TERM expected here, which is
28581               # wrong in this case.
28582                 if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
28583                     my $next_type = $rtokens->[ $i + 1 ];
28584                     my $expecting =
28585                       operator_expected( $prev_type, $tok, $next_type );
28586
28587                     # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
28588                     $combine_ok = 0 if ( $expecting == TERM );
28589                 }
28590
28591                 # Patch for RT #114359: Missparsing of "print $x ** 0.5;
28592                 # Accept the digraphs '**' only after type 'Z'
28593                 # Otherwise postpone the decision.
28594                 if ( $test_tok eq '**' ) {
28595                     if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
28596                 }
28597             }
28598
28599             if (
28600                 $combine_ok
28601
28602                 && ( $test_tok ne '/=' )    # might be pattern
28603                 && ( $test_tok ne 'x=' )    # might be $x
28604                 && ( $test_tok ne '*=' )    # typeglob?
28605
28606                 # Moved above as part of fix for
28607                 # RT #114359: Missparsing of "print $x ** 0.5;
28608                 # && ( $test_tok ne '**' )    # typeglob?
28609               )
28610             {
28611                 $tok = $test_tok;
28612                 $i++;
28613
28614                 # Now try to assemble trigraphs.  Note that all possible
28615                 # perl trigraphs can be constructed by appending a character
28616                 # to a digraph.
28617                 $test_tok = $tok . $rtokens->[ $i + 1 ];
28618
28619                 if ( $is_trigraph{$test_tok} ) {
28620                     $tok = $test_tok;
28621                     $i++;
28622                 }
28623
28624                 # The only current tetragraph is the double diamond operator
28625                 # and its first three characters are not a trigraph, so
28626                 # we do can do a special test for it
28627                 elsif ( $test_tok eq '<<>' ) {
28628                     $test_tok .= $rtokens->[ $i + 2 ];
28629                     if ( $is_tetragraph{$test_tok} ) {
28630                         $tok = $test_tok;
28631                         $i += 2;
28632                     }
28633                 }
28634             }
28635
28636             $type      = $tok;
28637             $next_tok  = $rtokens->[ $i + 1 ];
28638             $next_type = $rtoken_type->[ $i + 1 ];
28639
28640             TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
28641                 local $" = ')(';
28642                 my @debug_list = (
28643                     $last_nonblank_token,      $tok,
28644                     $next_tok,                 $brace_depth,
28645                     $brace_type[$brace_depth], $paren_depth,
28646                     $paren_type[$paren_depth]
28647                 );
28648                 print STDOUT "TOKENIZE:(@debug_list)\n";
28649             };
28650
28651             # turn off attribute list on first non-blank, non-bareword
28652             if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
28653
28654             ###############################################################
28655             # We have the next token, $tok.
28656             # Now we have to examine this token and decide what it is
28657             # and define its $type
28658             #
28659             # section 1: bare words
28660             ###############################################################
28661
28662             if ( $pre_type eq 'w' ) {
28663                 $expecting = operator_expected( $prev_type, $tok, $next_type );
28664                 my ( $next_nonblank_token, $i_next ) =
28665                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
28666
28667                 # ATTRS: handle sub and variable attributes
28668                 if ($in_attribute_list) {
28669
28670                     # treat bare word followed by open paren like qw(
28671                     if ( $next_nonblank_token eq '(' ) {
28672                         $in_quote                = $quote_items{'q'};
28673                         $allowed_quote_modifiers = $quote_modifiers{'q'};
28674                         $type                    = 'q';
28675                         $quote_type              = 'q';
28676                         next;
28677                     }
28678
28679                     # handle bareword not followed by open paren
28680                     else {
28681                         $type = 'w';
28682                         next;
28683                     }
28684                 }
28685
28686                 # quote a word followed by => operator
28687                 # unless the word __END__ or __DATA__ and the only word on
28688                 # the line.
28689                 if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) {
28690
28691                     if ( $rtokens->[ $i_next + 1 ] eq '>' ) {
28692                         if ( $is_constant{$current_package}{$tok} ) {
28693                             $type = 'C';
28694                         }
28695                         elsif ( $is_user_function{$current_package}{$tok} ) {
28696                             $type = 'U';
28697                             $prototype =
28698                               $user_function_prototype{$current_package}{$tok};
28699                         }
28700                         elsif ( $tok =~ /^v\d+$/ ) {
28701                             $type = 'v';
28702                             report_v_string($tok);
28703                         }
28704                         else { $type = 'w' }
28705
28706                         next;
28707                     }
28708                 }
28709
28710      # quote a bare word within braces..like xxx->{s}; note that we
28711      # must be sure this is not a structural brace, to avoid
28712      # mistaking {s} in the following for a quoted bare word:
28713      #     for(@[){s}bla}BLA}
28714      # Also treat q in something like var{-q} as a bare word, not qoute operator
28715                 if (
28716                     $next_nonblank_token eq '}'
28717                     && (
28718                         $last_nonblank_type eq 'L'
28719                         || (   $last_nonblank_type eq 'm'
28720                             && $last_last_nonblank_type eq 'L' )
28721                     )
28722                   )
28723                 {
28724                     $type = 'w';
28725                     next;
28726                 }
28727
28728                 # a bare word immediately followed by :: is not a keyword;
28729                 # use $tok_kw when testing for keywords to avoid a mistake
28730                 my $tok_kw = $tok;
28731                 if (   $rtokens->[ $i + 1 ] eq ':'
28732                     && $rtokens->[ $i + 2 ] eq ':' )
28733                 {
28734                     $tok_kw .= '::';
28735                 }
28736
28737                 # handle operator x (now we know it isn't $x=)
28738                 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
28739                     if ( $tok eq 'x' ) {
28740
28741                         if ( $rtokens->[ $i + 1 ] eq '=' ) {    # x=
28742                             $tok  = 'x=';
28743                             $type = $tok;
28744                             $i++;
28745                         }
28746                         else {
28747                             $type = 'x';
28748                         }
28749                     }
28750
28751                     # FIXME: Patch: mark something like x4 as an integer for now
28752                     # It gets fixed downstream.  This is easier than
28753                     # splitting the pretoken.
28754                     else {
28755                         $type = 'n';
28756                     }
28757                 }
28758                 elsif ( $tok_kw eq 'CORE::' ) {
28759                     $type = $tok = $tok_kw;
28760                     $i += 2;
28761                 }
28762                 elsif ( ( $tok eq 'strict' )
28763                     and ( $last_nonblank_token eq 'use' ) )
28764                 {
28765                     $tokenizer_self->{_saw_use_strict} = 1;
28766                     scan_bare_identifier();
28767                 }
28768
28769                 elsif ( ( $tok eq 'warnings' )
28770                     and ( $last_nonblank_token eq 'use' ) )
28771                 {
28772                     $tokenizer_self->{_saw_perl_dash_w} = 1;
28773
28774                     # scan as identifier, so that we pick up something like:
28775                     # use warnings::register
28776                     scan_bare_identifier();
28777                 }
28778
28779                 elsif (
28780                        $tok eq 'AutoLoader'
28781                     && $tokenizer_self->{_look_for_autoloader}
28782                     && (
28783                         $last_nonblank_token eq 'use'
28784
28785                         # these regexes are from AutoSplit.pm, which we want
28786                         # to mimic
28787                         || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
28788                         || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
28789                     )
28790                   )
28791                 {
28792                     write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
28793                     $tokenizer_self->{_saw_autoloader}      = 1;
28794                     $tokenizer_self->{_look_for_autoloader} = 0;
28795                     scan_bare_identifier();
28796                 }
28797
28798                 elsif (
28799                        $tok eq 'SelfLoader'
28800                     && $tokenizer_self->{_look_for_selfloader}
28801                     && (   $last_nonblank_token eq 'use'
28802                         || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
28803                         || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
28804                   )
28805                 {
28806                     write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
28807                     $tokenizer_self->{_saw_selfloader}      = 1;
28808                     $tokenizer_self->{_look_for_selfloader} = 0;
28809                     scan_bare_identifier();
28810                 }
28811
28812                 elsif ( ( $tok eq 'constant' )
28813                     and ( $last_nonblank_token eq 'use' ) )
28814                 {
28815                     scan_bare_identifier();
28816                     my ( $next_nonblank_token, $i_next ) =
28817                       find_next_nonblank_token( $i, $rtokens,
28818                         $max_token_index );
28819
28820                     if ($next_nonblank_token) {
28821
28822                         if ( $is_keyword{$next_nonblank_token} ) {
28823
28824                             # Assume qw is used as a quote and okay, as in:
28825                             #  use constant qw{ DEBUG 0 };
28826                             # Not worth trying to parse for just a warning
28827
28828                             # NOTE: This warning is deactivated because recent
28829                             # versions of perl do not complain here, but
28830                             # the coding is retained for reference.
28831                             if ( 0 && $next_nonblank_token ne 'qw' ) {
28832                                 warning(
28833 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
28834                                 );
28835                             }
28836                         }
28837
28838                         # FIXME: could check for error in which next token is
28839                         # not a word (number, punctuation, ..)
28840                         else {
28841                             $is_constant{$current_package}{$next_nonblank_token}
28842                               = 1;
28843                         }
28844                     }
28845                 }
28846
28847                 # various quote operators
28848                 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
28849 ##NICOL PATCH
28850                     if ( $expecting == OPERATOR ) {
28851
28852                         # Be careful not to call an error for a qw quote
28853                         # where a parenthesized list is allowed.  For example,
28854                         # it could also be a for/foreach construct such as
28855                         #
28856                         #    foreach my $key qw\Uno Due Tres Quadro\ {
28857                         #        print "Set $key\n";
28858                         #    }
28859                         #
28860
28861                         # Or it could be a function call.
28862                         # NOTE: Braces in something like &{ xxx } are not
28863                         # marked as a block, we might have a method call.
28864                         # &method(...), $method->(..), &{method}(...),
28865                         # $ref[2](list) is ok & short for $ref[2]->(list)
28866                         #
28867                         # See notes in 'sub code_block_type' and
28868                         # 'sub is_non_structural_brace'
28869
28870                         unless (
28871                             $tok eq 'qw'
28872                             && (   $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
28873                                 || $is_for_foreach{$want_paren} )
28874                           )
28875                         {
28876                             error_if_expecting_OPERATOR();
28877                         }
28878                     }
28879                     $in_quote                = $quote_items{$tok};
28880                     $allowed_quote_modifiers = $quote_modifiers{$tok};
28881
28882                    # All quote types are 'Q' except possibly qw quotes.
28883                    # qw quotes are special in that they may generally be trimmed
28884                    # of leading and trailing whitespace.  So they are given a
28885                    # separate type, 'q', unless requested otherwise.
28886                     $type =
28887                       ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
28888                       ? 'q'
28889                       : 'Q';
28890                     $quote_type = $type;
28891                 }
28892
28893                 # check for a statement label
28894                 elsif (
28895                        ( $next_nonblank_token eq ':' )
28896                     && ( $rtokens->[ $i_next + 1 ] ne ':' )
28897                     && ( $i_next <= $max_token_index )      # colon on same line
28898                     && label_ok()
28899                   )
28900                 {
28901                     if ( $tok !~ /[A-Z]/ ) {
28902                         push @{ $tokenizer_self->{_rlower_case_labels_at} },
28903                           $input_line_number;
28904                     }
28905                     $type = 'J';
28906                     $tok .= ':';
28907                     $i = $i_next;
28908                     next;
28909                 }
28910
28911                 #      'sub' || 'package'
28912                 elsif ( $is_sub_package{$tok_kw} ) {
28913                     error_if_expecting_OPERATOR()
28914                       if ( $expecting == OPERATOR );
28915                     scan_id();
28916                 }
28917
28918                 # Note on token types for format, __DATA__, __END__:
28919                 # It simplifies things to give these type ';', so that when we
28920                 # start rescanning we will be expecting a token of type TERM.
28921                 # We will switch to type 'k' before outputting the tokens.
28922                 elsif ( $is_format_END_DATA{$tok_kw} ) {
28923                     $type = ';';    # make tokenizer look for TERM next
28924                     $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
28925                     last;
28926                 }
28927
28928                 elsif ( $is_keyword{$tok_kw} ) {
28929                     $type = 'k';
28930
28931                     # Since for and foreach may not be followed immediately
28932                     # by an opening paren, we have to remember which keyword
28933                     # is associated with the next '('
28934                     if ( $is_for_foreach{$tok} ) {
28935                         if ( new_statement_ok() ) {
28936                             $want_paren = $tok;
28937                         }
28938                     }
28939
28940                     # recognize 'use' statements, which are special
28941                     elsif ( $is_use_require{$tok} ) {
28942                         $statement_type = $tok;
28943                         error_if_expecting_OPERATOR()
28944                           if ( $expecting == OPERATOR );
28945                     }
28946
28947                     # remember my and our to check for trailing ": shared"
28948                     elsif ( $is_my_our{$tok} ) {
28949                         $statement_type = $tok;
28950                     }
28951
28952                     # Check for misplaced 'elsif' and 'else', but allow isolated
28953                     # else or elsif blocks to be formatted.  This is indicated
28954                     # by a last noblank token of ';'
28955                     elsif ( $tok eq 'elsif' ) {
28956                         if (   $last_nonblank_token ne ';'
28957                             && $last_nonblank_block_type !~
28958                             /^(if|elsif|unless)$/ )
28959                         {
28960                             warning(
28961 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
28962                             );
28963                         }
28964                     }
28965                     elsif ( $tok eq 'else' ) {
28966
28967                         # patched for SWITCH/CASE
28968                         if (
28969                                $last_nonblank_token ne ';'
28970                             && $last_nonblank_block_type !~
28971                             /^(if|elsif|unless|case|when)$/
28972
28973                             # patch to avoid an unwanted error message for
28974                             # the case of a parenless 'case' (RT 105484):
28975                             # switch ( 1 ) { case x { 2 } else { } }
28976                             && $statement_type !~
28977                             /^(if|elsif|unless|case|when)$/
28978                           )
28979                         {
28980                             warning(
28981 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
28982                             );
28983                         }
28984                     }
28985                     elsif ( $tok eq 'continue' ) {
28986                         if (   $last_nonblank_token ne ';'
28987                             && $last_nonblank_block_type !~
28988                             /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
28989                         {
28990
28991                             # note: ';' '{' and '}' in list above
28992                             # because continues can follow bare blocks;
28993                             # ':' is labeled block
28994                             #
28995                             ############################################
28996                             # NOTE: This check has been deactivated because
28997                             # continue has an alternative usage for given/when
28998                             # blocks in perl 5.10
28999                             ## warning("'$tok' should follow a block\n");
29000                             ############################################
29001                         }
29002                     }
29003
29004                     # patch for SWITCH/CASE if 'case' and 'when are
29005                     # treated as keywords.
29006                     elsif ( $tok eq 'when' || $tok eq 'case' ) {
29007                         $statement_type = $tok;    # next '{' is block
29008                     }
29009
29010                     #
29011                     # indent trailing if/unless/while/until
29012                     # outdenting will be handled by later indentation loop
29013 ## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
29014 ##$opt_o = 1
29015 ##  if !(
29016 ##             $opt_b
29017 ##          || $opt_c
29018 ##          || $opt_d
29019 ##          || $opt_f
29020 ##          || $opt_i
29021 ##          || $opt_l
29022 ##          || $opt_o
29023 ##          || $opt_x
29024 ##  );
29025 ##                    if (   $tok =~ /^(if|unless|while|until)$/
29026 ##                        && $next_nonblank_token ne '(' )
29027 ##                    {
29028 ##                        $indent_flag = 1;
29029 ##                    }
29030                 }
29031
29032                 # check for inline label following
29033                 #         /^(redo|last|next|goto)$/
29034                 elsif (( $last_nonblank_type eq 'k' )
29035                     && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
29036                 {
29037                     $type = 'j';
29038                     next;
29039                 }
29040
29041                 # something else --
29042                 else {
29043
29044                     scan_bare_identifier();
29045                     if ( $type eq 'w' ) {
29046
29047                         if ( $expecting == OPERATOR ) {
29048
29049                             # don't complain about possible indirect object
29050                             # notation.
29051                             # For example:
29052                             #   package main;
29053                             #   sub new($) { ... }
29054                             #   $b = new A::;  # calls A::new
29055                             #   $c = new A;    # same thing but suspicious
29056                             # This will call A::new but we have a 'new' in
29057                             # main:: which looks like a constant.
29058                             #
29059                             if ( $last_nonblank_type eq 'C' ) {
29060                                 if ( $tok !~ /::$/ ) {
29061                                     complain(<<EOM);
29062 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
29063        Maybe indirectet object notation?
29064 EOM
29065                                 }
29066                             }
29067                             else {
29068                                 error_if_expecting_OPERATOR("bareword");
29069                             }
29070                         }
29071
29072                         # mark bare words immediately followed by a paren as
29073                         # functions
29074                         $next_tok = $rtokens->[ $i + 1 ];
29075                         if ( $next_tok eq '(' ) {
29076                             $type = 'U';
29077                         }
29078
29079                         # underscore after file test operator is file handle
29080                         if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
29081                             $type = 'Z';
29082                         }
29083
29084                         # patch for SWITCH/CASE if 'case' and 'when are
29085                         # not treated as keywords:
29086                         if (
29087                             (
29088                                    $tok eq 'case'
29089                                 && $brace_type[$brace_depth] eq 'switch'
29090                             )
29091                             || (   $tok eq 'when'
29092                                 && $brace_type[$brace_depth] eq 'given' )
29093                           )
29094                         {
29095                             $statement_type = $tok;    # next '{' is block
29096                             $type = 'k';    # for keyword syntax coloring
29097                         }
29098
29099                         # patch for SWITCH/CASE if switch and given not keywords
29100                         # Switch is not a perl 5 keyword, but we will gamble
29101                         # and mark switch followed by paren as a keyword.  This
29102                         # is only necessary to get html syntax coloring nice,
29103                         # and does not commit this as being a switch/case.
29104                         if ( $next_nonblank_token eq '('
29105                             && ( $tok eq 'switch' || $tok eq 'given' ) )
29106                         {
29107                             $type = 'k';    # for keyword syntax coloring
29108                         }
29109                     }
29110                 }
29111             }
29112
29113             ###############################################################
29114             # section 2: strings of digits
29115             ###############################################################
29116             elsif ( $pre_type eq 'd' ) {
29117                 $expecting = operator_expected( $prev_type, $tok, $next_type );
29118                 error_if_expecting_OPERATOR("Number")
29119                   if ( $expecting == OPERATOR );
29120                 my $number = scan_number();
29121                 if ( !defined($number) ) {
29122
29123                     # shouldn't happen - we should always get a number
29124                     warning("non-number beginning with digit--program bug\n");
29125                     report_definite_bug();
29126                 }
29127             }
29128
29129             ###############################################################
29130             # section 3: all other tokens
29131             ###############################################################
29132
29133             else {
29134                 last if ( $tok eq '#' );
29135                 my $code = $tokenization_code->{$tok};
29136                 if ($code) {
29137                     $expecting =
29138                       operator_expected( $prev_type, $tok, $next_type );
29139                     $code->();
29140                     redo if $in_quote;
29141                 }
29142             }
29143         }
29144
29145         # -----------------------------
29146         # end of main tokenization loop
29147         # -----------------------------
29148
29149         if ( $i_tok >= 0 ) {
29150             $routput_token_type->[$i_tok]     = $type;
29151             $routput_block_type->[$i_tok]     = $block_type;
29152             $routput_container_type->[$i_tok] = $container_type;
29153             $routput_type_sequence->[$i_tok]  = $type_sequence;
29154             $routput_indent_flag->[$i_tok]    = $indent_flag;
29155         }
29156
29157         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
29158             $last_last_nonblank_token          = $last_nonblank_token;
29159             $last_last_nonblank_type           = $last_nonblank_type;
29160             $last_last_nonblank_block_type     = $last_nonblank_block_type;
29161             $last_last_nonblank_container_type = $last_nonblank_container_type;
29162             $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
29163             $last_nonblank_token               = $tok;
29164             $last_nonblank_type                = $type;
29165             $last_nonblank_block_type          = $block_type;
29166             $last_nonblank_container_type      = $container_type;
29167             $last_nonblank_type_sequence       = $type_sequence;
29168             $last_nonblank_prototype           = $prototype;
29169         }
29170
29171         # reset indentation level if necessary at a sub or package
29172         # in an attempt to recover from a nesting error
29173         if ( $level_in_tokenizer < 0 ) {
29174             if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
29175                 reset_indentation_level(0);
29176                 brace_warning("resetting level to 0 at $1 $2\n");
29177             }
29178         }
29179
29180         # all done tokenizing this line ...
29181         # now prepare the final list of tokens and types
29182
29183         my @token_type     = ();   # stack of output token types
29184         my @block_type     = ();   # stack of output code block types
29185         my @container_type = ();   # stack of output code container types
29186         my @type_sequence  = ();   # stack of output type sequence numbers
29187         my @tokens         = ();   # output tokens
29188         my @levels         = ();   # structural brace levels of output tokens
29189         my @slevels        = ();   # secondary nesting levels of output tokens
29190         my @nesting_tokens = ();   # string of tokens leading to this depth
29191         my @nesting_types  = ();   # string of token types leading to this depth
29192         my @nesting_blocks = ();   # string of block types leading to this depth
29193         my @nesting_lists  = ();   # string of list types leading to this depth
29194         my @ci_string = ();  # string needed to compute continuation indentation
29195         my @container_environment = ();    # BLOCK or LIST
29196         my $container_environment = '';
29197         my $im                    = -1;    # previous $i value
29198         my $num;
29199         my $ci_string_sum = ones_count($ci_string_in_tokenizer);
29200
29201 # Computing Token Indentation
29202 #
29203 #     The final section of the tokenizer forms tokens and also computes
29204 #     parameters needed to find indentation.  It is much easier to do it
29205 #     in the tokenizer than elsewhere.  Here is a brief description of how
29206 #     indentation is computed.  Perl::Tidy computes indentation as the sum
29207 #     of 2 terms:
29208 #
29209 #     (1) structural indentation, such as if/else/elsif blocks
29210 #     (2) continuation indentation, such as long parameter call lists.
29211 #
29212 #     These are occasionally called primary and secondary indentation.
29213 #
29214 #     Structural indentation is introduced by tokens of type '{', although
29215 #     the actual tokens might be '{', '(', or '['.  Structural indentation
29216 #     is of two types: BLOCK and non-BLOCK.  Default structural indentation
29217 #     is 4 characters if the standard indentation scheme is used.
29218 #
29219 #     Continuation indentation is introduced whenever a line at BLOCK level
29220 #     is broken before its termination.  Default continuation indentation
29221 #     is 2 characters in the standard indentation scheme.
29222 #
29223 #     Both types of indentation may be nested arbitrarily deep and
29224 #     interlaced.  The distinction between the two is somewhat arbitrary.
29225 #
29226 #     For each token, we will define two variables which would apply if
29227 #     the current statement were broken just before that token, so that
29228 #     that token started a new line:
29229 #
29230 #     $level = the structural indentation level,
29231 #     $ci_level = the continuation indentation level
29232 #
29233 #     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
29234 #     assuming defaults.  However, in some special cases it is customary
29235 #     to modify $ci_level from this strict value.
29236 #
29237 #     The total structural indentation is easy to compute by adding and
29238 #     subtracting 1 from a saved value as types '{' and '}' are seen.  The
29239 #     running value of this variable is $level_in_tokenizer.
29240 #
29241 #     The total continuation is much more difficult to compute, and requires
29242 #     several variables.  These variables are:
29243 #
29244 #     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
29245 #       each indentation level, if there are intervening open secondary
29246 #       structures just prior to that level.
29247 #     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
29248 #       if the last token at that level is "continued", meaning that it
29249 #       is not the first token of an expression.
29250 #     $nesting_block_string = a string of 1's and 0's indicating, for each
29251 #       indentation level, if the level is of type BLOCK or not.
29252 #     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
29253 #     $nesting_list_string = a string of 1's and 0's indicating, for each
29254 #       indentation level, if it is appropriate for list formatting.
29255 #       If so, continuation indentation is used to indent long list items.
29256 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
29257 #     @{$rslevel_stack} = a stack of total nesting depths at each
29258 #       structural indentation level, where "total nesting depth" means
29259 #       the nesting depth that would occur if every nesting token -- '{', '[',
29260 #       and '(' -- , regardless of context, is used to compute a nesting
29261 #       depth.
29262
29263         #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
29264         #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
29265
29266         my ( $ci_string_i, $level_i, $nesting_block_string_i,
29267             $nesting_list_string_i, $nesting_token_string_i,
29268             $nesting_type_string_i, );
29269
29270         foreach my $i ( @{$routput_token_list} )
29271         {    # scan the list of pre-tokens indexes
29272
29273             # self-checking for valid token types
29274             my $type                    = $routput_token_type->[$i];
29275             my $forced_indentation_flag = $routput_indent_flag->[$i];
29276
29277             # See if we should undo the $forced_indentation_flag.
29278             # Forced indentation after 'if', 'unless', 'while' and 'until'
29279             # expressions without trailing parens is optional and doesn't
29280             # always look good.  It is usually okay for a trailing logical
29281             # expression, but if the expression is a function call, code block,
29282             # or some kind of list it puts in an unwanted extra indentation
29283             # level which is hard to remove.
29284             #
29285             # Example where extra indentation looks ok:
29286             # return 1
29287             #   if $det_a < 0 and $det_b > 0
29288             #       or $det_a > 0 and $det_b < 0;
29289             #
29290             # Example where extra indentation is not needed because
29291             # the eval brace also provides indentation:
29292             # print "not " if defined eval {
29293             #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
29294             # };
29295             #
29296             # The following rule works fairly well:
29297             #   Undo the flag if the end of this line, or start of the next
29298             #   line, is an opening container token or a comma.
29299             # This almost always works, but if not after another pass it will
29300             # be stable.
29301             if ( $forced_indentation_flag && $type eq 'k' ) {
29302                 my $ixlast  = -1;
29303                 my $ilast   = $routput_token_list->[$ixlast];
29304                 my $toklast = $routput_token_type->[$ilast];
29305                 if ( $toklast eq '#' ) {
29306                     $ixlast--;
29307                     $ilast   = $routput_token_list->[$ixlast];
29308                     $toklast = $routput_token_type->[$ilast];
29309                 }
29310                 if ( $toklast eq 'b' ) {
29311                     $ixlast--;
29312                     $ilast   = $routput_token_list->[$ixlast];
29313                     $toklast = $routput_token_type->[$ilast];
29314                 }
29315                 if ( $toklast =~ /^[\{,]$/ ) {
29316                     $forced_indentation_flag = 0;
29317                 }
29318                 else {
29319                     ( $toklast, my $i_next ) =
29320                       find_next_nonblank_token( $max_token_index, $rtokens,
29321                         $max_token_index );
29322                     if ( $toklast =~ /^[\{,]$/ ) {
29323                         $forced_indentation_flag = 0;
29324                     }
29325                 }
29326             }
29327
29328             # if we are already in an indented if, see if we should outdent
29329             if ($indented_if_level) {
29330
29331                 # don't try to nest trailing if's - shouldn't happen
29332                 if ( $type eq 'k' ) {
29333                     $forced_indentation_flag = 0;
29334                 }
29335
29336                 # check for the normal case - outdenting at next ';'
29337                 elsif ( $type eq ';' ) {
29338                     if ( $level_in_tokenizer == $indented_if_level ) {
29339                         $forced_indentation_flag = -1;
29340                         $indented_if_level       = 0;
29341                     }
29342                 }
29343
29344                 # handle case of missing semicolon
29345                 elsif ( $type eq '}' ) {
29346                     if ( $level_in_tokenizer == $indented_if_level ) {
29347                         $indented_if_level = 0;
29348
29349                         # TBD: This could be a subroutine call
29350                         $level_in_tokenizer--;
29351                         if ( @{$rslevel_stack} > 1 ) {
29352                             pop( @{$rslevel_stack} );
29353                         }
29354                         if ( length($nesting_block_string) > 1 )
29355                         {    # true for valid script
29356                             chop $nesting_block_string;
29357                             chop $nesting_list_string;
29358                         }
29359
29360                     }
29361                 }
29362             }
29363
29364             my $tok = $rtokens->[$i];  # the token, but ONLY if same as pretoken
29365             $level_i = $level_in_tokenizer;
29366
29367             # This can happen by running perltidy on non-scripts
29368             # although it could also be bug introduced by programming change.
29369             # Perl silently accepts a 032 (^Z) and takes it as the end
29370             if ( !$is_valid_token_type{$type} ) {
29371                 my $val = ord($type);
29372                 warning(
29373                     "unexpected character decimal $val ($type) in script\n");
29374                 $tokenizer_self->{_in_error} = 1;
29375             }
29376
29377             # ----------------------------------------------------------------
29378             # TOKEN TYPE PATCHES
29379             #  output __END__, __DATA__, and format as type 'k' instead of ';'
29380             # to make html colors correct, etc.
29381             my $fix_type = $type;
29382             if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
29383
29384             # output anonymous 'sub' as keyword
29385             if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
29386
29387             # -----------------------------------------------------------------
29388
29389             $nesting_token_string_i = $nesting_token_string;
29390             $nesting_type_string_i  = $nesting_type_string;
29391             $nesting_block_string_i = $nesting_block_string;
29392             $nesting_list_string_i  = $nesting_list_string;
29393
29394             # set primary indentation levels based on structural braces
29395             # Note: these are set so that the leading braces have a HIGHER
29396             # level than their CONTENTS, which is convenient for indentation
29397             # Also, define continuation indentation for each token.
29398             if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
29399             {
29400
29401                 # use environment before updating
29402                 $container_environment =
29403                     $nesting_block_flag ? 'BLOCK'
29404                   : $nesting_list_flag  ? 'LIST'
29405                   :                       "";
29406
29407                 # if the difference between total nesting levels is not 1,
29408                 # there are intervening non-structural nesting types between
29409                 # this '{' and the previous unclosed '{'
29410                 my $intervening_secondary_structure = 0;
29411                 if ( @{$rslevel_stack} ) {
29412                     $intervening_secondary_structure =
29413                       $slevel_in_tokenizer - $rslevel_stack->[-1];
29414                 }
29415
29416      # Continuation Indentation
29417      #
29418      # Having tried setting continuation indentation both in the formatter and
29419      # in the tokenizer, I can say that setting it in the tokenizer is much,
29420      # much easier.  The formatter already has too much to do, and can't
29421      # make decisions on line breaks without knowing what 'ci' will be at
29422      # arbitrary locations.
29423      #
29424      # But a problem with setting the continuation indentation (ci) here
29425      # in the tokenizer is that we do not know where line breaks will actually
29426      # be.  As a result, we don't know if we should propagate continuation
29427      # indentation to higher levels of structure.
29428      #
29429      # For nesting of only structural indentation, we never need to do this.
29430      # For example, in a long if statement, like this
29431      #
29432      #   if ( !$output_block_type[$i]
29433      #     && ($in_statement_continuation) )
29434      #   {           <--outdented
29435      #       do_something();
29436      #   }
29437      #
29438      # the second line has ci but we do normally give the lines within the BLOCK
29439      # any ci.  This would be true if we had blocks nested arbitrarily deeply.
29440      #
29441      # But consider something like this, where we have created a break after
29442      # an opening paren on line 1, and the paren is not (currently) a
29443      # structural indentation token:
29444      #
29445      # my $file = $menubar->Menubutton(
29446      #   qw/-text File -underline 0 -menuitems/ => [
29447      #       [
29448      #           Cascade    => '~View',
29449      #           -menuitems => [
29450      #           ...
29451      #
29452      # The second line has ci, so it would seem reasonable to propagate it
29453      # down, giving the third line 1 ci + 1 indentation.  This suggests the
29454      # following rule, which is currently used to propagating ci down: if there
29455      # are any non-structural opening parens (or brackets, or braces), before
29456      # an opening structural brace, then ci is propagated down, and otherwise
29457      # not.  The variable $intervening_secondary_structure contains this
29458      # information for the current token, and the string
29459      # "$ci_string_in_tokenizer" is a stack of previous values of this
29460      # variable.
29461
29462                 # save the current states
29463                 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
29464                 $level_in_tokenizer++;
29465
29466                 if ($forced_indentation_flag) {
29467
29468                     # break BEFORE '?' when there is forced indentation
29469                     if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
29470                     if ( $type eq 'k' ) {
29471                         $indented_if_level = $level_in_tokenizer;
29472                     }
29473
29474                     # do not change container environment here if we are not
29475                     # at a real list. Adding this check prevents "blinkers"
29476                     # often near 'unless" clauses, such as in the following
29477                     # code:
29478 ##          next
29479 ##            unless -e (
29480 ##                    $archive =
29481 ##                      File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
29482 ##            );
29483
29484                     $nesting_block_string .= "$nesting_block_flag";
29485                 }
29486                 else {
29487
29488                     if ( $routput_block_type->[$i] ) {
29489                         $nesting_block_flag = 1;
29490                         $nesting_block_string .= '1';
29491                     }
29492                     else {
29493                         $nesting_block_flag = 0;
29494                         $nesting_block_string .= '0';
29495                     }
29496                 }
29497
29498                 # we will use continuation indentation within containers
29499                 # which are not blocks and not logical expressions
29500                 my $bit = 0;
29501                 if ( !$routput_block_type->[$i] ) {
29502
29503                     # propagate flag down at nested open parens
29504                     if ( $routput_container_type->[$i] eq '(' ) {
29505                         $bit = 1 if $nesting_list_flag;
29506                     }
29507
29508                   # use list continuation if not a logical grouping
29509                   # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
29510                     else {
29511                         $bit = 1
29512                           unless
29513                           $is_logical_container{ $routput_container_type->[$i]
29514                           };
29515                     }
29516                 }
29517                 $nesting_list_string .= $bit;
29518                 $nesting_list_flag = $bit;
29519
29520                 $ci_string_in_tokenizer .=
29521                   ( $intervening_secondary_structure != 0 ) ? '1' : '0';
29522                 $ci_string_sum = ones_count($ci_string_in_tokenizer);
29523                 $continuation_string_in_tokenizer .=
29524                   ( $in_statement_continuation > 0 ) ? '1' : '0';
29525
29526    #  Sometimes we want to give an opening brace continuation indentation,
29527    #  and sometimes not.  For code blocks, we don't do it, so that the leading
29528    #  '{' gets outdented, like this:
29529    #
29530    #   if ( !$output_block_type[$i]
29531    #     && ($in_statement_continuation) )
29532    #   {           <--outdented
29533    #
29534    #  For other types, we will give them continuation indentation.  For example,
29535    #  here is how a list looks with the opening paren indented:
29536    #
29537    #     @LoL =
29538    #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
29539    #         [ "homer", "marge", "bart" ], );
29540    #
29541    #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
29542
29543                 my $total_ci = $ci_string_sum;
29544                 if (
29545                     !$routput_block_type->[$i]    # patch: skip for BLOCK
29546                     && ($in_statement_continuation)
29547                     && !( $forced_indentation_flag && $type eq ':' )
29548                   )
29549                 {
29550                     $total_ci += $in_statement_continuation
29551                       unless ( $ci_string_in_tokenizer =~ /1$/ );
29552                 }
29553
29554                 $ci_string_i               = $total_ci;
29555                 $in_statement_continuation = 0;
29556             }
29557
29558             elsif ($type eq '}'
29559                 || $type eq 'R'
29560                 || $forced_indentation_flag < 0 )
29561             {
29562
29563                 # only a nesting error in the script would prevent popping here
29564                 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
29565
29566                 $level_i = --$level_in_tokenizer;
29567
29568                 # restore previous level values
29569                 if ( length($nesting_block_string) > 1 )
29570                 {    # true for valid script
29571                     chop $nesting_block_string;
29572                     $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
29573                     chop $nesting_list_string;
29574                     $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
29575
29576                     chop $ci_string_in_tokenizer;
29577                     $ci_string_sum = ones_count($ci_string_in_tokenizer);
29578
29579                     $in_statement_continuation =
29580                       chop $continuation_string_in_tokenizer;
29581
29582                     # zero continuation flag at terminal BLOCK '}' which
29583                     # ends a statement.
29584                     if ( $routput_block_type->[$i] ) {
29585
29586                         # ...These include non-anonymous subs
29587                         # note: could be sub ::abc { or sub 'abc
29588                         if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
29589
29590                          # note: older versions of perl require the /gc modifier
29591                          # here or else the \G does not work.
29592                             if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
29593                             {
29594                                 $in_statement_continuation = 0;
29595                             }
29596                         }
29597
29598 # ...and include all block types except user subs with
29599 # block prototypes and these: (sort|grep|map|do|eval)
29600 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
29601                         elsif (
29602                             $is_zero_continuation_block_type{
29603                                 $routput_block_type->[$i]
29604                             } )
29605                         {
29606                             $in_statement_continuation = 0;
29607                         }
29608
29609                         # ..but these are not terminal types:
29610                         #     /^(sort|grep|map|do|eval)$/ )
29611                         elsif (
29612                             $is_not_zero_continuation_block_type{
29613                                 $routput_block_type->[$i]
29614                             } )
29615                         {
29616                         }
29617
29618                         # ..and a block introduced by a label
29619                         # /^\w+\s*:$/gc ) {
29620                         elsif ( $routput_block_type->[$i] =~ /:$/ ) {
29621                             $in_statement_continuation = 0;
29622                         }
29623
29624                         # user function with block prototype
29625                         else {
29626                             $in_statement_continuation = 0;
29627                         }
29628                     }
29629
29630                     # If we are in a list, then
29631                     # we must set continuation indentation at the closing
29632                     # paren of something like this (paren after $check):
29633                     #     assert(
29634                     #         __LINE__,
29635                     #         ( not defined $check )
29636                     #           or ref $check
29637                     #           or $check eq "new"
29638                     #           or $check eq "old",
29639                     #     );
29640                     elsif ( $tok eq ')' ) {
29641                         $in_statement_continuation = 1
29642                           if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
29643                     }
29644
29645                     elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
29646                 }
29647
29648                 # use environment after updating
29649                 $container_environment =
29650                     $nesting_block_flag ? 'BLOCK'
29651                   : $nesting_list_flag  ? 'LIST'
29652                   :                       "";
29653                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
29654                 $nesting_block_string_i = $nesting_block_string;
29655                 $nesting_list_string_i  = $nesting_list_string;
29656             }
29657
29658             # not a structural indentation type..
29659             else {
29660
29661                 $container_environment =
29662                     $nesting_block_flag ? 'BLOCK'
29663                   : $nesting_list_flag  ? 'LIST'
29664                   :                       "";
29665
29666                 # zero the continuation indentation at certain tokens so
29667                 # that they will be at the same level as its container.  For
29668                 # commas, this simplifies the -lp indentation logic, which
29669                 # counts commas.  For ?: it makes them stand out.
29670                 if ($nesting_list_flag) {
29671                     if ( $type =~ /^[,\?\:]$/ ) {
29672                         $in_statement_continuation = 0;
29673                     }
29674                 }
29675
29676                 # be sure binary operators get continuation indentation
29677                 if (
29678                     $container_environment
29679                     && (   $type eq 'k' && $is_binary_keyword{$tok}
29680                         || $is_binary_type{$type} )
29681                   )
29682                 {
29683                     $in_statement_continuation = 1;
29684                 }
29685
29686                 # continuation indentation is sum of any open ci from previous
29687                 # levels plus the current level
29688                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
29689
29690                 # update continuation flag ...
29691                 # if this isn't a blank or comment..
29692                 if ( $type ne 'b' && $type ne '#' ) {
29693
29694                     # and we are in a BLOCK
29695                     if ($nesting_block_flag) {
29696
29697                         # the next token after a ';' and label starts a new stmt
29698                         if ( $type eq ';' || $type eq 'J' ) {
29699                             $in_statement_continuation = 0;
29700                         }
29701
29702                         # otherwise, we are continuing the current statement
29703                         else {
29704                             $in_statement_continuation = 1;
29705                         }
29706                     }
29707
29708                     # if we are not in a BLOCK..
29709                     else {
29710
29711                         # do not use continuation indentation if not list
29712                         # environment (could be within if/elsif clause)
29713                         if ( !$nesting_list_flag ) {
29714                             $in_statement_continuation = 0;
29715                         }
29716
29717                         # otherwise, the token after a ',' starts a new term
29718
29719                         # Patch FOR RT#99961; no continuation after a ';'
29720                         # This is needed because perltidy currently marks
29721                         # a block preceded by a type character like % or @
29722                         # as a non block, to simplify formatting. But these
29723                         # are actually blocks and can have semicolons.
29724                         # See code_block_type() and is_non_structural_brace().
29725                         elsif ( $type eq ',' || $type eq ';' ) {
29726                             $in_statement_continuation = 0;
29727                         }
29728
29729                         # otherwise, we are continuing the current term
29730                         else {
29731                             $in_statement_continuation = 1;
29732                         }
29733                     }
29734                 }
29735             }
29736
29737             if ( $level_in_tokenizer < 0 ) {
29738                 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
29739                     $tokenizer_self->{_saw_negative_indentation} = 1;
29740                     warning("Starting negative indentation\n");
29741                 }
29742             }
29743
29744             # set secondary nesting levels based on all containment token types
29745             # Note: these are set so that the nesting depth is the depth
29746             # of the PREVIOUS TOKEN, which is convenient for setting
29747             # the strength of token bonds
29748             my $slevel_i = $slevel_in_tokenizer;
29749
29750             #    /^[L\{\(\[]$/
29751             if ( $is_opening_type{$type} ) {
29752                 $slevel_in_tokenizer++;
29753                 $nesting_token_string .= $tok;
29754                 $nesting_type_string  .= $type;
29755             }
29756
29757             #       /^[R\}\)\]]$/
29758             elsif ( $is_closing_type{$type} ) {
29759                 $slevel_in_tokenizer--;
29760                 my $char = chop $nesting_token_string;
29761
29762                 if ( $char ne $matching_start_token{$tok} ) {
29763                     $nesting_token_string .= $char . $tok;
29764                     $nesting_type_string  .= $type;
29765                 }
29766                 else {
29767                     chop $nesting_type_string;
29768                 }
29769             }
29770
29771             push( @block_type,            $routput_block_type->[$i] );
29772             push( @ci_string,             $ci_string_i );
29773             push( @container_environment, $container_environment );
29774             push( @container_type,        $routput_container_type->[$i] );
29775             push( @levels,                $level_i );
29776             push( @nesting_tokens,        $nesting_token_string_i );
29777             push( @nesting_types,         $nesting_type_string_i );
29778             push( @slevels,               $slevel_i );
29779             push( @token_type,            $fix_type );
29780             push( @type_sequence,         $routput_type_sequence->[$i] );
29781             push( @nesting_blocks,        $nesting_block_string );
29782             push( @nesting_lists,         $nesting_list_string );
29783
29784             # now form the previous token
29785             if ( $im >= 0 ) {
29786                 $num =
29787                   $rtoken_map->[$i] - $rtoken_map->[$im];  # how many characters
29788
29789                 if ( $num > 0 ) {
29790                     push( @tokens,
29791                         substr( $input_line, $rtoken_map->[$im], $num ) );
29792                 }
29793             }
29794             $im = $i;
29795         }
29796
29797         $num = length($input_line) - $rtoken_map->[$im];   # make the last token
29798         if ( $num > 0 ) {
29799             push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) );
29800         }
29801
29802         $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
29803         $tokenizer_self->{_in_quote}          = $in_quote;
29804         $tokenizer_self->{_quote_target} =
29805           $in_quote ? matching_end_token($quote_character) : "";
29806         $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
29807
29808         $line_of_tokens->{_rtoken_type}            = \@token_type;
29809         $line_of_tokens->{_rtokens}                = \@tokens;
29810         $line_of_tokens->{_rblock_type}            = \@block_type;
29811         $line_of_tokens->{_rcontainer_type}        = \@container_type;
29812         $line_of_tokens->{_rcontainer_environment} = \@container_environment;
29813         $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
29814         $line_of_tokens->{_rlevels}                = \@levels;
29815         $line_of_tokens->{_rslevels}               = \@slevels;
29816         $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
29817         $line_of_tokens->{_rci_levels}             = \@ci_string;
29818         $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
29819
29820         return;
29821     }
29822 }    # end tokenize_this_line
29823
29824 #########i#############################################################
29825 # Tokenizer routines which assist in identifying token types
29826 #######################################################################
29827
29828 sub operator_expected {
29829
29830     # Many perl symbols have two or more meanings.  For example, '<<'
29831     # can be a shift operator or a here-doc operator.  The
29832     # interpretation of these symbols depends on the current state of
29833     # the tokenizer, which may either be expecting a term or an
29834     # operator.  For this example, a << would be a shift if an operator
29835     # is expected, and a here-doc if a term is expected.  This routine
29836     # is called to make this decision for any current token.  It returns
29837     # one of three possible values:
29838     #
29839     #     OPERATOR - operator expected (or at least, not a term)
29840     #     UNKNOWN  - can't tell
29841     #     TERM     - a term is expected (or at least, not an operator)
29842     #
29843     # The decision is based on what has been seen so far.  This
29844     # information is stored in the "$last_nonblank_type" and
29845     # "$last_nonblank_token" variables.  For example, if the
29846     # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
29847     # if $last_nonblank_type is 'n' (numeric), we are expecting an
29848     # OPERATOR.
29849     #
29850     # If a UNKNOWN is returned, the calling routine must guess. A major
29851     # goal of this tokenizer is to minimize the possibility of returning
29852     # UNKNOWN, because a wrong guess can spoil the formatting of a
29853     # script.
29854     #
29855     # adding NEW_TOKENS: it is critically important that this routine be
29856     # updated to allow it to determine if an operator or term is to be
29857     # expected after the new token.  Doing this simply involves adding
29858     # the new token character to one of the regexes in this routine or
29859     # to one of the hash lists
29860     # that it uses, which are initialized in the BEGIN section.
29861     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
29862     # $statement_type
29863
29864     my ( $prev_type, $tok, $next_type ) = @_;
29865
29866     my $op_expected = UNKNOWN;
29867
29868 ##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
29869
29870 # Note: function prototype is available for token type 'U' for future
29871 # program development.  It contains the leading and trailing parens,
29872 # and no blanks.  It might be used to eliminate token type 'C', for
29873 # example (prototype = '()'). Thus:
29874 # if ($last_nonblank_type eq 'U') {
29875 #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
29876 # }
29877
29878     # A possible filehandle (or object) requires some care...
29879     if ( $last_nonblank_type eq 'Z' ) {
29880
29881         # angle.t
29882         if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
29883             $op_expected = UNKNOWN;
29884         }
29885
29886         # For possible file handle like "$a", Perl uses weird parsing rules.
29887         # For example:
29888         # print $a/2,"/hi";   - division
29889         # print $a / 2,"/hi"; - division
29890         # print $a/ 2,"/hi";  - division
29891         # print $a /2,"/hi";  - pattern (and error)!
29892         elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
29893             $op_expected = TERM;
29894         }
29895
29896         # Note when an operation is being done where a
29897         # filehandle might be expected, since a change in whitespace
29898         # could change the interpretation of the statement.
29899         else {
29900             if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
29901                 complain("operator in print statement not recommended\n");
29902                 $op_expected = OPERATOR;
29903             }
29904         }
29905     }
29906
29907     # Check for smartmatch operator before preceding brace or square bracket.
29908     # For example, at the ? after the ] in the following expressions we are
29909     # expecting an operator:
29910     #
29911     # qr/3/ ~~ ['1234'] ? 1 : 0;
29912     # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
29913     elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
29914         $op_expected = OPERATOR;
29915     }
29916
29917     # handle something after 'do' and 'eval'
29918     elsif ( $is_block_operator{$last_nonblank_token} ) {
29919
29920         # something like $a = eval "expression";
29921         #                          ^
29922         if ( $last_nonblank_type eq 'k' ) {
29923             $op_expected = TERM;    # expression or list mode following keyword
29924         }
29925
29926         # something like $a = do { BLOCK } / 2;
29927         # or this ? after a smartmatch anonynmous hash or array reference:
29928         #   qr/3/ ~~ ['1234'] ? 1 : 0;
29929         #                                  ^
29930         else {
29931             $op_expected = OPERATOR;    # block mode following }
29932         }
29933     }
29934
29935     # handle bare word..
29936     elsif ( $last_nonblank_type eq 'w' ) {
29937
29938         # unfortunately, we can't tell what type of token to expect next
29939         # after most bare words
29940         $op_expected = UNKNOWN;
29941     }
29942
29943     # operator, but not term possible after these types
29944     # Note: moved ')' from type to token because parens in list context
29945     # get marked as '{' '}' now.  This is a minor glitch in the following:
29946     #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
29947     #
29948     elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
29949         || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
29950     {
29951         $op_expected = OPERATOR;
29952
29953         # in a 'use' statement, numbers and v-strings are not true
29954         # numbers, so to avoid incorrect error messages, we will
29955         # mark them as unknown for now (use.t)
29956         # TODO: it would be much nicer to create a new token V for VERSION
29957         # number in a use statement.  Then this could be a check on type V
29958         # and related patches which change $statement_type for '=>'
29959         # and ',' could be removed.  Further, it would clean things up to
29960         # scan the 'use' statement with a separate subroutine.
29961         if (   ( $statement_type eq 'use' )
29962             && ( $last_nonblank_type =~ /^[nv]$/ ) )
29963         {
29964             $op_expected = UNKNOWN;
29965         }
29966
29967         # expecting VERSION or {} after package NAMESPACE
29968         elsif ($statement_type =~ /^package\b/
29969             && $last_nonblank_token =~ /^package\b/ )
29970         {
29971             $op_expected = TERM;
29972         }
29973     }
29974
29975     # no operator after many keywords, such as "die", "warn", etc
29976     elsif ( $expecting_term_token{$last_nonblank_token} ) {
29977
29978         # patch for dor.t (defined or).
29979         # perl functions which may be unary operators
29980         # TODO: This list is incomplete, and these should be put
29981         # into a hash.
29982         if (   $tok eq '/'
29983             && $next_type eq '/'
29984             && $last_nonblank_type eq 'k'
29985             && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
29986         {
29987             $op_expected = OPERATOR;
29988         }
29989         else {
29990             $op_expected = TERM;
29991         }
29992     }
29993
29994     # no operator after things like + - **  (i.e., other operators)
29995     elsif ( $expecting_term_types{$last_nonblank_type} ) {
29996         $op_expected = TERM;
29997     }
29998
29999     # a few operators, like "time", have an empty prototype () and so
30000     # take no parameters but produce a value to operate on
30001     elsif ( $expecting_operator_token{$last_nonblank_token} ) {
30002         $op_expected = OPERATOR;
30003     }
30004
30005     # post-increment and decrement produce values to be operated on
30006     elsif ( $expecting_operator_types{$last_nonblank_type} ) {
30007         $op_expected = OPERATOR;
30008     }
30009
30010     # no value to operate on after sub block
30011     elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
30012
30013     # a right brace here indicates the end of a simple block.
30014     # all non-structural right braces have type 'R'
30015     # all braces associated with block operator keywords have been given those
30016     # keywords as "last_nonblank_token" and caught above.
30017     # (This statement is order dependent, and must come after checking
30018     # $last_nonblank_token).
30019     elsif ( $last_nonblank_type eq '}' ) {
30020
30021         # patch for dor.t (defined or).
30022         if (   $tok eq '/'
30023             && $next_type eq '/'
30024             && $last_nonblank_token eq ']' )
30025         {
30026             $op_expected = OPERATOR;
30027         }
30028
30029         # Patch for RT #116344: misparse a ternary operator after an anonymous
30030         # hash, like this:
30031         #   return ref {} ? 1 : 0;
30032         # The right brace should really be marked type 'R' in this case, and
30033         # it is safest to return an UNKNOWN here. Expecting a TERM will
30034         # cause the '?' to always be interpreted as a pattern delimiter
30035         # rather than introducing a ternary operator.
30036         elsif ( $tok eq '?' ) {
30037             $op_expected = UNKNOWN;
30038         }
30039         else {
30040             $op_expected = TERM;
30041         }
30042     }
30043
30044     # something else..what did I forget?
30045     else {
30046
30047         # collecting diagnostics on unknown operator types..see what was missed
30048         $op_expected = UNKNOWN;
30049         write_diagnostics(
30050 "OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
30051         );
30052     }
30053
30054     TOKENIZER_DEBUG_FLAG_EXPECT && do {
30055         print STDOUT
30056 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
30057     };
30058     return $op_expected;
30059 }
30060
30061 sub new_statement_ok {
30062
30063     # return true if the current token can start a new statement
30064     # USES GLOBAL VARIABLES: $last_nonblank_type
30065
30066     return label_ok()    # a label would be ok here
30067
30068       || $last_nonblank_type eq 'J';    # or we follow a label
30069
30070 }
30071
30072 sub label_ok {
30073
30074     # Decide if a bare word followed by a colon here is a label
30075     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
30076     # $brace_depth, @brace_type
30077
30078     # if it follows an opening or closing code block curly brace..
30079     if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
30080         && $last_nonblank_type eq $last_nonblank_token )
30081     {
30082
30083         # it is a label if and only if the curly encloses a code block
30084         return $brace_type[$brace_depth];
30085     }
30086
30087     # otherwise, it is a label if and only if it follows a ';' (real or fake)
30088     # or another label
30089     else {
30090         return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
30091     }
30092 }
30093
30094 sub code_block_type {
30095
30096     # Decide if this is a block of code, and its type.
30097     # Must be called only when $type = $token = '{'
30098     # The problem is to distinguish between the start of a block of code
30099     # and the start of an anonymous hash reference
30100     # Returns "" if not code block, otherwise returns 'last_nonblank_token'
30101     # to indicate the type of code block.  (For example, 'last_nonblank_token'
30102     # might be 'if' for an if block, 'else' for an else block, etc).
30103     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
30104     # $last_nonblank_block_type, $brace_depth, @brace_type
30105
30106     # handle case of multiple '{'s
30107
30108 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
30109
30110     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
30111     if (   $last_nonblank_token eq '{'
30112         && $last_nonblank_type eq $last_nonblank_token )
30113     {
30114
30115         # opening brace where a statement may appear is probably
30116         # a code block but might be and anonymous hash reference
30117         if ( $brace_type[$brace_depth] ) {
30118             return decide_if_code_block( $i, $rtokens, $rtoken_type,
30119                 $max_token_index );
30120         }
30121
30122         # cannot start a code block within an anonymous hash
30123         else {
30124             return "";
30125         }
30126     }
30127
30128     elsif ( $last_nonblank_token eq ';' ) {
30129
30130         # an opening brace where a statement may appear is probably
30131         # a code block but might be and anonymous hash reference
30132         return decide_if_code_block( $i, $rtokens, $rtoken_type,
30133             $max_token_index );
30134     }
30135
30136     # handle case of '}{'
30137     elsif ($last_nonblank_token eq '}'
30138         && $last_nonblank_type eq $last_nonblank_token )
30139     {
30140
30141         # a } { situation ...
30142         # could be hash reference after code block..(blktype1.t)
30143         if ($last_nonblank_block_type) {
30144             return decide_if_code_block( $i, $rtokens, $rtoken_type,
30145                 $max_token_index );
30146         }
30147
30148         # must be a block if it follows a closing hash reference
30149         else {
30150             return $last_nonblank_token;
30151         }
30152     }
30153
30154     ################################################################
30155     # NOTE: braces after type characters start code blocks, but for
30156     # simplicity these are not identified as such.  See also
30157     # sub is_non_structural_brace.
30158     ################################################################
30159
30160 ##    elsif ( $last_nonblank_type eq 't' ) {
30161 ##       return $last_nonblank_token;
30162 ##    }
30163
30164     # brace after label:
30165     elsif ( $last_nonblank_type eq 'J' ) {
30166         return $last_nonblank_token;
30167     }
30168
30169 # otherwise, look at previous token.  This must be a code block if
30170 # it follows any of these:
30171 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
30172     elsif ( $is_code_block_token{$last_nonblank_token} ) {
30173
30174         # Bug Patch: Note that the opening brace after the 'if' in the following
30175         # snippet is an anonymous hash ref and not a code block!
30176         #   print 'hi' if { x => 1, }->{x};
30177         # We can identify this situation because the last nonblank type
30178         # will be a keyword (instead of a closing peren)
30179         if (   $last_nonblank_token =~ /^(if|unless)$/
30180             && $last_nonblank_type eq 'k' )
30181         {
30182             return "";
30183         }
30184         else {
30185             return $last_nonblank_token;
30186         }
30187     }
30188
30189     # or a sub or package BLOCK
30190     elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
30191         && $last_nonblank_token =~ /^(sub|package)\b/ )
30192     {
30193         return $last_nonblank_token;
30194     }
30195
30196     elsif ( $statement_type =~ /^(sub|package)\b/ ) {
30197         return $statement_type;
30198     }
30199
30200     # user-defined subs with block parameters (like grep/map/eval)
30201     elsif ( $last_nonblank_type eq 'G' ) {
30202         return $last_nonblank_token;
30203     }
30204
30205     # check bareword
30206     elsif ( $last_nonblank_type eq 'w' ) {
30207         return decide_if_code_block( $i, $rtokens, $rtoken_type,
30208             $max_token_index );
30209     }
30210
30211     # Patch for bug # RT #94338 reported by Daniel Trizen
30212     # for-loop in a parenthesized block-map triggering an error message:
30213     #    map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
30214     # Check for a code block within a parenthesized function call
30215     elsif ( $last_nonblank_token eq '(' ) {
30216         my $paren_type = $paren_type[$paren_depth];
30217         if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
30218
30219             # We will mark this as a code block but use type 't' instead
30220             # of the name of the contining function.  This will allow for
30221             # correct parsing but will usually produce better formatting.
30222             # Braces with block type 't' are not broken open automatically
30223             # in the formatter as are other code block types, and this usually
30224             # works best.
30225             return 't';    # (Not $paren_type)
30226         }
30227         else {
30228             return "";
30229         }
30230     }
30231
30232     # handle unknown syntax ') {'
30233     # we previously appended a '()' to mark this case
30234     elsif ( $last_nonblank_token =~ /\(\)$/ ) {
30235         return $last_nonblank_token;
30236     }
30237
30238     # anything else must be anonymous hash reference
30239     else {
30240         return "";
30241     }
30242 }
30243
30244 sub decide_if_code_block {
30245
30246     # USES GLOBAL VARIABLES: $last_nonblank_token
30247     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
30248
30249     my ( $next_nonblank_token, $i_next ) =
30250       find_next_nonblank_token( $i, $rtokens, $max_token_index );
30251
30252     # we are at a '{' where a statement may appear.
30253     # We must decide if this brace starts an anonymous hash or a code
30254     # block.
30255     # return "" if anonymous hash, and $last_nonblank_token otherwise
30256
30257     # initialize to be code BLOCK
30258     my $code_block_type = $last_nonblank_token;
30259
30260     # Check for the common case of an empty anonymous hash reference:
30261     # Maybe something like sub { { } }
30262     if ( $next_nonblank_token eq '}' ) {
30263         $code_block_type = "";
30264     }
30265
30266     else {
30267
30268         # To guess if this '{' is an anonymous hash reference, look ahead
30269         # and test as follows:
30270         #
30271         # it is a hash reference if next come:
30272         #   - a string or digit followed by a comma or =>
30273         #   - bareword followed by =>
30274         # otherwise it is a code block
30275         #
30276         # Examples of anonymous hash ref:
30277         # {'aa',};
30278         # {1,2}
30279         #
30280         # Examples of code blocks:
30281         # {1; print "hello\n", 1;}
30282         # {$a,1};
30283
30284         # We are only going to look ahead one more (nonblank/comment) line.
30285         # Strange formatting could cause a bad guess, but that's unlikely.
30286         my @pre_types;
30287         my @pre_tokens;
30288
30289         # Ignore the rest of this line if it is a side comment
30290         if ( $next_nonblank_token ne '#' ) {
30291             @pre_types  = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
30292             @pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
30293         }
30294         my ( $rpre_tokens, $rpre_types ) =
30295           peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
30296                                                        # generous, and prevents
30297                                                        # wasting lots of
30298                                                        # time in mangled files
30299         if ( defined($rpre_types) && @{$rpre_types} ) {
30300             push @pre_types,  @{$rpre_types};
30301             push @pre_tokens, @{$rpre_tokens};
30302         }
30303
30304         # put a sentinel token to simplify stopping the search
30305         push @pre_types, '}';
30306         push @pre_types, '}';
30307
30308         my $jbeg = 0;
30309         $jbeg = 1 if $pre_types[0] eq 'b';
30310
30311         # first look for one of these
30312         #  - bareword
30313         #  - bareword with leading -
30314         #  - digit
30315         #  - quoted string
30316         my $j = $jbeg;
30317         if ( $pre_types[$j] =~ /^[\'\"]/ ) {
30318
30319             # find the closing quote; don't worry about escapes
30320             my $quote_mark = $pre_types[$j];
30321             foreach my $k ( $j + 1 .. $#pre_types - 1 ) {
30322                 if ( $pre_types[$k] eq $quote_mark ) {
30323                     $j = $k + 1;
30324                     my $next = $pre_types[$j];
30325                     last;
30326                 }
30327             }
30328         }
30329         elsif ( $pre_types[$j] eq 'd' ) {
30330             $j++;
30331         }
30332         elsif ( $pre_types[$j] eq 'w' ) {
30333             $j++;
30334         }
30335         elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
30336             $j++;
30337         }
30338         if ( $j > $jbeg ) {
30339
30340             $j++ if $pre_types[$j] eq 'b';
30341
30342             # Patched for RT #95708
30343             if (
30344
30345                 # it is a comma which is not a pattern delimeter except for qw
30346                 (
30347                        $pre_types[$j] eq ','
30348                     && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
30349                 )
30350
30351                 # or a =>
30352                 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
30353               )
30354             {
30355                 $code_block_type = "";
30356             }
30357         }
30358     }
30359
30360     return $code_block_type;
30361 }
30362
30363 sub report_unexpected {
30364
30365     # report unexpected token type and show where it is
30366     # USES GLOBAL VARIABLES: $tokenizer_self
30367     my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
30368         $rpretoken_type, $input_line )
30369       = @_;
30370
30371     if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
30372         my $msg = "found $found where $expecting expected";
30373         my $pos = $rpretoken_map->[$i_tok];
30374         interrupt_logfile();
30375         my $input_line_number = $tokenizer_self->{_last_line_number};
30376         my ( $offset, $numbered_line, $underline ) =
30377           make_numbered_line( $input_line_number, $input_line, $pos );
30378         $underline = write_on_underline( $underline, $pos - $offset, '^' );
30379
30380         my $trailer = "";
30381         if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
30382             my $pos_prev = $rpretoken_map->[$last_nonblank_i];
30383             my $num;
30384             if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
30385                 $num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
30386             }
30387             else {
30388                 $num = $pos - $pos_prev;
30389             }
30390             if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
30391
30392             $underline =
30393               write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
30394             $trailer = " (previous token underlined)";
30395         }
30396         warning( $numbered_line . "\n" );
30397         warning( $underline . "\n" );
30398         warning( $msg . $trailer . "\n" );
30399         resume_logfile();
30400     }
30401     return;
30402 }
30403
30404 sub is_non_structural_brace {
30405
30406     # Decide if a brace or bracket is structural or non-structural
30407     # by looking at the previous token and type
30408     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
30409
30410     # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
30411     # Tentatively deactivated because it caused the wrong operator expectation
30412     # for this code:
30413     #      $user = @vars[1] / 100;
30414     # Must update sub operator_expected before re-implementing.
30415     # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
30416     #    return 0;
30417     # }
30418
30419     ################################################################
30420     # NOTE: braces after type characters start code blocks, but for
30421     # simplicity these are not identified as such.  See also
30422     # sub code_block_type
30423     ################################################################
30424
30425     ##if ($last_nonblank_type eq 't') {return 0}
30426
30427     # otherwise, it is non-structural if it is decorated
30428     # by type information.
30429     # For example, the '{' here is non-structural:   ${xxx}
30430     return (
30431         $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
30432
30433           # or if we follow a hash or array closing curly brace or bracket
30434           # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
30435           # because the first '}' would have been given type 'R'
30436           || $last_nonblank_type =~ /^([R\]])$/
30437     );
30438 }
30439
30440 #########i#############################################################
30441 # Tokenizer routines for tracking container nesting depths
30442 #######################################################################
30443
30444 # The following routines keep track of nesting depths of the nesting
30445 # types, ( [ { and ?.  This is necessary for determining the indentation
30446 # level, and also for debugging programs.  Not only do they keep track of
30447 # nesting depths of the individual brace types, but they check that each
30448 # of the other brace types is balanced within matching pairs.  For
30449 # example, if the program sees this sequence:
30450 #
30451 #         {  ( ( ) }
30452 #
30453 # then it can determine that there is an extra left paren somewhere
30454 # between the { and the }.  And so on with every other possible
30455 # combination of outer and inner brace types.  For another
30456 # example:
30457 #
30458 #         ( [ ..... ]  ] )
30459 #
30460 # which has an extra ] within the parens.
30461 #
30462 # The brace types have indexes 0 .. 3 which are indexes into
30463 # the matrices.
30464 #
30465 # The pair ? : are treated as just another nesting type, with ? acting
30466 # as the opening brace and : acting as the closing brace.
30467 #
30468 # The matrix
30469 #
30470 #         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
30471 #
30472 # saves the nesting depth of brace type $b (where $b is either of the other
30473 # nesting types) when brace type $a enters a new depth.  When this depth
30474 # decreases, a check is made that the current depth of brace types $b is
30475 # unchanged, or otherwise there must have been an error.  This can
30476 # be very useful for localizing errors, particularly when perl runs to
30477 # the end of a large file (such as this one) and announces that there
30478 # is a problem somewhere.
30479 #
30480 # A numerical sequence number is maintained for every nesting type,
30481 # so that each matching pair can be uniquely identified in a simple
30482 # way.
30483
30484 sub increase_nesting_depth {
30485     my ( $aa, $pos ) = @_;
30486
30487     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
30488     # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
30489     # $statement_type
30490     $current_depth[$aa]++;
30491     $total_depth++;
30492     $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
30493     my $input_line_number = $tokenizer_self->{_last_line_number};
30494     my $input_line        = $tokenizer_self->{_line_text};
30495
30496     # Sequence numbers increment by number of items.  This keeps
30497     # a unique set of numbers but still allows the relative location
30498     # of any type to be determined.
30499     $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
30500     my $seqno = $nesting_sequence_number[$aa];
30501     $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
30502
30503     $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
30504       [ $input_line_number, $input_line, $pos ];
30505
30506     for my $bb ( 0 .. $#closing_brace_names ) {
30507         next if ( $bb == $aa );
30508         $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
30509     }
30510
30511     # set a flag for indenting a nested ternary statement
30512     my $indent = 0;
30513     if ( $aa == QUESTION_COLON ) {
30514         $nested_ternary_flag[ $current_depth[$aa] ] = 0;
30515         if ( $current_depth[$aa] > 1 ) {
30516             if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
30517                 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
30518                 if ( $pdepth == $total_depth - 1 ) {
30519                     $indent = 1;
30520                     $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
30521                 }
30522             }
30523         }
30524     }
30525     $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
30526     $statement_type = "";
30527     return ( $seqno, $indent );
30528 }
30529
30530 sub decrease_nesting_depth {
30531
30532     my ( $aa, $pos ) = @_;
30533
30534     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
30535     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
30536     # $statement_type
30537     my $seqno             = 0;
30538     my $input_line_number = $tokenizer_self->{_last_line_number};
30539     my $input_line        = $tokenizer_self->{_line_text};
30540
30541     my $outdent = 0;
30542     $total_depth--;
30543     if ( $current_depth[$aa] > 0 ) {
30544
30545         # set a flag for un-indenting after seeing a nested ternary statement
30546         $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
30547         if ( $aa == QUESTION_COLON ) {
30548             $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
30549         }
30550         $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
30551
30552         # check that any brace types $bb contained within are balanced
30553         for my $bb ( 0 .. $#closing_brace_names ) {
30554             next if ( $bb == $aa );
30555
30556             unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
30557                 $current_depth[$bb] )
30558             {
30559                 my $diff =
30560                   $current_depth[$bb] -
30561                   $depth_array[$aa][$bb][ $current_depth[$aa] ];
30562
30563                 # don't whine too many times
30564                 my $saw_brace_error = get_saw_brace_error();
30565                 if (
30566                     $saw_brace_error <= MAX_NAG_MESSAGES
30567
30568                     # if too many closing types have occurred, we probably
30569                     # already caught this error
30570                     && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
30571                   )
30572                 {
30573                     interrupt_logfile();
30574                     my $rsl =
30575                       $starting_line_of_current_depth[$aa]
30576                       [ $current_depth[$aa] ];
30577                     my $sl  = $rsl->[0];
30578                     my $rel = [ $input_line_number, $input_line, $pos ];
30579                     my $el  = $rel->[0];
30580                     my ($ess);
30581
30582                     if ( $diff == 1 || $diff == -1 ) {
30583                         $ess = '';
30584                     }
30585                     else {
30586                         $ess = 's';
30587                     }
30588                     my $bname =
30589                       ( $diff > 0 )
30590                       ? $opening_brace_names[$bb]
30591                       : $closing_brace_names[$bb];
30592                     write_error_indicator_pair( @{$rsl}, '^' );
30593                     my $msg = <<"EOM";
30594 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
30595 EOM
30596
30597                     if ( $diff > 0 ) {
30598                         my $rml =
30599                           $starting_line_of_current_depth[$bb]
30600                           [ $current_depth[$bb] ];
30601                         my $ml = $rml->[0];
30602                         $msg .=
30603 "    The most recent un-matched $bname is on line $ml\n";
30604                         write_error_indicator_pair( @{$rml}, '^' );
30605                     }
30606                     write_error_indicator_pair( @{$rel}, '^' );
30607                     warning($msg);
30608                     resume_logfile();
30609                 }
30610                 increment_brace_error();
30611             }
30612         }
30613         $current_depth[$aa]--;
30614     }
30615     else {
30616
30617         my $saw_brace_error = get_saw_brace_error();
30618         if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
30619             my $msg = <<"EOM";
30620 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
30621 EOM
30622             indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
30623         }
30624         increment_brace_error();
30625     }
30626     return ( $seqno, $outdent );
30627 }
30628
30629 sub check_final_nesting_depths {
30630
30631     # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
30632
30633     for my $aa ( 0 .. $#closing_brace_names ) {
30634
30635         if ( $current_depth[$aa] ) {
30636             my $rsl =
30637               $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
30638             my $sl  = $rsl->[0];
30639             my $msg = <<"EOM";
30640 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
30641 The most recent un-matched $opening_brace_names[$aa] is on line $sl
30642 EOM
30643             indicate_error( $msg, @{$rsl}, '^' );
30644             increment_brace_error();
30645         }
30646     }
30647     return;
30648 }
30649
30650 #########i#############################################################
30651 # Tokenizer routines for looking ahead in input stream
30652 #######################################################################
30653
30654 sub peek_ahead_for_n_nonblank_pre_tokens {
30655
30656     # returns next n pretokens if they exist
30657     # returns undef's if hits eof without seeing any pretokens
30658     # USES GLOBAL VARIABLES: $tokenizer_self
30659     my $max_pretokens = shift;
30660     my $line;
30661     my $i = 0;
30662     my ( $rpre_tokens, $rmap, $rpre_types );
30663
30664     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
30665     {
30666         $line =~ s/^\s*//;    # trim leading blanks
30667         next if ( length($line) <= 0 );    # skip blank
30668         next if ( $line =~ /^#/ );         # skip comment
30669         ( $rpre_tokens, $rmap, $rpre_types ) =
30670           pre_tokenize( $line, $max_pretokens );
30671         last;
30672     }
30673     return ( $rpre_tokens, $rpre_types );
30674 }
30675
30676 # look ahead for next non-blank, non-comment line of code
30677 sub peek_ahead_for_nonblank_token {
30678
30679     # USES GLOBAL VARIABLES: $tokenizer_self
30680     my ( $rtokens, $max_token_index ) = @_;
30681     my $line;
30682     my $i = 0;
30683
30684     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
30685     {
30686         $line =~ s/^\s*//;    # trim leading blanks
30687         next if ( length($line) <= 0 );    # skip blank
30688         next if ( $line =~ /^#/ );         # skip comment
30689         my ( $rtok, $rmap, $rtype ) =
30690           pre_tokenize( $line, 2 );        # only need 2 pre-tokens
30691         my $j = $max_token_index + 1;
30692
30693         foreach my $tok ( @{$rtok} ) {
30694             last if ( $tok =~ "\n" );
30695             $rtokens->[ ++$j ] = $tok;
30696         }
30697         last;
30698     }
30699     return $rtokens;
30700 }
30701
30702 #########i#############################################################
30703 # Tokenizer guessing routines for ambiguous situations
30704 #######################################################################
30705
30706 sub guess_if_pattern_or_conditional {
30707
30708     # this routine is called when we have encountered a ? following an
30709     # unknown bareword, and we must decide if it starts a pattern or not
30710     # input parameters:
30711     #   $i - token index of the ? starting possible pattern
30712     # output parameters:
30713     #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
30714     #   msg = a warning or diagnostic message
30715     # USES GLOBAL VARIABLES: $last_nonblank_token
30716
30717     # FIXME: this needs to be rewritten
30718
30719     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
30720     my $is_pattern = 0;
30721     my $msg        = "guessing that ? after $last_nonblank_token starts a ";
30722
30723     if ( $i >= $max_token_index ) {
30724         $msg .= "conditional (no end to pattern found on the line)\n";
30725     }
30726     else {
30727         my $ibeg = $i;
30728         $i = $ibeg + 1;
30729         my $next_token = $rtokens->[$i];    # first token after ?
30730
30731         # look for a possible ending ? on this line..
30732         my $in_quote        = 1;
30733         my $quote_depth     = 0;
30734         my $quote_character = '';
30735         my $quote_pos       = 0;
30736         my $quoted_string;
30737         (
30738             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
30739             $quoted_string
30740           )
30741           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
30742             $quote_pos, $quote_depth, $max_token_index );
30743
30744         if ($in_quote) {
30745
30746             # we didn't find an ending ? on this line,
30747             # so we bias towards conditional
30748             $is_pattern = 0;
30749             $msg .= "conditional (no ending ? on this line)\n";
30750
30751             # we found an ending ?, so we bias towards a pattern
30752         }
30753         else {
30754
30755             # Watch out for an ending ? in quotes, like this
30756             #    my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
30757             my $s_quote = 0;
30758             my $d_quote = 0;
30759             my $colons  = 0;
30760             foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
30761                 my $tok = $rtokens->[$ii];
30762                 if ( $tok eq ":" ) { $colons++ }
30763                 if ( $tok eq "'" ) { $s_quote++ }
30764                 if ( $tok eq '"' ) { $d_quote++ }
30765             }
30766             if ( $s_quote % 2 || $d_quote % 2 || $colons ) {
30767                 $is_pattern = 0;
30768                 $msg .= "found ending ? but unbalanced quote chars\n";
30769             }
30770             elsif ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
30771                 $is_pattern = 1;
30772                 $msg .= "pattern (found ending ? and pattern expected)\n";
30773             }
30774             else {
30775                 $msg .= "pattern (uncertain, but found ending ?)\n";
30776             }
30777         }
30778     }
30779     return ( $is_pattern, $msg );
30780 }
30781
30782 sub guess_if_pattern_or_division {
30783
30784     # this routine is called when we have encountered a / following an
30785     # unknown bareword, and we must decide if it starts a pattern or is a
30786     # division
30787     # input parameters:
30788     #   $i - token index of the / starting possible pattern
30789     # output parameters:
30790     #   $is_pattern = 0 if probably division,  =1 if probably a pattern
30791     #   msg = a warning or diagnostic message
30792     # USES GLOBAL VARIABLES: $last_nonblank_token
30793     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
30794     my $is_pattern = 0;
30795     my $msg        = "guessing that / after $last_nonblank_token starts a ";
30796
30797     if ( $i >= $max_token_index ) {
30798         $msg .= "division (no end to pattern found on the line)\n";
30799     }
30800     else {
30801         my $ibeg = $i;
30802         my $divide_expected =
30803           numerator_expected( $i, $rtokens, $max_token_index );
30804         $i = $ibeg + 1;
30805         my $next_token = $rtokens->[$i];    # first token after slash
30806
30807         # look for a possible ending / on this line..
30808         my $in_quote        = 1;
30809         my $quote_depth     = 0;
30810         my $quote_character = '';
30811         my $quote_pos       = 0;
30812         my $quoted_string;
30813         (
30814             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
30815             $quoted_string
30816           )
30817           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
30818             $quote_pos, $quote_depth, $max_token_index );
30819
30820         if ($in_quote) {
30821
30822             # we didn't find an ending / on this line,
30823             # so we bias towards division
30824             if ( $divide_expected >= 0 ) {
30825                 $is_pattern = 0;
30826                 $msg .= "division (no ending / on this line)\n";
30827             }
30828             else {
30829                 $msg        = "multi-line pattern (division not possible)\n";
30830                 $is_pattern = 1;
30831             }
30832
30833         }
30834
30835         # we found an ending /, so we bias towards a pattern
30836         else {
30837
30838             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
30839
30840                 if ( $divide_expected >= 0 ) {
30841
30842                     if ( $i - $ibeg > 60 ) {
30843                         $msg .= "division (matching / too distant)\n";
30844                         $is_pattern = 0;
30845                     }
30846                     else {
30847                         $msg .= "pattern (but division possible too)\n";
30848                         $is_pattern = 1;
30849                     }
30850                 }
30851                 else {
30852                     $is_pattern = 1;
30853                     $msg .= "pattern (division not possible)\n";
30854                 }
30855             }
30856             else {
30857
30858                 if ( $divide_expected >= 0 ) {
30859                     $is_pattern = 0;
30860                     $msg .= "division (pattern not possible)\n";
30861                 }
30862                 else {
30863                     $is_pattern = 1;
30864                     $msg .=
30865                       "pattern (uncertain, but division would not work here)\n";
30866                 }
30867             }
30868         }
30869     }
30870     return ( $is_pattern, $msg );
30871 }
30872
30873 # try to resolve here-doc vs. shift by looking ahead for
30874 # non-code or the end token (currently only looks for end token)
30875 # returns 1 if it is probably a here doc, 0 if not
30876 sub guess_if_here_doc {
30877
30878     # This is how many lines we will search for a target as part of the
30879     # guessing strategy.  It is a constant because there is probably
30880     # little reason to change it.
30881     # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
30882     # %is_constant,
30883     my $HERE_DOC_WINDOW = 40;
30884
30885     my $next_token        = shift;
30886     my $here_doc_expected = 0;
30887     my $line;
30888     my $k   = 0;
30889     my $msg = "checking <<";
30890
30891     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
30892     {
30893         chomp $line;
30894
30895         if ( $line =~ /^$next_token$/ ) {
30896             $msg .= " -- found target $next_token ahead $k lines\n";
30897             $here_doc_expected = 1;    # got it
30898             last;
30899         }
30900         last if ( $k >= $HERE_DOC_WINDOW );
30901     }
30902
30903     unless ($here_doc_expected) {
30904
30905         if ( !defined($line) ) {
30906             $here_doc_expected = -1;    # hit eof without seeing target
30907             $msg .= " -- must be shift; target $next_token not in file\n";
30908
30909         }
30910         else {                          # still unsure..taking a wild guess
30911
30912             if ( !$is_constant{$current_package}{$next_token} ) {
30913                 $here_doc_expected = 1;
30914                 $msg .=
30915                   " -- guessing it's a here-doc ($next_token not a constant)\n";
30916             }
30917             else {
30918                 $msg .=
30919                   " -- guessing it's a shift ($next_token is a constant)\n";
30920             }
30921         }
30922     }
30923     write_logfile_entry($msg);
30924     return $here_doc_expected;
30925 }
30926
30927 #########i#############################################################
30928 # Tokenizer Routines for scanning identifiers and related items
30929 #######################################################################
30930
30931 sub scan_bare_identifier_do {
30932
30933     # this routine is called to scan a token starting with an alphanumeric
30934     # variable or package separator, :: or '.
30935     # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
30936     # $last_nonblank_type,@paren_type, $paren_depth
30937
30938     my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
30939         $max_token_index )
30940       = @_;
30941     my $i_begin = $i;
30942     my $package = undef;
30943
30944     my $i_beg = $i;
30945
30946     # we have to back up one pretoken at a :: since each : is one pretoken
30947     if ( $tok eq '::' ) { $i_beg-- }
30948     if ( $tok eq '->' ) { $i_beg-- }
30949     my $pos_beg = $rtoken_map->[$i_beg];
30950     pos($input_line) = $pos_beg;
30951
30952     #  Examples:
30953     #   A::B::C
30954     #   A::
30955     #   ::A
30956     #   A'B
30957     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
30958
30959         my $pos  = pos($input_line);
30960         my $numc = $pos - $pos_beg;
30961         $tok = substr( $input_line, $pos_beg, $numc );
30962
30963         # type 'w' includes anything without leading type info
30964         # ($,%,@,*) including something like abc::def::ghi
30965         $type = 'w';
30966
30967         my $sub_name = "";
30968         if ( defined($2) ) { $sub_name = $2; }
30969         if ( defined($1) ) {
30970             $package = $1;
30971
30972             # patch: don't allow isolated package name which just ends
30973             # in the old style package separator (single quote).  Example:
30974             #   use CGI':all';
30975             if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
30976                 $pos--;
30977             }
30978
30979             $package =~ s/\'/::/g;
30980             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
30981             $package =~ s/::$//;
30982         }
30983         else {
30984             $package = $current_package;
30985
30986             if ( $is_keyword{$tok} ) {
30987                 $type = 'k';
30988             }
30989         }
30990
30991         # if it is a bareword..
30992         if ( $type eq 'w' ) {
30993
30994             # check for v-string with leading 'v' type character
30995             # (This seems to have precedence over filehandle, type 'Y')
30996             if ( $tok =~ /^v\d[_\d]*$/ ) {
30997
30998                 # we only have the first part - something like 'v101' -
30999                 # look for more
31000                 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
31001                     $pos  = pos($input_line);
31002                     $numc = $pos - $pos_beg;
31003                     $tok  = substr( $input_line, $pos_beg, $numc );
31004                 }
31005                 $type = 'v';
31006
31007                 # warn if this version can't handle v-strings
31008                 report_v_string($tok);
31009             }
31010
31011             elsif ( $is_constant{$package}{$sub_name} ) {
31012                 $type = 'C';
31013             }
31014
31015             # bareword after sort has implied empty prototype; for example:
31016             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
31017             # This has priority over whatever the user has specified.
31018             elsif ($last_nonblank_token eq 'sort'
31019                 && $last_nonblank_type eq 'k' )
31020             {
31021                 $type = 'Z';
31022             }
31023
31024             # Note: strangely, perl does not seem to really let you create
31025             # functions which act like eval and do, in the sense that eval
31026             # and do may have operators following the final }, but any operators
31027             # that you create with prototype (&) apparently do not allow
31028             # trailing operators, only terms.  This seems strange.
31029             # If this ever changes, here is the update
31030             # to make perltidy behave accordingly:
31031
31032             # elsif ( $is_block_function{$package}{$tok} ) {
31033             #    $tok='eval'; # patch to do braces like eval  - doesn't work
31034             #    $type = 'k';
31035             #}
31036             # FIXME: This could become a separate type to allow for different
31037             # future behavior:
31038             elsif ( $is_block_function{$package}{$sub_name} ) {
31039                 $type = 'G';
31040             }
31041
31042             elsif ( $is_block_list_function{$package}{$sub_name} ) {
31043                 $type = 'G';
31044             }
31045             elsif ( $is_user_function{$package}{$sub_name} ) {
31046                 $type      = 'U';
31047                 $prototype = $user_function_prototype{$package}{$sub_name};
31048             }
31049
31050             # check for indirect object
31051             elsif (
31052
31053                 # added 2001-03-27: must not be followed immediately by '('
31054                 # see fhandle.t
31055                 ( $input_line !~ m/\G\(/gc )
31056
31057                 # and
31058                 && (
31059
31060                     # preceded by keyword like 'print', 'printf' and friends
31061                     $is_indirect_object_taker{$last_nonblank_token}
31062
31063                     # or preceded by something like 'print(' or 'printf('
31064                     || (
31065                         ( $last_nonblank_token eq '(' )
31066                         && $is_indirect_object_taker{ $paren_type[$paren_depth]
31067                         }
31068
31069                     )
31070                 )
31071               )
31072             {
31073
31074                 # may not be indirect object unless followed by a space
31075                 if ( $input_line =~ m/\G\s+/gc ) {
31076                     $type = 'Y';
31077
31078                     # Abandon Hope ...
31079                     # Perl's indirect object notation is a very bad
31080                     # thing and can cause subtle bugs, especially for
31081                     # beginning programmers.  And I haven't even been
31082                     # able to figure out a sane warning scheme which
31083                     # doesn't get in the way of good scripts.
31084
31085                     # Complain if a filehandle has any lower case
31086                     # letters.  This is suggested good practice.
31087                     # Use 'sub_name' because something like
31088                     # main::MYHANDLE is ok for filehandle
31089                     if ( $sub_name =~ /[a-z]/ ) {
31090
31091                         # could be bug caused by older perltidy if
31092                         # followed by '('
31093                         if ( $input_line =~ m/\G\s*\(/gc ) {
31094                             complain(
31095 "Caution: unknown word '$tok' in indirect object slot\n"
31096                             );
31097                         }
31098                     }
31099                 }
31100
31101                 # bareword not followed by a space -- may not be filehandle
31102                 # (may be function call defined in a 'use' statement)
31103                 else {
31104                     $type = 'Z';
31105                 }
31106             }
31107         }
31108
31109         # Now we must convert back from character position
31110         # to pre_token index.
31111         # I don't think an error flag can occur here ..but who knows
31112         my $error;
31113         ( $i, $error ) =
31114           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
31115         if ($error) {
31116             warning("scan_bare_identifier: Possibly invalid tokenization\n");
31117         }
31118     }
31119
31120     # no match but line not blank - could be syntax error
31121     # perl will take '::' alone without complaint
31122     else {
31123         $type = 'w';
31124
31125         # change this warning to log message if it becomes annoying
31126         warning("didn't find identifier after leading ::\n");
31127     }
31128     return ( $i, $tok, $type, $prototype );
31129 }
31130
31131 sub scan_id_do {
31132
31133 # This is the new scanner and will eventually replace scan_identifier.
31134 # Only type 'sub' and 'package' are implemented.
31135 # Token types $ * % @ & -> are not yet implemented.
31136 #
31137 # Scan identifier following a type token.
31138 # The type of call depends on $id_scan_state: $id_scan_state = ''
31139 # for starting call, in which case $tok must be the token defining
31140 # the type.
31141 #
31142 # If the type token is the last nonblank token on the line, a value
31143 # of $id_scan_state = $tok is returned, indicating that further
31144 # calls must be made to get the identifier.  If the type token is
31145 # not the last nonblank token on the line, the identifier is
31146 # scanned and handled and a value of '' is returned.
31147 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
31148 # $statement_type, $tokenizer_self
31149
31150     my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
31151         $max_token_index )
31152       = @_;
31153     my $type = '';
31154     my ( $i_beg, $pos_beg );
31155
31156     #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
31157     #my ($a,$b,$c) = caller;
31158     #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
31159
31160     # on re-entry, start scanning at first token on the line
31161     if ($id_scan_state) {
31162         $i_beg = $i;
31163         $type  = '';
31164     }
31165
31166     # on initial entry, start scanning just after type token
31167     else {
31168         $i_beg         = $i + 1;
31169         $id_scan_state = $tok;
31170         $type          = 't';
31171     }
31172
31173     # find $i_beg = index of next nonblank token,
31174     # and handle empty lines
31175     my $blank_line          = 0;
31176     my $next_nonblank_token = $rtokens->[$i_beg];
31177     if ( $i_beg > $max_token_index ) {
31178         $blank_line = 1;
31179     }
31180     else {
31181
31182         # only a '#' immediately after a '$' is not a comment
31183         if ( $next_nonblank_token eq '#' ) {
31184             unless ( $tok eq '$' ) {
31185                 $blank_line = 1;
31186             }
31187         }
31188
31189         if ( $next_nonblank_token =~ /^\s/ ) {
31190             ( $next_nonblank_token, $i_beg ) =
31191               find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
31192                 $max_token_index );
31193             if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
31194                 $blank_line = 1;
31195             }
31196         }
31197     }
31198
31199     # handle non-blank line; identifier, if any, must follow
31200     unless ($blank_line) {
31201
31202         if ( $id_scan_state eq 'sub' ) {
31203             ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
31204                 $input_line, $i,             $i_beg,
31205                 $tok,        $type,          $rtokens,
31206                 $rtoken_map, $id_scan_state, $max_token_index
31207             );
31208         }
31209
31210         elsif ( $id_scan_state eq 'package' ) {
31211             ( $i, $tok, $type ) =
31212               do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
31213                 $rtoken_map, $max_token_index );
31214             $id_scan_state = '';
31215         }
31216
31217         else {
31218             warning("invalid token in scan_id: $tok\n");
31219             $id_scan_state = '';
31220         }
31221     }
31222
31223     if ( $id_scan_state && ( !defined($type) || !$type ) ) {
31224
31225         # shouldn't happen:
31226         warning(
31227 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
31228         );
31229         report_definite_bug();
31230     }
31231
31232     TOKENIZER_DEBUG_FLAG_NSCAN && do {
31233         print STDOUT
31234           "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
31235     };
31236     return ( $i, $tok, $type, $id_scan_state );
31237 }
31238
31239 sub check_prototype {
31240     my ( $proto, $package, $subname ) = @_;
31241     return unless ( defined($package) && defined($subname) );
31242     if ( defined($proto) ) {
31243         $proto =~ s/^\s*\(\s*//;
31244         $proto =~ s/\s*\)$//;
31245         if ($proto) {
31246             $is_user_function{$package}{$subname}        = 1;
31247             $user_function_prototype{$package}{$subname} = "($proto)";
31248
31249             # prototypes containing '&' must be treated specially..
31250             if ( $proto =~ /\&/ ) {
31251
31252                 # right curly braces of prototypes ending in
31253                 # '&' may be followed by an operator
31254                 if ( $proto =~ /\&$/ ) {
31255                     $is_block_function{$package}{$subname} = 1;
31256                 }
31257
31258                 # right curly braces of prototypes NOT ending in
31259                 # '&' may NOT be followed by an operator
31260                 elsif ( $proto !~ /\&$/ ) {
31261                     $is_block_list_function{$package}{$subname} = 1;
31262                 }
31263             }
31264         }
31265         else {
31266             $is_constant{$package}{$subname} = 1;
31267         }
31268     }
31269     else {
31270         $is_user_function{$package}{$subname} = 1;
31271     }
31272     return;
31273 }
31274
31275 sub do_scan_package {
31276
31277     # do_scan_package parses a package name
31278     # it is called with $i_beg equal to the index of the first nonblank
31279     # token following a 'package' token.
31280     # USES GLOBAL VARIABLES: $current_package,
31281
31282     # package NAMESPACE
31283     # package NAMESPACE VERSION
31284     # package NAMESPACE BLOCK
31285     # package NAMESPACE VERSION BLOCK
31286     #
31287     # If VERSION is provided, package sets the $VERSION variable in the given
31288     # namespace to a version object with the VERSION provided. VERSION must be
31289     # a "strict" style version number as defined by the version module: a
31290     # positive decimal number (integer or decimal-fraction) without
31291     # exponentiation or else a dotted-decimal v-string with a leading 'v'
31292     # character and at least three components.
31293     # reference http://perldoc.perl.org/functions/package.html
31294
31295     my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
31296         $max_token_index )
31297       = @_;
31298     my $package = undef;
31299     my $pos_beg = $rtoken_map->[$i_beg];
31300     pos($input_line) = $pos_beg;
31301
31302     # handle non-blank line; package name, if any, must follow
31303     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
31304         $package = $1;
31305         $package = ( defined($1) && $1 ) ? $1 : 'main';
31306         $package =~ s/\'/::/g;
31307         if ( $package =~ /^\:/ ) { $package = 'main' . $package }
31308         $package =~ s/::$//;
31309         my $pos  = pos($input_line);
31310         my $numc = $pos - $pos_beg;
31311         $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
31312         $type = 'i';
31313
31314         # Now we must convert back from character position
31315         # to pre_token index.
31316         # I don't think an error flag can occur here ..but ?
31317         my $error;
31318         ( $i, $error ) =
31319           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
31320         if ($error) { warning("Possibly invalid package\n") }
31321         $current_package = $package;
31322
31323         # we should now have package NAMESPACE
31324         # now expecting VERSION, BLOCK, or ; to follow ...
31325         # package NAMESPACE VERSION
31326         # package NAMESPACE BLOCK
31327         # package NAMESPACE VERSION BLOCK
31328         my ( $next_nonblank_token, $i_next ) =
31329           find_next_nonblank_token( $i, $rtokens, $max_token_index );
31330
31331         # check that something recognizable follows, but do not parse.
31332         # A VERSION number will be parsed later as a number or v-string in the
31333         # normal way.  What is important is to set the statement type if
31334         # everything looks okay so that the operator_expected() routine
31335         # knows that the number is in a package statement.
31336         # Examples of valid primitive tokens that might follow are:
31337         #  1235  . ; { } v3  v
31338         if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
31339             $statement_type = $tok;
31340         }
31341         else {
31342             warning(
31343                 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
31344             );
31345         }
31346     }
31347
31348     # no match but line not blank --
31349     # could be a label with name package, like package:  , for example.
31350     else {
31351         $type = 'k';
31352     }
31353
31354     return ( $i, $tok, $type );
31355 }
31356
31357 sub scan_identifier_do {
31358
31359     # This routine assembles tokens into identifiers.  It maintains a
31360     # scan state, id_scan_state.  It updates id_scan_state based upon
31361     # current id_scan_state and token, and returns an updated
31362     # id_scan_state and the next index after the identifier.
31363     # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
31364     # $last_nonblank_type
31365
31366     my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
31367         $expecting, $container_type )
31368       = @_;
31369     my $i_begin   = $i;
31370     my $type      = '';
31371     my $tok_begin = $rtokens->[$i_begin];
31372     if ( $tok_begin eq ':' ) { $tok_begin = '::' }
31373     my $id_scan_state_begin = $id_scan_state;
31374     my $identifier_begin    = $identifier;
31375     my $tok                 = $tok_begin;
31376     my $message             = "";
31377
31378     my $in_prototype_or_signature = $container_type =~ /^sub/;
31379
31380     # these flags will be used to help figure out the type:
31381     my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
31382     my $saw_type;
31383
31384     # allow old package separator (') except in 'use' statement
31385     my $allow_tick = ( $last_nonblank_token ne 'use' );
31386
31387     # get started by defining a type and a state if necessary
31388     unless ($id_scan_state) {
31389         $context = UNKNOWN_CONTEXT;
31390
31391         # fixup for digraph
31392         if ( $tok eq '>' ) {
31393             $tok       = '->';
31394             $tok_begin = $tok;
31395         }
31396         $identifier = $tok;
31397
31398         if ( $tok eq '$' || $tok eq '*' ) {
31399             $id_scan_state = '$';
31400             $context       = SCALAR_CONTEXT;
31401         }
31402         elsif ( $tok eq '%' || $tok eq '@' ) {
31403             $id_scan_state = '$';
31404             $context       = LIST_CONTEXT;
31405         }
31406         elsif ( $tok eq '&' ) {
31407             $id_scan_state = '&';
31408         }
31409         elsif ( $tok eq 'sub' or $tok eq 'package' ) {
31410             $saw_alpha     = 0;     # 'sub' is considered type info here
31411             $id_scan_state = '$';
31412             $identifier .= ' ';     # need a space to separate sub from sub name
31413         }
31414         elsif ( $tok eq '::' ) {
31415             $id_scan_state = 'A';
31416         }
31417         elsif ( $tok =~ /^[A-Za-z_]/ ) {
31418             $id_scan_state = ':';
31419         }
31420         elsif ( $tok eq '->' ) {
31421             $id_scan_state = '$';
31422         }
31423         else {
31424
31425             # shouldn't happen
31426             my ( $a, $b, $c ) = caller;
31427             warning("Program Bug: scan_identifier given bad token = $tok \n");
31428             warning("   called from sub $a  line: $c\n");
31429             report_definite_bug();
31430         }
31431         $saw_type = !$saw_alpha;
31432     }
31433     else {
31434         $i--;
31435         $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
31436     }
31437
31438     # now loop to gather the identifier
31439     my $i_save = $i;
31440
31441     while ( $i < $max_token_index ) {
31442         $i_save = $i unless ( $tok =~ /^\s*$/ );
31443         $tok = $rtokens->[ ++$i ];
31444
31445         if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
31446             $tok = '::';
31447             $i++;
31448         }
31449
31450         if ( $id_scan_state eq '$' ) {    # starting variable name
31451
31452             if ( $tok eq '$' ) {
31453
31454                 $identifier .= $tok;
31455
31456                 # we've got a punctuation variable if end of line (punct.t)
31457                 if ( $i == $max_token_index ) {
31458                     $type          = 'i';
31459                     $id_scan_state = '';
31460                     last;
31461                 }
31462             }
31463
31464             # POSTDEFREF ->@ ->% ->& ->*
31465             elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
31466                 $identifier .= $tok;
31467             }
31468             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
31469                 $saw_alpha     = 1;
31470                 $id_scan_state = ':';           # now need ::
31471                 $identifier .= $tok;
31472             }
31473             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
31474                 $saw_alpha     = 1;
31475                 $id_scan_state = ':';                 # now need ::
31476                 $identifier .= $tok;
31477
31478                 # Perl will accept leading digits in identifiers,
31479                 # although they may not always produce useful results.
31480                 # Something like $main::0 is ok.  But this also works:
31481                 #
31482                 #  sub howdy::123::bubba{ print "bubba $54321!\n" }
31483                 #  howdy::123::bubba();
31484                 #
31485             }
31486             elsif ( $tok =~ /^[0-9]/ ) {    # numeric
31487                 $saw_alpha     = 1;
31488                 $id_scan_state = ':';       # now need ::
31489                 $identifier .= $tok;
31490             }
31491             elsif ( $tok eq '::' ) {
31492                 $id_scan_state = 'A';
31493                 $identifier .= $tok;
31494             }
31495
31496             # $# and POSTDEFREF ->$#
31497             elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) {    # $#array
31498                 $identifier .= $tok;    # keep same state, a $ could follow
31499             }
31500             elsif ( $tok eq '{' ) {
31501
31502                 # check for something like ${#} or ${©}
31503                 if (
31504                     (
31505                            $identifier eq '$'
31506                         || $identifier eq '@'
31507                         || $identifier eq '$#'
31508                     )
31509                     && $i + 2 <= $max_token_index
31510                     && $rtokens->[ $i + 2 ] eq '}'
31511                     && $rtokens->[ $i + 1 ] !~ /[\s\w]/
31512                   )
31513                 {
31514                     my $next2 = $rtokens->[ $i + 2 ];
31515                     my $next1 = $rtokens->[ $i + 1 ];
31516                     $identifier .= $tok . $next1 . $next2;
31517                     $i += 2;
31518                     $id_scan_state = '';
31519                     last;
31520                 }
31521
31522                 # skip something like ${xxx} or ->{
31523                 $id_scan_state = '';
31524
31525                 # if this is the first token of a line, any tokens for this
31526                 # identifier have already been accumulated
31527                 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
31528                 $i = $i_save;
31529                 last;
31530             }
31531
31532             # space ok after leading $ % * & @
31533             elsif ( $tok =~ /^\s*$/ ) {
31534
31535                 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
31536
31537                     if ( length($identifier) > 1 ) {
31538                         $id_scan_state = '';
31539                         $i             = $i_save;
31540                         $type          = 'i';    # probably punctuation variable
31541                         last;
31542                     }
31543                     else {
31544
31545                         # spaces after $'s are common, and space after @
31546                         # is harmless, so only complain about space
31547                         # after other type characters. Space after $ and
31548                         # @ will be removed in formatting.  Report space
31549                         # after % and * because they might indicate a
31550                         # parsing error.  In other words '% ' might be a
31551                         # modulo operator.  Delete this warning if it
31552                         # gets annoying.
31553                         if ( $identifier !~ /^[\@\$]$/ ) {
31554                             $message =
31555                               "Space in identifier, following $identifier\n";
31556                         }
31557                     }
31558                 }
31559
31560                 # else:
31561                 # space after '->' is ok
31562             }
31563             elsif ( $tok eq '^' ) {
31564
31565                 # check for some special variables like $^W
31566                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
31567                     $identifier .= $tok;
31568                     $id_scan_state = 'A';
31569
31570                     # Perl accepts '$^]' or '@^]', but
31571                     # there must not be a space before the ']'.
31572                     my $next1 = $rtokens->[ $i + 1 ];
31573                     if ( $next1 eq ']' ) {
31574                         $i++;
31575                         $identifier .= $next1;
31576                         $id_scan_state = "";
31577                         last;
31578                     }
31579                 }
31580                 else {
31581                     $id_scan_state = '';
31582                 }
31583             }
31584             else {    # something else
31585
31586                 if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
31587                     $id_scan_state = '';
31588                     $i             = $i_save;
31589                     $type          = 'i';       # probably punctuation variable
31590                     last;
31591                 }
31592
31593                 # check for various punctuation variables
31594                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
31595                     $identifier .= $tok;
31596                 }
31597
31598                 # POSTDEFREF: Postfix reference ->$* ->%*  ->@* ->** ->&* ->$#*
31599                 elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
31600                     $identifier .= $tok;
31601                 }
31602
31603                 elsif ( $identifier eq '$#' ) {
31604
31605                     if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
31606
31607                     # perl seems to allow just these: $#: $#- $#+
31608                     elsif ( $tok =~ /^[\:\-\+]$/ ) {
31609                         $type = 'i';
31610                         $identifier .= $tok;
31611                     }
31612                     else {
31613                         $i = $i_save;
31614                         write_logfile_entry( 'Use of $# is deprecated' . "\n" );
31615                     }
31616                 }
31617                 elsif ( $identifier eq '$$' ) {
31618
31619                     # perl does not allow references to punctuation
31620                     # variables without braces.  For example, this
31621                     # won't work:
31622                     #  $:=\4;
31623                     #  $a = $$:;
31624                     # You would have to use
31625                     #  $a = ${$:};
31626
31627                     $i = $i_save;
31628                     if   ( $tok eq '{' ) { $type = 't' }
31629                     else                 { $type = 'i' }
31630                 }
31631                 elsif ( $identifier eq '->' ) {
31632                     $i = $i_save;
31633                 }
31634                 else {
31635                     $i = $i_save;
31636                     if ( length($identifier) == 1 ) { $identifier = ''; }
31637                 }
31638                 $id_scan_state = '';
31639                 last;
31640             }
31641         }
31642         elsif ( $id_scan_state eq '&' ) {    # starting sub call?
31643
31644             if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
31645                 $id_scan_state = ':';          # now need ::
31646                 $saw_alpha     = 1;
31647                 $identifier .= $tok;
31648             }
31649             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
31650                 $id_scan_state = ':';                 # now need ::
31651                 $saw_alpha     = 1;
31652                 $identifier .= $tok;
31653             }
31654             elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
31655                 $id_scan_state = ':';       # now need ::
31656                 $saw_alpha     = 1;
31657                 $identifier .= $tok;
31658             }
31659             elsif ( $tok =~ /^\s*$/ ) {     # allow space
31660             }
31661             elsif ( $tok eq '::' ) {        # leading ::
31662                 $id_scan_state = 'A';       # accept alpha next
31663                 $identifier .= $tok;
31664             }
31665             elsif ( $tok eq '{' ) {
31666                 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
31667                 $i             = $i_save;
31668                 $id_scan_state = '';
31669                 last;
31670             }
31671             else {
31672
31673                 # punctuation variable?
31674                 # testfile: cunningham4.pl
31675                 #
31676                 # We have to be careful here.  If we are in an unknown state,
31677                 # we will reject the punctuation variable.  In the following
31678                 # example the '&' is a binary operator but we are in an unknown
31679                 # state because there is no sigil on 'Prima', so we don't
31680                 # know what it is.  But it is a bad guess that
31681                 # '&~' is a function variable.
31682                 # $self->{text}->{colorMap}->[
31683                 #   Prima::PodView::COLOR_CODE_FOREGROUND
31684                 #   & ~tb::COLOR_INDEX ] =
31685                 #   $sec->{ColorCode}
31686                 if ( $identifier eq '&' && $expecting ) {
31687                     $identifier .= $tok;
31688                 }
31689                 else {
31690                     $identifier = '';
31691                     $i          = $i_save;
31692                     $type       = '&';
31693                 }
31694                 $id_scan_state = '';
31695                 last;
31696             }
31697         }
31698         elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
31699
31700             if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
31701                 $identifier .= $tok;
31702                 $id_scan_state = ':';        # now need ::
31703                 $saw_alpha     = 1;
31704             }
31705             elsif ( $tok eq "'" && $allow_tick ) {
31706                 $identifier .= $tok;
31707                 $id_scan_state = ':';        # now need ::
31708                 $saw_alpha     = 1;
31709             }
31710             elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
31711                 $identifier .= $tok;
31712                 $id_scan_state = ':';        # now need ::
31713                 $saw_alpha     = 1;
31714             }
31715             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
31716                 $id_scan_state = '(';
31717                 $identifier .= $tok;
31718             }
31719             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
31720                 $id_scan_state = ')';
31721                 $identifier .= $tok;
31722             }
31723             else {
31724                 $id_scan_state = '';
31725                 $i             = $i_save;
31726                 last;
31727             }
31728         }
31729         elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
31730
31731             if ( $tok eq '::' ) {            # got it
31732                 $identifier .= $tok;
31733                 $id_scan_state = 'A';        # now require alpha
31734             }
31735             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
31736                 $identifier .= $tok;
31737                 $id_scan_state = ':';           # now need ::
31738                 $saw_alpha     = 1;
31739             }
31740             elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
31741                 $identifier .= $tok;
31742                 $id_scan_state = ':';           # now need ::
31743                 $saw_alpha     = 1;
31744             }
31745             elsif ( $tok eq "'" && $allow_tick ) {    # tick
31746
31747                 if ( $is_keyword{$identifier} ) {
31748                     $id_scan_state = '';              # that's all
31749                     $i             = $i_save;
31750                 }
31751                 else {
31752                     $identifier .= $tok;
31753                 }
31754             }
31755             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
31756                 $id_scan_state = '(';
31757                 $identifier .= $tok;
31758             }
31759             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
31760                 $id_scan_state = ')';
31761                 $identifier .= $tok;
31762             }
31763             else {
31764                 $id_scan_state = '';        # that's all
31765                 $i             = $i_save;
31766                 last;
31767             }
31768         }
31769         elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
31770
31771             if ( $tok eq '(' ) {             # got it
31772                 $identifier .= $tok;
31773                 $id_scan_state = ')';        # now find the end of it
31774             }
31775             elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
31776                 $identifier .= $tok;
31777             }
31778             else {
31779                 $id_scan_state = '';         # that's all - no prototype
31780                 $i             = $i_save;
31781                 last;
31782             }
31783         }
31784         elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
31785
31786             if ( $tok eq ')' ) {             # got it
31787                 $identifier .= $tok;
31788                 $id_scan_state = '';         # all done
31789                 last;
31790             }
31791             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
31792                 $identifier .= $tok;
31793             }
31794             else {    # probable error in script, but keep going
31795                 warning("Unexpected '$tok' while seeking end of prototype\n");
31796                 $identifier .= $tok;
31797             }
31798         }
31799         else {        # can get here due to error in initialization
31800             $id_scan_state = '';
31801             $i             = $i_save;
31802             last;
31803         }
31804     }
31805
31806     if ( $id_scan_state eq ')' ) {
31807         warning("Hit end of line while seeking ) to end prototype\n");
31808     }
31809
31810     # once we enter the actual identifier, it may not extend beyond
31811     # the end of the current line
31812     if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
31813         $id_scan_state = '';
31814     }
31815     if ( $i < 0 ) { $i = 0 }
31816
31817     unless ($type) {
31818
31819         if ($saw_type) {
31820
31821             if ($saw_alpha) {
31822                 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
31823                     $type = 'w';
31824                 }
31825                 else { $type = 'i' }
31826             }
31827             elsif ( $identifier eq '->' ) {
31828                 $type = '->';
31829             }
31830             elsif (
31831                 ( length($identifier) > 1 )
31832
31833                 # In something like '@$=' we have an identifier '@$'
31834                 # In something like '$${' we have type '$$' (and only
31835                 # part of an identifier)
31836                 && !( $identifier =~ /\$$/ && $tok eq '{' )
31837                 && ( $identifier !~ /^(sub |package )$/ )
31838               )
31839             {
31840                 $type = 'i';
31841             }
31842             else { $type = 't' }
31843         }
31844         elsif ($saw_alpha) {
31845
31846             # type 'w' includes anything without leading type info
31847             # ($,%,@,*) including something like abc::def::ghi
31848             $type = 'w';
31849         }
31850         else {
31851             $type = '';
31852         }    # this can happen on a restart
31853     }
31854
31855     if ($identifier) {
31856         $tok = $identifier;
31857         if ($message) { write_logfile_entry($message) }
31858     }
31859     else {
31860         $tok = $tok_begin;
31861         $i   = $i_begin;
31862     }
31863
31864     TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
31865         my ( $a, $b, $c ) = caller;
31866         print STDOUT
31867 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
31868         print STDOUT
31869 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
31870     };
31871     return ( $i, $tok, $type, $id_scan_state, $identifier );
31872 }
31873
31874 {
31875
31876     # saved package and subnames in case prototype is on separate line
31877     my ( $package_saved, $subname_saved );
31878
31879     sub do_scan_sub {
31880
31881         # do_scan_sub parses a sub name and prototype
31882         # it is called with $i_beg equal to the index of the first nonblank
31883         # token following a 'sub' token.
31884
31885         # TODO: add future error checks to be sure we have a valid
31886         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
31887         # a name is given if and only if a non-anonymous sub is
31888         # appropriate.
31889         # USES GLOBAL VARS: $current_package, $last_nonblank_token,
31890         # $in_attribute_list, %saw_function_definition,
31891         # $statement_type
31892
31893         my (
31894             $input_line, $i,             $i_beg,
31895             $tok,        $type,          $rtokens,
31896             $rtoken_map, $id_scan_state, $max_token_index
31897         ) = @_;
31898         $id_scan_state = "";    # normally we get everything in one call
31899         my $subname = undef;
31900         my $package = undef;
31901         my $proto   = undef;
31902         my $attrs   = undef;
31903         my $match;
31904
31905         my $pos_beg = $rtoken_map->[$i_beg];
31906         pos($input_line) = $pos_beg;
31907
31908         # Look for the sub NAME
31909         if (
31910             $input_line =~ m/\G\s*
31911         ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
31912         (\w+)               # NAME    - required
31913         /gcx
31914           )
31915         {
31916             $match   = 1;
31917             $subname = $2;
31918
31919             $package = ( defined($1) && $1 ) ? $1 : $current_package;
31920             $package =~ s/\'/::/g;
31921             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
31922             $package =~ s/::$//;
31923             my $pos  = pos($input_line);
31924             my $numc = $pos - $pos_beg;
31925             $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
31926             $type = 'i';
31927         }
31928
31929         # Now look for PROTO ATTRS
31930         # Look for prototype/attributes which are usually on the same
31931         # line as the sub name but which might be on a separate line.
31932         # For example, we might have an anonymous sub with attributes,
31933         # or a prototype on a separate line from its sub name
31934
31935         # NOTE: We only want to parse PROTOTYPES here. If we see anything that
31936         # does not look like a prototype, we assume it is a SIGNATURE and we
31937         # will stop and let the the standard tokenizer handle it.  In
31938         # particular, we stop if we see any nested parens, braces, or commas.
31939         my $saw_opening_paren = $input_line =~ /\G\s*\(/;
31940         if (
31941             $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))?  # PROTO
31942             (\s*:)?                              # ATTRS leading ':'
31943             /gcx
31944             && ( $1 || $2 )
31945           )
31946         {
31947             $proto = $1;
31948             $attrs = $2;
31949
31950             # If we also found the sub name on this call then append PROTO.
31951             # This is not necessary but for compatability with previous
31952             # versions when the -csc flag is used:
31953             if ( $match && $proto ) {
31954                 $tok .= $proto;
31955             }
31956             $match ||= 1;
31957
31958             # Handle prototype on separate line from subname
31959             if ($subname_saved) {
31960                 $package = $package_saved;
31961                 $subname = $subname_saved;
31962                 $tok     = $last_nonblank_token;
31963             }
31964             $type = 'i';
31965         }
31966
31967         if ($match) {
31968
31969             # ATTRS: if there are attributes, back up and let the ':' be
31970             # found later by the scanner.
31971             my $pos = pos($input_line);
31972             if ($attrs) {
31973                 $pos -= length($attrs);
31974             }
31975
31976             my $next_nonblank_token = $tok;
31977
31978             # catch case of line with leading ATTR ':' after anonymous sub
31979             if ( $pos == $pos_beg && $tok eq ':' ) {
31980                 $type              = 'A';
31981                 $in_attribute_list = 1;
31982             }
31983
31984             # Otherwise, if we found a match we must convert back from
31985             # string position to the pre_token index for continued parsing.
31986             else {
31987
31988                 # I don't think an error flag can occur here ..but ?
31989                 my $error;
31990                 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
31991                     $max_token_index );
31992                 if ($error) { warning("Possibly invalid sub\n") }
31993
31994                 # check for multiple definitions of a sub
31995                 ( $next_nonblank_token, my $i_next ) =
31996                   find_next_nonblank_token_on_this_line( $i, $rtokens,
31997                     $max_token_index );
31998             }
31999
32000             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
32001             {    # skip blank or side comment
32002                 my ( $rpre_tokens, $rpre_types ) =
32003                   peek_ahead_for_n_nonblank_pre_tokens(1);
32004                 if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
32005                     $next_nonblank_token = $rpre_tokens->[0];
32006                 }
32007                 else {
32008                     $next_nonblank_token = '}';
32009                 }
32010             }
32011             $package_saved = "";
32012             $subname_saved = "";
32013
32014             # See what's next...
32015             if ( $next_nonblank_token eq '{' ) {
32016                 if ($subname) {
32017
32018                     # Check for multiple definitions of a sub, but
32019                     # it is ok to have multiple sub BEGIN, etc,
32020                     # so we do not complain if name is all caps
32021                     if (   $saw_function_definition{$package}{$subname}
32022                         && $subname !~ /^[A-Z]+$/ )
32023                     {
32024                         my $lno = $saw_function_definition{$package}{$subname};
32025                         warning(
32026 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
32027                         );
32028                     }
32029                     $saw_function_definition{$package}{$subname} =
32030                       $tokenizer_self->{_last_line_number};
32031                 }
32032             }
32033             elsif ( $next_nonblank_token eq ';' ) {
32034             }
32035             elsif ( $next_nonblank_token eq '}' ) {
32036             }
32037
32038             # ATTRS - if an attribute list follows, remember the name
32039             # of the sub so the next opening brace can be labeled.
32040             # Setting 'statement_type' causes any ':'s to introduce
32041             # attributes.
32042             elsif ( $next_nonblank_token eq ':' ) {
32043                 $statement_type = $tok;
32044             }
32045
32046             # if we stopped before an open paren ...
32047             elsif ( $next_nonblank_token eq '(' ) {
32048
32049                 # If we DID NOT see this paren above then it must be on the
32050                 # next line so we will set a flag to come back here and see if
32051                 # it is a PROTOTYPE
32052
32053                 # Otherwise, we assume it is a SIGNATURE rather than a
32054                 # PROTOTYPE and let the normal tokenizer handle it as a list
32055                 if ( !$saw_opening_paren ) {
32056                     $id_scan_state = 'sub';     # we must come back to get proto
32057                     $package_saved = $package;
32058                     $subname_saved = $subname;
32059                 }
32060                 $statement_type = $tok;
32061             }
32062             elsif ($next_nonblank_token) {      # EOF technically ok
32063                 warning(
32064 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
32065                 );
32066             }
32067             check_prototype( $proto, $package, $subname );
32068         }
32069
32070         # no match but line not blank
32071         else {
32072         }
32073         return ( $i, $tok, $type, $id_scan_state );
32074     }
32075 }
32076
32077 #########i###############################################################
32078 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
32079 #########################################################################
32080
32081 sub find_next_nonblank_token {
32082     my ( $i, $rtokens, $max_token_index ) = @_;
32083
32084     if ( $i >= $max_token_index ) {
32085         if ( !peeked_ahead() ) {
32086             peeked_ahead(1);
32087             $rtokens =
32088               peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
32089         }
32090     }
32091     my $next_nonblank_token = $rtokens->[ ++$i ];
32092
32093     if ( $next_nonblank_token =~ /^\s*$/ ) {
32094         $next_nonblank_token = $rtokens->[ ++$i ];
32095     }
32096     return ( $next_nonblank_token, $i );
32097 }
32098
32099 sub numerator_expected {
32100
32101     # this is a filter for a possible numerator, in support of guessing
32102     # for the / pattern delimiter token.
32103     # returns -
32104     #   1 - yes
32105     #   0 - can't tell
32106     #  -1 - no
32107     # Note: I am using the convention that variables ending in
32108     # _expected have these 3 possible values.
32109     my ( $i, $rtokens, $max_token_index ) = @_;
32110     my $numerator_expected = 0;
32111
32112     my $next_token = $rtokens->[ $i + 1 ];
32113     if ( $next_token eq '=' ) { $i++; }    # handle /=
32114     my ( $next_nonblank_token, $i_next ) =
32115       find_next_nonblank_token( $i, $rtokens, $max_token_index );
32116
32117     if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
32118         $numerator_expected = 1;
32119     }
32120     else {
32121
32122         if ( $next_nonblank_token =~ /^\s*$/ ) {
32123             $numerator_expected = 0;
32124         }
32125         else {
32126             $numerator_expected = -1;
32127         }
32128     }
32129     return $numerator_expected;
32130 }
32131
32132 sub pattern_expected {
32133
32134     # This is the start of a filter for a possible pattern.
32135     # It looks at the token after a possible pattern and tries to
32136     # determine if that token could end a pattern.
32137     # returns -
32138     #   1 - yes
32139     #   0 - can't tell
32140     #  -1 - no
32141     my ( $i, $rtokens, $max_token_index ) = @_;
32142     my $is_pattern = 0;
32143
32144     my $next_token = $rtokens->[ $i + 1 ];
32145     if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; }   # skip possible modifier
32146     my ( $next_nonblank_token, $i_next ) =
32147       find_next_nonblank_token( $i, $rtokens, $max_token_index );
32148
32149     # list of tokens which may follow a pattern
32150     # (can probably be expanded)
32151     if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
32152     {
32153         $is_pattern = 1;
32154     }
32155     else {
32156
32157         if ( $next_nonblank_token =~ /^\s*$/ ) {
32158             $is_pattern = 0;
32159         }
32160         else {
32161             $is_pattern = -1;
32162         }
32163     }
32164     return $is_pattern;
32165 }
32166
32167 sub find_next_nonblank_token_on_this_line {
32168     my ( $i, $rtokens, $max_token_index ) = @_;
32169     my $next_nonblank_token;
32170
32171     if ( $i < $max_token_index ) {
32172         $next_nonblank_token = $rtokens->[ ++$i ];
32173
32174         if ( $next_nonblank_token =~ /^\s*$/ ) {
32175
32176             if ( $i < $max_token_index ) {
32177                 $next_nonblank_token = $rtokens->[ ++$i ];
32178             }
32179         }
32180     }
32181     else {
32182         $next_nonblank_token = "";
32183     }
32184     return ( $next_nonblank_token, $i );
32185 }
32186
32187 sub find_angle_operator_termination {
32188
32189     # We are looking at a '<' and want to know if it is an angle operator.
32190     # We are to return:
32191     #   $i = pretoken index of ending '>' if found, current $i otherwise
32192     #   $type = 'Q' if found, '>' otherwise
32193     my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
32194     my $i    = $i_beg;
32195     my $type = '<';
32196     pos($input_line) = 1 + $rtoken_map->[$i];
32197
32198     my $filter;
32199
32200     # we just have to find the next '>' if a term is expected
32201     if ( $expecting == TERM ) { $filter = '[\>]' }
32202
32203     # we have to guess if we don't know what is expected
32204     elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
32205
32206     # shouldn't happen - we shouldn't be here if operator is expected
32207     else { warning("Program Bug in find_angle_operator_termination\n") }
32208
32209     # To illustrate what we might be looking at, in case we are
32210     # guessing, here are some examples of valid angle operators
32211     # (or file globs):
32212     #  <tmp_imp/*>
32213     #  <FH>
32214     #  <$fh>
32215     #  <*.c *.h>
32216     #  <_>
32217     #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
32218     #  <${PREFIX}*img*.$IMAGE_TYPE>
32219     #  <img*.$IMAGE_TYPE>
32220     #  <Timg*.$IMAGE_TYPE>
32221     #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
32222     #
32223     # Here are some examples of lines which do not have angle operators:
32224     #  return undef unless $self->[2]++ < $#{$self->[1]};
32225     #  < 2  || @$t >
32226     #
32227     # the following line from dlister.pl caused trouble:
32228     #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
32229     #
32230     # If the '<' starts an angle operator, it must end on this line and
32231     # it must not have certain characters like ';' and '=' in it.  I use
32232     # this to limit the testing.  This filter should be improved if
32233     # possible.
32234
32235     if ( $input_line =~ /($filter)/g ) {
32236
32237         if ( $1 eq '>' ) {
32238
32239             # We MAY have found an angle operator termination if we get
32240             # here, but we need to do more to be sure we haven't been
32241             # fooled.
32242             my $pos = pos($input_line);
32243
32244             my $pos_beg = $rtoken_map->[$i];
32245             my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
32246
32247             # Reject if the closing '>' follows a '-' as in:
32248             # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
32249             if ( $expecting eq UNKNOWN ) {
32250                 my $check = substr( $input_line, $pos - 2, 1 );
32251                 if ( $check eq '-' ) {
32252                     return ( $i, $type );
32253                 }
32254             }
32255
32256             ######################################debug#####
32257             #write_diagnostics( "ANGLE? :$str\n");
32258             #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
32259             ######################################debug#####
32260             $type = 'Q';
32261             my $error;
32262             ( $i, $error ) =
32263               inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
32264
32265             # It may be possible that a quote ends midway in a pretoken.
32266             # If this happens, it may be necessary to split the pretoken.
32267             if ($error) {
32268                 warning(
32269                     "Possible tokinization error..please check this line\n");
32270                 report_possible_bug();
32271             }
32272
32273             # Now let's see where we stand....
32274             # OK if math op not possible
32275             if ( $expecting == TERM ) {
32276             }
32277
32278             # OK if there are no more than 2 pre-tokens inside
32279             # (not possible to write 2 token math between < and >)
32280             # This catches most common cases
32281             elsif ( $i <= $i_beg + 3 ) {
32282                 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
32283             }
32284
32285             # Not sure..
32286             else {
32287
32288                 # Let's try a Brace Test: any braces inside must balance
32289                 my $br = 0;
32290                 while ( $str =~ /\{/g ) { $br++ }
32291                 while ( $str =~ /\}/g ) { $br-- }
32292                 my $sb = 0;
32293                 while ( $str =~ /\[/g ) { $sb++ }
32294                 while ( $str =~ /\]/g ) { $sb-- }
32295                 my $pr = 0;
32296                 while ( $str =~ /\(/g ) { $pr++ }
32297                 while ( $str =~ /\)/g ) { $pr-- }
32298
32299                 # if braces do not balance - not angle operator
32300                 if ( $br || $sb || $pr ) {
32301                     $i    = $i_beg;
32302                     $type = '<';
32303                     write_diagnostics(
32304                         "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
32305                 }
32306
32307                 # we should keep doing more checks here...to be continued
32308                 # Tentatively accepting this as a valid angle operator.
32309                 # There are lots more things that can be checked.
32310                 else {
32311                     write_diagnostics(
32312                         "ANGLE-Guessing yes: $str expecting=$expecting\n");
32313                     write_logfile_entry("Guessing angle operator here: $str\n");
32314                 }
32315             }
32316         }
32317
32318         # didn't find ending >
32319         else {
32320             if ( $expecting == TERM ) {
32321                 warning("No ending > for angle operator\n");
32322             }
32323         }
32324     }
32325     return ( $i, $type );
32326 }
32327
32328 sub scan_number_do {
32329
32330     #  scan a number in any of the formats that Perl accepts
32331     #  Underbars (_) are allowed in decimal numbers.
32332     #  input parameters -
32333     #      $input_line  - the string to scan
32334     #      $i           - pre_token index to start scanning
32335     #    $rtoken_map    - reference to the pre_token map giving starting
32336     #                    character position in $input_line of token $i
32337     #  output parameters -
32338     #    $i            - last pre_token index of the number just scanned
32339     #    number        - the number (characters); or undef if not a number
32340
32341     my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
32342     my $pos_beg = $rtoken_map->[$i];
32343     my $pos;
32344     my $i_begin = $i;
32345     my $number  = undef;
32346     my $type    = $input_type;
32347
32348     my $first_char = substr( $input_line, $pos_beg, 1 );
32349
32350     # Look for bad starting characters; Shouldn't happen..
32351     if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
32352         warning("Program bug - scan_number given character $first_char\n");
32353         report_definite_bug();
32354         return ( $i, $type, $number );
32355     }
32356
32357     # handle v-string without leading 'v' character ('Two Dot' rule)
32358     # (vstring.t)
32359     # TODO: v-strings may contain underscores
32360     pos($input_line) = $pos_beg;
32361     if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
32362         $pos = pos($input_line);
32363         my $numc = $pos - $pos_beg;
32364         $number = substr( $input_line, $pos_beg, $numc );
32365         $type = 'v';
32366         report_v_string($number);
32367     }
32368
32369     # handle octal, hex, binary
32370     if ( !defined($number) ) {
32371         pos($input_line) = $pos_beg;
32372         if ( $input_line =~
32373             /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
32374         {
32375             $pos = pos($input_line);
32376             my $numc = $pos - $pos_beg;
32377             $number = substr( $input_line, $pos_beg, $numc );
32378             $type = 'n';
32379         }
32380     }
32381
32382     # handle decimal
32383     if ( !defined($number) ) {
32384         pos($input_line) = $pos_beg;
32385
32386         if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
32387             $pos = pos($input_line);
32388
32389             # watch out for things like 0..40 which would give 0. by this;
32390             if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
32391                 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
32392             {
32393                 $pos--;
32394             }
32395             my $numc = $pos - $pos_beg;
32396             $number = substr( $input_line, $pos_beg, $numc );
32397             $type = 'n';
32398         }
32399     }
32400
32401     # filter out non-numbers like e + - . e2  .e3 +e6
32402     # the rule: at least one digit, and any 'e' must be preceded by a digit
32403     if (
32404         $number !~ /\d/    # no digits
32405         || (   $number =~ /^(.*)[eE]/
32406             && $1 !~ /\d/ )    # or no digits before the 'e'
32407       )
32408     {
32409         $number = undef;
32410         $type   = $input_type;
32411         return ( $i, $type, $number );
32412     }
32413
32414     # Found a number; now we must convert back from character position
32415     # to pre_token index. An error here implies user syntax error.
32416     # An example would be an invalid octal number like '009'.
32417     my $error;
32418     ( $i, $error ) =
32419       inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
32420     if ($error) { warning("Possibly invalid number\n") }
32421
32422     return ( $i, $type, $number );
32423 }
32424
32425 sub inverse_pretoken_map {
32426
32427     # Starting with the current pre_token index $i, scan forward until
32428     # finding the index of the next pre_token whose position is $pos.
32429     my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
32430     my $error = 0;
32431
32432     while ( ++$i <= $max_token_index ) {
32433
32434         if ( $pos <= $rtoken_map->[$i] ) {
32435
32436             # Let the calling routine handle errors in which we do not
32437             # land on a pre-token boundary.  It can happen by running
32438             # perltidy on some non-perl scripts, for example.
32439             if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
32440             $i--;
32441             last;
32442         }
32443     }
32444     return ( $i, $error );
32445 }
32446
32447 sub find_here_doc {
32448
32449     # find the target of a here document, if any
32450     # input parameters:
32451     #   $i - token index of the second < of <<
32452     #   ($i must be less than the last token index if this is called)
32453     # output parameters:
32454     #   $found_target = 0 didn't find target; =1 found target
32455     #   HERE_TARGET - the target string (may be empty string)
32456     #   $i - unchanged if not here doc,
32457     #    or index of the last token of the here target
32458     #   $saw_error - flag noting unbalanced quote on here target
32459     my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
32460     my $ibeg                 = $i;
32461     my $found_target         = 0;
32462     my $here_doc_target      = '';
32463     my $here_quote_character = '';
32464     my $saw_error            = 0;
32465     my ( $next_nonblank_token, $i_next_nonblank, $next_token );
32466     $next_token = $rtokens->[ $i + 1 ];
32467
32468     # perl allows a backslash before the target string (heredoc.t)
32469     my $backslash = 0;
32470     if ( $next_token eq '\\' ) {
32471         $backslash  = 1;
32472         $next_token = $rtokens->[ $i + 2 ];
32473     }
32474
32475     ( $next_nonblank_token, $i_next_nonblank ) =
32476       find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
32477
32478     if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
32479
32480         my $in_quote    = 1;
32481         my $quote_depth = 0;
32482         my $quote_pos   = 0;
32483         my $quoted_string;
32484
32485         (
32486             $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
32487             $quoted_string
32488           )
32489           = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
32490             $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
32491
32492         if ($in_quote) {    # didn't find end of quote, so no target found
32493             $i = $ibeg;
32494             if ( $expecting == TERM ) {
32495                 warning(
32496 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
32497                 );
32498                 $saw_error = 1;
32499             }
32500         }
32501         else {              # found ending quote
32502             ##my $j;
32503             $found_target = 1;
32504
32505             my $tokj;
32506             foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
32507                 $tokj = $rtokens->[$j];
32508
32509                 # we have to remove any backslash before the quote character
32510                 # so that the here-doc-target exactly matches this string
32511                 next
32512                   if ( $tokj eq "\\"
32513                     && $j < $i - 1
32514                     && $rtokens->[ $j + 1 ] eq $here_quote_character );
32515                 $here_doc_target .= $tokj;
32516             }
32517         }
32518     }
32519
32520     elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
32521         $found_target = 1;
32522         write_logfile_entry(
32523             "found blank here-target after <<; suggest using \"\"\n");
32524         $i = $ibeg;
32525     }
32526     elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
32527
32528         my $here_doc_expected;
32529         if ( $expecting == UNKNOWN ) {
32530             $here_doc_expected = guess_if_here_doc($next_token);
32531         }
32532         else {
32533             $here_doc_expected = 1;
32534         }
32535
32536         if ($here_doc_expected) {
32537             $found_target    = 1;
32538             $here_doc_target = $next_token;
32539             $i               = $ibeg + 1;
32540         }
32541
32542     }
32543     else {
32544
32545         if ( $expecting == TERM ) {
32546             $found_target = 1;
32547             write_logfile_entry("Note: bare here-doc operator <<\n");
32548         }
32549         else {
32550             $i = $ibeg;
32551         }
32552     }
32553
32554     # patch to neglect any prepended backslash
32555     if ( $found_target && $backslash ) { $i++ }
32556
32557     return ( $found_target, $here_doc_target, $here_quote_character, $i,
32558         $saw_error );
32559 }
32560
32561 sub do_quote {
32562
32563     # follow (or continue following) quoted string(s)
32564     # $in_quote return code:
32565     #   0 - ok, found end
32566     #   1 - still must find end of quote whose target is $quote_character
32567     #   2 - still looking for end of first of two quotes
32568     #
32569     # Returns updated strings:
32570     #  $quoted_string_1 = quoted string seen while in_quote=1
32571     #  $quoted_string_2 = quoted string seen while in_quote=2
32572     my (
32573         $i,               $in_quote,    $quote_character,
32574         $quote_pos,       $quote_depth, $quoted_string_1,
32575         $quoted_string_2, $rtokens,     $rtoken_map,
32576         $max_token_index
32577     ) = @_;
32578
32579     my $in_quote_starting = $in_quote;
32580
32581     my $quoted_string;
32582     if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
32583         my $ibeg = $i;
32584         (
32585             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
32586             $quoted_string
32587           )
32588           = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
32589             $quote_pos, $quote_depth, $max_token_index );
32590         $quoted_string_2 .= $quoted_string;
32591         if ( $in_quote == 1 ) {
32592             if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
32593             $quote_character = '';
32594         }
32595         else {
32596             $quoted_string_2 .= "\n";
32597         }
32598     }
32599
32600     if ( $in_quote == 1 ) {    # one (more) quote to follow
32601         my $ibeg = $i;
32602         (
32603             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
32604             $quoted_string
32605           )
32606           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
32607             $quote_pos, $quote_depth, $max_token_index );
32608         $quoted_string_1 .= $quoted_string;
32609         if ( $in_quote == 1 ) {
32610             $quoted_string_1 .= "\n";
32611         }
32612     }
32613     return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
32614         $quoted_string_1, $quoted_string_2 );
32615 }
32616
32617 sub follow_quoted_string {
32618
32619     # scan for a specific token, skipping escaped characters
32620     # if the quote character is blank, use the first non-blank character
32621     # input parameters:
32622     #   $rtokens = reference to the array of tokens
32623     #   $i = the token index of the first character to search
32624     #   $in_quote = number of quoted strings being followed
32625     #   $beginning_tok = the starting quote character
32626     #   $quote_pos = index to check next for alphanumeric delimiter
32627     # output parameters:
32628     #   $i = the token index of the ending quote character
32629     #   $in_quote = decremented if found end, unchanged if not
32630     #   $beginning_tok = the starting quote character
32631     #   $quote_pos = index to check next for alphanumeric delimiter
32632     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
32633     #   $quoted_string = the text of the quote (without quotation tokens)
32634     my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
32635         $max_token_index )
32636       = @_;
32637     my ( $tok, $end_tok );
32638     my $i             = $i_beg - 1;
32639     my $quoted_string = "";
32640
32641     TOKENIZER_DEBUG_FLAG_QUOTE && do {
32642         print STDOUT
32643 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
32644     };
32645
32646     # get the corresponding end token
32647     if ( $beginning_tok !~ /^\s*$/ ) {
32648         $end_tok = matching_end_token($beginning_tok);
32649     }
32650
32651     # a blank token means we must find and use the first non-blank one
32652     else {
32653         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
32654
32655         while ( $i < $max_token_index ) {
32656             $tok = $rtokens->[ ++$i ];
32657
32658             if ( $tok !~ /^\s*$/ ) {
32659
32660                 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
32661                     $i = $max_token_index;
32662                 }
32663                 else {
32664
32665                     if ( length($tok) > 1 ) {
32666                         if ( $quote_pos <= 0 ) { $quote_pos = 1 }
32667                         $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
32668                     }
32669                     else {
32670                         $beginning_tok = $tok;
32671                         $quote_pos     = 0;
32672                     }
32673                     $end_tok     = matching_end_token($beginning_tok);
32674                     $quote_depth = 1;
32675                     last;
32676                 }
32677             }
32678             else {
32679                 $allow_quote_comments = 1;
32680             }
32681         }
32682     }
32683
32684     # There are two different loops which search for the ending quote
32685     # character.  In the rare case of an alphanumeric quote delimiter, we
32686     # have to look through alphanumeric tokens character-by-character, since
32687     # the pre-tokenization process combines multiple alphanumeric
32688     # characters, whereas for a non-alphanumeric delimiter, only tokens of
32689     # length 1 can match.
32690
32691     ###################################################################
32692     # Case 1 (rare): loop for case of alphanumeric quote delimiter..
32693     # "quote_pos" is the position the current word to begin searching
32694     ###################################################################
32695     if ( $beginning_tok =~ /\w/ ) {
32696
32697         # Note this because it is not recommended practice except
32698         # for obfuscated perl contests
32699         if ( $in_quote == 1 ) {
32700             write_logfile_entry(
32701                 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
32702         }
32703
32704         while ( $i < $max_token_index ) {
32705
32706             if ( $quote_pos == 0 || ( $i < 0 ) ) {
32707                 $tok = $rtokens->[ ++$i ];
32708
32709                 if ( $tok eq '\\' ) {
32710
32711                     # retain backslash unless it hides the end token
32712                     $quoted_string .= $tok
32713                       unless $rtokens->[ $i + 1 ] eq $end_tok;
32714                     $quote_pos++;
32715                     last if ( $i >= $max_token_index );
32716                     $tok = $rtokens->[ ++$i ];
32717                 }
32718             }
32719             my $old_pos = $quote_pos;
32720
32721             unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
32722             {
32723
32724             }
32725             $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
32726
32727             if ( $quote_pos > 0 ) {
32728
32729                 $quoted_string .=
32730                   substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
32731
32732                 $quote_depth--;
32733
32734                 if ( $quote_depth == 0 ) {
32735                     $in_quote--;
32736                     last;
32737                 }
32738             }
32739             else {
32740                 $quoted_string .= substr( $tok, $old_pos );
32741             }
32742         }
32743     }
32744
32745     ########################################################################
32746     # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
32747     ########################################################################
32748     else {
32749
32750         while ( $i < $max_token_index ) {
32751             $tok = $rtokens->[ ++$i ];
32752
32753             if ( $tok eq $end_tok ) {
32754                 $quote_depth--;
32755
32756                 if ( $quote_depth == 0 ) {
32757                     $in_quote--;
32758                     last;
32759                 }
32760             }
32761             elsif ( $tok eq $beginning_tok ) {
32762                 $quote_depth++;
32763             }
32764             elsif ( $tok eq '\\' ) {
32765
32766                 # retain backslash unless it hides the beginning or end token
32767                 $tok = $rtokens->[ ++$i ];
32768                 $quoted_string .= '\\'
32769                   unless ( $tok eq $end_tok || $tok eq $beginning_tok );
32770             }
32771             $quoted_string .= $tok;
32772         }
32773     }
32774     if ( $i > $max_token_index ) { $i = $max_token_index }
32775     return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
32776         $quoted_string );
32777 }
32778
32779 sub indicate_error {
32780     my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
32781     interrupt_logfile();
32782     warning($msg);
32783     write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
32784     resume_logfile();
32785     return;
32786 }
32787
32788 sub write_error_indicator_pair {
32789     my ( $line_number, $input_line, $pos, $carrat ) = @_;
32790     my ( $offset, $numbered_line, $underline ) =
32791       make_numbered_line( $line_number, $input_line, $pos );
32792     $underline = write_on_underline( $underline, $pos - $offset, $carrat );
32793     warning( $numbered_line . "\n" );
32794     $underline =~ s/\s*$//;
32795     warning( $underline . "\n" );
32796     return;
32797 }
32798
32799 sub make_numbered_line {
32800
32801     #  Given an input line, its line number, and a character position of
32802     #  interest, create a string not longer than 80 characters of the form
32803     #     $lineno: sub_string
32804     #  such that the sub_string of $str contains the position of interest
32805     #
32806     #  Here is an example of what we want, in this case we add trailing
32807     #  '...' because the line is long.
32808     #
32809     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
32810     #
32811     #  Here is another example, this time in which we used leading '...'
32812     #  because of excessive length:
32813     #
32814     # 2: ... er of the World Wide Web Consortium's
32815     #
32816     #  input parameters are:
32817     #   $lineno = line number
32818     #   $str = the text of the line
32819     #   $pos = position of interest (the error) : 0 = first character
32820     #
32821     #   We return :
32822     #     - $offset = an offset which corrects the position in case we only
32823     #       display part of a line, such that $pos-$offset is the effective
32824     #       position from the start of the displayed line.
32825     #     - $numbered_line = the numbered line as above,
32826     #     - $underline = a blank 'underline' which is all spaces with the same
32827     #       number of characters as the numbered line.
32828
32829     my ( $lineno, $str, $pos ) = @_;
32830     my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
32831     my $excess = length($str) - $offset - 68;
32832     my $numc   = ( $excess > 0 ) ? 68 : undef;
32833
32834     if ( defined($numc) ) {
32835         if ( $offset == 0 ) {
32836             $str = substr( $str, $offset, $numc - 4 ) . " ...";
32837         }
32838         else {
32839             $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
32840         }
32841     }
32842     else {
32843
32844         if ( $offset == 0 ) {
32845         }
32846         else {
32847             $str = "... " . substr( $str, $offset + 4 );
32848         }
32849     }
32850
32851     my $numbered_line = sprintf( "%d: ", $lineno );
32852     $offset -= length($numbered_line);
32853     $numbered_line .= $str;
32854     my $underline = " " x length($numbered_line);
32855     return ( $offset, $numbered_line, $underline );
32856 }
32857
32858 sub write_on_underline {
32859
32860     # The "underline" is a string that shows where an error is; it starts
32861     # out as a string of blanks with the same length as the numbered line of
32862     # code above it, and we have to add marking to show where an error is.
32863     # In the example below, we want to write the string '--^' just below
32864     # the line of bad code:
32865     #
32866     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
32867     #                 ---^
32868     # We are given the current underline string, plus a position and a
32869     # string to write on it.
32870     #
32871     # In the above example, there will be 2 calls to do this:
32872     # First call:  $pos=19, pos_chr=^
32873     # Second call: $pos=16, pos_chr=---
32874     #
32875     # This is a trivial thing to do with substr, but there is some
32876     # checking to do.
32877
32878     my ( $underline, $pos, $pos_chr ) = @_;
32879
32880     # check for error..shouldn't happen
32881     unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
32882         return $underline;
32883     }
32884     my $excess = length($pos_chr) + $pos - length($underline);
32885     if ( $excess > 0 ) {
32886         $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
32887     }
32888     substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
32889     return ($underline);
32890 }
32891
32892 sub pre_tokenize {
32893
32894     # Break a string, $str, into a sequence of preliminary tokens.  We
32895     # are interested in these types of tokens:
32896     #   words       (type='w'),            example: 'max_tokens_wanted'
32897     #   digits      (type = 'd'),          example: '0755'
32898     #   whitespace  (type = 'b'),          example: '   '
32899     #   any other single character (i.e. punct; type = the character itself).
32900     # We cannot do better than this yet because we might be in a quoted
32901     # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
32902     # tokens.
32903     my ( $str, $max_tokens_wanted ) = @_;
32904
32905     # we return references to these 3 arrays:
32906     my @tokens    = ();     # array of the tokens themselves
32907     my @token_map = (0);    # string position of start of each token
32908     my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
32909
32910     do {
32911
32912         # whitespace
32913         if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
32914
32915         # numbers
32916         # note that this must come before words!
32917         elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
32918
32919         # words
32920         elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
32921
32922         # single-character punctuation
32923         elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
32924
32925         # that's all..
32926         else {
32927             return ( \@tokens, \@token_map, \@type );
32928         }
32929
32930         push @tokens,    $1;
32931         push @token_map, pos($str);
32932
32933     } while ( --$max_tokens_wanted != 0 );
32934
32935     return ( \@tokens, \@token_map, \@type );
32936 }
32937
32938 sub show_tokens {
32939
32940     # this is an old debug routine
32941     # not called, but saved for reference
32942     my ( $rtokens, $rtoken_map ) = @_;
32943     my $num = scalar( @{$rtokens} );
32944
32945     foreach my $i ( 0 .. $num - 1 ) {
32946         my $len = length( $rtokens->[$i] );
32947         print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
32948     }
32949     return;
32950 }
32951
32952 {
32953     my %matching_end_token;
32954
32955     BEGIN {
32956         %matching_end_token = (
32957             '{' => '}',
32958             '(' => ')',
32959             '[' => ']',
32960             '<' => '>',
32961         );
32962     }
32963
32964     sub matching_end_token {
32965
32966         # return closing character for a pattern
32967         my $beginning_token = shift;
32968         if ( $matching_end_token{$beginning_token} ) {
32969             return $matching_end_token{$beginning_token};
32970         }
32971         return ($beginning_token);
32972     }
32973 }
32974
32975 sub dump_token_types {
32976     my ( $class, $fh ) = @_;
32977
32978     # This should be the latest list of token types in use
32979     # adding NEW_TOKENS: add a comment here
32980     print $fh <<'END_OF_LIST';
32981
32982 Here is a list of the token types currently used for lines of type 'CODE'.
32983 For the following tokens, the "type" of a token is just the token itself.
32984
32985 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
32986 ( ) <= >= == =~ !~ != ++ -- /= x=
32987 ... **= <<= >>= &&= ||= //= <=>
32988 , + - / * | % ! x ~ = \ ? : . < > ^ &
32989
32990 The following additional token types are defined:
32991
32992  type    meaning
32993     b    blank (white space)
32994     {    indent: opening structural curly brace or square bracket or paren
32995          (code block, anonymous hash reference, or anonymous array reference)
32996     }    outdent: right structural curly brace or square bracket or paren
32997     [    left non-structural square bracket (enclosing an array index)
32998     ]    right non-structural square bracket
32999     (    left non-structural paren (all but a list right of an =)
33000     )    right non-structural paren
33001     L    left non-structural curly brace (enclosing a key)
33002     R    right non-structural curly brace
33003     ;    terminal semicolon
33004     f    indicates a semicolon in a "for" statement
33005     h    here_doc operator <<
33006     #    a comment
33007     Q    indicates a quote or pattern
33008     q    indicates a qw quote block
33009     k    a perl keyword
33010     C    user-defined constant or constant function (with void prototype = ())
33011     U    user-defined function taking parameters
33012     G    user-defined function taking block parameter (like grep/map/eval)
33013     M    (unused, but reserved for subroutine definition name)
33014     P    (unused, but -html uses it to label pod text)
33015     t    type indicater such as %,$,@,*,&,sub
33016     w    bare word (perhaps a subroutine call)
33017     i    identifier of some type (with leading %, $, @, *, &, sub, -> )
33018     n    a number
33019     v    a v-string
33020     F    a file test operator (like -e)
33021     Y    File handle
33022     Z    identifier in indirect object slot: may be file handle, object
33023     J    LABEL:  code block label
33024     j    LABEL after next, last, redo, goto
33025     p    unary +
33026     m    unary -
33027     pp   pre-increment operator ++
33028     mm   pre-decrement operator --
33029     A    : used as attribute separator
33030
33031     Here are the '_line_type' codes used internally:
33032     SYSTEM         - system-specific code before hash-bang line
33033     CODE           - line of perl code (including comments)
33034     POD_START      - line starting pod, such as '=head'
33035     POD            - pod documentation text
33036     POD_END        - last line of pod section, '=cut'
33037     HERE           - text of here-document
33038     HERE_END       - last line of here-doc (target word)
33039     FORMAT         - format section
33040     FORMAT_END     - last line of format section, '.'
33041     DATA_START     - __DATA__ line
33042     DATA           - unidentified text following __DATA__
33043     END_START      - __END__ line
33044     END            - unidentified text following __END__
33045     ERROR          - we are in big trouble, probably not a perl script
33046 END_OF_LIST
33047
33048     return;
33049 }
33050
33051 BEGIN {
33052
33053     # These names are used in error messages
33054     @opening_brace_names = qw# '{' '[' '(' '?' #;
33055     @closing_brace_names = qw# '}' ']' ')' ':' #;
33056
33057     my @q;
33058
33059     my @digraphs = qw(
33060       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
33061       <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
33062     );
33063     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
33064
33065     my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
33066     @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
33067
33068     my @tetragraphs = qw( <<>> );
33069     @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
33070
33071     # make a hash of all valid token types for self-checking the tokenizer
33072     # (adding NEW_TOKENS : select a new character and add to this list)
33073     my @valid_token_types = qw#
33074       A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
33075       { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
33076       #;
33077     push( @valid_token_types, @digraphs );
33078     push( @valid_token_types, @trigraphs );
33079     push( @valid_token_types, @tetragraphs );
33080     push( @valid_token_types, ( '#', ',', 'CORE::' ) );
33081     @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
33082
33083     # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
33084     my @file_test_operators =
33085       qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
33086     @is_file_test_operator{@file_test_operators} =
33087       (1) x scalar(@file_test_operators);
33088
33089     # these functions have prototypes of the form (&), so when they are
33090     # followed by a block, that block MAY BE followed by an operator.
33091     # Smartmatch operator ~~ may be followed by anonymous hash or array ref
33092     @q = qw( do eval );
33093     @is_block_operator{@q} = (1) x scalar(@q);
33094
33095     # these functions allow an identifier in the indirect object slot
33096     @q = qw( print printf sort exec system say);
33097     @is_indirect_object_taker{@q} = (1) x scalar(@q);
33098
33099     # These tokens may precede a code block
33100     # patched for SWITCH/CASE/CATCH.  Actually these could be removed
33101     # now and we could let the extended-syntax coding handle them
33102     @q =
33103       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
33104       unless do while until eval for foreach map grep sort
33105       switch case given when catch try finally);
33106     @is_code_block_token{@q} = (1) x scalar(@q);
33107
33108     # I'll build the list of keywords incrementally
33109     my @Keywords = ();
33110
33111     # keywords and tokens after which a value or pattern is expected,
33112     # but not an operator.  In other words, these should consume terms
33113     # to their right, or at least they are not expected to be followed
33114     # immediately by operators.
33115     my @value_requestor = qw(
33116       AUTOLOAD
33117       BEGIN
33118       CHECK
33119       DESTROY
33120       END
33121       EQ
33122       GE
33123       GT
33124       INIT
33125       LE
33126       LT
33127       NE
33128       UNITCHECK
33129       abs
33130       accept
33131       alarm
33132       and
33133       atan2
33134       bind
33135       binmode
33136       bless
33137       break
33138       caller
33139       chdir
33140       chmod
33141       chomp
33142       chop
33143       chown
33144       chr
33145       chroot
33146       close
33147       closedir
33148       cmp
33149       connect
33150       continue
33151       cos
33152       crypt
33153       dbmclose
33154       dbmopen
33155       defined
33156       delete
33157       die
33158       dump
33159       each
33160       else
33161       elsif
33162       eof
33163       eq
33164       exec
33165       exists
33166       exit
33167       exp
33168       fcntl
33169       fileno
33170       flock
33171       for
33172       foreach
33173       formline
33174       ge
33175       getc
33176       getgrgid
33177       getgrnam
33178       gethostbyaddr
33179       gethostbyname
33180       getnetbyaddr
33181       getnetbyname
33182       getpeername
33183       getpgrp
33184       getpriority
33185       getprotobyname
33186       getprotobynumber
33187       getpwnam
33188       getpwuid
33189       getservbyname
33190       getservbyport
33191       getsockname
33192       getsockopt
33193       glob
33194       gmtime
33195       goto
33196       grep
33197       gt
33198       hex
33199       if
33200       index
33201       int
33202       ioctl
33203       join
33204       keys
33205       kill
33206       last
33207       lc
33208       lcfirst
33209       le
33210       length
33211       link
33212       listen
33213       local
33214       localtime
33215       lock
33216       log
33217       lstat
33218       lt
33219       map
33220       mkdir
33221       msgctl
33222       msgget
33223       msgrcv
33224       msgsnd
33225       my
33226       ne
33227       next
33228       no
33229       not
33230       oct
33231       open
33232       opendir
33233       or
33234       ord
33235       our
33236       pack
33237       pipe
33238       pop
33239       pos
33240       print
33241       printf
33242       prototype
33243       push
33244       quotemeta
33245       rand
33246       read
33247       readdir
33248       readlink
33249       readline
33250       readpipe
33251       recv
33252       redo
33253       ref
33254       rename
33255       require
33256       reset
33257       return
33258       reverse
33259       rewinddir
33260       rindex
33261       rmdir
33262       scalar
33263       seek
33264       seekdir
33265       select
33266       semctl
33267       semget
33268       semop
33269       send
33270       sethostent
33271       setnetent
33272       setpgrp
33273       setpriority
33274       setprotoent
33275       setservent
33276       setsockopt
33277       shift
33278       shmctl
33279       shmget
33280       shmread
33281       shmwrite
33282       shutdown
33283       sin
33284       sleep
33285       socket
33286       socketpair
33287       sort
33288       splice
33289       split
33290       sprintf
33291       sqrt
33292       srand
33293       stat
33294       study
33295       substr
33296       symlink
33297       syscall
33298       sysopen
33299       sysread
33300       sysseek
33301       system
33302       syswrite
33303       tell
33304       telldir
33305       tie
33306       tied
33307       truncate
33308       uc
33309       ucfirst
33310       umask
33311       undef
33312       unless
33313       unlink
33314       unpack
33315       unshift
33316       untie
33317       until
33318       use
33319       utime
33320       values
33321       vec
33322       waitpid
33323       warn
33324       while
33325       write
33326       xor
33327
33328       switch
33329       case
33330       given
33331       when
33332       err
33333       say
33334
33335       catch
33336     );
33337
33338     # patched above for SWITCH/CASE given/when err say
33339     # 'err' is a fairly safe addition.
33340     # TODO: 'default' still needed if appropriate
33341     # 'use feature' seen, but perltidy works ok without it.
33342     # Concerned that 'default' could break code.
33343     push( @Keywords, @value_requestor );
33344
33345     # These are treated the same but are not keywords:
33346     my @extra_vr = qw(
33347       constant
33348       vars
33349     );
33350     push( @value_requestor, @extra_vr );
33351
33352     @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
33353
33354     # this list contains keywords which do not look for arguments,
33355     # so that they might be followed by an operator, or at least
33356     # not a term.
33357     my @operator_requestor = qw(
33358       endgrent
33359       endhostent
33360       endnetent
33361       endprotoent
33362       endpwent
33363       endservent
33364       fork
33365       getgrent
33366       gethostent
33367       getlogin
33368       getnetent
33369       getppid
33370       getprotoent
33371       getpwent
33372       getservent
33373       setgrent
33374       setpwent
33375       time
33376       times
33377       wait
33378       wantarray
33379     );
33380
33381     push( @Keywords, @operator_requestor );
33382
33383     # These are treated the same but are not considered keywords:
33384     my @extra_or = qw(
33385       STDERR
33386       STDIN
33387       STDOUT
33388     );
33389
33390     push( @operator_requestor, @extra_or );
33391
33392     @expecting_operator_token{@operator_requestor} =
33393       (1) x scalar(@operator_requestor);
33394
33395     # these token TYPES expect trailing operator but not a term
33396     # note: ++ and -- are post-increment and decrement, 'C' = constant
33397     my @operator_requestor_types = qw( ++ -- C <> q );
33398     @expecting_operator_types{@operator_requestor_types} =
33399       (1) x scalar(@operator_requestor_types);
33400
33401     # these token TYPES consume values (terms)
33402     # note: pp and mm are pre-increment and decrement
33403     # f=semicolon in for,  F=file test operator
33404     my @value_requestor_type = qw#
33405       L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
33406       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
33407       <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
33408       f F pp mm Y p m U J G j >> << ^ t
33409       ~. ^. |. &. ^.= |.= &.=
33410       #;
33411     push( @value_requestor_type, ',' )
33412       ;    # (perl doesn't like a ',' in a qw block)
33413     @expecting_term_types{@value_requestor_type} =
33414       (1) x scalar(@value_requestor_type);
33415
33416     # Note: the following valid token types are not assigned here to
33417     # hashes requesting to be followed by values or terms, but are
33418     # instead currently hard-coded into sub operator_expected:
33419     # ) -> :: Q R Z ] b h i k n v w } #
33420
33421     # For simple syntax checking, it is nice to have a list of operators which
33422     # will really be unhappy if not followed by a term.  This includes most
33423     # of the above...
33424     %really_want_term = %expecting_term_types;
33425
33426     # with these exceptions...
33427     delete $really_want_term{'U'}; # user sub, depends on prototype
33428     delete $really_want_term{'F'}; # file test works on $_ if no following term
33429     delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
33430                                    # let perl do it
33431
33432     @q = qw(q qq qw qx qr s y tr m);
33433     @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
33434
33435     # These keywords are handled specially in the tokenizer code:
33436     my @special_keywords = qw(
33437       do
33438       eval
33439       format
33440       m
33441       package
33442       q
33443       qq
33444       qr
33445       qw
33446       qx
33447       s
33448       sub
33449       tr
33450       y
33451     );
33452     push( @Keywords, @special_keywords );
33453
33454     # Keywords after which list formatting may be used
33455     # WARNING: do not include |map|grep|eval or perl may die on
33456     # syntax errors (map1.t).
33457     my @keyword_taking_list = qw(
33458       and
33459       chmod
33460       chomp
33461       chop
33462       chown
33463       dbmopen
33464       die
33465       elsif
33466       exec
33467       fcntl
33468       for
33469       foreach
33470       formline
33471       getsockopt
33472       if
33473       index
33474       ioctl
33475       join
33476       kill
33477       local
33478       msgctl
33479       msgrcv
33480       msgsnd
33481       my
33482       open
33483       or
33484       our
33485       pack
33486       print
33487       printf
33488       push
33489       read
33490       readpipe
33491       recv
33492       return
33493       reverse
33494       rindex
33495       seek
33496       select
33497       semctl
33498       semget
33499       send
33500       setpriority
33501       setsockopt
33502       shmctl
33503       shmget
33504       shmread
33505       shmwrite
33506       socket
33507       socketpair
33508       sort
33509       splice
33510       split
33511       sprintf
33512       substr
33513       syscall
33514       sysopen
33515       sysread
33516       sysseek
33517       system
33518       syswrite
33519       tie
33520       unless
33521       unlink
33522       unpack
33523       unshift
33524       until
33525       vec
33526       warn
33527       while
33528       given
33529       when
33530     );
33531     @is_keyword_taking_list{@keyword_taking_list} =
33532       (1) x scalar(@keyword_taking_list);
33533
33534     # These are not used in any way yet
33535     #    my @unused_keywords = qw(
33536     #     __FILE__
33537     #     __LINE__
33538     #     __PACKAGE__
33539     #     );
33540
33541     #  The list of keywords was originally extracted from function 'keyword' in
33542     #  perl file toke.c version 5.005.03, using this utility, plus a
33543     #  little editing: (file getkwd.pl):
33544     #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
33545     #  Add 'get' prefix where necessary, then split into the above lists.
33546     #  This list should be updated as necessary.
33547     #  The list should not contain these special variables:
33548     #  ARGV DATA ENV SIG STDERR STDIN STDOUT
33549     #  __DATA__ __END__
33550
33551     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
33552 }
33553 1;