lib/Perl/Tidy.pm

   1 ############################################################
   2 #
   3 #    perltidy - a perl script indenter and formatter
   4 #
   5 #    Copyright (c) 2000-2009 by Steve Hancock
   6 #    Distributed under the GPL license agreement; see file COPYING
   7 #
   8 #    This program is free software; you can redistribute it and/or modify
   9 #    it under the terms of the GNU General Public License as published by
  10 #    the Free Software Foundation; either version 2 of the License, or
  11 #    (at your option) any later version.
  12 #
  13 #    This program is distributed in the hope that it will be useful,
  14 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 #    GNU General Public License for more details.
  17 #
  18 #    You should have received a copy of the GNU General Public License
  19 #    along with this program; if not, write to the Free Software
  20 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 #
  22 #    For brief instructions instructions, try 'perltidy -h'.
  23 #    For more complete documentation, try 'man perltidy'
  24 #    or visit http://perltidy.sourceforge.net
  25 #
  26 #    This script is an example of the default style.  It was formatted with:
  27 #
  28 #      perltidy Tidy.pm
  29 #
  30 #    Code Contributions:
  31 #      Michael Cartmell supplied code for adaptation to VMS and helped with
  32 #        v-strings.
  33 #      Hugh S. Myers supplied sub streamhandle and the supporting code to
  34 #        create a Perl::Tidy module which can operate on strings, arrays, etc.
  35 #      Yves Orton supplied coding to help detect Windows versions.
  36 #      Axel Rose supplied a patch for MacPerl.
  37 #      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
  38 #      Dan Tyrell contributed a patch for binary I/O.
  39 #      Ueli Hugenschmidt contributed a patch for -fpsc
  40 #      Many others have supplied key ideas, suggestions, and bug reports;
  41 #        see the CHANGES file.
  42 #
  43 ############################################################
  44
  45 package Perl::Tidy;
  46 use 5.004;    # need IO::File from 5.004 or later
  47 BEGIN { $^W = 1; }    # turn on warnings
  48
  49 use strict;
  50 use Exporter;
  51 use Carp;
  52 $|++;
  53
  54 use vars qw{
  55   $VERSION
  56   @ISA
  57   @EXPORT
  58   $missing_file_spec
  59 };
  60
  61 @ISA    = qw( Exporter );
  62 @EXPORT = qw( &perltidy );
  63
  64 use IO::File;
  65 use File::Basename;
  66
  67 BEGIN {
  68     ( $VERSION = q($Id: Tidy.pm,v 1.74 2009/06/16 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  69 }
  70
  71 sub streamhandle {
  72
  73     # given filename and mode (r or w), create an object which:
  74     #   has a 'getline' method if mode='r', and
  75     #   has a 'print' method if mode='w'.
  76     # The objects also need a 'close' method.
  77     #
  78     # How the object is made:
  79     #
  80     # if $filename is:     Make object using:
  81     # ----------------     -----------------
  82     # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
  83     # string               IO::File
  84     # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
  85     # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
  86     # object               object
  87     #                      (check for 'print' method for 'w' mode)
  88     #                      (check for 'getline' method for 'r' mode)
  89     my $ref = ref( my $filename = shift );
  90     my $mode = shift;
  91     my $New;
  92     my $fh;
  93
  94     # handle a reference
  95     if ($ref) {
  96         if ( $ref eq 'ARRAY' ) {
  97             $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
  98         }
  99         elsif ( $ref eq 'SCALAR' ) {
 100             $New = sub { Perl::Tidy::IOScalar->new(@_) };
 101         }
 102         else {
 103
 104             # Accept an object with a getline method for reading. Note:
 105             # IO::File is built-in and does not respond to the defined
 106             # operator.  If this causes trouble, the check can be
 107             # skipped and we can just let it crash if there is no
 108             # getline.
 109             if ( $mode =~ /[rR]/ ) {
 110                 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
 111                     $New = sub { $filename };
 112                 }
 113                 else {
 114                     $New = sub { undef };
 115                     confess <<EOM;
 116 ------------------------------------------------------------------------
 117 No 'getline' method is defined for object of class $ref
 118 Please check your call to Perl::Tidy::perltidy.  Trace follows.
 119 ------------------------------------------------------------------------
 120 EOM
 121                 }
 122             }
 123
 124             # Accept an object with a print method for writing.
 125             # See note above about IO::File
 126             if ( $mode =~ /[wW]/ ) {
 127                 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
 128                     $New = sub { $filename };
 129                 }
 130                 else {
 131                     $New = sub { undef };
 132                     confess <<EOM;
 133 ------------------------------------------------------------------------
 134 No 'print' method is defined for object of class $ref
 135 Please check your call to Perl::Tidy::perltidy. Trace follows.
 136 ------------------------------------------------------------------------
 137 EOM
 138                 }
 139             }
 140         }
 141     }
 142
 143     # handle a string
 144     else {
 145         if ( $filename eq '-' ) {
 146             $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
 147         }
 148         else {
 149             $New = sub { IO::File->new(@_) };
 150         }
 151     }
 152     $fh = $New->( $filename, $mode )
 153       or warn "Couldn't open file:$filename in mode:$mode : $!\n";
 154     return $fh, ( $ref or $filename );
 155 }
 156
 157 sub find_input_line_ending {
 158
 159     # Peek at a file and return first line ending character.
 160     # Quietly return undef in case of any trouble.
 161     my ($input_file) = @_;
 162     my $ending;
 163
 164     # silently ignore input from object or stdin
 165     if ( ref($input_file) || $input_file eq '-' ) {
 166         return $ending;
 167     }
 168     open( INFILE, $input_file ) || return $ending;
 169
 170     binmode INFILE;
 171     my $buf;
 172     read( INFILE, $buf, 1024 );
 173     close INFILE;
 174     if ( $buf && $buf =~ /([\012\015]+)/ ) {
 175         my $test = $1;
 176
 177         # dos
 178         if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 179
 180         # mac
 181         elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 182
 183         # unix
 184         elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 185
 186         # unknown
 187         else { }
 188     }
 189
 190     # no ending seen
 191     else { }
 192
 193     return $ending;
 194 }
 195
 196 sub catfile {
 197
 198     # concatenate a path and file basename
 199     # returns undef in case of error
 200
 201     BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
 202
 203     # use File::Spec if we can
 204     unless ($missing_file_spec) {
 205         return File::Spec->catfile(@_);
 206     }
 207
 208     # Perl 5.004 systems may not have File::Spec so we'll make
 209     # a simple try.  We assume File::Basename is available.
 210     # return undef if not successful.
 211     my $name      = pop @_;
 212     my $path      = join '/', @_;
 213     my $test_file = $path . $name;
 214     my ( $test_name, $test_path ) = fileparse($test_file);
 215     return $test_file if ( $test_name eq $name );
 216     return undef if ( $^O eq 'VMS' );
 217
 218     # this should work at least for Windows and Unix:
 219     $test_file = $path . '/' . $name;
 220     ( $test_name, $test_path ) = fileparse($test_file);
 221     return $test_file if ( $test_name eq $name );
 222     return undef;
 223 }
 224
 225 sub make_temporary_filename {
 226
 227     # Make a temporary filename.
 228     #
 229     # The POSIX tmpnam() function tends to be unreliable for non-unix
 230     # systems (at least for the win32 systems that I've tested), so use
 231     # a pre-defined name.  A slight disadvantage of this is that two
 232     # perltidy runs in the same working directory may conflict.
 233     # However, the chance of that is small and managable by the user.
 234     # An alternative would be to check for the file's existance and use,
 235     # say .TMP0, .TMP1, etc, but that scheme has its own problems.  So,
 236     # keep it simple.
 237     my $name = "perltidy.TMP";
 238     if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
 239         return $name;
 240     }
 241     eval "use POSIX qw(tmpnam)";
 242     if ($@) { return $name }
 243     use IO::File;
 244
 245     # just make a couple of tries before giving up and using the default
 246     for ( 0 .. 1 ) {
 247         my $tmpname = tmpnam();
 248         my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
 249         if ($fh) {
 250             $fh->close();
 251             return ($tmpname);
 252             last;
 253         }
 254     }
 255     return ($name);
 256 }
 257
 258 # Here is a map of the flow of data from the input source to the output
 259 # line sink:
 260 #
 261 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
 262 #       input                         groups                 output
 263 #       lines   tokens      lines       of          lines    lines
 264 #                                      lines
 265 #
 266 # The names correspond to the package names responsible for the unit processes.
 267 #
 268 # The overall process is controlled by the "main" package.
 269 #
 270 # LineSource is the stream of input lines
 271 #
 272 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
 273 # if necessary.  A token is any section of the input line which should be
 274 # manipulated as a single entity during formatting.  For example, a single
 275 # ',' character is a token, and so is an entire side comment.  It handles
 276 # the complexities of Perl syntax, such as distinguishing between '<<' as
 277 # a shift operator and as a here-document, or distinguishing between '/'
 278 # as a divide symbol and as a pattern delimiter.
 279 #
 280 # Formatter inserts and deletes whitespace between tokens, and breaks
 281 # sequences of tokens at appropriate points as output lines.  It bases its
 282 # decisions on the default rules as modified by any command-line options.
 283 #
 284 # VerticalAligner collects groups of lines together and tries to line up
 285 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
 286 #
 287 # FileWriter simply writes lines to the output stream.
 288 #
 289 # The Logger package, not shown, records significant events and warning
 290 # messages.  It writes a .LOG file, which may be saved with a
 291 # '-log' or a '-g' flag.
 292
 293 {
 294
 295     # variables needed by interrupt handler:
 296     my $tokenizer;
 297     my $input_file;
 298
 299     # this routine may be called to give a status report if interrupted.  If a
 300     # parameter is given, it will call exit with that parameter.  This is no
 301     # longer used because it works under Unix but not under Windows.
 302     sub interrupt_handler {
 303
 304         my $exit_flag = shift;
 305         print STDERR "perltidy interrupted";
 306         if ($tokenizer) {
 307             my $input_line_number =
 308               Perl::Tidy::Tokenizer::get_input_line_number();
 309             print STDERR " at line $input_line_number";
 310         }
 311         if ($input_file) {
 312
 313             if   ( ref $input_file ) { print STDERR " of reference to:" }
 314             else                     { print STDERR " of file:" }
 315             print STDERR " $input_file";
 316         }
 317         print STDERR "\n";
 318         exit $exit_flag if defined($exit_flag);
 319     }
 320
 321     sub perltidy {
 322
 323         my %defaults = (
 324             argv                  => undef,
 325             destination           => undef,
 326             formatter             => undef,
 327             logfile               => undef,
 328             errorfile             => undef,
 329             perltidyrc            => undef,
 330             source                => undef,
 331             stderr                => undef,
 332             dump_options          => undef,
 333             dump_options_type     => undef,
 334             dump_getopt_flags     => undef,
 335             dump_options_category => undef,
 336             dump_options_range    => undef,
 337             dump_abbreviations    => undef,
 338         );
 339
 340         # don't overwrite callers ARGV
 341         local @ARGV = @ARGV;
 342
 343         my %input_hash = @_;
 344
 345         if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
 346             local $" = ')(';
 347             my @good_keys = sort keys %defaults;
 348             @bad_keys = sort @bad_keys;
 349             confess <<EOM;
 350 ------------------------------------------------------------------------
 351 Unknown perltidy parameter : (@bad_keys)
 352 perltidy only understands : (@good_keys)
 353 ------------------------------------------------------------------------
 354
 355 EOM
 356         }
 357
 358         my $get_hash_ref = sub {
 359             my ($key) = @_;
 360             my $hash_ref = $input_hash{$key};
 361             if ( defined($hash_ref) ) {
 362                 unless ( ref($hash_ref) eq 'HASH' ) {
 363                     my $what = ref($hash_ref);
 364                     my $but_is =
 365                       $what ? "but is ref to $what" : "but is not a reference";
 366                     croak <<EOM;
 367 ------------------------------------------------------------------------
 368 error in call to perltidy:
 369 -$key must be reference to HASH $but_is
 370 ------------------------------------------------------------------------
 371 EOM
 372                 }
 373             }
 374             return $hash_ref;
 375         };
 376
 377         %input_hash = ( %defaults, %input_hash );
 378         my $argv               = $input_hash{'argv'};
 379         my $destination_stream = $input_hash{'destination'};
 380         my $errorfile_stream   = $input_hash{'errorfile'};
 381         my $logfile_stream     = $input_hash{'logfile'};
 382         my $perltidyrc_stream  = $input_hash{'perltidyrc'};
 383         my $source_stream      = $input_hash{'source'};
 384         my $stderr_stream      = $input_hash{'stderr'};
 385         my $user_formatter     = $input_hash{'formatter'};
 386
 387         # various dump parameters
 388         my $dump_options_type     = $input_hash{'dump_options_type'};
 389         my $dump_options          = $get_hash_ref->('dump_options');
 390         my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
 391         my $dump_options_category = $get_hash_ref->('dump_options_category');
 392         my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
 393         my $dump_options_range    = $get_hash_ref->('dump_options_range');
 394
 395         # validate dump_options_type
 396         if ( defined($dump_options) ) {
 397             unless ( defined($dump_options_type) ) {
 398                 $dump_options_type = 'perltidyrc';
 399             }
 400             unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
 401                 croak <<EOM;
 402 ------------------------------------------------------------------------
 403 Please check value of -dump_options_type in call to perltidy;
 404 saw: '$dump_options_type'
 405 expecting: 'perltidyrc' or 'full'
 406 ------------------------------------------------------------------------
 407 EOM
 408
 409             }
 410         }
 411         else {
 412             $dump_options_type = "";
 413         }
 414
 415         if ($user_formatter) {
 416
 417             # if the user defines a formatter, there is no output stream,
 418             # but we need a null stream to keep coding simple
 419             $destination_stream = Perl::Tidy::DevNull->new();
 420         }
 421
 422         # see if ARGV is overridden
 423         if ( defined($argv) ) {
 424
 425             my $rargv = ref $argv;
 426             if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
 427
 428             # ref to ARRAY
 429             if ($rargv) {
 430                 if ( $rargv eq 'ARRAY' ) {
 431                     @ARGV = @$argv;
 432                 }
 433                 else {
 434                     croak <<EOM;
 435 ------------------------------------------------------------------------
 436 Please check value of -argv in call to perltidy;
 437 it must be a string or ref to ARRAY but is: $rargv
 438 ------------------------------------------------------------------------
 439 EOM
 440                 }
 441             }
 442
 443             # string
 444             else {
 445                 my ( $rargv, $msg ) = parse_args($argv);
 446                 if ($msg) {
 447                     die <<EOM;
 448 Error parsing this string passed to to perltidy with 'argv':
 449 $msg
 450 EOM
 451                 }
 452                 @ARGV = @{$rargv};
 453             }
 454         }
 455
 456         # redirect STDERR if requested
 457         if ($stderr_stream) {
 458             my ( $fh_stderr, $stderr_file ) =
 459               Perl::Tidy::streamhandle( $stderr_stream, 'w' );
 460             if ($fh_stderr) { *STDERR = $fh_stderr }
 461             else {
 462                 croak <<EOM;
 463 ------------------------------------------------------------------------
 464 Unable to redirect STDERR to $stderr_stream
 465 Please check value of -stderr in call to perltidy
 466 ------------------------------------------------------------------------
 467 EOM
 468             }
 469         }
 470
 471         my $rpending_complaint;
 472         $$rpending_complaint = "";
 473         my $rpending_logfile_message;
 474         $$rpending_logfile_message = "";
 475
 476         my ( $is_Windows, $Windows_type ) =
 477           look_for_Windows($rpending_complaint);
 478
 479         # VMS file names are restricted to a 40.40 format, so we append _tdy
 480         # instead of .tdy, etc. (but see also sub check_vms_filename)
 481         my $dot;
 482         my $dot_pattern;
 483         if ( $^O eq 'VMS' ) {
 484             $dot         = '_';
 485             $dot_pattern = '_';
 486         }
 487         else {
 488             $dot         = '.';
 489             $dot_pattern = '\.';    # must escape for use in regex
 490         }
 491
 492         # handle command line options
 493         my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
 494             $rexpansion, $roption_category, $roption_range )
 495           = process_command_line(
 496             $perltidyrc_stream,  $is_Windows, $Windows_type,
 497             $rpending_complaint, $dump_options_type,
 498           );
 499
 500         # return or exit immediately after all dumps
 501         my $quit_now = 0;
 502
 503         # Getopt parameters and their flags
 504         if ( defined($dump_getopt_flags) ) {
 505             $quit_now = 1;
 506             foreach my $op ( @{$roption_string} ) {
 507                 my $opt  = $op;
 508                 my $flag = "";
 509
 510                 # Examples:
 511                 #  some-option=s
 512                 #  some-option=i
 513                 #  some-option:i
 514                 #  some-option!
 515                 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
 516                     $opt  = $1;
 517                     $flag = $2;
 518                 }
 519                 $dump_getopt_flags->{$opt} = $flag;
 520             }
 521         }
 522
 523         if ( defined($dump_options_category) ) {
 524             $quit_now = 1;
 525             %{$dump_options_category} = %{$roption_category};
 526         }
 527
 528         if ( defined($dump_options_range) ) {
 529             $quit_now = 1;
 530             %{$dump_options_range} = %{$roption_range};
 531         }
 532
 533         if ( defined($dump_abbreviations) ) {
 534             $quit_now = 1;
 535             %{$dump_abbreviations} = %{$rexpansion};
 536         }
 537
 538         if ( defined($dump_options) ) {
 539             $quit_now = 1;
 540             %{$dump_options} = %{$rOpts};
 541         }
 542
 543         return if ($quit_now);
 544
 545         # make printable string of options for this run as possible diagnostic
 546         my $readable_options = readable_options( $rOpts, $roption_string );
 547
 548         # dump from command line
 549         if ( $rOpts->{'dump-options'} ) {
 550             print STDOUT $readable_options;
 551             exit 1;
 552         }
 553
 554         check_options( $rOpts, $is_Windows, $Windows_type,
 555             $rpending_complaint );
 556
 557         if ($user_formatter) {
 558             $rOpts->{'format'} = 'user';
 559         }
 560
 561         # there must be one entry here for every possible format
 562         my %default_file_extension = (
 563             tidy => 'tdy',
 564             html => 'html',
 565             user => '',
 566         );
 567
 568         # be sure we have a valid output format
 569         unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
 570             my $formats = join ' ',
 571               sort map { "'" . $_ . "'" } keys %default_file_extension;
 572             my $fmt = $rOpts->{'format'};
 573             die "-format='$fmt' but must be one of: $formats\n";
 574         }
 575
 576         my $output_extension =
 577           make_extension( $rOpts->{'output-file-extension'},
 578             $default_file_extension{ $rOpts->{'format'} }, $dot );
 579
 580         my $backup_extension =
 581           make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
 582
 583         my $html_toc_extension =
 584           make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
 585
 586         my $html_src_extension =
 587           make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
 588
 589         # check for -b option;
 590         my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
 591           && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
 592           && @ARGV > 0;    # silently ignore if standard input;
 593                            # this allows -b to be in a .perltidyrc file
 594                            # without error messages when running from an editor
 595
 596         # turn off -b with warnings in case of conflicts with other options
 597         if ($in_place_modify) {
 598             if ( $rOpts->{'standard-output'} ) {
 599                 warn "Ignoring -b; you may not use -b and -st together\n";
 600                 $in_place_modify = 0;
 601             }
 602             if ($destination_stream) {
 603                 warn
 604 "Ignoring -b; you may not specify a destination array and -b together\n";
 605                 $in_place_modify = 0;
 606             }
 607             if ($source_stream) {
 608                 warn
 609 "Ignoring -b; you may not specify a source array and -b together\n";
 610                 $in_place_modify = 0;
 611             }
 612             if ( $rOpts->{'outfile'} ) {
 613                 warn "Ignoring -b; you may not use -b and -o together\n";
 614                 $in_place_modify = 0;
 615             }
 616             if ( defined( $rOpts->{'output-path'} ) ) {
 617                 warn "Ignoring -b; you may not use -b and -opath together\n";
 618                 $in_place_modify = 0;
 619             }
 620         }
 621
 622         Perl::Tidy::Formatter::check_options($rOpts);
 623         if ( $rOpts->{'format'} eq 'html' ) {
 624             Perl::Tidy::HtmlWriter->check_options($rOpts);
 625         }
 626
 627         # make the pattern of file extensions that we shouldn't touch
 628         my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
 629         if ($output_extension) {
 630             my $ext = quotemeta($output_extension);
 631             $forbidden_file_extensions .= "|$ext";
 632         }
 633         if ( $in_place_modify && $backup_extension ) {
 634             my $ext = quotemeta($backup_extension);
 635             $forbidden_file_extensions .= "|$ext";
 636         }
 637         $forbidden_file_extensions .= ')$';
 638
 639         # Create a diagnostics object if requested;
 640         # This is only useful for code development
 641         my $diagnostics_object = undef;
 642         if ( $rOpts->{'DIAGNOSTICS'} ) {
 643             $diagnostics_object = Perl::Tidy::Diagnostics->new();
 644         }
 645
 646         # no filenames should be given if input is from an array
 647         if ($source_stream) {
 648             if ( @ARGV > 0 ) {
 649                 die
 650 "You may not specify any filenames when a source array is given\n";
 651             }
 652
 653             # we'll stuff the source array into ARGV
 654             unshift( @ARGV, $source_stream );
 655
 656             # No special treatment for source stream which is a filename.
 657             # This will enable checks for binary files and other bad stuff.
 658             $source_stream = undef unless ref($source_stream);
 659         }
 660
 661         # use stdin by default if no source array and no args
 662         else {
 663             unshift( @ARGV, '-' ) unless @ARGV;
 664         }
 665
 666         # loop to process all files in argument list
 667         my $number_of_files = @ARGV;
 668         my $formatter       = undef;
 669         $tokenizer = undef;
 670         while ( $input_file = shift @ARGV ) {
 671             my $fileroot;
 672             my $input_file_permissions;
 673
 674             #---------------------------------------------------------------
 675             # determine the input file name
 676             #---------------------------------------------------------------
 677             if ($source_stream) {
 678                 $fileroot = "perltidy";
 679             }
 680             elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
 681                 $fileroot = "perltidy";   # root name to use for .ERR, .LOG, etc
 682                 $in_place_modify = 0;
 683             }
 684             else {
 685                 $fileroot = $input_file;
 686                 unless ( -e $input_file ) {
 687
 688                     # file doesn't exist - check for a file glob
 689                     if ( $input_file =~ /([\?\*\[\{])/ ) {
 690
 691                         # Windows shell may not remove quotes, so do it
 692                         my $input_file = $input_file;
 693                         if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
 694                         if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
 695                         my $pattern = fileglob_to_re($input_file);
 696                         ##eval "/$pattern/";
 697                         if ( !$@ && opendir( DIR, './' ) ) {
 698                             my @files =
 699                               grep { /$pattern/ && !-d $_ } readdir(DIR);
 700                             closedir(DIR);
 701                             if (@files) {
 702                                 unshift @ARGV, @files;
 703                                 next;
 704                             }
 705                         }
 706                     }
 707                     print "skipping file: '$input_file': no matches found\n";
 708                     next;
 709                 }
 710
 711                 unless ( -f $input_file ) {
 712                     print "skipping file: $input_file: not a regular file\n";
 713                     next;
 714                 }
 715
 716                 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
 717                     print
 718 "skipping file: $input_file: Non-text (override with -f)\n";
 719                     next;
 720                 }
 721
 722                 # we should have a valid filename now
 723                 $fileroot               = $input_file;
 724                 $input_file_permissions = ( stat $input_file )[2] & 07777;
 725
 726                 if ( $^O eq 'VMS' ) {
 727                     ( $fileroot, $dot ) = check_vms_filename($fileroot);
 728                 }
 729
 730                 # add option to change path here
 731                 if ( defined( $rOpts->{'output-path'} ) ) {
 732
 733                     my ( $base, $old_path ) = fileparse($fileroot);
 734                     my $new_path = $rOpts->{'output-path'};
 735                     unless ( -d $new_path ) {
 736                         unless ( mkdir $new_path, 0777 ) {
 737                             die "unable to create directory $new_path: $!\n";
 738                         }
 739                     }
 740                     my $path = $new_path;
 741                     $fileroot = catfile( $path, $base );
 742                     unless ($fileroot) {
 743                         die <<EOM;
 744 ------------------------------------------------------------------------
 745 Problem combining $new_path and $base to make a filename; check -opath
 746 ------------------------------------------------------------------------
 747 EOM
 748                     }
 749                 }
 750             }
 751
 752             # Skip files with same extension as the output files because
 753             # this can lead to a messy situation with files like
 754             # script.tdy.tdy.tdy ... or worse problems ...  when you
 755             # rerun perltidy over and over with wildcard input.
 756             if (
 757                 !$source_stream
 758                 && (   $input_file =~ /$forbidden_file_extensions/o
 759                     || $input_file eq 'DIAGNOSTICS' )
 760               )
 761             {
 762                 print "skipping file: $input_file: wrong extension\n";
 763                 next;
 764             }
 765
 766             # the 'source_object' supplies a method to read the input file
 767             my $source_object =
 768               Perl::Tidy::LineSource->new( $input_file, $rOpts,
 769                 $rpending_logfile_message );
 770             next unless ($source_object);
 771
 772             # register this file name with the Diagnostics package
 773             $diagnostics_object->set_input_file($input_file)
 774               if $diagnostics_object;
 775
 776             #---------------------------------------------------------------
 777             # determine the output file name
 778             #---------------------------------------------------------------
 779             my $output_file = undef;
 780             my $actual_output_extension;
 781
 782             if ( $rOpts->{'outfile'} ) {
 783
 784                 if ( $number_of_files <= 1 ) {
 785
 786                     if ( $rOpts->{'standard-output'} ) {
 787                         die "You may not use -o and -st together\n";
 788                     }
 789                     elsif ($destination_stream) {
 790                         die
 791 "You may not specify a destination array and -o together\n";
 792                     }
 793                     elsif ( defined( $rOpts->{'output-path'} ) ) {
 794                         die "You may not specify -o and -opath together\n";
 795                     }
 796                     elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
 797                         die "You may not specify -o and -oext together\n";
 798                     }
 799                     $output_file = $rOpts->{outfile};
 800
 801                     # make sure user gives a file name after -o
 802                     if ( $output_file =~ /^-/ ) {
 803                         die "You must specify a valid filename after -o\n";
 804                     }
 805
 806                     # do not overwrite input file with -o
 807                     if ( defined($input_file_permissions)
 808                         && ( $output_file eq $input_file ) )
 809                     {
 810                         die
 811                           "Use 'perltidy -b $input_file' to modify in-place\n";
 812                     }
 813                 }
 814                 else {
 815                     die "You may not use -o with more than one input file\n";
 816                 }
 817             }
 818             elsif ( $rOpts->{'standard-output'} ) {
 819                 if ($destination_stream) {
 820                     die
 821 "You may not specify a destination array and -st together\n";
 822                 }
 823                 $output_file = '-';
 824
 825                 if ( $number_of_files <= 1 ) {
 826                 }
 827                 else {
 828                     die "You may not use -st with more than one input file\n";
 829                 }
 830             }
 831             elsif ($destination_stream) {
 832                 $output_file = $destination_stream;
 833             }
 834             elsif ($source_stream) {  # source but no destination goes to stdout
 835                 $output_file = '-';
 836             }
 837             elsif ( $input_file eq '-' ) {
 838                 $output_file = '-';
 839             }
 840             else {
 841                 if ($in_place_modify) {
 842                     $output_file = IO::File->new_tmpfile()
 843                       or die "cannot open temp file for -b option: $!\n";
 844                 }
 845                 else {
 846                     $actual_output_extension = $output_extension;
 847                     $output_file             = $fileroot . $output_extension;
 848                 }
 849             }
 850
 851             # the 'sink_object' knows how to write the output file
 852             my $tee_file = $fileroot . $dot . "TEE";
 853
 854             my $line_separator = $rOpts->{'output-line-ending'};
 855             if ( $rOpts->{'preserve-line-endings'} ) {
 856                 $line_separator = find_input_line_ending($input_file);
 857             }
 858
 859             # Eventually all I/O may be done with binmode, but for now it is
 860             # only done when a user requests a particular line separator
 861             # through the -ple or -ole flags
 862             my $binmode = 0;
 863             if   ( defined($line_separator) ) { $binmode        = 1 }
 864             else                              { $line_separator = "\n" }
 865
 866             my $sink_object =
 867               Perl::Tidy::LineSink->new( $output_file, $tee_file,
 868                 $line_separator, $rOpts, $rpending_logfile_message, $binmode );
 869
 870             #---------------------------------------------------------------
 871             # initialize the error logger
 872             #---------------------------------------------------------------
 873             my $warning_file = $fileroot . $dot . "ERR";
 874             if ($errorfile_stream) { $warning_file = $errorfile_stream }
 875             my $log_file = $fileroot . $dot . "LOG";
 876             if ($logfile_stream) { $log_file = $logfile_stream }
 877
 878             my $logger_object =
 879               Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
 880                 $saw_extrude );
 881             write_logfile_header(
 882                 $rOpts,        $logger_object, $config_file,
 883                 $rraw_options, $Windows_type,  $readable_options,
 884             );
 885             if ($$rpending_logfile_message) {
 886                 $logger_object->write_logfile_entry($$rpending_logfile_message);
 887             }
 888             if ($$rpending_complaint) {
 889                 $logger_object->complain($$rpending_complaint);
 890             }
 891
 892             #---------------------------------------------------------------
 893             # initialize the debug object, if any
 894             #---------------------------------------------------------------
 895             my $debugger_object = undef;
 896             if ( $rOpts->{DEBUG} ) {
 897                 $debugger_object =
 898                   Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
 899             }
 900
 901             #---------------------------------------------------------------
 902             # create a formatter for this file : html writer or pretty printer
 903             #---------------------------------------------------------------
 904
 905             # we have to delete any old formatter because, for safety,
 906             # the formatter will check to see that there is only one.
 907             $formatter = undef;
 908
 909             if ($user_formatter) {
 910                 $formatter = $user_formatter;
 911             }
 912             elsif ( $rOpts->{'format'} eq 'html' ) {
 913                 $formatter =
 914                   Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
 915                     $actual_output_extension, $html_toc_extension,
 916                     $html_src_extension );
 917             }
 918             elsif ( $rOpts->{'format'} eq 'tidy' ) {
 919                 $formatter = Perl::Tidy::Formatter->new(
 920                     logger_object      => $logger_object,
 921                     diagnostics_object => $diagnostics_object,
 922                     sink_object        => $sink_object,
 923                 );
 924             }
 925             else {
 926                 die "I don't know how to do -format=$rOpts->{'format'}\n";
 927             }
 928
 929             unless ($formatter) {
 930                 die "Unable to continue with $rOpts->{'format'} formatting\n";
 931             }
 932
 933             #---------------------------------------------------------------
 934             # create the tokenizer for this file
 935             #---------------------------------------------------------------
 936             $tokenizer = undef;                     # must destroy old tokenizer
 937             $tokenizer = Perl::Tidy::Tokenizer->new(
 938                 source_object       => $source_object,
 939                 logger_object       => $logger_object,
 940                 debugger_object     => $debugger_object,
 941                 diagnostics_object  => $diagnostics_object,
 942                 starting_level      => $rOpts->{'starting-indentation-level'},
 943                 tabs                => $rOpts->{'tabs'},
 944                 indent_columns      => $rOpts->{'indent-columns'},
 945                 look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
 946                 look_for_autoloader => $rOpts->{'look-for-autoloader'},
 947                 look_for_selfloader => $rOpts->{'look-for-selfloader'},
 948                 trim_qw             => $rOpts->{'trim-qw'},
 949             );
 950
 951             #---------------------------------------------------------------
 952             # now we can do it
 953             #---------------------------------------------------------------
 954             process_this_file( $tokenizer, $formatter );
 955
 956             #---------------------------------------------------------------
 957             # close the input source and report errors
 958             #---------------------------------------------------------------
 959             $source_object->close_input_file();
 960
 961             # get file names to use for syntax check
 962             my $ifname = $source_object->get_input_file_copy_name();
 963             my $ofname = $sink_object->get_output_file_copy();
 964
 965             #---------------------------------------------------------------
 966             # handle the -b option (backup and modify in-place)
 967             #---------------------------------------------------------------
 968             if ($in_place_modify) {
 969                 unless ( -f $input_file ) {
 970
 971                     # oh, oh, no real file to backup ..
 972                     # shouldn't happen because of numerous preliminary checks
 973                     die print
 974 "problem with -b backing up input file '$input_file': not a file\n";
 975                 }
 976                 my $backup_name = $input_file . $backup_extension;
 977                 if ( -f $backup_name ) {
 978                     unlink($backup_name)
 979                       or die
 980 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
 981                 }
 982                 rename( $input_file, $backup_name )
 983                   or die
 984 "problem renaming $input_file to $backup_name for -b option: $!\n";
 985                 $ifname = $backup_name;
 986
 987                 seek( $output_file, 0, 0 )
 988                   or die "unable to rewind tmp file for -b option: $!\n";
 989
 990                 my $fout = IO::File->new("> $input_file")
 991                   or die
 992 "problem opening $input_file for write for -b option; check directory permissions: $!\n";
 993                 binmode $fout;
 994                 my $line;
 995                 while ( $line = $output_file->getline() ) {
 996                     $fout->print($line);
 997                 }
 998                 $fout->close();
 999                 $output_file = $input_file;
1000                 $ofname      = $input_file;
1001             }
1002
1003             #---------------------------------------------------------------
1004             # clean up and report errors
1005             #---------------------------------------------------------------
1006             $sink_object->close_output_file()    if $sink_object;
1007             $debugger_object->close_debug_file() if $debugger_object;
1008
1009             my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
1010             if ($output_file) {
1011
1012                 if ($input_file_permissions) {
1013
1014                     # give output script same permissions as input script, but
1015                     # make it user-writable or else we can't run perltidy again.
1016                     # Thus we retain whatever executable flags were set.
1017                     if ( $rOpts->{'format'} eq 'tidy' ) {
1018                         chmod( $input_file_permissions | 0600, $output_file );
1019                     }
1020
1021                     # else use default permissions for html and any other format
1022
1023                 }
1024                 if ( $logger_object && $rOpts->{'check-syntax'} ) {
1025                     $infile_syntax_ok =
1026                       check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1027                 }
1028             }
1029
1030             $logger_object->finish( $infile_syntax_ok, $formatter )
1031               if $logger_object;
1032         }    # end of loop to process all files
1033     }    # end of main program
1034 }
1035
1036 sub fileglob_to_re {
1037
1038     # modified (corrected) from version in find2perl
1039     my $x = shift;
1040     $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1041     $x =~ s#\*#.*#g;               # '*' -> '.*'
1042     $x =~ s#\?#.#g;                # '?' -> '.'
1043     "^$x\\z";                      # match whole word
1044 }
1045
1046 sub make_extension {
1047
1048     # Make a file extension, including any leading '.' if necessary
1049     # The '.' may actually be an '_' under VMS
1050     my ( $extension, $default, $dot ) = @_;
1051
1052     # Use the default if none specified
1053     $extension = $default unless ($extension);
1054
1055     # Only extensions with these leading characters get a '.'
1056     # This rule gives the user some freedom
1057     if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1058         $extension = $dot . $extension;
1059     }
1060     return $extension;
1061 }
1062
1063 sub write_logfile_header {
1064     my (
1065         $rOpts,        $logger_object, $config_file,
1066         $rraw_options, $Windows_type,  $readable_options
1067     ) = @_;
1068     $logger_object->write_logfile_entry(
1069 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1070     );
1071     if ($Windows_type) {
1072         $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1073     }
1074     my $options_string = join( ' ', @$rraw_options );
1075
1076     if ($config_file) {
1077         $logger_object->write_logfile_entry(
1078             "Found Configuration File >>> $config_file \n");
1079     }
1080     $logger_object->write_logfile_entry(
1081         "Configuration and command line parameters for this run:\n");
1082     $logger_object->write_logfile_entry("$options_string\n");
1083
1084     if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1085         $rOpts->{'logfile'} = 1;    # force logfile to be saved
1086         $logger_object->write_logfile_entry(
1087             "Final parameter set for this run\n");
1088         $logger_object->write_logfile_entry(
1089             "------------------------------------\n");
1090
1091         $logger_object->write_logfile_entry($readable_options);
1092
1093         $logger_object->write_logfile_entry(
1094             "------------------------------------\n");
1095     }
1096     $logger_object->write_logfile_entry(
1097         "To find error messages search for 'WARNING' with your editor\n");
1098 }
1099
1100 sub generate_options {
1101
1102     ######################################################################
1103     # Generate and return references to:
1104     #  @option_string - the list of options to be passed to Getopt::Long
1105     #  @defaults - the list of default options
1106     #  %expansion - a hash showing how all abbreviations are expanded
1107     #  %category - a hash giving the general category of each option
1108     #  %option_range - a hash giving the valid ranges of certain options
1109
1110     # Note: a few options are not documented in the man page and usage
1111     # message. This is because these are experimental or debug options and
1112     # may or may not be retained in future versions.
1113     #
1114     # Here are the undocumented flags as far as I know.  Any of them
1115     # may disappear at any time.  They are mainly for fine-tuning
1116     # and debugging.
1117     #
1118     # fll --> fuzzy-line-length           # a trivial parameter which gets
1119     #                                       turned off for the extrude option
1120     #                                       which is mainly for debugging
1121     # chk --> check-multiline-quotes      # check for old bug; to be deleted
1122     # scl --> short-concatenation-item-length   # helps break at '.'
1123     # recombine                           # for debugging line breaks
1124     # valign                              # for debugging vertical alignment
1125     # I   --> DIAGNOSTICS                 # for debugging
1126     ######################################################################
1127
1128     # here is a summary of the Getopt codes:
1129     # <none> does not take an argument
1130     # =s takes a mandatory string
1131     # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1132     # =i takes a mandatory integer
1133     # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1134     # ! does not take an argument and may be negated
1135     #  i.e., -foo and -nofoo are allowed
1136     # a double dash signals the end of the options list
1137     #
1138     #---------------------------------------------------------------
1139     # Define the option string passed to GetOptions.
1140     #---------------------------------------------------------------
1141
1142     my @option_string   = ();
1143     my %expansion       = ();
1144     my %option_category = ();
1145     my %option_range    = ();
1146     my $rexpansion      = \%expansion;
1147
1148     # names of categories in manual
1149     # leading integers will allow sorting
1150     my @category_name = (
1151         '0. I/O control',
1152         '1. Basic formatting options',
1153         '2. Code indentation control',
1154         '3. Whitespace control',
1155         '4. Comment controls',
1156         '5. Linebreak controls',
1157         '6. Controlling list formatting',
1158         '7. Retaining or ignoring existing line breaks',
1159         '8. Blank line control',
1160         '9. Other controls',
1161         '10. HTML options',
1162         '11. pod2html options',
1163         '12. Controlling HTML properties',
1164         '13. Debugging',
1165     );
1166
1167     #  These options are parsed directly by perltidy:
1168     #    help h
1169     #    version v
1170     #  However, they are included in the option set so that they will
1171     #  be seen in the options dump.
1172
1173     # These long option names have no abbreviations or are treated specially
1174     @option_string = qw(
1175       html!
1176       noprofile
1177       no-profile
1178       npro
1179       recombine!
1180       valign!
1181     );
1182
1183     my $category = 13;    # Debugging
1184     foreach (@option_string) {
1185         my $opt = $_;     # must avoid changing the actual flag
1186         $opt =~ s/!$//;
1187         $option_category{$opt} = $category_name[$category];
1188     }
1189
1190     $category = 11;                                       # HTML
1191     $option_category{html} = $category_name[$category];
1192
1193     # routine to install and check options
1194     my $add_option = sub {
1195         my ( $long_name, $short_name, $flag ) = @_;
1196         push @option_string, $long_name . $flag;
1197         $option_category{$long_name} = $category_name[$category];
1198         if ($short_name) {
1199             if ( $expansion{$short_name} ) {
1200                 my $existing_name = $expansion{$short_name}[0];
1201                 die
1202 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1203             }
1204             $expansion{$short_name} = [$long_name];
1205             if ( $flag eq '!' ) {
1206                 my $nshort_name = 'n' . $short_name;
1207                 my $nolong_name = 'no' . $long_name;
1208                 if ( $expansion{$nshort_name} ) {
1209                     my $existing_name = $expansion{$nshort_name}[0];
1210                     die
1211 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1212                 }
1213                 $expansion{$nshort_name} = [$nolong_name];
1214             }
1215         }
1216     };
1217
1218     # Install long option names which have a simple abbreviation.
1219     # Options with code '!' get standard negation ('no' for long names,
1220     # 'n' for abbreviations).  Categories follow the manual.
1221
1222     ###########################
1223     $category = 0;    # I/O_Control
1224     ###########################
1225     $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1226     $add_option->( 'backup-file-extension',      'bext',  '=s' );
1227     $add_option->( 'force-read-binary',          'f',     '!' );
1228     $add_option->( 'format',                     'fmt',   '=s' );
1229     $add_option->( 'logfile',                    'log',   '!' );
1230     $add_option->( 'logfile-gap',                'g',     ':i' );
1231     $add_option->( 'outfile',                    'o',     '=s' );
1232     $add_option->( 'output-file-extension',      'oext',  '=s' );
1233     $add_option->( 'output-path',                'opath', '=s' );
1234     $add_option->( 'profile',                    'pro',   '=s' );
1235     $add_option->( 'quiet',                      'q',     '!' );
1236     $add_option->( 'standard-error-output',      'se',    '!' );
1237     $add_option->( 'standard-output',            'st',    '!' );
1238     $add_option->( 'warning-output',             'w',     '!' );
1239
1240     # options which are both toggle switches and values moved here
1241     # to hide from tidyview (which does not show category 0 flags):
1242     # -ole moved here from category 1
1243     # -sil moved here from category 2
1244     $add_option->( 'output-line-ending',         'ole', '=s' );
1245     $add_option->( 'starting-indentation-level', 'sil', '=i' );
1246
1247     ########################################
1248     $category = 1;    # Basic formatting options
1249     ########################################
1250     $add_option->( 'check-syntax',             'syn',  '!' );
1251     $add_option->( 'entab-leading-whitespace', 'et',   '=i' );
1252     $add_option->( 'indent-columns',           'i',    '=i' );
1253     $add_option->( 'maximum-line-length',      'l',    '=i' );
1254     $add_option->( 'perl-syntax-check-flags',  'pscf', '=s' );
1255     $add_option->( 'preserve-line-endings',    'ple',  '!' );
1256     $add_option->( 'tabs',                     't',    '!' );
1257
1258     ########################################
1259     $category = 2;    # Code indentation control
1260     ########################################
1261     $add_option->( 'continuation-indentation',           'ci',   '=i' );
1262     $add_option->( 'line-up-parentheses',                'lp',   '!' );
1263     $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1264     $add_option->( 'outdent-keywords',                   'okw',  '!' );
1265     $add_option->( 'outdent-labels',                     'ola',  '!' );
1266     $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1267     $add_option->( 'indent-closing-brace',               'icb',  '!' );
1268     $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1269     $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1270     $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1271     $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1272     $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1273     $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1274
1275     ########################################
1276     $category = 3;    # Whitespace control
1277     ########################################
1278     $add_option->( 'add-semicolons',                            'asc',   '!' );
1279     $add_option->( 'add-whitespace',                            'aws',   '!' );
1280     $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1281     $add_option->( 'brace-tightness',                           'bt',    '=i' );
1282     $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1283     $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1284     $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1285     $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1286     $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1287     $add_option->( 'paren-tightness',                           'pt',    '=i' );
1288     $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1289     $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1290     $add_option->( 'space-function-paren',                      'sfp',   '!' );
1291     $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1292     $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1293     $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1294     $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1295     $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1296     $add_option->( 'trim-qw',                                   'tqw',   '!' );
1297     $add_option->( 'want-left-space',                           'wls',   '=s' );
1298     $add_option->( 'want-right-space',                          'wrs',   '=s' );
1299
1300     ########################################
1301     $category = 4;    # Comment controls
1302     ########################################
1303     $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1304     $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1305     $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1306     $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1307     $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1308     $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1309     $add_option->( 'closing-side-comments',             'csc',  '!' );
1310     $add_option->( 'closing-side-comments-balanced',    'cscb', '!' );
1311     $add_option->( 'format-skipping',                   'fs',   '!' );
1312     $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1313     $add_option->( 'format-skipping-end',               'fse',  '=s' );
1314     $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1315     $add_option->( 'indent-block-comments',             'ibc',  '!' );
1316     $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1317     $add_option->( 'fixed-position-side-comment',       'fpsc', '=i' );
1318     $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1319     $add_option->( 'outdent-long-comments',             'olc',  '!' );
1320     $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1321     $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1322     $add_option->( 'static-block-comments',             'sbc',  '!' );
1323     $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1324     $add_option->( 'static-side-comments',              'ssc',  '!' );
1325
1326     ########################################
1327     $category = 5;    # Linebreak controls
1328     ########################################
1329     $add_option->( 'add-newlines',                            'anl',   '!' );
1330     $add_option->( 'block-brace-vertical-tightness',          'bbvt',  '=i' );
1331     $add_option->( 'block-brace-vertical-tightness-list',     'bbvtl', '=s' );
1332     $add_option->( 'brace-vertical-tightness',                'bvt',   '=i' );
1333     $add_option->( 'brace-vertical-tightness-closing',        'bvtc',  '=i' );
1334     $add_option->( 'cuddled-else',                            'ce',    '!' );
1335     $add_option->( 'delete-old-newlines',                     'dnl',   '!' );
1336     $add_option->( 'opening-brace-always-on-right',           'bar',   '!' );
1337     $add_option->( 'opening-brace-on-new-line',               'bl',    '!' );
1338     $add_option->( 'opening-hash-brace-right',                'ohbr',  '!' );
1339     $add_option->( 'opening-paren-right',                     'opr',   '!' );
1340     $add_option->( 'opening-square-bracket-right',            'osbr',  '!' );
1341     $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl',  '!' );
1342     $add_option->( 'opening-sub-brace-on-new-line',           'sbl',   '!' );
1343     $add_option->( 'paren-vertical-tightness',                'pvt',   '=i' );
1344     $add_option->( 'paren-vertical-tightness-closing',        'pvtc',  '=i' );
1345     $add_option->( 'stack-closing-hash-brace',                'schb',  '!' );
1346     $add_option->( 'stack-closing-paren',                     'scp',   '!' );
1347     $add_option->( 'stack-closing-square-bracket',            'scsb',  '!' );
1348     $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
1349     $add_option->( 'stack-opening-paren',                     'sop',   '!' );
1350     $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
1351     $add_option->( 'vertical-tightness',                      'vt',    '=i' );
1352     $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
1353     $add_option->( 'want-break-after',                        'wba',   '=s' );
1354     $add_option->( 'want-break-before',                       'wbb',   '=s' );
1355     $add_option->( 'break-after-all-operators',               'baao',  '!' );
1356     $add_option->( 'break-before-all-operators',              'bbao',  '!' );
1357     $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
1358
1359     ########################################
1360     $category = 6;    # Controlling list formatting
1361     ########################################
1362     $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1363     $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1364     $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1365
1366     ########################################
1367     $category = 7;    # Retaining or ignoring existing line breaks
1368     ########################################
1369     $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1370     $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1371     $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1372     $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
1373
1374     ########################################
1375     $category = 8;    # Blank line control
1376     ########################################
1377     $add_option->( 'blanks-before-blocks',            'bbb', '!' );
1378     $add_option->( 'blanks-before-comments',          'bbc', '!' );
1379     $add_option->( 'blanks-before-subs',              'bbs', '!' );
1380     $add_option->( 'long-block-line-count',           'lbl', '=i' );
1381     $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1382     $add_option->( 'keep-old-blank-lines',            'kbl', '=i' );
1383
1384     ########################################
1385     $category = 9;    # Other controls
1386     ########################################
1387     $add_option->( 'delete-block-comments',        'dbc',  '!' );
1388     $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1389     $add_option->( 'delete-pod',                   'dp',   '!' );
1390     $add_option->( 'delete-side-comments',         'dsc',  '!' );
1391     $add_option->( 'tee-block-comments',           'tbc',  '!' );
1392     $add_option->( 'tee-pod',                      'tp',   '!' );
1393     $add_option->( 'tee-side-comments',            'tsc',  '!' );
1394     $add_option->( 'look-for-autoloader',          'lal',  '!' );
1395     $add_option->( 'look-for-hash-bang',           'x',    '!' );
1396     $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1397     $add_option->( 'pass-version-line',            'pvl',  '!' );
1398
1399     ########################################
1400     $category = 13;    # Debugging
1401     ########################################
1402     $add_option->( 'DEBUG',                           'D',    '!' );
1403     $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1404     $add_option->( 'check-multiline-quotes',          'chk',  '!' );
1405     $add_option->( 'dump-defaults',                   'ddf',  '!' );
1406     $add_option->( 'dump-long-names',                 'dln',  '!' );
1407     $add_option->( 'dump-options',                    'dop',  '!' );
1408     $add_option->( 'dump-profile',                    'dpro', '!' );
1409     $add_option->( 'dump-short-names',                'dsn',  '!' );
1410     $add_option->( 'dump-token-types',                'dtt',  '!' );
1411     $add_option->( 'dump-want-left-space',            'dwls', '!' );
1412     $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1413     $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1414     $add_option->( 'help',                            'h',    '' );
1415     $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1416     $add_option->( 'show-options',                    'opt',  '!' );
1417     $add_option->( 'version',                         'v',    '' );
1418
1419     #---------------------------------------------------------------------
1420
1421     # The Perl::Tidy::HtmlWriter will add its own options to the string
1422     Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1423
1424     ########################################
1425     # Set categories 10, 11, 12
1426     ########################################
1427     # Based on their known order
1428     $category = 12;    # HTML properties
1429     foreach my $opt (@option_string) {
1430         my $long_name = $opt;
1431         $long_name =~ s/(!|=.*|:.*)$//;
1432         unless ( defined( $option_category{$long_name} ) ) {
1433             if ( $long_name =~ /^html-linked/ ) {
1434                 $category = 10;    # HTML options
1435             }
1436             elsif ( $long_name =~ /^pod2html/ ) {
1437                 $category = 11;    # Pod2html
1438             }
1439             $option_category{$long_name} = $category_name[$category];
1440         }
1441     }
1442
1443     #---------------------------------------------------------------
1444     # Assign valid ranges to certain options
1445     #---------------------------------------------------------------
1446     # In the future, these may be used to make preliminary checks
1447     # hash keys are long names
1448     # If key or value is undefined:
1449     #   strings may have any value
1450     #   integer ranges are >=0
1451     # If value is defined:
1452     #   value is [qw(any valid words)] for strings
1453     #   value is [min, max] for integers
1454     #   if min is undefined, there is no lower limit
1455     #   if max is undefined, there is no upper limit
1456     # Parameters not listed here have defaults
1457     %option_range = (
1458         'format'             => [ 'tidy', 'html', 'user' ],
1459         'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
1460
1461         'block-brace-tightness'    => [ 0, 2 ],
1462         'brace-tightness'          => [ 0, 2 ],
1463         'paren-tightness'          => [ 0, 2 ],
1464         'square-bracket-tightness' => [ 0, 2 ],
1465
1466         'block-brace-vertical-tightness'            => [ 0, 2 ],
1467         'brace-vertical-tightness'                  => [ 0, 2 ],
1468         'brace-vertical-tightness-closing'          => [ 0, 2 ],
1469         'paren-vertical-tightness'                  => [ 0, 2 ],
1470         'paren-vertical-tightness-closing'          => [ 0, 2 ],
1471         'square-bracket-vertical-tightness'         => [ 0, 2 ],
1472         'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1473         'vertical-tightness'                        => [ 0, 2 ],
1474         'vertical-tightness-closing'                => [ 0, 2 ],
1475
1476         'closing-brace-indentation'          => [ 0, 3 ],
1477         'closing-paren-indentation'          => [ 0, 3 ],
1478         'closing-square-bracket-indentation' => [ 0, 3 ],
1479         'closing-token-indentation'          => [ 0, 3 ],
1480
1481         'closing-side-comment-else-flag' => [ 0, 2 ],
1482         'comma-arrow-breakpoints'        => [ 0, 3 ],
1483     );
1484
1485     # Note: we could actually allow negative ci if someone really wants it:
1486     # $option_range{'continuation-indentation'} = [ undef, undef ];
1487
1488     #---------------------------------------------------------------
1489     # Assign default values to the above options here, except
1490     # for 'outfile' and 'help'.
1491     # These settings should approximate the perlstyle(1) suggestions.
1492     #---------------------------------------------------------------
1493     my @defaults = qw(
1494       add-newlines
1495       add-semicolons
1496       add-whitespace
1497       blanks-before-blocks
1498       blanks-before-comments
1499       blanks-before-subs
1500       block-brace-tightness=0
1501       block-brace-vertical-tightness=0
1502       brace-tightness=1
1503       brace-vertical-tightness-closing=0
1504       brace-vertical-tightness=0
1505       break-at-old-logical-breakpoints
1506       break-at-old-ternary-breakpoints
1507       break-at-old-keyword-breakpoints
1508       comma-arrow-breakpoints=1
1509       nocheck-syntax
1510       closing-side-comment-interval=6
1511       closing-side-comment-maximum-text=20
1512       closing-side-comment-else-flag=0
1513       closing-side-comments-balanced
1514       closing-paren-indentation=0
1515       closing-brace-indentation=0
1516       closing-square-bracket-indentation=0
1517       continuation-indentation=2
1518       delete-old-newlines
1519       delete-semicolons
1520       fuzzy-line-length
1521       hanging-side-comments
1522       indent-block-comments
1523       indent-columns=4
1524       keep-old-blank-lines=1
1525       long-block-line-count=8
1526       look-for-autoloader
1527       look-for-selfloader
1528       maximum-consecutive-blank-lines=1
1529       maximum-fields-per-table=0
1530       maximum-line-length=80
1531       minimum-space-to-comment=4
1532       nobrace-left-and-indent
1533       nocuddled-else
1534       nodelete-old-whitespace
1535       nohtml
1536       nologfile
1537       noquiet
1538       noshow-options
1539       nostatic-side-comments
1540       notabs
1541       nowarning-output
1542       outdent-labels
1543       outdent-long-quotes
1544       outdent-long-comments
1545       paren-tightness=1
1546       paren-vertical-tightness-closing=0
1547       paren-vertical-tightness=0
1548       pass-version-line
1549       recombine
1550       valign
1551       short-concatenation-item-length=8
1552       space-for-semicolon
1553       square-bracket-tightness=1
1554       square-bracket-vertical-tightness-closing=0
1555       square-bracket-vertical-tightness=0
1556       static-block-comments
1557       trim-qw
1558       format=tidy
1559       backup-file-extension=bak
1560       format-skipping
1561
1562       pod2html
1563       html-table-of-contents
1564       html-entities
1565     );
1566
1567     push @defaults, "perl-syntax-check-flags=-c -T";
1568
1569     #---------------------------------------------------------------
1570     # Define abbreviations which will be expanded into the above primitives.
1571     # These may be defined recursively.
1572     #---------------------------------------------------------------
1573     %expansion = (
1574         %expansion,
1575         'freeze-newlines'   => [qw(noadd-newlines nodelete-old-newlines)],
1576         'fnl'               => [qw(freeze-newlines)],
1577         'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1578         'fws'               => [qw(freeze-whitespace)],
1579         'freeze-blank-lines' =>
1580           [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1581         'fbl'                => [qw(freeze-blank-lines)],
1582         'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1583         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1584         'nooutdent-long-lines' =>
1585           [qw(nooutdent-long-quotes nooutdent-long-comments)],
1586         'noll' => [qw(nooutdent-long-lines)],
1587         'io'   => [qw(indent-only)],
1588         'delete-all-comments' =>
1589           [qw(delete-block-comments delete-side-comments delete-pod)],
1590         'nodelete-all-comments' =>
1591           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1592         'dac'  => [qw(delete-all-comments)],
1593         'ndac' => [qw(nodelete-all-comments)],
1594         'gnu'  => [qw(gnu-style)],
1595         'pbp'  => [qw(perl-best-practices)],
1596         'tee-all-comments' =>
1597           [qw(tee-block-comments tee-side-comments tee-pod)],
1598         'notee-all-comments' =>
1599           [qw(notee-block-comments notee-side-comments notee-pod)],
1600         'tac'   => [qw(tee-all-comments)],
1601         'ntac'  => [qw(notee-all-comments)],
1602         'html'  => [qw(format=html)],
1603         'nhtml' => [qw(format=tidy)],
1604         'tidy'  => [qw(format=tidy)],
1605
1606         'swallow-optional-blank-lines'   => [qw(kbl=0)],
1607         'noswallow-optional-blank-lines' => [qw(kbl=1)],
1608         'sob'                            => [qw(kbl=0)],
1609         'nsob'                           => [qw(kbl=1)],
1610
1611         'break-after-comma-arrows'   => [qw(cab=0)],
1612         'nobreak-after-comma-arrows' => [qw(cab=1)],
1613         'baa'                        => [qw(cab=0)],
1614         'nbaa'                       => [qw(cab=1)],
1615
1616         'break-at-old-trinary-breakpoints' => [qw(bot)],
1617
1618         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1619         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1620         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1621         'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1622         'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1623
1624         'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1625         'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1626         'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1627         'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1628         'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1629
1630         'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1631         'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1632         'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1633
1634         'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1635         'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1636         'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1637
1638         'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1639         'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1640         'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1641
1642         'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1643         'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1644         'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1645
1646         'otr'                   => [qw(opr ohbr osbr)],
1647         'opening-token-right'   => [qw(opr ohbr osbr)],
1648         'notr'                  => [qw(nopr nohbr nosbr)],
1649         'noopening-token-right' => [qw(nopr nohbr nosbr)],
1650
1651         'sot'                    => [qw(sop sohb sosb)],
1652         'nsot'                   => [qw(nsop nsohb nsosb)],
1653         'stack-opening-tokens'   => [qw(sop sohb sosb)],
1654         'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1655
1656         'sct'                    => [qw(scp schb scsb)],
1657         'stack-closing-tokens'   => => [qw(scp schb scsb)],
1658         'nsct'                   => [qw(nscp nschb nscsb)],
1659         'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1660
1661         # 'mangle' originally deleted pod and comments, but to keep it
1662         # reversible, it no longer does.  But if you really want to
1663         # delete them, just use:
1664         #   -mangle -dac
1665
1666         # An interesting use for 'mangle' is to do this:
1667         #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1668         # which will form as many one-line blocks as possible
1669
1670         'mangle' => [
1671             qw(
1672               check-syntax
1673               keep-old-blank-lines=0
1674               delete-old-newlines
1675               delete-old-whitespace
1676               delete-semicolons
1677               indent-columns=0
1678               maximum-consecutive-blank-lines=0
1679               maximum-line-length=100000
1680               noadd-newlines
1681               noadd-semicolons
1682               noadd-whitespace
1683               noblanks-before-blocks
1684               noblanks-before-subs
1685               notabs
1686               )
1687         ],
1688
1689         # 'extrude' originally deleted pod and comments, but to keep it
1690         # reversible, it no longer does.  But if you really want to
1691         # delete them, just use
1692         #   extrude -dac
1693         #
1694         # An interesting use for 'extrude' is to do this:
1695         #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1696         # which will break up all one-line blocks.
1697
1698         'extrude' => [
1699             qw(
1700               check-syntax
1701               ci=0
1702               delete-old-newlines
1703               delete-old-whitespace
1704               delete-semicolons
1705               indent-columns=0
1706               maximum-consecutive-blank-lines=0
1707               maximum-line-length=1
1708               noadd-semicolons
1709               noadd-whitespace
1710               noblanks-before-blocks
1711               noblanks-before-subs
1712               nofuzzy-line-length
1713               notabs
1714               norecombine
1715               )
1716         ],
1717
1718         # this style tries to follow the GNU Coding Standards (which do
1719         # not really apply to perl but which are followed by some perl
1720         # programmers).
1721         'gnu-style' => [
1722             qw(
1723               lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1724               )
1725         ],
1726
1727         # Style suggested in Damian Conway's Perl Best Practices
1728         'perl-best-practices' => [
1729             qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
1730 q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
1731         ],
1732
1733         # Additional styles can be added here
1734     );
1735
1736     Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1737
1738     # Uncomment next line to dump all expansions for debugging:
1739     # dump_short_names(\%expansion);
1740     return (
1741         \@option_string,   \@defaults, \%expansion,
1742         \%option_category, \%option_range
1743     );
1744
1745 }    # end of generate_options
1746
1747 sub process_command_line {
1748
1749     my (
1750         $perltidyrc_stream,  $is_Windows, $Windows_type,
1751         $rpending_complaint, $dump_options_type
1752     ) = @_;
1753
1754     use Getopt::Long;
1755
1756     my (
1757         $roption_string,   $rdefaults, $rexpansion,
1758         $roption_category, $roption_range
1759     ) = generate_options();
1760
1761     #---------------------------------------------------------------
1762     # set the defaults by passing the above list through GetOptions
1763     #---------------------------------------------------------------
1764     my %Opts = ();
1765     {
1766         local @ARGV;
1767         my $i;
1768
1769         # do not load the defaults if we are just dumping perltidyrc
1770         unless ( $dump_options_type eq 'perltidyrc' ) {
1771             for $i (@$rdefaults) { push @ARGV, "--" . $i }
1772         }
1773
1774         # Patch to save users Getopt::Long configuration
1775         # and set to Getopt::Long defaults.  Use eval to avoid
1776         # breaking old versions of Perl without these routines.
1777         my $glc;
1778         eval { $glc = Getopt::Long::Configure() };
1779         unless ($@) {
1780             eval { Getopt::Long::ConfigDefaults() };
1781         }
1782         else { $glc = undef }
1783
1784         if ( !GetOptions( \%Opts, @$roption_string ) ) {
1785             die "Programming Bug: error in setting default options";
1786         }
1787
1788         # Patch to put the previous Getopt::Long configuration back
1789         eval { Getopt::Long::Configure($glc) } if defined $glc;
1790     }
1791
1792     my $word;
1793     my @raw_options        = ();
1794     my $config_file        = "";
1795     my $saw_ignore_profile = 0;
1796     my $saw_extrude        = 0;
1797     my $saw_dump_profile   = 0;
1798     my $i;
1799
1800     #---------------------------------------------------------------
1801     # Take a first look at the command-line parameters.  Do as many
1802     # immediate dumps as possible, which can avoid confusion if the
1803     # perltidyrc file has an error.
1804     #---------------------------------------------------------------
1805     foreach $i (@ARGV) {
1806
1807         $i =~ s/^--/-/;
1808         if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1809             $saw_ignore_profile = 1;
1810         }
1811
1812         # note: this must come before -pro and -profile, below:
1813         elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1814             $saw_dump_profile = 1;
1815         }
1816         elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1817             if ($config_file) {
1818                 warn
1819 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1820             }
1821             $config_file = $2;
1822             unless ( -e $config_file ) {
1823                 warn "cannot find file given with -pro=$config_file: $!\n";
1824                 $config_file = "";
1825             }
1826         }
1827         elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1828             die "usage: -pro=filename or --profile=filename, no spaces\n";
1829         }
1830         elsif ( $i =~ /^-extrude$/ ) {
1831             $saw_extrude = 1;
1832         }
1833         elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1834             usage();
1835             exit 1;
1836         }
1837         elsif ( $i =~ /^-(version|v)$/ ) {
1838             show_version();
1839             exit 1;
1840         }
1841         elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1842             dump_defaults(@$rdefaults);
1843             exit 1;
1844         }
1845         elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1846             dump_long_names(@$roption_string);
1847             exit 1;
1848         }
1849         elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1850             dump_short_names($rexpansion);
1851             exit 1;
1852         }
1853         elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1854             Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1855             exit 1;
1856         }
1857     }
1858
1859     if ( $saw_dump_profile && $saw_ignore_profile ) {
1860         warn "No profile to dump because of -npro\n";
1861         exit 1;
1862     }
1863
1864     #---------------------------------------------------------------
1865     # read any .perltidyrc configuration file
1866     #---------------------------------------------------------------
1867     unless ($saw_ignore_profile) {
1868
1869         # resolve possible conflict between $perltidyrc_stream passed
1870         # as call parameter to perltidy and -pro=filename on command
1871         # line.
1872         if ($perltidyrc_stream) {
1873             if ($config_file) {
1874                 warn <<EOM;
1875  Conflict: a perltidyrc configuration file was specified both as this
1876  perltidy call parameter: $perltidyrc_stream
1877  and with this -profile=$config_file.
1878  Using -profile=$config_file.
1879 EOM
1880             }
1881             else {
1882                 $config_file = $perltidyrc_stream;
1883             }
1884         }
1885
1886         # look for a config file if we don't have one yet
1887         my $rconfig_file_chatter;
1888         $$rconfig_file_chatter = "";
1889         $config_file =
1890           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
1891             $rpending_complaint )
1892           unless $config_file;
1893
1894         # open any config file
1895         my $fh_config;
1896         if ($config_file) {
1897             ( $fh_config, $config_file ) =
1898               Perl::Tidy::streamhandle( $config_file, 'r' );
1899             unless ($fh_config) {
1900                 $$rconfig_file_chatter .=
1901                   "# $config_file exists but cannot be opened\n";
1902             }
1903         }
1904
1905         if ($saw_dump_profile) {
1906             if ($saw_dump_profile) {
1907                 dump_config_file( $fh_config, $config_file,
1908                     $rconfig_file_chatter );
1909                 exit 1;
1910             }
1911         }
1912
1913         if ($fh_config) {
1914
1915             my ( $rconfig_list, $death_message ) =
1916               read_config_file( $fh_config, $config_file, $rexpansion );
1917             die $death_message if ($death_message);
1918
1919             # process any .perltidyrc parameters right now so we can
1920             # localize errors
1921             if (@$rconfig_list) {
1922                 local @ARGV = @$rconfig_list;
1923
1924                 expand_command_abbreviations( $rexpansion, \@raw_options,
1925                     $config_file );
1926
1927                 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1928                     die
1929 "Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
1930                 }
1931
1932                 # Anything left in this local @ARGV is an error and must be
1933                 # invalid bare words from the configuration file.  We cannot
1934                 # check this earlier because bare words may have been valid
1935                 # values for parameters.  We had to wait for GetOptions to have
1936                 # a look at @ARGV.
1937                 if (@ARGV) {
1938                     my $count = @ARGV;
1939                     my $str   = "\'" . pop(@ARGV) . "\'";
1940                     while ( my $param = pop(@ARGV) ) {
1941                         if ( length($str) < 70 ) {
1942                             $str .= ", '$param'";
1943                         }
1944                         else {
1945                             $str .= ", ...";
1946                             last;
1947                         }
1948                     }
1949                     die <<EOM;
1950 There are $count unrecognized values in the configuration file '$config_file':
1951 $str
1952 Use leading dashes for parameters.  Use -npro to ignore this file.
1953 EOM
1954                 }
1955
1956                 # Undo any options which cause premature exit.  They are not
1957                 # appropriate for a config file, and it could be hard to
1958                 # diagnose the cause of the premature exit.
1959                 foreach (
1960                     qw{
1961                     dump-defaults
1962                     dump-long-names
1963                     dump-options
1964                     dump-profile
1965                     dump-short-names
1966                     dump-token-types
1967                     dump-want-left-space
1968                     dump-want-right-space
1969                     help
1970                     stylesheet
1971                     version
1972                     }
1973                   )
1974                 {
1975
1976                     if ( defined( $Opts{$_} ) ) {
1977                         delete $Opts{$_};
1978                         warn "ignoring --$_ in config file: $config_file\n";
1979                     }
1980                 }
1981             }
1982         }
1983     }
1984
1985     #---------------------------------------------------------------
1986     # now process the command line parameters
1987     #---------------------------------------------------------------
1988     expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
1989
1990     if ( !GetOptions( \%Opts, @$roption_string ) ) {
1991         die "Error on command line; for help try 'perltidy -h'\n";
1992     }
1993
1994     return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
1995         $rexpansion, $roption_category, $roption_range );
1996 }    # end of process_command_line
1997
1998 sub check_options {
1999
2000     my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2001
2002     #---------------------------------------------------------------
2003     # check and handle any interactions among the basic options..
2004     #---------------------------------------------------------------
2005
2006     # Since -vt, -vtc, and -cti are abbreviations, but under
2007     # msdos, an unquoted input parameter like vtc=1 will be
2008     # seen as 2 parameters, vtc and 1, so the abbreviations
2009     # won't be seen.  Therefore, we will catch them here if
2010     # they get through.
2011
2012     if ( defined $rOpts->{'vertical-tightness'} ) {
2013         my $vt = $rOpts->{'vertical-tightness'};
2014         $rOpts->{'paren-vertical-tightness'}          = $vt;
2015         $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2016         $rOpts->{'brace-vertical-tightness'}          = $vt;
2017     }
2018
2019     if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2020         my $vtc = $rOpts->{'vertical-tightness-closing'};
2021         $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
2022         $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2023         $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
2024     }
2025
2026     if ( defined $rOpts->{'closing-token-indentation'} ) {
2027         my $cti = $rOpts->{'closing-token-indentation'};
2028         $rOpts->{'closing-square-bracket-indentation'} = $cti;
2029         $rOpts->{'closing-brace-indentation'}          = $cti;
2030         $rOpts->{'closing-paren-indentation'}          = $cti;
2031     }
2032
2033     # In quiet mode, there is no log file and hence no way to report
2034     # results of syntax check, so don't do it.
2035     if ( $rOpts->{'quiet'} ) {
2036         $rOpts->{'check-syntax'} = 0;
2037     }
2038
2039     # can't check syntax if no output
2040     if ( $rOpts->{'format'} ne 'tidy' ) {
2041         $rOpts->{'check-syntax'} = 0;
2042     }
2043
2044     # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2045     # wide variety of nasty problems on these systems, because they cannot
2046     # reliably run backticks.  Don't even think about changing this!
2047     if (   $rOpts->{'check-syntax'}
2048         && $is_Windows
2049         && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2050     {
2051         $rOpts->{'check-syntax'} = 0;
2052     }
2053
2054     # It's really a bad idea to check syntax as root unless you wrote
2055     # the script yourself.  FIXME: not sure if this works with VMS
2056     unless ($is_Windows) {
2057
2058         if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2059             $rOpts->{'check-syntax'} = 0;
2060             $$rpending_complaint .=
2061 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2062         }
2063     }
2064
2065     # see if user set a non-negative logfile-gap
2066     if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2067
2068         # a zero gap will be taken as a 1
2069         if ( $rOpts->{'logfile-gap'} == 0 ) {
2070             $rOpts->{'logfile-gap'} = 1;
2071         }
2072
2073         # setting a non-negative logfile gap causes logfile to be saved
2074         $rOpts->{'logfile'} = 1;
2075     }
2076
2077     # not setting logfile gap, or setting it negative, causes default of 50
2078     else {
2079         $rOpts->{'logfile-gap'} = 50;
2080     }
2081
2082     # set short-cut flag when only indentation is to be done.
2083     # Note that the user may or may not have already set the
2084     # indent-only flag.
2085     if (   !$rOpts->{'add-whitespace'}
2086         && !$rOpts->{'delete-old-whitespace'}
2087         && !$rOpts->{'add-newlines'}
2088         && !$rOpts->{'delete-old-newlines'} )
2089     {
2090         $rOpts->{'indent-only'} = 1;
2091     }
2092
2093     # -isbc implies -ibc
2094     if ( $rOpts->{'indent-spaced-block-comments'} ) {
2095         $rOpts->{'indent-block-comments'} = 1;
2096     }
2097
2098     # -bli flag implies -bl
2099     if ( $rOpts->{'brace-left-and-indent'} ) {
2100         $rOpts->{'opening-brace-on-new-line'} = 1;
2101     }
2102
2103     if (   $rOpts->{'opening-brace-always-on-right'}
2104         && $rOpts->{'opening-brace-on-new-line'} )
2105     {
2106         warn <<EOM;
2107  Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2108   'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2109 EOM
2110         $rOpts->{'opening-brace-on-new-line'} = 0;
2111     }
2112
2113     # it simplifies things if -bl is 0 rather than undefined
2114     if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2115         $rOpts->{'opening-brace-on-new-line'} = 0;
2116     }
2117
2118     # -sbl defaults to -bl if not defined
2119     if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2120         $rOpts->{'opening-sub-brace-on-new-line'} =
2121           $rOpts->{'opening-brace-on-new-line'};
2122     }
2123
2124     if ( $rOpts->{'entab-leading-whitespace'} ) {
2125         if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2126             warn "-et=n must use a positive integer; ignoring -et\n";
2127             $rOpts->{'entab-leading-whitespace'} = undef;
2128         }
2129
2130         # entab leading whitespace has priority over the older 'tabs' option
2131         if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2132     }
2133 }
2134
2135 sub expand_command_abbreviations {
2136
2137     # go through @ARGV and expand any abbreviations
2138
2139     my ( $rexpansion, $rraw_options, $config_file ) = @_;
2140     my ($word);
2141
2142     # set a pass limit to prevent an infinite loop;
2143     # 10 should be plenty, but it may be increased to allow deeply
2144     # nested expansions.
2145     my $max_passes = 10;
2146     my @new_argv   = ();
2147
2148     # keep looping until all expansions have been converted into actual
2149     # dash parameters..
2150     for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2151         my @new_argv     = ();
2152         my $abbrev_count = 0;
2153
2154         # loop over each item in @ARGV..
2155         foreach $word (@ARGV) {
2156
2157             # convert any leading 'no-' to just 'no'
2158             if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2159
2160             # if it is a dash flag (instead of a file name)..
2161             if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2162
2163                 my $abr   = $1;
2164                 my $flags = $2;
2165
2166                 # save the raw input for debug output in case of circular refs
2167                 if ( $pass_count == 0 ) {
2168                     push( @$rraw_options, $word );
2169                 }
2170
2171                 # recombine abbreviation and flag, if necessary,
2172                 # to allow abbreviations with arguments such as '-vt=1'
2173                 if ( $rexpansion->{ $abr . $flags } ) {
2174                     $abr   = $abr . $flags;
2175                     $flags = "";
2176                 }
2177
2178                 # if we see this dash item in the expansion hash..
2179                 if ( $rexpansion->{$abr} ) {
2180                     $abbrev_count++;
2181
2182                     # stuff all of the words that it expands to into the
2183                     # new arg list for the next pass
2184                     foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2185                         next unless $abbrev;    # for safety; shouldn't happen
2186                         push( @new_argv, '--' . $abbrev . $flags );
2187                     }
2188                 }
2189
2190                 # not in expansion hash, must be actual long name
2191                 else {
2192                     push( @new_argv, $word );
2193                 }
2194             }
2195
2196             # not a dash item, so just save it for the next pass
2197             else {
2198                 push( @new_argv, $word );
2199             }
2200         }    # end of this pass
2201
2202         # update parameter list @ARGV to the new one
2203         @ARGV = @new_argv;
2204         last unless ( $abbrev_count > 0 );
2205
2206         # make sure we are not in an infinite loop
2207         if ( $pass_count == $max_passes ) {
2208             print STDERR
2209 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2210             print STDERR "Here are the raw options\n";
2211             local $" = ')(';
2212             print STDERR "(@$rraw_options)\n";
2213             my $num = @new_argv;
2214
2215             if ( $num < 50 ) {
2216                 print STDERR "After $max_passes passes here is ARGV\n";
2217                 print STDERR "(@new_argv)\n";
2218             }
2219             else {
2220                 print STDERR "After $max_passes passes ARGV has $num entries\n";
2221             }
2222
2223             if ($config_file) {
2224                 die <<"DIE";
2225 Please check your configuration file $config_file for circular-references.
2226 To deactivate it, use -npro.
2227 DIE
2228             }
2229             else {
2230                 die <<'DIE';
2231 Program bug - circular-references in the %expansion hash, probably due to
2232 a recent program change.
2233 DIE
2234             }
2235         }    # end of check for circular references
2236     }    # end of loop over all passes
2237 }
2238
2239 # Debug routine -- this will dump the expansion hash
2240 sub dump_short_names {
2241     my $rexpansion = shift;
2242     print STDOUT <<EOM;
2243 List of short names.  This list shows how all abbreviations are
2244 translated into other abbreviations and, eventually, into long names.
2245 New abbreviations may be defined in a .perltidyrc file.
2246 For a list of all long names, use perltidy --dump-long-names (-dln).
2247 --------------------------------------------------------------------------
2248 EOM
2249     foreach my $abbrev ( sort keys %$rexpansion ) {
2250         my @list = @{ $$rexpansion{$abbrev} };
2251         print STDOUT "$abbrev --> @list\n";
2252     }
2253 }
2254
2255 sub check_vms_filename {
2256
2257     # given a valid filename (the perltidy input file)
2258     # create a modified filename and separator character
2259     # suitable for VMS.
2260     #
2261     # Contributed by Michael Cartmell
2262     #
2263     my ( $base, $path ) = fileparse( $_[0] );
2264
2265     # remove explicit ; version
2266     $base =~ s/;-?\d*$//
2267
2268       # remove explicit . version ie two dots in filename NB ^ escapes a dot
2269       or $base =~ s/(          # begin capture $1
2270                   (?:^|[^^])\. # match a dot not preceded by a caret
2271                   (?:          # followed by nothing
2272                     |          # or
2273                     .*[^^]     # anything ending in a non caret
2274                   )
2275                 )              # end capture $1
2276                 \.-?\d*$       # match . version number
2277               /$1/x;
2278
2279     # normalise filename, if there are no unescaped dots then append one
2280     $base .= '.' unless $base =~ /(?:^|[^^])\./;
2281
2282     # if we don't already have an extension then we just append the extention
2283     my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2284     return ( $path . $base, $separator );
2285 }
2286
2287 sub Win_OS_Type {
2288
2289     # TODO: are these more standard names?
2290     # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2291
2292     # Returns a string that determines what MS OS we are on.
2293     # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2294     # Returns blank string if not an MS system.
2295     # Original code contributed by: Yves Orton
2296     # We need to know this to decide where to look for config files
2297
2298     my $rpending_complaint = shift;
2299     my $os                 = "";
2300     return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2301
2302     # Systems built from Perl source may not have Win32.pm
2303     # But probably have Win32::GetOSVersion() anyway so the
2304     # following line is not 'required':
2305     # return $os unless eval('require Win32');
2306
2307     # Use the standard API call to determine the version
2308     my ( $undef, $major, $minor, $build, $id );
2309     eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2310
2311     #
2312     #    NAME                   ID   MAJOR  MINOR
2313     #    Windows NT 4           2      4       0
2314     #    Windows 2000           2      5       0
2315     #    Windows XP             2      5       1
2316     #    Windows Server 2003    2      5       2
2317
2318     return "win32s" unless $id;    # If id==0 then its a win32s box.
2319     $os = {                        # Magic numbers from MSDN
2320                                    # documentation of GetOSVersion
2321         1 => {
2322             0  => "95",
2323             10 => "98",
2324             90 => "Me"
2325         },
2326         2 => {
2327             0  => "2000",          # or NT 4, see below
2328             1  => "XP/.Net",
2329             2  => "Win2003",
2330             51 => "NT3.51"
2331         }
2332     }->{$id}->{$minor};
2333
2334     # If $os is undefined, the above code is out of date.  Suggested updates
2335     # are welcome.
2336     unless ( defined $os ) {
2337         $os = "";
2338         $$rpending_complaint .= <<EOS;
2339 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2340 We won't be able to look for a system-wide config file.
2341 EOS
2342     }
2343
2344     # Unfortunately the logic used for the various versions isnt so clever..
2345     # so we have to handle an outside case.
2346     return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2347 }
2348
2349 sub is_unix {
2350     return
2351          ( $^O !~ /win32|dos/i )
2352       && ( $^O ne 'VMS' )
2353       && ( $^O ne 'OS2' )
2354       && ( $^O ne 'MacOS' );
2355 }
2356
2357 sub look_for_Windows {
2358
2359     # determine Windows sub-type and location of
2360     # system-wide configuration files
2361     my $rpending_complaint = shift;
2362     my $is_Windows         = ( $^O =~ /win32|dos/i );
2363     my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
2364     return ( $is_Windows, $Windows_type );
2365 }
2366
2367 sub find_config_file {
2368
2369     # look for a .perltidyrc configuration file
2370     # For Windows also look for a file named perltidy.ini
2371     my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2372         $rpending_complaint ) = @_;
2373
2374     $$rconfig_file_chatter .= "# Config file search...system reported as:";
2375     if ($is_Windows) {
2376         $$rconfig_file_chatter .= "Windows $Windows_type\n";
2377     }
2378     else {
2379         $$rconfig_file_chatter .= " $^O\n";
2380     }
2381
2382     # sub to check file existance and record all tests
2383     my $exists_config_file = sub {
2384         my $config_file = shift;
2385         return 0 unless $config_file;
2386         $$rconfig_file_chatter .= "# Testing: $config_file\n";
2387         return -f $config_file;
2388     };
2389
2390     my $config_file;
2391
2392     # look in current directory first
2393     $config_file = ".perltidyrc";
2394     return $config_file if $exists_config_file->($config_file);
2395     if ($is_Windows) {
2396         $config_file = "perltidy.ini";
2397         return $config_file if $exists_config_file->($config_file);
2398     }
2399
2400     # Default environment vars.
2401     my @envs = qw(PERLTIDY HOME);
2402
2403     # Check the NT/2k/XP locations, first a local machine def, then a
2404     # network def
2405     push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2406
2407     # Now go through the enviornment ...
2408     foreach my $var (@envs) {
2409         $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2410         if ( defined( $ENV{$var} ) ) {
2411             $$rconfig_file_chatter .= " = $ENV{$var}\n";
2412
2413             # test ENV{ PERLTIDY } as file:
2414             if ( $var eq 'PERLTIDY' ) {
2415                 $config_file = "$ENV{$var}";
2416                 return $config_file if $exists_config_file->($config_file);
2417             }
2418
2419             # test ENV as directory:
2420             $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2421             return $config_file if $exists_config_file->($config_file);
2422
2423             if ($is_Windows) {
2424                 $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2425                 return $config_file if $exists_config_file->($config_file);
2426             }
2427         }
2428         else {
2429             $$rconfig_file_chatter .= "\n";
2430         }
2431     }
2432
2433     # then look for a system-wide definition
2434     # where to look varies with OS
2435     if ($is_Windows) {
2436
2437         if ($Windows_type) {
2438             my ( $os, $system, $allusers ) =
2439               Win_Config_Locs( $rpending_complaint, $Windows_type );
2440
2441             # Check All Users directory, if there is one.
2442             # i.e. C:\Documents and Settings\User\perltidy.ini
2443             if ($allusers) {
2444
2445                 $config_file = catfile( $allusers, ".perltidyrc" );
2446                 return $config_file if $exists_config_file->($config_file);
2447
2448                 $config_file = catfile( $allusers, "perltidy.ini" );
2449                 return $config_file if $exists_config_file->($config_file);
2450             }
2451
2452             # Check system directory.
2453             # retain old code in case someone has been able to create
2454             # a file with a leading period.
2455             $config_file = catfile( $system, ".perltidyrc" );
2456             return $config_file if $exists_config_file->($config_file);
2457
2458             $config_file = catfile( $system, "perltidy.ini" );
2459             return $config_file if $exists_config_file->($config_file);
2460         }
2461     }
2462
2463     # Place to add customization code for other systems
2464     elsif ( $^O eq 'OS2' ) {
2465     }
2466     elsif ( $^O eq 'MacOS' ) {
2467     }
2468     elsif ( $^O eq 'VMS' ) {
2469     }
2470
2471     # Assume some kind of Unix
2472     else {
2473
2474         $config_file = "/usr/local/etc/perltidyrc";
2475         return $config_file if $exists_config_file->($config_file);
2476
2477         $config_file = "/etc/perltidyrc";
2478         return $config_file if $exists_config_file->($config_file);
2479     }
2480
2481     # Couldn't find a config file
2482     return;
2483 }
2484
2485 sub Win_Config_Locs {
2486
2487     # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2488     # or undef if its not a win32 OS.  In list context returns OS, System
2489     # Directory, and All Users Directory.  All Users will be empty on a
2490     # 9x/Me box.  Contributed by: Yves Orton.
2491
2492     my $rpending_complaint = shift;
2493     my $os = (@_) ? shift : Win_OS_Type();
2494     return unless $os;
2495
2496     my $system   = "";
2497     my $allusers = "";
2498
2499     if ( $os =~ /9[58]|Me/ ) {
2500         $system = "C:/Windows";
2501     }
2502     elsif ( $os =~ /NT|XP|200?/ ) {
2503         $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2504         $allusers =
2505           ( $os =~ /NT/ )
2506           ? "C:/WinNT/profiles/All Users/"
2507           : "C:/Documents and Settings/All Users/";
2508     }
2509     else {
2510
2511         # This currently would only happen on a win32s computer.  I dont have
2512         # one to test, so I am unsure how to proceed.  Suggestions welcome!
2513         $$rpending_complaint .=
2514 "I dont know a sensible place to look for config files on an $os system.\n";
2515         return;
2516     }
2517     return wantarray ? ( $os, $system, $allusers ) : $os;
2518 }
2519
2520 sub dump_config_file {
2521     my $fh                   = shift;
2522     my $config_file          = shift;
2523     my $rconfig_file_chatter = shift;
2524     print STDOUT "$$rconfig_file_chatter";
2525     if ($fh) {
2526         print STDOUT "# Dump of file: '$config_file'\n";
2527         while ( my $line = $fh->getline() ) { print STDOUT $line }
2528         eval { $fh->close() };
2529     }
2530     else {
2531         print STDOUT "# ...no config file found\n";
2532     }
2533 }
2534
2535 sub read_config_file {
2536
2537     my ( $fh, $config_file, $rexpansion ) = @_;
2538     my @config_list = ();
2539
2540     # file is bad if non-empty $death_message is returned
2541     my $death_message = "";
2542
2543     my $name = undef;
2544     my $line_no;
2545     while ( my $line = $fh->getline() ) {
2546         $line_no++;
2547         chomp $line;
2548         next if $line =~ /^\s*#/;    # skip full-line comment
2549         ( $line, $death_message ) =
2550           strip_comment( $line, $config_file, $line_no );
2551         last if ($death_message);
2552         $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
2553         next unless $line;
2554
2555         # look for something of the general form
2556         #    newname { body }
2557         # or just
2558         #    body
2559
2560         if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2561             my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2562
2563             # handle a new alias definition
2564             if ($newname) {
2565                 if ($name) {
2566                     $death_message =
2567 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2568                     last;
2569                 }
2570                 $name = $newname;
2571
2572                 if ( ${$rexpansion}{$name} ) {
2573                     local $" = ')(';
2574                     my @names = sort keys %$rexpansion;
2575                     $death_message =
2576                         "Here is a list of all installed aliases\n(@names)\n"
2577                       . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2578                     last;
2579                 }
2580                 ${$rexpansion}{$name} = [];
2581             }
2582
2583             # now do the body
2584             if ($body) {
2585
2586                 my ( $rbody_parts, $msg ) = parse_args($body);
2587                 if ($msg) {
2588                     $death_message = <<EOM;
2589 Error reading file '$config_file' at line number $line_no.
2590 $msg
2591 Please fix this line or use -npro to avoid reading this file
2592 EOM
2593                     last;
2594                 }
2595
2596                 if ($name) {
2597
2598                     # remove leading dashes if this is an alias
2599                     foreach (@$rbody_parts) { s/^\-+//; }
2600                     push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2601                 }
2602                 else {
2603                     push( @config_list, @$rbody_parts );
2604                 }
2605             }
2606
2607             if ($curly) {
2608                 unless ($name) {
2609                     $death_message =
2610 "Unexpected '}' seen in config file $config_file line $.\n";
2611                     last;
2612                 }
2613                 $name = undef;
2614             }
2615         }
2616     }
2617     eval { $fh->close() };
2618     return ( \@config_list, $death_message );
2619 }
2620
2621 sub strip_comment {
2622
2623     my ( $instr, $config_file, $line_no ) = @_;
2624     my $msg = "";
2625
2626     # nothing to do if no comments
2627     if ( $instr !~ /#/ ) {
2628         return ( $instr, $msg );
2629     }
2630
2631     # use simple method of no quotes
2632     elsif ( $instr !~ /['"]/ ) {
2633         $instr =~ s/\s*\#.*$//;    # simple trim
2634         return ( $instr, $msg );
2635     }
2636
2637     # handle comments and quotes
2638     my $outstr     = "";
2639     my $quote_char = "";
2640     while (1) {
2641
2642         # looking for ending quote character
2643         if ($quote_char) {
2644             if ( $instr =~ /\G($quote_char)/gc ) {
2645                 $quote_char = "";
2646                 $outstr .= $1;
2647             }
2648             elsif ( $instr =~ /\G(.)/gc ) {
2649                 $outstr .= $1;
2650             }
2651
2652             # error..we reached the end without seeing the ending quote char
2653             else {
2654                 $msg = <<EOM;
2655 Error reading file $config_file at line number $line_no.
2656 Did not see ending quote character <$quote_char> in this text:
2657 $instr
2658 Please fix this line or use -npro to avoid reading this file
2659 EOM
2660                 last;
2661             }
2662         }
2663
2664         # accumulating characters and looking for start of a quoted string
2665         else {
2666             if ( $instr =~ /\G([\"\'])/gc ) {
2667                 $outstr .= $1;
2668                 $quote_char = $1;
2669             }
2670             elsif ( $instr =~ /\G#/gc ) {
2671                 last;
2672             }
2673             elsif ( $instr =~ /\G(.)/gc ) {
2674                 $outstr .= $1;
2675             }
2676             else {
2677                 last;
2678             }
2679         }
2680     }
2681     return ( $outstr, $msg );
2682 }
2683
2684 sub parse_args {
2685
2686     # Parse a command string containing multiple string with possible
2687     # quotes, into individual commands.  It might look like this, for example:
2688     #
2689     #    -wba=" + - "  -some-thing -wbb='. && ||'
2690     #
2691     # There is no need, at present, to handle escaped quote characters.
2692     # (They are not perltidy tokens, so needn't be in strings).
2693
2694     my ($body)     = @_;
2695     my @body_parts = ();
2696     my $quote_char = "";
2697     my $part       = "";
2698     my $msg        = "";
2699     while (1) {
2700
2701         # looking for ending quote character
2702         if ($quote_char) {
2703             if ( $body =~ /\G($quote_char)/gc ) {
2704                 $quote_char = "";
2705             }
2706             elsif ( $body =~ /\G(.)/gc ) {
2707                 $part .= $1;
2708             }
2709
2710             # error..we reached the end without seeing the ending quote char
2711             else {
2712                 if ( length($part) ) { push @body_parts, $part; }
2713                 $msg = <<EOM;
2714 Did not see ending quote character <$quote_char> in this text:
2715 $body
2716 EOM
2717                 last;
2718             }
2719         }
2720
2721         # accumulating characters and looking for start of a quoted string
2722         else {
2723             if ( $body =~ /\G([\"\'])/gc ) {
2724                 $quote_char = $1;
2725             }
2726             elsif ( $body =~ /\G(\s+)/gc ) {
2727                 if ( length($part) ) { push @body_parts, $part; }
2728                 $part = "";
2729             }
2730             elsif ( $body =~ /\G(.)/gc ) {
2731                 $part .= $1;
2732             }
2733             else {
2734                 if ( length($part) ) { push @body_parts, $part; }
2735                 last;
2736             }
2737         }
2738     }
2739     return ( \@body_parts, $msg );
2740 }
2741
2742 sub dump_long_names {
2743
2744     my @names = sort @_;
2745     print STDOUT <<EOM;
2746 # Command line long names (passed to GetOptions)
2747 #---------------------------------------------------------------
2748 # here is a summary of the Getopt codes:
2749 # <none> does not take an argument
2750 # =s takes a mandatory string
2751 # :s takes an optional string
2752 # =i takes a mandatory integer
2753 # :i takes an optional integer
2754 # ! does not take an argument and may be negated
2755 #  i.e., -foo and -nofoo are allowed
2756 # a double dash signals the end of the options list
2757 #
2758 #---------------------------------------------------------------
2759 EOM
2760
2761     foreach (@names) { print STDOUT "$_\n" }
2762 }
2763
2764 sub dump_defaults {
2765     my @defaults = sort @_;
2766     print STDOUT "Default command line options:\n";
2767     foreach (@_) { print STDOUT "$_\n" }
2768 }
2769
2770 sub readable_options {
2771
2772     # return options for this run as a string which could be
2773     # put in a perltidyrc file
2774     my ( $rOpts, $roption_string ) = @_;
2775     my %Getopt_flags;
2776     my $rGetopt_flags    = \%Getopt_flags;
2777     my $readable_options = "# Final parameter set for this run.\n";
2778     $readable_options .=
2779       "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
2780     foreach my $opt ( @{$roption_string} ) {
2781         my $flag = "";
2782         if ( $opt =~ /(.*)(!|=.*)$/ ) {
2783             $opt  = $1;
2784             $flag = $2;
2785         }
2786         if ( defined( $rOpts->{$opt} ) ) {
2787             $rGetopt_flags->{$opt} = $flag;
2788         }
2789     }
2790     foreach my $key ( sort keys %{$rOpts} ) {
2791         my $flag   = $rGetopt_flags->{$key};
2792         my $value  = $rOpts->{$key};
2793         my $prefix = '--';
2794         my $suffix = "";
2795         if ($flag) {
2796             if ( $flag =~ /^=/ ) {
2797                 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2798                 $suffix = "=" . $value;
2799             }
2800             elsif ( $flag =~ /^!/ ) {
2801                 $prefix .= "no" unless ($value);
2802             }
2803             else {
2804
2805                 # shouldn't happen
2806                 $readable_options .=
2807                   "# ERROR in dump_options: unrecognized flag $flag for $key\n";
2808             }
2809         }
2810         $readable_options .= $prefix . $key . $suffix . "\n";
2811     }
2812     return $readable_options;
2813 }
2814
2815 sub show_version {
2816     print <<"EOM";
2817 This is perltidy, v$VERSION
2818
2819 Copyright 2000-2009, Steve Hancock
2820
2821 Perltidy is free software and may be copied under the terms of the GNU
2822 General Public License, which is included in the distribution files.
2823
2824 Complete documentation for perltidy can be found using 'man perltidy'
2825 or on the internet at http://perltidy.sourceforge.net.
2826 EOM
2827 }
2828
2829 sub usage {
2830
2831     print STDOUT <<EOF;
2832 This is perltidy version $VERSION, a perl script indenter.  Usage:
2833
2834     perltidy [ options ] file1 file2 file3 ...
2835             (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2836     perltidy [ options ] file1 -o outfile
2837     perltidy [ options ] file1 -st >outfile
2838     perltidy [ options ] <infile >outfile
2839
2840 Options have short and long forms. Short forms are shown; see
2841 man pages for long forms.  Note: '=s' indicates a required string,
2842 and '=n' indicates a required integer.
2843
2844 I/O control
2845  -h      show this help
2846  -o=file name of the output file (only if single input file)
2847  -oext=s change output extension from 'tdy' to s
2848  -opath=path  change path to be 'path' for output files
2849  -b      backup original to .bak and modify file in-place
2850  -bext=s change default backup extension from 'bak' to s
2851  -q      deactivate error messages (for running under editor)
2852  -w      include non-critical warning messages in the .ERR error output
2853  -syn    run perl -c to check syntax (default under unix systems)
2854  -log    save .LOG file, which has useful diagnostics
2855  -f      force perltidy to read a binary file
2856  -g      like -log but writes more detailed .LOG file, for debugging scripts
2857  -opt    write the set of options actually used to a .LOG file
2858  -npro   ignore .perltidyrc configuration command file
2859  -pro=file   read configuration commands from file instead of .perltidyrc
2860  -st     send output to standard output, STDOUT
2861  -se     send error output to standard error output, STDERR
2862  -v      display version number to standard output and quit
2863
2864 Basic Options:
2865  -i=n    use n columns per indentation level (default n=4)
2866  -t      tabs: use one tab character per indentation level, not recommeded
2867  -nt     no tabs: use n spaces per indentation level (default)
2868  -et=n   entab leading whitespace n spaces per tab; not recommended
2869  -io     "indent only": just do indentation, no other formatting.
2870  -sil=n  set starting indentation level to n;  use if auto detection fails
2871  -ole=s  specify output line ending (s=dos or win, mac, unix)
2872  -ple    keep output line endings same as input (input must be filename)
2873
2874 Whitespace Control
2875  -fws    freeze whitespace; this disables all whitespace changes
2876            and disables the following switches:
2877  -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
2878  -bbt    same as -bt but for code block braces; same as -bt if not given
2879  -bbvt   block braces vertically tight; use with -bl or -bli
2880  -bbvtl=s  make -bbvt to apply to selected list of block types
2881  -pt=n   paren tightness (n=0, 1 or 2)
2882  -sbt=n  square bracket tightness (n=0, 1, or 2)
2883  -bvt=n  brace vertical tightness,
2884          n=(0=open, 1=close unless multiple steps on a line, 2=always close)
2885  -pvt=n  paren vertical tightness (see -bvt for n)
2886  -sbvt=n square bracket vertical tightness (see -bvt for n)
2887  -bvtc=n closing brace vertical tightness:
2888          n=(0=open, 1=sometimes close, 2=always close)
2889  -pvtc=n closing paren vertical tightness, see -bvtc for n.
2890  -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
2891  -ci=n   sets continuation indentation=n,  default is n=2 spaces
2892  -lp     line up parentheses, brackets, and non-BLOCK braces
2893  -sfs    add space before semicolon in for( ; ; )
2894  -aws    allow perltidy to add whitespace (default)
2895  -dws    delete all old non-essential whitespace
2896  -icb    indent closing brace of a code block
2897  -cti=n  closing indentation of paren, square bracket, or non-block brace:
2898          n=0 none, =1 align with opening, =2 one full indentation level
2899  -icp    equivalent to -cti=2
2900  -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
2901  -wrs=s  want space right of tokens in string;
2902  -sts    put space before terminal semicolon of a statement
2903  -sak=s  put space between keywords given in s and '(';
2904  -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
2905
2906 Line Break Control
2907  -fnl    freeze newlines; this disables all line break changes
2908             and disables the following switches:
2909  -anl    add newlines;  ok to introduce new line breaks
2910  -bbs    add blank line before subs and packages
2911  -bbc    add blank line before block comments
2912  -bbb    add blank line between major blocks
2913  -kbl=n  keep old blank lines? 0=no, 1=some, 2=all
2914  -mbl=n  maximum consecutive blank lines to output (default=1)
2915  -ce     cuddled else; use this style: '} else {'
2916  -dnl    delete old newlines (default)
2917  -l=n    maximum line length;  default n=80
2918  -bl     opening brace on new line
2919  -sbl    opening sub brace on new line.  value of -bl is used if not given.
2920  -bli    opening brace on new line and indented
2921  -bar    opening brace always on right, even for long clauses
2922  -vt=n   vertical tightness (requires -lp); n controls break after opening
2923          token: 0=never  1=no break if next line balanced   2=no break
2924  -vtc=n  vertical tightness of closing container; n controls if closing
2925          token starts new line: 0=always  1=not unless list  1=never
2926  -wba=s  want break after tokens in string; i.e. wba=': .'
2927  -wbb=s  want break before tokens in string
2928
2929 Following Old Breakpoints
2930  -kis    keep interior semicolons.  Allows multiple statements per line.
2931  -boc    break at old comma breaks: turns off all automatic list formatting
2932  -bol    break at old logical breakpoints: or, and, ||, && (default)
2933  -bok    break at old list keyword breakpoints such as map, sort (default)
2934  -bot    break at old conditional (ternary ?:) operator breakpoints (default)
2935  -cab=n  break at commas after a comma-arrow (=>):
2936          n=0 break at all commas after =>
2937          n=1 stable: break unless this breaks an existing one-line container
2938          n=2 break only if a one-line container cannot be formed
2939          n=3 do not treat commas after => specially at all
2940
2941 Comment controls
2942  -ibc    indent block comments (default)
2943  -isbc   indent spaced block comments; may indent unless no leading space
2944  -msc=n  minimum desired spaces to side comment, default 4
2945  -fpsc=n fix position for side comments; default 0;
2946  -csc    add or update closing side comments after closing BLOCK brace
2947  -dcsc   delete closing side comments created by a -csc command
2948  -cscp=s change closing side comment prefix to be other than '## end'
2949  -cscl=s change closing side comment to apply to selected list of blocks
2950  -csci=n minimum number of lines needed to apply a -csc tag, default n=6
2951  -csct=n maximum number of columns of appended text, default n=20
2952  -cscw   causes warning if old side comment is overwritten with -csc
2953
2954  -sbc    use 'static block comments' identified by leading '##' (default)
2955  -sbcp=s change static block comment identifier to be other than '##'
2956  -osbc   outdent static block comments
2957
2958  -ssc    use 'static side comments' identified by leading '##' (default)
2959  -sscp=s change static side comment identifier to be other than '##'
2960
2961 Delete selected text
2962  -dac    delete all comments AND pod
2963  -dbc    delete block comments
2964  -dsc    delete side comments
2965  -dp     delete pod
2966
2967 Send selected text to a '.TEE' file
2968  -tac    tee all comments AND pod
2969  -tbc    tee block comments
2970  -tsc    tee side comments
2971  -tp     tee pod
2972
2973 Outdenting
2974  -olq    outdent long quoted strings (default)
2975  -olc    outdent a long block comment line
2976  -ola    outdent statement labels
2977  -okw    outdent control keywords (redo, next, last, goto, return)
2978  -okwl=s specify alternative keywords for -okw command
2979
2980 Other controls
2981  -mft=n  maximum fields per table; default n=40
2982  -x      do not format lines before hash-bang line (i.e., for VMS)
2983  -asc    allows perltidy to add a ';' when missing (default)
2984  -dsm    allows perltidy to delete an unnecessary ';'  (default)
2985
2986 Combinations of other parameters
2987  -gnu     attempt to follow GNU Coding Standards as applied to perl
2988  -mangle  remove as many newlines as possible (but keep comments and pods)
2989  -extrude  insert as many newlines as possible
2990
2991 Dump and die, debugging
2992  -dop    dump options used in this run to standard output and quit
2993  -ddf    dump default options to standard output and quit
2994  -dsn    dump all option short names to standard output and quit
2995  -dln    dump option long names to standard output and quit
2996  -dpro   dump whatever configuration file is in effect to standard output
2997  -dtt    dump all token types to standard output and quit
2998
2999 HTML
3000  -html write an html file (see 'man perl2web' for many options)
3001        Note: when -html is used, no indentation or formatting are done.
3002        Hint: try perltidy -html -css=mystyle.css filename.pl
3003        and edit mystyle.css to change the appearance of filename.html.
3004        -nnn gives line numbers
3005        -pre only writes out <pre>..</pre> code section
3006        -toc places a table of contents to subs at the top (default)
3007        -pod passes pod text through pod2html (default)
3008        -frm write html as a frame (3 files)
3009        -text=s extra extension for table of contents if -frm, default='toc'
3010        -sext=s extra extension for file content if -frm, default='src'
3011
3012 A prefix of "n" negates short form toggle switches, and a prefix of "no"
3013 negates the long forms.  For example, -nasc means don't add missing
3014 semicolons.
3015
3016 If you are unable to see this entire text, try "perltidy -h | more"
3017 For more detailed information, and additional options, try "man perltidy",
3018 or go to the perltidy home page at http://perltidy.sourceforge.net
3019 EOF
3020
3021 }
3022
3023 sub process_this_file {
3024
3025     my ( $truth, $beauty ) = @_;
3026
3027     # loop to process each line of this file
3028     while ( my $line_of_tokens = $truth->get_line() ) {
3029         $beauty->write_line($line_of_tokens);
3030     }
3031
3032     # finish up
3033     eval { $beauty->finish_formatting() };
3034     $truth->report_tokenization_errors();
3035 }
3036
3037 sub check_syntax {
3038
3039     # Use 'perl -c' to make sure that we did not create bad syntax
3040     # This is a very good independent check for programming errors
3041     #
3042     # Given names of the input and output files, ($ifname, $ofname),
3043     # we do the following:
3044     # - check syntax of the input file
3045     # - if bad, all done (could be an incomplete code snippet)
3046     # - if infile syntax ok, then check syntax of the output file;
3047     #   - if outfile syntax bad, issue warning; this implies a code bug!
3048     # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3049
3050     my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
3051     my $infile_syntax_ok = 0;
3052     my $line_of_dashes   = '-' x 42 . "\n";
3053
3054     my $flags = $rOpts->{'perl-syntax-check-flags'};
3055
3056     # be sure we invoke perl with -c
3057     # note: perl will accept repeated flags like '-c -c'.  It is safest
3058     # to append another -c than try to find an interior bundled c, as
3059     # in -Tc, because such a 'c' might be in a quoted string, for example.
3060     if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3061
3062     # be sure we invoke perl with -x if requested
3063     # same comments about repeated parameters applies
3064     if ( $rOpts->{'look-for-hash-bang'} ) {
3065         if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3066     }
3067
3068     # this shouldn't happen unless a termporary file couldn't be made
3069     if ( $ifname eq '-' ) {
3070         $logger_object->write_logfile_entry(
3071             "Cannot run perl -c on STDIN and STDOUT\n");
3072         return $infile_syntax_ok;
3073     }
3074
3075     $logger_object->write_logfile_entry(
3076         "checking input file syntax with perl $flags\n");
3077     $logger_object->write_logfile_entry($line_of_dashes);
3078
3079     # Not all operating systems/shells support redirection of the standard
3080     # error output.
3081     my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3082
3083     my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3084     $logger_object->write_logfile_entry("$perl_output\n");
3085
3086     if ( $perl_output =~ /syntax\s*OK/ ) {
3087         $infile_syntax_ok = 1;
3088         $logger_object->write_logfile_entry($line_of_dashes);
3089         $logger_object->write_logfile_entry(
3090             "checking output file syntax with perl $flags ...\n");
3091         $logger_object->write_logfile_entry($line_of_dashes);
3092
3093         my $perl_output =
3094           do_syntax_check( $ofname, $flags, $error_redirection );
3095         $logger_object->write_logfile_entry("$perl_output\n");
3096
3097         unless ( $perl_output =~ /syntax\s*OK/ ) {
3098             $logger_object->write_logfile_entry($line_of_dashes);
3099             $logger_object->warning(
3100 "The output file has a syntax error when tested with perl $flags $ofname !\n"
3101             );
3102             $logger_object->warning(
3103                 "This implies an error in perltidy; the file $ofname is bad\n");
3104             $logger_object->report_definite_bug();
3105
3106             # the perl version number will be helpful for diagnosing the problem
3107             $logger_object->write_logfile_entry(
3108                 qx/perl -v $error_redirection/ . "\n" );
3109         }
3110     }
3111     else {
3112
3113         # Only warn of perl -c syntax errors.  Other messages,
3114         # such as missing modules, are too common.  They can be
3115         # seen by running with perltidy -w
3116         $logger_object->complain("A syntax check using perl $flags gives: \n");
3117         $logger_object->complain($line_of_dashes);
3118         $logger_object->complain("$perl_output\n");
3119         $logger_object->complain($line_of_dashes);
3120         $infile_syntax_ok = -1;
3121         $logger_object->write_logfile_entry($line_of_dashes);
3122         $logger_object->write_logfile_entry(
3123 "The output file will not be checked because of input file problems\n"
3124         );
3125     }
3126     return $infile_syntax_ok;
3127 }
3128
3129 sub do_syntax_check {
3130     my ( $fname, $flags, $error_redirection ) = @_;
3131
3132     # We have to quote the filename in case it has unusual characters
3133     # or spaces.  Example: this filename #CM11.pm# gives trouble.
3134     $fname = '"' . $fname . '"';
3135
3136     # Under VMS something like -T will become -t (and an error) so we
3137     # will put quotes around the flags.  Double quotes seem to work on
3138     # Unix/Windows/VMS, but this may not work on all systems.  (Single
3139     # quotes do not work under Windows).  It could become necessary to
3140     # put double quotes around each flag, such as:  -"c"  -"T"
3141     # We may eventually need some system-dependent coding here.
3142     $flags = '"' . $flags . '"';
3143
3144     # now wish for luck...
3145     return qx/perl $flags $fname $error_redirection/;
3146 }
3147
3148 #####################################################################
3149 #
3150 # This is a stripped down version of IO::Scalar
3151 # Given a reference to a scalar, it supplies either:
3152 # a getline method which reads lines (mode='r'), or
3153 # a print method which reads lines (mode='w')
3154 #
3155 #####################################################################
3156 package Perl::Tidy::IOScalar;
3157 use Carp;
3158
3159 sub new {
3160     my ( $package, $rscalar, $mode ) = @_;
3161     my $ref = ref $rscalar;
3162     if ( $ref ne 'SCALAR' ) {
3163         confess <<EOM;
3164 ------------------------------------------------------------------------
3165 expecting ref to SCALAR but got ref to ($ref); trace follows:
3166 ------------------------------------------------------------------------
3167 EOM
3168
3169     }
3170     if ( $mode eq 'w' ) {
3171         $$rscalar = "";
3172         return bless [ $rscalar, $mode ], $package;
3173     }
3174     elsif ( $mode eq 'r' ) {
3175
3176         # Convert a scalar to an array.
3177         # This avoids looking for "\n" on each call to getline
3178         my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3179         my $i_next = 0;
3180         return bless [ \@array, $mode, $i_next ], $package;
3181     }
3182     else {
3183         confess <<EOM;
3184 ------------------------------------------------------------------------
3185 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3186 ------------------------------------------------------------------------
3187 EOM
3188     }
3189 }
3190
3191 sub getline {
3192     my $self = shift;
3193     my $mode = $self->[1];
3194     if ( $mode ne 'r' ) {
3195         confess <<EOM;
3196 ------------------------------------------------------------------------
3197 getline call requires mode = 'r' but mode = ($mode); trace follows:
3198 ------------------------------------------------------------------------
3199 EOM
3200     }
3201     my $i = $self->[2]++;
3202     ##my $line = $self->[0]->[$i];
3203     return $self->[0]->[$i];
3204 }
3205
3206 sub print {
3207     my $self = shift;
3208     my $mode = $self->[1];
3209     if ( $mode ne 'w' ) {
3210         confess <<EOM;
3211 ------------------------------------------------------------------------
3212 print call requires mode = 'w' but mode = ($mode); trace follows:
3213 ------------------------------------------------------------------------
3214 EOM
3215     }
3216     ${ $self->[0] } .= $_[0];
3217 }
3218 sub close { return }
3219
3220 #####################################################################
3221 #
3222 # This is a stripped down version of IO::ScalarArray
3223 # Given a reference to an array, it supplies either:
3224 # a getline method which reads lines (mode='r'), or
3225 # a print method which reads lines (mode='w')
3226 #
3227 # NOTE: this routine assumes that that there aren't any embedded
3228 # newlines within any of the array elements.  There are no checks
3229 # for that.
3230 #
3231 #####################################################################
3232 package Perl::Tidy::IOScalarArray;
3233 use Carp;
3234
3235 sub new {
3236     my ( $package, $rarray, $mode ) = @_;
3237     my $ref = ref $rarray;
3238     if ( $ref ne 'ARRAY' ) {
3239         confess <<EOM;
3240 ------------------------------------------------------------------------
3241 expecting ref to ARRAY but got ref to ($ref); trace follows:
3242 ------------------------------------------------------------------------
3243 EOM
3244
3245     }
3246     if ( $mode eq 'w' ) {
3247         @$rarray = ();
3248         return bless [ $rarray, $mode ], $package;
3249     }
3250     elsif ( $mode eq 'r' ) {
3251         my $i_next = 0;
3252         return bless [ $rarray, $mode, $i_next ], $package;
3253     }
3254     else {
3255         confess <<EOM;
3256 ------------------------------------------------------------------------
3257 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3258 ------------------------------------------------------------------------
3259 EOM
3260     }
3261 }
3262
3263 sub getline {
3264     my $self = shift;
3265     my $mode = $self->[1];
3266     if ( $mode ne 'r' ) {
3267         confess <<EOM;
3268 ------------------------------------------------------------------------
3269 getline requires mode = 'r' but mode = ($mode); trace follows:
3270 ------------------------------------------------------------------------
3271 EOM
3272     }
3273     my $i = $self->[2]++;
3274     return $self->[0]->[$i];
3275 }
3276
3277 sub print {
3278     my $self = shift;
3279     my $mode = $self->[1];
3280     if ( $mode ne 'w' ) {
3281         confess <<EOM;
3282 ------------------------------------------------------------------------
3283 print requires mode = 'w' but mode = ($mode); trace follows:
3284 ------------------------------------------------------------------------
3285 EOM
3286     }
3287     push @{ $self->[0] }, $_[0];
3288 }
3289 sub close { return }
3290
3291 #####################################################################
3292 #
3293 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3294 # which returns the next line to be parsed
3295 #
3296 #####################################################################
3297
3298 package Perl::Tidy::LineSource;
3299
3300 sub new {
3301
3302     my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3303     my $input_file_copy = undef;
3304     my $fh_copy;
3305
3306     my $input_line_ending;
3307     if ( $rOpts->{'preserve-line-endings'} ) {
3308         $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3309     }
3310
3311     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3312     return undef unless $fh;
3313
3314     # in order to check output syntax when standard output is used,
3315     # or when it is an object, we have to make a copy of the file
3316     if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3317     {
3318
3319         # Turning off syntax check when input output is used.
3320         # The reason is that temporary files cause problems on
3321         # on many systems.
3322         $rOpts->{'check-syntax'} = 0;
3323         $input_file_copy = '-';
3324
3325         $$rpending_logfile_message .= <<EOM;
3326 Note: --syntax check will be skipped because standard input is used
3327 EOM
3328
3329     }
3330
3331     return bless {
3332         _fh                => $fh,
3333         _fh_copy           => $fh_copy,
3334         _filename          => $input_file,
3335         _input_file_copy   => $input_file_copy,
3336         _input_line_ending => $input_line_ending,
3337         _rinput_buffer     => [],
3338         _started           => 0,
3339     }, $class;
3340 }
3341
3342 sub get_input_file_copy_name {
3343     my $self   = shift;
3344     my $ifname = $self->{_input_file_copy};
3345     unless ($ifname) {
3346         $ifname = $self->{_filename};
3347     }
3348     return $ifname;
3349 }
3350
3351 sub close_input_file {
3352     my $self = shift;
3353     eval { $self->{_fh}->close() };
3354     eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3355 }
3356
3357 sub get_line {
3358     my $self          = shift;
3359     my $line          = undef;
3360     my $fh            = $self->{_fh};
3361     my $fh_copy       = $self->{_fh_copy};
3362     my $rinput_buffer = $self->{_rinput_buffer};
3363
3364     if ( scalar(@$rinput_buffer) ) {
3365         $line = shift @$rinput_buffer;
3366     }
3367     else {
3368         $line = $fh->getline();
3369
3370         # patch to read raw mac files under unix, dos
3371         # see if the first line has embedded \r's
3372         if ( $line && !$self->{_started} ) {
3373             if ( $line =~ /[\015][^\015\012]/ ) {
3374
3375                 # found one -- break the line up and store in a buffer
3376                 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3377                 my $count = @$rinput_buffer;
3378                 $line = shift @$rinput_buffer;
3379             }
3380             $self->{_started}++;
3381         }
3382     }
3383     if ( $line && $fh_copy ) { $fh_copy->print($line); }
3384     return $line;
3385 }
3386
3387 #####################################################################
3388 #
3389 # the Perl::Tidy::LineSink class supplies a write_line method for
3390 # actual file writing
3391 #
3392 #####################################################################
3393
3394 package Perl::Tidy::LineSink;
3395
3396 sub new {
3397
3398     my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3399         $rpending_logfile_message, $binmode )
3400       = @_;
3401     my $fh               = undef;
3402     my $fh_copy          = undef;
3403     my $fh_tee           = undef;
3404     my $output_file_copy = "";
3405     my $output_file_open = 0;
3406
3407     if ( $rOpts->{'format'} eq 'tidy' ) {
3408         ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3409         unless ($fh) { die "Cannot write to output stream\n"; }
3410         $output_file_open = 1;
3411         if ($binmode) {
3412             if ( ref($fh) eq 'IO::File' ) {
3413                 binmode $fh;
3414             }
3415             if ( $output_file eq '-' ) { binmode STDOUT }
3416         }
3417     }
3418
3419     # in order to check output syntax when standard output is used,
3420     # or when it is an object, we have to make a copy of the file
3421     if ( $output_file eq '-' || ref $output_file ) {
3422         if ( $rOpts->{'check-syntax'} ) {
3423
3424             # Turning off syntax check when standard output is used.
3425             # The reason is that temporary files cause problems on
3426             # on many systems.
3427             $rOpts->{'check-syntax'} = 0;
3428             $output_file_copy = '-';
3429             $$rpending_logfile_message .= <<EOM;
3430 Note: --syntax check will be skipped because standard output is used
3431 EOM
3432
3433         }
3434     }
3435
3436     bless {
3437         _fh               => $fh,
3438         _fh_copy          => $fh_copy,
3439         _fh_tee           => $fh_tee,
3440         _output_file      => $output_file,
3441         _output_file_open => $output_file_open,
3442         _output_file_copy => $output_file_copy,
3443         _tee_flag         => 0,
3444         _tee_file         => $tee_file,
3445         _tee_file_opened  => 0,
3446         _line_separator   => $line_separator,
3447         _binmode          => $binmode,
3448     }, $class;
3449 }
3450
3451 sub write_line {
3452
3453     my $self    = shift;
3454     my $fh      = $self->{_fh};
3455     my $fh_copy = $self->{_fh_copy};
3456
3457     my $output_file_open = $self->{_output_file_open};
3458     chomp $_[0];
3459     $_[0] .= $self->{_line_separator};
3460
3461     $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3462     print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3463
3464     if ( $self->{_tee_flag} ) {
3465         unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3466         my $fh_tee = $self->{_fh_tee};
3467         print $fh_tee $_[0];
3468     }
3469 }
3470
3471 sub get_output_file_copy {
3472     my $self   = shift;
3473     my $ofname = $self->{_output_file_copy};
3474     unless ($ofname) {
3475         $ofname = $self->{_output_file};
3476     }
3477     return $ofname;
3478 }
3479
3480 sub tee_on {
3481     my $self = shift;
3482     $self->{_tee_flag} = 1;
3483 }
3484
3485 sub tee_off {
3486     my $self = shift;
3487     $self->{_tee_flag} = 0;
3488 }
3489
3490 sub really_open_tee_file {
3491     my $self     = shift;
3492     my $tee_file = $self->{_tee_file};
3493     my $fh_tee;
3494     $fh_tee = IO::File->new(">$tee_file")
3495       or die("couldn't open TEE file $tee_file: $!\n");
3496     binmode $fh_tee if $self->{_binmode};
3497     $self->{_tee_file_opened} = 1;
3498     $self->{_fh_tee}          = $fh_tee;
3499 }
3500
3501 sub close_output_file {
3502     my $self = shift;
3503     eval { $self->{_fh}->close() }      if $self->{_output_file_open};
3504     eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3505     $self->close_tee_file();
3506 }
3507
3508 sub close_tee_file {
3509     my $self = shift;
3510
3511     if ( $self->{_tee_file_opened} ) {
3512         eval { $self->{_fh_tee}->close() };
3513         $self->{_tee_file_opened} = 0;
3514     }
3515 }
3516
3517 #####################################################################
3518 #
3519 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3520 # useful for program development.
3521 #
3522 # Only one such file is created regardless of the number of input
3523 # files processed.  This allows the results of processing many files
3524 # to be summarized in a single file.
3525 #
3526 #####################################################################
3527
3528 package Perl::Tidy::Diagnostics;
3529
3530 sub new {
3531
3532     my $class = shift;
3533     bless {
3534         _write_diagnostics_count => 0,
3535         _last_diagnostic_file    => "",
3536         _input_file              => "",
3537         _fh                      => undef,
3538     }, $class;
3539 }
3540
3541 sub set_input_file {
3542     my $self = shift;
3543     $self->{_input_file} = $_[0];
3544 }
3545
3546 # This is a diagnostic routine which is useful for program development.
3547 # Output from debug messages go to a file named DIAGNOSTICS, where
3548 # they are labeled by file and line.  This allows many files to be
3549 # scanned at once for some particular condition of interest.
3550 sub write_diagnostics {
3551     my $self = shift;
3552
3553     unless ( $self->{_write_diagnostics_count} ) {
3554         open DIAGNOSTICS, ">DIAGNOSTICS"
3555           or death("couldn't open DIAGNOSTICS: $!\n");
3556     }
3557
3558     my $last_diagnostic_file = $self->{_last_diagnostic_file};
3559     my $input_file           = $self->{_input_file};
3560     if ( $last_diagnostic_file ne $input_file ) {
3561         print DIAGNOSTICS "\nFILE:$input_file\n";
3562     }
3563     $self->{_last_diagnostic_file} = $input_file;
3564     my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3565     print DIAGNOSTICS "$input_line_number:\t@_";
3566     $self->{_write_diagnostics_count}++;
3567 }
3568
3569 #####################################################################
3570 #
3571 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3572 #
3573 #####################################################################
3574
3575 package Perl::Tidy::Logger;
3576
3577 sub new {
3578     my $class = shift;
3579     my $fh;
3580     my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3581
3582     # remove any old error output file
3583     unless ( ref($warning_file) ) {
3584         if ( -e $warning_file ) { unlink($warning_file) }
3585     }
3586
3587     bless {
3588         _log_file                      => $log_file,
3589         _fh_warnings                   => undef,
3590         _rOpts                         => $rOpts,
3591         _fh_warnings                   => undef,
3592         _last_input_line_written       => 0,
3593         _at_end_of_file                => 0,
3594         _use_prefix                    => 1,
3595         _block_log_output              => 0,
3596         _line_of_tokens                => undef,
3597         _output_line_number            => undef,
3598         _wrote_line_information_string => 0,
3599         _wrote_column_headings         => 0,
3600         _warning_file                  => $warning_file,
3601         _warning_count                 => 0,
3602         _complaint_count               => 0,
3603         _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
3604         _saw_brace_error => 0,
3605         _saw_extrude     => $saw_extrude,
3606         _output_array    => [],
3607     }, $class;
3608 }
3609
3610 sub close_log_file {
3611
3612     my $self = shift;
3613     if ( $self->{_fh_warnings} ) {
3614         eval { $self->{_fh_warnings}->close() };
3615         $self->{_fh_warnings} = undef;
3616     }
3617 }
3618
3619 sub get_warning_count {
3620     my $self = shift;
3621     return $self->{_warning_count};
3622 }
3623
3624 sub get_use_prefix {
3625     my $self = shift;
3626     return $self->{_use_prefix};
3627 }
3628
3629 sub block_log_output {
3630     my $self = shift;
3631     $self->{_block_log_output} = 1;
3632 }
3633
3634 sub unblock_log_output {
3635     my $self = shift;
3636     $self->{_block_log_output} = 0;
3637 }
3638
3639 sub interrupt_logfile {
3640     my $self = shift;
3641     $self->{_use_prefix} = 0;
3642     $self->warning("\n");
3643     $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
3644 }
3645
3646 sub resume_logfile {
3647     my $self = shift;
3648     $self->write_logfile_entry( '#' x 60 . "\n" );
3649     $self->{_use_prefix} = 1;
3650 }
3651
3652 sub we_are_at_the_last_line {
3653     my $self = shift;
3654     unless ( $self->{_wrote_line_information_string} ) {
3655         $self->write_logfile_entry("Last line\n\n");
3656     }
3657     $self->{_at_end_of_file} = 1;
3658 }
3659
3660 # record some stuff in case we go down in flames
3661 sub black_box {
3662     my $self = shift;
3663     my ( $line_of_tokens, $output_line_number ) = @_;
3664     my $input_line        = $line_of_tokens->{_line_text};
3665     my $input_line_number = $line_of_tokens->{_line_number};
3666
3667     # save line information in case we have to write a logfile message
3668     $self->{_line_of_tokens}                = $line_of_tokens;
3669     $self->{_output_line_number}            = $output_line_number;
3670     $self->{_wrote_line_information_string} = 0;
3671
3672     my $last_input_line_written = $self->{_last_input_line_written};
3673     my $rOpts                   = $self->{_rOpts};
3674     if (
3675         (
3676             ( $input_line_number - $last_input_line_written ) >=
3677             $rOpts->{'logfile-gap'}
3678         )
3679         || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3680       )
3681     {
3682         my $rlevels                      = $line_of_tokens->{_rlevels};
3683         my $structural_indentation_level = $$rlevels[0];
3684         $self->{_last_input_line_written} = $input_line_number;
3685         ( my $out_str = $input_line ) =~ s/^\s*//;
3686         chomp $out_str;
3687
3688         $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3689
3690         if ( length($out_str) > 35 ) {
3691             $out_str = substr( $out_str, 0, 35 ) . " ....";
3692         }
3693         $self->logfile_output( "", "$out_str\n" );
3694     }
3695 }
3696
3697 sub write_logfile_entry {
3698     my $self = shift;
3699
3700     # add leading >>> to avoid confusing error mesages and code
3701     $self->logfile_output( ">>>", "@_" );
3702 }
3703
3704 sub write_column_headings {
3705     my $self = shift;
3706
3707     $self->{_wrote_column_headings} = 1;
3708     my $routput_array = $self->{_output_array};
3709     push @{$routput_array}, <<EOM;
3710 The nesting depths in the table below are at the start of the lines.
3711 The indicated output line numbers are not always exact.
3712 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3713
3714 in:out indent c b  nesting   code + messages; (messages begin with >>>)
3715 lines  levels i k            (code begins with one '.' per indent level)
3716 ------  ----- - - --------   -------------------------------------------
3717 EOM
3718 }
3719
3720 sub make_line_information_string {
3721
3722     # make columns of information when a logfile message needs to go out
3723     my $self                    = shift;
3724     my $line_of_tokens          = $self->{_line_of_tokens};
3725     my $input_line_number       = $line_of_tokens->{_line_number};
3726     my $line_information_string = "";
3727     if ($input_line_number) {
3728
3729         my $output_line_number   = $self->{_output_line_number};
3730         my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
3731         my $paren_depth          = $line_of_tokens->{_paren_depth};
3732         my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3733         my $python_indentation_level =
3734           $line_of_tokens->{_python_indentation_level};
3735         my $rlevels         = $line_of_tokens->{_rlevels};
3736         my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3737         my $rci_levels      = $line_of_tokens->{_rci_levels};
3738         my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3739
3740         my $structural_indentation_level = $$rlevels[0];
3741
3742         $self->write_column_headings() unless $self->{_wrote_column_headings};
3743
3744         # keep logfile columns aligned for scripts up to 999 lines;
3745         # for longer scripts it doesn't really matter
3746         my $extra_space = "";
3747         $extra_space .=
3748             ( $input_line_number < 10 )  ? "  "
3749           : ( $input_line_number < 100 ) ? " "
3750           :                                "";
3751         $extra_space .=
3752             ( $output_line_number < 10 )  ? "  "
3753           : ( $output_line_number < 100 ) ? " "
3754           :                                 "";
3755
3756         # there are 2 possible nesting strings:
3757         # the original which looks like this:  (0 [1 {2
3758         # the new one, which looks like this:  {{[
3759         # the new one is easier to read, and shows the order, but
3760         # could be arbitrarily long, so we use it unless it is too long
3761         my $nesting_string =
3762           "($paren_depth [$square_bracket_depth {$brace_depth";
3763         my $nesting_string_new = $$rnesting_tokens[0];
3764
3765         my $ci_level = $$rci_levels[0];
3766         if ( $ci_level > 9 ) { $ci_level = '*' }
3767         my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3768
3769         if ( length($nesting_string_new) <= 8 ) {
3770             $nesting_string =
3771               $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3772         }
3773         if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3774         $line_information_string =
3775 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3776     }
3777     return $line_information_string;
3778 }
3779
3780 sub logfile_output {
3781     my $self = shift;
3782     my ( $prompt, $msg ) = @_;
3783     return if ( $self->{_block_log_output} );
3784
3785     my $routput_array = $self->{_output_array};
3786     if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3787         push @{$routput_array}, "$msg";
3788     }
3789     else {
3790         my $line_information_string = $self->make_line_information_string();
3791         $self->{_wrote_line_information_string} = 1;
3792
3793         if ($line_information_string) {
3794             push @{$routput_array}, "$line_information_string   $prompt$msg";
3795         }
3796         else {
3797             push @{$routput_array}, "$msg";
3798         }
3799     }
3800 }
3801
3802 sub get_saw_brace_error {
3803     my $self = shift;
3804     return $self->{_saw_brace_error};
3805 }
3806
3807 sub increment_brace_error {
3808     my $self = shift;
3809     $self->{_saw_brace_error}++;
3810 }
3811
3812 sub brace_warning {
3813     my $self = shift;
3814     use constant BRACE_WARNING_LIMIT => 10;
3815     my $saw_brace_error = $self->{_saw_brace_error};
3816
3817     if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3818         $self->warning(@_);
3819     }
3820     $saw_brace_error++;
3821     $self->{_saw_brace_error} = $saw_brace_error;
3822
3823     if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3824         $self->warning("No further warnings of this type will be given\n");
3825     }
3826 }
3827
3828 sub complain {
3829
3830     # handle non-critical warning messages based on input flag
3831     my $self  = shift;
3832     my $rOpts = $self->{_rOpts};
3833
3834     # these appear in .ERR output only if -w flag is used
3835     if ( $rOpts->{'warning-output'} ) {
3836         $self->warning(@_);
3837     }
3838
3839     # otherwise, they go to the .LOG file
3840     else {
3841         $self->{_complaint_count}++;
3842         $self->write_logfile_entry(@_);
3843     }
3844 }
3845
3846 sub warning {
3847
3848     # report errors to .ERR file (or stdout)
3849     my $self = shift;
3850     use constant WARNING_LIMIT => 50;
3851
3852     my $rOpts = $self->{_rOpts};
3853     unless ( $rOpts->{'quiet'} ) {
3854
3855         my $warning_count = $self->{_warning_count};
3856         unless ($warning_count) {
3857             my $warning_file = $self->{_warning_file};
3858             my $fh_warnings;
3859             if ( $rOpts->{'standard-error-output'} ) {
3860                 $fh_warnings = *STDERR;
3861             }
3862             else {
3863                 ( $fh_warnings, my $filename ) =
3864                   Perl::Tidy::streamhandle( $warning_file, 'w' );
3865                 $fh_warnings or die("couldn't open $filename $!\n");
3866                 warn "## Please see file $filename\n";
3867             }
3868             $self->{_fh_warnings} = $fh_warnings;
3869         }
3870
3871         my $fh_warnings = $self->{_fh_warnings};
3872         if ( $warning_count < WARNING_LIMIT ) {
3873             if ( $self->get_use_prefix() > 0 ) {
3874                 my $input_line_number =
3875                   Perl::Tidy::Tokenizer::get_input_line_number();
3876                 $fh_warnings->print("$input_line_number:\t@_");
3877                 $self->write_logfile_entry("WARNING: @_");
3878             }
3879             else {
3880                 $fh_warnings->print(@_);
3881                 $self->write_logfile_entry(@_);
3882             }
3883         }
3884         $warning_count++;
3885         $self->{_warning_count} = $warning_count;
3886
3887         if ( $warning_count == WARNING_LIMIT ) {
3888             $fh_warnings->print("No further warnings will be given\n");
3889         }
3890     }
3891 }
3892
3893 # programming bug codes:
3894 #   -1 = no bug
3895 #    0 = maybe, not sure.
3896 #    1 = definitely
3897 sub report_possible_bug {
3898     my $self         = shift;
3899     my $saw_code_bug = $self->{_saw_code_bug};
3900     $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
3901 }
3902
3903 sub report_definite_bug {
3904     my $self = shift;
3905     $self->{_saw_code_bug} = 1;
3906 }
3907
3908 sub ask_user_for_bug_report {
3909     my $self = shift;
3910
3911     my ( $infile_syntax_ok, $formatter ) = @_;
3912     my $saw_code_bug = $self->{_saw_code_bug};
3913     if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
3914         $self->warning(<<EOM);
3915
3916 You may have encountered a code bug in perltidy.  If you think so, and
3917 the problem is not listed in the BUGS file at
3918 http://perltidy.sourceforge.net, please report it so that it can be
3919 corrected.  Include the smallest possible script which has the problem,
3920 along with the .LOG file. See the manual pages for contact information.
3921 Thank you!
3922 EOM
3923
3924     }
3925     elsif ( $saw_code_bug == 1 ) {
3926         if ( $self->{_saw_extrude} ) {
3927             $self->warning(<<EOM);
3928
3929 You may have encountered a bug in perltidy.  However, since you are using the
3930 -extrude option, the problem may be with perl or one of its modules, which have
3931 occasional problems with this type of file.  If you believe that the
3932 problem is with perltidy, and the problem is not listed in the BUGS file at
3933 http://perltidy.sourceforge.net, please report it so that it can be corrected.
3934 Include the smallest possible script which has the problem, along with the .LOG
3935 file. See the manual pages for contact information.
3936 Thank you!
3937 EOM
3938         }
3939         else {
3940             $self->warning(<<EOM);
3941
3942 Oops, you seem to have encountered a bug in perltidy.  Please check the
3943 BUGS file at http://perltidy.sourceforge.net.  If the problem is not
3944 listed there, please report it so that it can be corrected.  Include the
3945 smallest possible script which produces this message, along with the
3946 .LOG file if appropriate.  See the manual pages for contact information.
3947 Your efforts are appreciated.
3948 Thank you!
3949 EOM
3950             my $added_semicolon_count = 0;
3951             eval {
3952                 $added_semicolon_count =
3953                   $formatter->get_added_semicolon_count();
3954             };
3955             if ( $added_semicolon_count > 0 ) {
3956                 $self->warning(<<EOM);
3957
3958 The log file shows that perltidy added $added_semicolon_count semicolons.
3959 Please rerun with -nasc to see if that is the cause of the syntax error.  Even
3960 if that is the problem, please report it so that it can be fixed.
3961 EOM
3962
3963             }
3964         }
3965     }
3966 }
3967
3968 sub finish {
3969
3970     # called after all formatting to summarize errors
3971     my $self = shift;
3972     my ( $infile_syntax_ok, $formatter ) = @_;
3973
3974     my $rOpts         = $self->{_rOpts};
3975     my $warning_count = $self->{_warning_count};
3976     my $saw_code_bug  = $self->{_saw_code_bug};
3977
3978     my $save_logfile =
3979          ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
3980       || $saw_code_bug == 1
3981       || $rOpts->{'logfile'};
3982     my $log_file = $self->{_log_file};
3983     if ($warning_count) {
3984         if ($save_logfile) {
3985             $self->block_log_output();    # avoid echoing this to the logfile
3986             $self->warning(
3987                 "The logfile $log_file may contain useful information\n");
3988             $self->unblock_log_output();
3989         }
3990
3991         if ( $self->{_complaint_count} > 0 ) {
3992             $self->warning(
3993 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
3994             );
3995         }
3996
3997         if ( $self->{_saw_brace_error}
3998             && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
3999         {
4000             $self->warning("To save a full .LOG file rerun with -g\n");
4001         }
4002     }
4003     $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4004
4005     if ($save_logfile) {
4006         my $log_file = $self->{_log_file};
4007         my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4008         if ($fh) {
4009             my $routput_array = $self->{_output_array};
4010             foreach ( @{$routput_array} ) { $fh->print($_) }
4011             eval { $fh->close() };
4012         }
4013     }
4014 }
4015
4016 #####################################################################
4017 #
4018 # The Perl::Tidy::DevNull class supplies a dummy print method
4019 #
4020 #####################################################################
4021
4022 package Perl::Tidy::DevNull;
4023 sub new { return bless {}, $_[0] }
4024 sub print { return }
4025 sub close { return }
4026
4027 #####################################################################
4028 #
4029 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4030 #
4031 #####################################################################
4032
4033 package Perl::Tidy::HtmlWriter;
4034
4035 use File::Basename;
4036
4037 # class variables
4038 use vars qw{
4039   %html_color
4040   %html_bold
4041   %html_italic
4042   %token_short_names
4043   %short_to_long_names
4044   $rOpts
4045   $css_filename
4046   $css_linkname
4047   $missing_html_entities
4048 };
4049
4050 # replace unsafe characters with HTML entity representation if HTML::Entities
4051 # is available
4052 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4053
4054 sub new {
4055
4056     my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4057         $html_src_extension )
4058       = @_;
4059
4060     my $html_file_opened = 0;
4061     my $html_fh;
4062     ( $html_fh, my $html_filename ) =
4063       Perl::Tidy::streamhandle( $html_file, 'w' );
4064     unless ($html_fh) {
4065         warn("can't open $html_file: $!\n");
4066         return undef;
4067     }
4068     $html_file_opened = 1;
4069
4070     if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4071         $input_file = "NONAME";
4072     }
4073
4074     # write the table of contents to a string
4075     my $toc_string;
4076     my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4077
4078     my $html_pre_fh;
4079     my @pre_string_stack;
4080     if ( $rOpts->{'html-pre-only'} ) {
4081
4082         # pre section goes directly to the output stream
4083         $html_pre_fh = $html_fh;
4084         $html_pre_fh->print( <<"PRE_END");
4085 <pre>
4086 PRE_END
4087     }
4088     else {
4089
4090         # pre section go out to a temporary string
4091         my $pre_string;
4092         $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4093         push @pre_string_stack, \$pre_string;
4094     }
4095
4096     # pod text gets diverted if the 'pod2html' is used
4097     my $html_pod_fh;
4098     my $pod_string;
4099     if ( $rOpts->{'pod2html'} ) {
4100         if ( $rOpts->{'html-pre-only'} ) {
4101             undef $rOpts->{'pod2html'};
4102         }
4103         else {
4104             eval "use Pod::Html";
4105             if ($@) {
4106                 warn
4107 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4108                 undef $rOpts->{'pod2html'};
4109             }
4110             else {
4111                 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4112             }
4113         }
4114     }
4115
4116     my $toc_filename;
4117     my $src_filename;
4118     if ( $rOpts->{'frames'} ) {
4119         unless ($extension) {
4120             warn
4121 "cannot use frames without a specified output extension; ignoring -frm\n";
4122             undef $rOpts->{'frames'};
4123         }
4124         else {
4125             $toc_filename = $input_file . $html_toc_extension . $extension;
4126             $src_filename = $input_file . $html_src_extension . $extension;
4127         }
4128     }
4129
4130     # ----------------------------------------------------------
4131     # Output is now directed as follows:
4132     # html_toc_fh <-- table of contents items
4133     # html_pre_fh <-- the <pre> section of formatted code, except:
4134     # html_pod_fh <-- pod goes here with the pod2html option
4135     # ----------------------------------------------------------
4136
4137     my $title = $rOpts->{'title'};
4138     unless ($title) {
4139         ( $title, my $path ) = fileparse($input_file);
4140     }
4141     my $toc_item_count = 0;
4142     my $in_toc_package = "";
4143     my $last_level     = 0;
4144     bless {
4145         _input_file        => $input_file,          # name of input file
4146         _title             => $title,               # title, unescaped
4147         _html_file         => $html_file,           # name of .html output file
4148         _toc_filename      => $toc_filename,        # for frames option
4149         _src_filename      => $src_filename,        # for frames option
4150         _html_file_opened  => $html_file_opened,    # a flag
4151         _html_fh           => $html_fh,             # the output stream
4152         _html_pre_fh       => $html_pre_fh,         # pre section goes here
4153         _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4154         _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4155         _rpod_string       => \$pod_string,         # string holding pod
4156         _pod_cut_count     => 0,                    # how many =cut's?
4157         _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4158         _rtoc_string       => \$toc_string,         # string holding toc
4159         _rtoc_item_count   => \$toc_item_count,     # how many toc items
4160         _rin_toc_package   => \$in_toc_package,     # package name
4161         _rtoc_name_count   => {},                   # hash to track unique names
4162         _rpackage_stack    => [],                   # stack to check for package
4163                                                     # name changes
4164         _rlast_level       => \$last_level,         # brace indentation level
4165     }, $class;
4166 }
4167
4168 sub add_toc_item {
4169
4170     # Add an item to the html table of contents.
4171     # This is called even if no table of contents is written,
4172     # because we still want to put the anchors in the <pre> text.
4173     # We are given an anchor name and its type; types are:
4174     #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4175     # There must be an 'EOF' call at the end to wrap things up.
4176     my $self = shift;
4177     my ( $name, $type ) = @_;
4178     my $html_toc_fh     = $self->{_html_toc_fh};
4179     my $html_pre_fh     = $self->{_html_pre_fh};
4180     my $rtoc_name_count = $self->{_rtoc_name_count};
4181     my $rtoc_item_count = $self->{_rtoc_item_count};
4182     my $rlast_level     = $self->{_rlast_level};
4183     my $rin_toc_package = $self->{_rin_toc_package};
4184     my $rpackage_stack  = $self->{_rpackage_stack};
4185
4186     # packages contain sublists of subs, so to avoid errors all package
4187     # items are written and finished with the following routines
4188     my $end_package_list = sub {
4189         if ($$rin_toc_package) {
4190             $html_toc_fh->print("</ul>\n</li>\n");
4191             $$rin_toc_package = "";
4192         }
4193     };
4194
4195     my $start_package_list = sub {
4196         my ( $unique_name, $package ) = @_;
4197         if ($$rin_toc_package) { $end_package_list->() }
4198         $html_toc_fh->print(<<EOM);
4199 <li><a href=\"#$unique_name\">package $package</a>
4200 <ul>
4201 EOM
4202         $$rin_toc_package = $package;
4203     };
4204
4205     # start the table of contents on the first item
4206     unless ($$rtoc_item_count) {
4207
4208         # but just quit if we hit EOF without any other entries
4209         # in this case, there will be no toc
4210         return if ( $type eq 'EOF' );
4211         $html_toc_fh->print( <<"TOC_END");
4212 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4213 <ul>
4214 TOC_END
4215     }
4216     $$rtoc_item_count++;
4217
4218     # make a unique anchor name for this location:
4219     #   - packages get a 'package-' prefix
4220     #   - subs use their names
4221     my $unique_name = $name;
4222     if ( $type eq 'package' ) { $unique_name = "package-$name" }
4223
4224     # append '-1', '-2', etc if necessary to make unique; this will
4225     # be unique because subs and packages cannot have a '-'
4226     if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4227         $unique_name .= "-$count";
4228     }
4229
4230     #   - all names get terminal '-' if pod2html is used, to avoid
4231     #     conflicts with anchor names created by pod2html
4232     if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4233
4234     # start/stop lists of subs
4235     if ( $type eq 'sub' ) {
4236         my $package = $rpackage_stack->[$$rlast_level];
4237         unless ($package) { $package = 'main' }
4238
4239         # if we're already in a package/sub list, be sure its the right
4240         # package or else close it
4241         if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4242             $end_package_list->();
4243         }
4244
4245         # start a package/sub list if necessary
4246         unless ($$rin_toc_package) {
4247             $start_package_list->( $unique_name, $package );
4248         }
4249     }
4250
4251     # now write an entry in the toc for this item
4252     if ( $type eq 'package' ) {
4253         $start_package_list->( $unique_name, $name );
4254     }
4255     elsif ( $type eq 'sub' ) {
4256         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4257     }
4258     else {
4259         $end_package_list->();
4260         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4261     }
4262
4263     # write the anchor in the <pre> section
4264     $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4265
4266     # end the table of contents, if any, on the end of file
4267     if ( $type eq 'EOF' ) {
4268         $html_toc_fh->print( <<"TOC_END");
4269 </ul>
4270 <!-- END CODE INDEX -->
4271 TOC_END
4272     }
4273 }
4274
4275 BEGIN {
4276
4277     # This is the official list of tokens which may be identified by the
4278     # user.  Long names are used as getopt keys.  Short names are
4279     # convenient short abbreviations for specifying input.  Short names
4280     # somewhat resemble token type characters, but are often different
4281     # because they may only be alphanumeric, to allow command line
4282     # input.  Also, note that because of case insensitivity of html,
4283     # this table must be in a single case only (I've chosen to use all
4284     # lower case).
4285     # When adding NEW_TOKENS: update this hash table
4286     # short names => long names
4287     %short_to_long_names = (
4288         'n'  => 'numeric',
4289         'p'  => 'paren',
4290         'q'  => 'quote',
4291         's'  => 'structure',
4292         'c'  => 'comment',
4293         'v'  => 'v-string',
4294         'cm' => 'comma',
4295         'w'  => 'bareword',
4296         'co' => 'colon',
4297         'pu' => 'punctuation',
4298         'i'  => 'identifier',
4299         'j'  => 'label',
4300         'h'  => 'here-doc-target',
4301         'hh' => 'here-doc-text',
4302         'k'  => 'keyword',
4303         'sc' => 'semicolon',
4304         'm'  => 'subroutine',
4305         'pd' => 'pod-text',
4306     );
4307
4308     # Now we have to map actual token types into one of the above short
4309     # names; any token types not mapped will get 'punctuation'
4310     # properties.
4311
4312     # The values of this hash table correspond to the keys of the
4313     # previous hash table.
4314     # The keys of this hash table are token types and can be seen
4315     # by running with --dump-token-types (-dtt).
4316
4317     # When adding NEW_TOKENS: update this hash table
4318     # $type => $short_name
4319     %token_short_names = (
4320         '#'  => 'c',
4321         'n'  => 'n',
4322         'v'  => 'v',
4323         'k'  => 'k',
4324         'F'  => 'k',
4325         'Q'  => 'q',
4326         'q'  => 'q',
4327         'J'  => 'j',
4328         'j'  => 'j',
4329         'h'  => 'h',
4330         'H'  => 'hh',
4331         'w'  => 'w',
4332         ','  => 'cm',
4333         '=>' => 'cm',
4334         ';'  => 'sc',
4335         ':'  => 'co',
4336         'f'  => 'sc',
4337         '('  => 'p',
4338         ')'  => 'p',
4339         'M'  => 'm',
4340         'P'  => 'pd',
4341         'A'  => 'co',
4342     );
4343
4344     # These token types will all be called identifiers for now
4345     # FIXME: need to separate user defined modules as separate type
4346     my @identifier = qw" i t U C Y Z G :: ";
4347     @token_short_names{@identifier} = ('i') x scalar(@identifier);
4348
4349     # These token types will be called 'structure'
4350     my @structure = qw" { } ";
4351     @token_short_names{@structure} = ('s') x scalar(@structure);
4352
4353     # OLD NOTES: save for reference
4354     # Any of these could be added later if it would be useful.
4355     # For now, they will by default become punctuation
4356     #    my @list = qw" L R [ ] ";
4357     #    @token_long_names{@list} = ('non-structure') x scalar(@list);
4358     #
4359     #    my @list = qw"
4360     #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4361     #      ";
4362     #    @token_long_names{@list} = ('math') x scalar(@list);
4363     #
4364     #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
4365     #    @token_long_names{@list} = ('bit') x scalar(@list);
4366     #
4367     #    my @list = qw" == != < > <= <=> ";
4368     #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4369     #
4370     #    my @list = qw" && || ! &&= ||= //= ";
4371     #    @token_long_names{@list} = ('logical') x scalar(@list);
4372     #
4373     #    my @list = qw" . .= =~ !~ x x= ";
4374     #    @token_long_names{@list} = ('string-operators') x scalar(@list);
4375     #
4376     #    # Incomplete..
4377     #    my @list = qw" .. -> <> ... \ ? ";
4378     #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
4379
4380 }
4381
4382 sub make_getopt_long_names {
4383     my $class = shift;
4384     my ($rgetopt_names) = @_;
4385     while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4386         push @$rgetopt_names, "html-color-$name=s";
4387         push @$rgetopt_names, "html-italic-$name!";
4388         push @$rgetopt_names, "html-bold-$name!";
4389     }
4390     push @$rgetopt_names, "html-color-background=s";
4391     push @$rgetopt_names, "html-linked-style-sheet=s";
4392     push @$rgetopt_names, "nohtml-style-sheets";
4393     push @$rgetopt_names, "html-pre-only";
4394     push @$rgetopt_names, "html-line-numbers";
4395     push @$rgetopt_names, "html-entities!";
4396     push @$rgetopt_names, "stylesheet";
4397     push @$rgetopt_names, "html-table-of-contents!";
4398     push @$rgetopt_names, "pod2html!";
4399     push @$rgetopt_names, "frames!";
4400     push @$rgetopt_names, "html-toc-extension=s";
4401     push @$rgetopt_names, "html-src-extension=s";
4402
4403     # Pod::Html parameters:
4404     push @$rgetopt_names, "backlink=s";
4405     push @$rgetopt_names, "cachedir=s";
4406     push @$rgetopt_names, "htmlroot=s";
4407     push @$rgetopt_names, "libpods=s";
4408     push @$rgetopt_names, "podpath=s";
4409     push @$rgetopt_names, "podroot=s";
4410     push @$rgetopt_names, "title=s";
4411
4412     # Pod::Html parameters with leading 'pod' which will be removed
4413     # before the call to Pod::Html
4414     push @$rgetopt_names, "podquiet!";
4415     push @$rgetopt_names, "podverbose!";
4416     push @$rgetopt_names, "podrecurse!";
4417     push @$rgetopt_names, "podflush";
4418     push @$rgetopt_names, "podheader!";
4419     push @$rgetopt_names, "podindex!";
4420 }
4421
4422 sub make_abbreviated_names {
4423
4424     # We're appending things like this to the expansion list:
4425     #      'hcc'    => [qw(html-color-comment)],
4426     #      'hck'    => [qw(html-color-keyword)],
4427     #  etc
4428     my $class = shift;
4429     my ($rexpansion) = @_;
4430
4431     # abbreviations for color/bold/italic properties
4432     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4433         ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
4434         ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
4435         ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
4436         ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4437         ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4438     }
4439
4440     # abbreviations for all other html options
4441     ${$rexpansion}{"hcbg"}  = ["html-color-background"];
4442     ${$rexpansion}{"pre"}   = ["html-pre-only"];
4443     ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
4444     ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
4445     ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
4446     ${$rexpansion}{"hent"}  = ["html-entities"];
4447     ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4448     ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
4449     ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
4450     ${$rexpansion}{"ss"}    = ["stylesheet"];
4451     ${$rexpansion}{"pod"}   = ["pod2html"];
4452     ${$rexpansion}{"npod"}  = ["nopod2html"];
4453     ${$rexpansion}{"frm"}   = ["frames"];
4454     ${$rexpansion}{"nfrm"}  = ["noframes"];
4455     ${$rexpansion}{"text"}  = ["html-toc-extension"];
4456     ${$rexpansion}{"sext"}  = ["html-src-extension"];
4457 }
4458
4459 sub check_options {
4460
4461     # This will be called once after options have been parsed
4462     my $class = shift;
4463     $rOpts = shift;
4464
4465     # X11 color names for default settings that seemed to look ok
4466     # (these color names are only used for programming clarity; the hex
4467     # numbers are actually written)
4468     use constant ForestGreen   => "#228B22";
4469     use constant SaddleBrown   => "#8B4513";
4470     use constant magenta4      => "#8B008B";
4471     use constant IndianRed3    => "#CD5555";
4472     use constant DeepSkyBlue4  => "#00688B";
4473     use constant MediumOrchid3 => "#B452CD";
4474     use constant black         => "#000000";
4475     use constant white         => "#FFFFFF";
4476     use constant red           => "#FF0000";
4477
4478     # set default color, bold, italic properties
4479     # anything not listed here will be given the default (punctuation) color --
4480     # these types currently not listed and get default: ws pu s sc cm co p
4481     # When adding NEW_TOKENS: add an entry here if you don't want defaults
4482
4483     # set_default_properties( $short_name, default_color, bold?, italic? );
4484     set_default_properties( 'c',  ForestGreen,   0, 0 );
4485     set_default_properties( 'pd', ForestGreen,   0, 1 );
4486     set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4487     set_default_properties( 'q',  IndianRed3,    0, 0 );
4488     set_default_properties( 'hh', IndianRed3,    0, 1 );
4489     set_default_properties( 'h',  IndianRed3,    1, 0 );
4490     set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4491     set_default_properties( 'w',  black,         0, 0 );
4492     set_default_properties( 'n',  MediumOrchid3, 0, 0 );
4493     set_default_properties( 'v',  MediumOrchid3, 0, 0 );
4494     set_default_properties( 'j',  IndianRed3,    1, 0 );
4495     set_default_properties( 'm',  red,           1, 0 );
4496
4497     set_default_color( 'html-color-background',  white );
4498     set_default_color( 'html-color-punctuation', black );
4499
4500     # setup property lookup tables for tokens based on their short names
4501     # every token type has a short name, and will use these tables
4502     # to do the html markup
4503     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4504         $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
4505         $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
4506         $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4507     }
4508
4509     # write style sheet to STDOUT and die if requested
4510     if ( defined( $rOpts->{'stylesheet'} ) ) {
4511         write_style_sheet_file('-');
4512         exit 1;
4513     }
4514
4515     # make sure user gives a file name after -css
4516     if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4517         $css_linkname = $rOpts->{'html-linked-style-sheet'};
4518         if ( $css_linkname =~ /^-/ ) {
4519             die "You must specify a valid filename after -css\n";
4520         }
4521     }
4522
4523     # check for conflict
4524     if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4525         $rOpts->{'nohtml-style-sheets'} = 0;
4526         warning("You can't specify both -css and -nss; -nss ignored\n");
4527     }
4528
4529     # write a style sheet file if necessary
4530     if ($css_linkname) {
4531
4532         # if the selected filename exists, don't write, because user may
4533         # have done some work by hand to create it; use backup name instead
4534         # Also, this will avoid a potential disaster in which the user
4535         # forgets to specify the style sheet, like this:
4536         #    perltidy -html -css myfile1.pl myfile2.pl
4537         # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4538         my $css_filename = $css_linkname;
4539         unless ( -e $css_filename ) {
4540             write_style_sheet_file($css_filename);
4541         }
4542     }
4543     $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4544 }
4545
4546 sub write_style_sheet_file {
4547
4548     my $css_filename = shift;
4549     my $fh;
4550     unless ( $fh = IO::File->new("> $css_filename") ) {
4551         die "can't open $css_filename: $!\n";
4552     }
4553     write_style_sheet_data($fh);
4554     eval { $fh->close };
4555 }
4556
4557 sub write_style_sheet_data {
4558
4559     # write the style sheet data to an open file handle
4560     my $fh = shift;
4561
4562     my $bg_color   = $rOpts->{'html-color-background'};
4563     my $text_color = $rOpts->{'html-color-punctuation'};
4564
4565     # pre-bgcolor is new, and may not be defined
4566     my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4567     $pre_bg_color = $bg_color unless $pre_bg_color;
4568
4569     $fh->print(<<"EOM");
4570 /* default style sheet generated by perltidy */
4571 body {background: $bg_color; color: $text_color}
4572 pre { color: $text_color;
4573       background: $pre_bg_color;
4574       font-family: courier;
4575     }
4576
4577 EOM
4578
4579     foreach my $short_name ( sort keys %short_to_long_names ) {
4580         my $long_name = $short_to_long_names{$short_name};
4581
4582         my $abbrev = '.' . $short_name;
4583         if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
4584         my $color = $html_color{$short_name};
4585         if ( !defined($color) ) { $color = $text_color }
4586         $fh->print("$abbrev \{ color: $color;");
4587
4588         if ( $html_bold{$short_name} ) {
4589             $fh->print(" font-weight:bold;");
4590         }
4591
4592         if ( $html_italic{$short_name} ) {
4593             $fh->print(" font-style:italic;");
4594         }
4595         $fh->print("} /* $long_name */\n");
4596     }
4597 }
4598
4599 sub set_default_color {
4600
4601     # make sure that options hash $rOpts->{$key} contains a valid color
4602     my ( $key, $color ) = @_;
4603     if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4604     $rOpts->{$key} = check_RGB($color);
4605 }
4606
4607 sub check_RGB {
4608
4609     # if color is a 6 digit hex RGB value, prepend a #, otherwise
4610     # assume that it is a valid ascii color name
4611     my ($color) = @_;
4612     if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4613     return $color;
4614 }
4615
4616 sub set_default_properties {
4617     my ( $short_name, $color, $bold, $italic ) = @_;
4618
4619     set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4620     my $key;
4621     $key = "html-bold-$short_to_long_names{$short_name}";
4622     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4623     $key = "html-italic-$short_to_long_names{$short_name}";
4624     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4625 }
4626
4627 sub pod_to_html {
4628
4629     # Use Pod::Html to process the pod and make the page
4630     # then merge the perltidy code sections into it.
4631     # return 1 if success, 0 otherwise
4632     my $self = shift;
4633     my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4634     my $input_file   = $self->{_input_file};
4635     my $title        = $self->{_title};
4636     my $success_flag = 0;
4637
4638     # don't try to use pod2html if no pod
4639     unless ($pod_string) {
4640         return $success_flag;
4641     }
4642
4643     # Pod::Html requires a real temporary filename
4644     # If we are making a frame, we have a name available
4645     # Otherwise, we have to fine one
4646     my $tmpfile;
4647     if ( $rOpts->{'frames'} ) {
4648         $tmpfile = $self->{_toc_filename};
4649     }
4650     else {
4651         $tmpfile = Perl::Tidy::make_temporary_filename();
4652     }
4653     my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4654     unless ($fh_tmp) {
4655         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4656         return $success_flag;
4657     }
4658
4659     #------------------------------------------------------------------
4660     # Warning: a temporary file is open; we have to clean up if
4661     # things go bad.  From here on all returns should be by going to
4662     # RETURN so that the temporary file gets unlinked.
4663     #------------------------------------------------------------------
4664
4665     # write the pod text to the temporary file
4666     $fh_tmp->print($pod_string);
4667     $fh_tmp->close();
4668
4669     # Hand off the pod to pod2html.
4670     # Note that we can use the same temporary filename for input and output
4671     # because of the way pod2html works.
4672     {
4673
4674         my @args;
4675         push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4676         my $kw;
4677
4678         # Flags with string args:
4679         # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4680         # "podpath=s", "podroot=s"
4681         # Note: -css=s is handled by perltidy itself
4682         foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4683             if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4684         }
4685
4686         # Toggle switches; these have extra leading 'pod'
4687         # "header!", "index!", "recurse!", "quiet!", "verbose!"
4688         foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4689             my $kwd = $kw;    # allows us to strip 'pod'
4690             if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4691             elsif ( defined( $rOpts->{$kw} ) ) {
4692                 $kwd =~ s/^pod//;
4693                 push @args, "--no$kwd";
4694             }
4695         }
4696
4697         # "flush",
4698         $kw = 'podflush';
4699         if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4700
4701         # Must clean up if pod2html dies (it can);
4702         # Be careful not to overwrite callers __DIE__ routine
4703         local $SIG{__DIE__} = sub {
4704             print $_[0];
4705             unlink $tmpfile if -e $tmpfile;
4706             exit 1;
4707         };
4708
4709         pod2html(@args);
4710     }
4711     $fh_tmp = IO::File->new( $tmpfile, 'r' );
4712     unless ($fh_tmp) {
4713
4714         # this error shouldn't happen ... we just used this filename
4715         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4716         goto RETURN;
4717     }
4718
4719     my $html_fh = $self->{_html_fh};
4720     my @toc;
4721     my $in_toc;
4722     my $no_print;
4723
4724     # This routine will write the html selectively and store the toc
4725     my $html_print = sub {
4726         foreach (@_) {
4727             $html_fh->print($_) unless ($no_print);
4728             if ($in_toc) { push @toc, $_ }
4729         }
4730     };
4731
4732     # loop over lines of html output from pod2html and merge in
4733     # the necessary perltidy html sections
4734     my ( $saw_body, $saw_index, $saw_body_end );
4735     while ( my $line = $fh_tmp->getline() ) {
4736
4737         if ( $line =~ /^\s*<html>\s*$/i ) {
4738             my $date = localtime;
4739             $html_print->("<!-- Generated by perltidy on $date -->\n");
4740             $html_print->($line);
4741         }
4742
4743         # Copy the perltidy css, if any, after <body> tag
4744         elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4745             $saw_body = 1;
4746             $html_print->($css_string) if $css_string;
4747             $html_print->($line);
4748
4749             # add a top anchor and heading
4750             $html_print->("<a name=\"-top-\"></a>\n");
4751             $title = escape_html($title);
4752             $html_print->("<h1>$title</h1>\n");
4753         }
4754         elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4755             $in_toc = 1;
4756
4757             # when frames are used, an extra table of contents in the
4758             # contents panel is confusing, so don't print it
4759             $no_print = $rOpts->{'frames'}
4760               || !$rOpts->{'html-table-of-contents'};
4761             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4762             $html_print->($line);
4763         }
4764
4765         # Copy the perltidy toc, if any, after the Pod::Html toc
4766         elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4767             $saw_index = 1;
4768             $html_print->($line);
4769             if ($toc_string) {
4770                 $html_print->("<hr />\n") if $rOpts->{'frames'};
4771                 $html_print->("<h2>Code Index:</h2>\n");
4772                 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4773                 $html_print->(@toc);
4774             }
4775             $in_toc   = 0;
4776             $no_print = 0;
4777         }
4778
4779         # Copy one perltidy section after each marker
4780         elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4781             $line = $2;
4782             $html_print->($1) if $1;
4783
4784             # Intermingle code and pod sections if we saw multiple =cut's.
4785             if ( $self->{_pod_cut_count} > 1 ) {
4786                 my $rpre_string = shift(@$rpre_string_stack);
4787                 if ($$rpre_string) {
4788                     $html_print->('<pre>');
4789                     $html_print->($$rpre_string);
4790                     $html_print->('</pre>');
4791                 }
4792                 else {
4793
4794                     # shouldn't happen: we stored a string before writing
4795                     # each marker.
4796                     warn
4797 "Problem merging html stream with pod2html; order may be wrong\n";
4798                 }
4799                 $html_print->($line);
4800             }
4801
4802             # If didn't see multiple =cut lines, we'll put the pod out first
4803             # and then the code, because it's less confusing.
4804             else {
4805
4806                 # since we are not intermixing code and pod, we don't need
4807                 # or want any <hr> lines which separated pod and code
4808                 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4809             }
4810         }
4811
4812         # Copy any remaining code section before the </body> tag
4813         elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4814             $saw_body_end = 1;
4815             if (@$rpre_string_stack) {
4816                 unless ( $self->{_pod_cut_count} > 1 ) {
4817                     $html_print->('<hr />');
4818                 }
4819                 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4820                     $html_print->('<pre>');
4821                     $html_print->($$rpre_string);
4822                     $html_print->('</pre>');
4823                 }
4824             }
4825             $html_print->($line);
4826         }
4827         else {
4828             $html_print->($line);
4829         }
4830     }
4831
4832     $success_flag = 1;
4833     unless ($saw_body) {
4834         warn "Did not see <body> in pod2html output\n";
4835         $success_flag = 0;
4836     }
4837     unless ($saw_body_end) {
4838         warn "Did not see </body> in pod2html output\n";
4839         $success_flag = 0;
4840     }
4841     unless ($saw_index) {
4842         warn "Did not find INDEX END in pod2html output\n";
4843         $success_flag = 0;
4844     }
4845
4846   RETURN:
4847     eval { $html_fh->close() };
4848
4849     # note that we have to unlink tmpfile before making frames
4850     # because the tmpfile may be one of the names used for frames
4851     unlink $tmpfile if -e $tmpfile;
4852     if ( $success_flag && $rOpts->{'frames'} ) {
4853         $self->make_frame( \@toc );
4854     }
4855     return $success_flag;
4856 }
4857
4858 sub make_frame {
4859
4860     # Make a frame with table of contents in the left panel
4861     # and the text in the right panel.
4862     # On entry:
4863     #  $html_filename contains the no-frames html output
4864     #  $rtoc is a reference to an array with the table of contents
4865     my $self          = shift;
4866     my ($rtoc)        = @_;
4867     my $input_file    = $self->{_input_file};
4868     my $html_filename = $self->{_html_file};
4869     my $toc_filename  = $self->{_toc_filename};
4870     my $src_filename  = $self->{_src_filename};
4871     my $title         = $self->{_title};
4872     $title = escape_html($title);
4873
4874     # FUTURE input parameter:
4875     my $top_basename = "";
4876
4877     # We need to produce 3 html files:
4878     # 1. - the table of contents
4879     # 2. - the contents (source code) itself
4880     # 3. - the frame which contains them
4881
4882     # get basenames for relative links
4883     my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
4884     my ( $src_basename, $src_path ) = fileparse($src_filename);
4885
4886     # 1. Make the table of contents panel, with appropriate changes
4887     # to the anchor names
4888     my $src_frame_name = 'SRC';
4889     my $first_anchor =
4890       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
4891         $src_frame_name );
4892
4893     # 2. The current .html filename is renamed to be the contents panel
4894     rename( $html_filename, $src_filename )
4895       or die "Cannot rename $html_filename to $src_filename:$!\n";
4896
4897     # 3. Then use the original html filename for the frame
4898     write_frame_html(
4899         $title,        $html_filename, $top_basename,
4900         $toc_basename, $src_basename,  $src_frame_name
4901     );
4902 }
4903
4904 sub write_toc_html {
4905
4906     # write a separate html table of contents file for frames
4907     my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
4908     my $fh = IO::File->new( $toc_filename, 'w' )
4909       or die "Cannot open $toc_filename:$!\n";
4910     $fh->print(<<EOM);
4911 <html>
4912 <head>
4913 <title>$title</title>
4914 </head>
4915 <body>
4916 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
4917 EOM
4918
4919     my $first_anchor =
4920       change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
4921     $fh->print( join "", @$rtoc );
4922
4923     $fh->print(<<EOM);
4924 </body>
4925 </html>
4926 EOM
4927
4928 }
4929
4930 sub write_frame_html {
4931
4932     # write an html file to be the table of contents frame
4933     my (
4934         $title,        $frame_filename, $top_basename,
4935         $toc_basename, $src_basename,   $src_frame_name
4936     ) = @_;
4937
4938     my $fh = IO::File->new( $frame_filename, 'w' )
4939       or die "Cannot open $toc_basename:$!\n";
4940
4941     $fh->print(<<EOM);
4942 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4943     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
4944 <?xml version="1.0" encoding="iso-8859-1" ?>
4945 <html xmlns="http://www.w3.org/1999/xhtml">
4946 <head>
4947 <title>$title</title>
4948 </head>
4949 EOM
4950
4951     # two left panels, one right, if master index file
4952     if ($top_basename) {
4953         $fh->print(<<EOM);
4954 <frameset cols="20%,80%">
4955 <frameset rows="30%,70%">
4956 <frame src = "$top_basename" />
4957 <frame src = "$toc_basename" />
4958 </frameset>
4959 EOM
4960     }
4961
4962     # one left panels, one right, if no master index file
4963     else {
4964         $fh->print(<<EOM);
4965 <frameset cols="20%,*">
4966 <frame src = "$toc_basename" />
4967 EOM
4968     }
4969     $fh->print(<<EOM);
4970 <frame src = "$src_basename" name = "$src_frame_name" />
4971 <noframes>
4972 <body>
4973 <p>If you see this message, you are using a non-frame-capable web client.</p>
4974 <p>This document contains:</p>
4975 <ul>
4976 <li><a href="$toc_basename">A table of contents</a></li>
4977 <li><a href="$src_basename">The source code</a></li>
4978 </ul>
4979 </body>
4980 </noframes>
4981 </frameset>
4982 </html>
4983 EOM
4984 }
4985
4986 sub change_anchor_names {
4987
4988     # add a filename and target to anchors
4989     # also return the first anchor
4990     my ( $rlines, $filename, $target ) = @_;
4991     my $first_anchor;
4992     foreach my $line (@$rlines) {
4993
4994         #  We're looking for lines like this:
4995         #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
4996         #  ----  -       --------  -----------------
4997         #  $1              $4            $5
4998         if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
4999             my $pre  = $1;
5000             my $name = $4;
5001             my $post = $5;
5002             my $href = "$filename#$name";
5003             $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5004             unless ($first_anchor) { $first_anchor = $href }
5005         }
5006     }
5007     return $first_anchor;
5008 }
5009
5010 sub close_html_file {
5011     my $self = shift;
5012     return unless $self->{_html_file_opened};
5013
5014     my $html_fh     = $self->{_html_fh};
5015     my $rtoc_string = $self->{_rtoc_string};
5016
5017     # There are 3 basic paths to html output...
5018
5019     # ---------------------------------
5020     # Path 1: finish up if in -pre mode
5021     # ---------------------------------
5022     if ( $rOpts->{'html-pre-only'} ) {
5023         $html_fh->print( <<"PRE_END");
5024 </pre>
5025 PRE_END
5026         eval { $html_fh->close() };
5027         return;
5028     }
5029
5030     # Finish the index
5031     $self->add_toc_item( 'EOF', 'EOF' );
5032
5033     my $rpre_string_stack = $self->{_rpre_string_stack};
5034
5035     # Patch to darken the <pre> background color in case of pod2html and
5036     # interleaved code/documentation.  Otherwise, the distinction
5037     # between code and documentation is blurred.
5038     if (   $rOpts->{pod2html}
5039         && $self->{_pod_cut_count} >= 1
5040         && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5041     {
5042         $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5043     }
5044
5045     # put the css or its link into a string, if used
5046     my $css_string;
5047     my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5048
5049     # use css linked to another file
5050     if ( $rOpts->{'html-linked-style-sheet'} ) {
5051         $fh_css->print(
5052             qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5053         );
5054     }
5055
5056     # use css embedded in this file
5057     elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5058         $fh_css->print( <<'ENDCSS');
5059 <style type="text/css">
5060 <!--
5061 ENDCSS
5062         write_style_sheet_data($fh_css);
5063         $fh_css->print( <<"ENDCSS");
5064 -->
5065 </style>
5066 ENDCSS
5067     }
5068
5069     # -----------------------------------------------------------
5070     # path 2: use pod2html if requested
5071     #         If we fail for some reason, continue on to path 3
5072     # -----------------------------------------------------------
5073     if ( $rOpts->{'pod2html'} ) {
5074         my $rpod_string = $self->{_rpod_string};
5075         $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5076             $rpre_string_stack )
5077           && return;
5078     }
5079
5080     # --------------------------------------------------
5081     # path 3: write code in html, with pod only in italics
5082     # --------------------------------------------------
5083     my $input_file = $self->{_input_file};
5084     my $title      = escape_html($input_file);
5085     my $date       = localtime;
5086     $html_fh->print( <<"HTML_START");
5087 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5088    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5089 <!-- Generated by perltidy on $date -->
5090 <html xmlns="http://www.w3.org/1999/xhtml">
5091 <head>
5092 <title>$title</title>
5093 HTML_START
5094
5095     # output the css, if used
5096     if ($css_string) {
5097         $html_fh->print($css_string);
5098         $html_fh->print( <<"ENDCSS");
5099 </head>
5100 <body>
5101 ENDCSS
5102     }
5103     else {
5104
5105         $html_fh->print( <<"HTML_START");
5106 </head>
5107 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5108 HTML_START
5109     }
5110
5111     $html_fh->print("<a name=\"-top-\"></a>\n");
5112     $html_fh->print( <<"EOM");
5113 <h1>$title</h1>
5114 EOM
5115
5116     # copy the table of contents
5117     if (   $$rtoc_string
5118         && !$rOpts->{'frames'}
5119         && $rOpts->{'html-table-of-contents'} )
5120     {
5121         $html_fh->print($$rtoc_string);
5122     }
5123
5124     # copy the pre section(s)
5125     my $fname_comment = $input_file;
5126     $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5127     $html_fh->print( <<"END_PRE");
5128 <hr />
5129 <!-- contents of filename: $fname_comment -->
5130 <pre>
5131 END_PRE
5132
5133     foreach my $rpre_string (@$rpre_string_stack) {
5134         $html_fh->print($$rpre_string);
5135     }
5136
5137     # and finish the html page
5138     $html_fh->print( <<"HTML_END");
5139 </pre>
5140 </body>
5141 </html>
5142 HTML_END
5143     eval { $html_fh->close() };    # could be object without close method
5144
5145     if ( $rOpts->{'frames'} ) {
5146         my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5147         $self->make_frame( \@toc );
5148     }
5149 }
5150
5151 sub markup_tokens {
5152     my $self = shift;
5153     my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5154     my ( @colored_tokens, $j, $string, $type, $token, $level );
5155     my $rlast_level    = $self->{_rlast_level};
5156     my $rpackage_stack = $self->{_rpackage_stack};
5157
5158     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5159         $type  = $$rtoken_type[$j];
5160         $token = $$rtokens[$j];
5161         $level = $$rlevels[$j];
5162         $level = 0 if ( $level < 0 );
5163
5164         #-------------------------------------------------------
5165         # Update the package stack.  The package stack is needed to keep
5166         # the toc correct because some packages may be declared within
5167         # blocks and go out of scope when we leave the block.
5168         #-------------------------------------------------------
5169         if ( $level > $$rlast_level ) {
5170             unless ( $rpackage_stack->[ $level - 1 ] ) {
5171                 $rpackage_stack->[ $level - 1 ] = 'main';
5172             }
5173             $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5174         }
5175         elsif ( $level < $$rlast_level ) {
5176             my $package = $rpackage_stack->[$level];
5177             unless ($package) { $package = 'main' }
5178
5179             # if we change packages due to a nesting change, we
5180             # have to make an entry in the toc
5181             if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5182                 $self->add_toc_item( $package, 'package' );
5183             }
5184         }
5185         $$rlast_level = $level;
5186
5187         #-------------------------------------------------------
5188         # Intercept a sub name here; split it
5189         # into keyword 'sub' and sub name; and add an
5190         # entry in the toc
5191         #-------------------------------------------------------
5192         if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5193             $token = $self->markup_html_element( $1, 'k' );
5194             push @colored_tokens, $token;
5195             $token = $2;
5196             $type  = 'M';
5197
5198             # but don't include sub declarations in the toc;
5199             # these wlll have leading token types 'i;'
5200             my $signature = join "", @$rtoken_type;
5201             unless ( $signature =~ /^i;/ ) {
5202                 my $subname = $token;
5203                 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5204                 $self->add_toc_item( $subname, 'sub' );
5205             }
5206         }
5207
5208         #-------------------------------------------------------
5209         # Intercept a package name here; split it
5210         # into keyword 'package' and name; add to the toc,
5211         # and update the package stack
5212         #-------------------------------------------------------
5213         if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5214             $token = $self->markup_html_element( $1, 'k' );
5215             push @colored_tokens, $token;
5216             $token = $2;
5217             $type  = 'i';
5218             $self->add_toc_item( "$token", 'package' );
5219             $rpackage_stack->[$level] = $token;
5220         }
5221
5222         $token = $self->markup_html_element( $token, $type );
5223         push @colored_tokens, $token;
5224     }
5225     return ( \@colored_tokens );
5226 }
5227
5228 sub markup_html_element {
5229     my $self = shift;
5230     my ( $token, $type ) = @_;
5231
5232     return $token if ( $type eq 'b' );    # skip a blank token
5233     return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5234     $token = escape_html($token);
5235
5236     # get the short abbreviation for this token type
5237     my $short_name = $token_short_names{$type};
5238     if ( !defined($short_name) ) {
5239         $short_name = "pu";                    # punctuation is default
5240     }
5241
5242     # handle style sheets..
5243     if ( !$rOpts->{'nohtml-style-sheets'} ) {
5244         if ( $short_name ne 'pu' ) {
5245             $token = qq(<span class="$short_name">) . $token . "</span>";
5246         }
5247     }
5248
5249     # handle no style sheets..
5250     else {
5251         my $color = $html_color{$short_name};
5252
5253         if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5254             $token = qq(<font color="$color">) . $token . "</font>";
5255         }
5256         if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5257         if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5258     }
5259     return $token;
5260 }
5261
5262 sub escape_html {
5263
5264     my $token = shift;
5265     if ($missing_html_entities) {
5266         $token =~ s/\&/&amp;/g;
5267         $token =~ s/\</&lt;/g;
5268         $token =~ s/\>/&gt;/g;
5269         $token =~ s/\"/&quot;/g;
5270     }
5271     else {
5272         HTML::Entities::encode_entities($token);
5273     }
5274     return $token;
5275 }
5276
5277 sub finish_formatting {
5278
5279     # called after last line
5280     my $self = shift;
5281     $self->close_html_file();
5282     return;
5283 }
5284
5285 sub write_line {
5286
5287     my $self = shift;
5288     return unless $self->{_html_file_opened};
5289     my $html_pre_fh      = $self->{_html_pre_fh};
5290     my ($line_of_tokens) = @_;
5291     my $line_type        = $line_of_tokens->{_line_type};
5292     my $input_line       = $line_of_tokens->{_line_text};
5293     my $line_number      = $line_of_tokens->{_line_number};
5294     chomp $input_line;
5295
5296     # markup line of code..
5297     my $html_line;
5298     if ( $line_type eq 'CODE' ) {
5299         my $rtoken_type = $line_of_tokens->{_rtoken_type};
5300         my $rtokens     = $line_of_tokens->{_rtokens};
5301         my $rlevels     = $line_of_tokens->{_rlevels};
5302
5303         if ( $input_line =~ /(^\s*)/ ) {
5304             $html_line = $1;
5305         }
5306         else {
5307             $html_line = "";
5308         }
5309         my ($rcolored_tokens) =
5310           $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5311         $html_line .= join '', @$rcolored_tokens;
5312     }
5313
5314     # markup line of non-code..
5315     else {
5316         my $line_character;
5317         if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
5318         elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
5319         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
5320         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5321         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
5322         elsif ( $line_type eq 'END_START' ) {
5323             $line_character = 'k';
5324             $self->add_toc_item( '__END__', '__END__' );
5325         }
5326         elsif ( $line_type eq 'DATA_START' ) {
5327             $line_character = 'k';
5328             $self->add_toc_item( '__DATA__', '__DATA__' );
5329         }
5330         elsif ( $line_type =~ /^POD/ ) {
5331             $line_character = 'P';
5332             if ( $rOpts->{'pod2html'} ) {
5333                 my $html_pod_fh = $self->{_html_pod_fh};
5334                 if ( $line_type eq 'POD_START' ) {
5335
5336                     my $rpre_string_stack = $self->{_rpre_string_stack};
5337                     my $rpre_string       = $rpre_string_stack->[-1];
5338
5339                     # if we have written any non-blank lines to the
5340                     # current pre section, start writing to a new output
5341                     # string
5342                     if ( $$rpre_string =~ /\S/ ) {
5343                         my $pre_string;
5344                         $html_pre_fh =
5345                           Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5346                         $self->{_html_pre_fh} = $html_pre_fh;
5347                         push @$rpre_string_stack, \$pre_string;
5348
5349                         # leave a marker in the pod stream so we know
5350                         # where to put the pre section we just
5351                         # finished.
5352                         my $for_html = '=for html';    # don't confuse pod utils
5353                         $html_pod_fh->print(<<EOM);
5354
5355 $for_html
5356 <!-- pERLTIDY sECTION -->
5357
5358 EOM
5359                     }
5360
5361                     # otherwise, just clear the current string and start
5362                     # over
5363                     else {
5364                         $$rpre_string = "";
5365                         $html_pod_fh->print("\n");
5366                     }
5367                 }
5368                 $html_pod_fh->print( $input_line . "\n" );
5369                 if ( $line_type eq 'POD_END' ) {
5370                     $self->{_pod_cut_count}++;
5371                     $html_pod_fh->print("\n");
5372                 }
5373                 return;
5374             }
5375         }
5376         else { $line_character = 'Q' }
5377         $html_line = $self->markup_html_element( $input_line, $line_character );
5378     }
5379
5380     # add the line number if requested
5381     if ( $rOpts->{'html-line-numbers'} ) {
5382         my $extra_space .=
5383             ( $line_number < 10 )   ? "   "
5384           : ( $line_number < 100 )  ? "  "
5385           : ( $line_number < 1000 ) ? " "
5386           :                           "";
5387         $html_line = $extra_space . $line_number . " " . $html_line;
5388     }
5389
5390     # write the line
5391     $html_pre_fh->print("$html_line\n");
5392 }
5393
5394 #####################################################################
5395 #
5396 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5397 # line breaks to the token stream
5398 #
5399 # WARNING: This is not a real class for speed reasons.  Only one
5400 # Formatter may be used.
5401 #
5402 #####################################################################
5403
5404 package Perl::Tidy::Formatter;
5405
5406 BEGIN {
5407
5408     # Caution: these debug flags produce a lot of output
5409     # They should all be 0 except when debugging small scripts
5410     use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
5411     use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
5412     use constant FORMATTER_DEBUG_FLAG_CI      => 0;
5413     use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
5414     use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
5415     use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
5416     use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5417     use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
5418     use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
5419     use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
5420     use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
5421     use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
5422
5423     my $debug_warning = sub {
5424         print "FORMATTER_DEBUGGING with key $_[0]\n";
5425     };
5426
5427     FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
5428     FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
5429     FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
5430     FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
5431     FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
5432     FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
5433     FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5434     FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
5435     FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
5436     FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
5437     FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
5438     FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
5439 }
5440
5441 use Carp;
5442 use vars qw{
5443
5444   @gnu_stack
5445   $max_gnu_stack_index
5446   $gnu_position_predictor
5447   $line_start_index_to_go
5448   $last_indentation_written
5449   $last_unadjusted_indentation
5450   $last_leading_token
5451
5452   $saw_VERSION_in_this_file
5453   $saw_END_or_DATA_
5454
5455   @gnu_item_list
5456   $max_gnu_item_index
5457   $gnu_sequence_number
5458   $last_output_indentation
5459   %last_gnu_equals
5460   %gnu_comma_count
5461   %gnu_arrow_count
5462
5463   @block_type_to_go
5464   @type_sequence_to_go
5465   @container_environment_to_go
5466   @bond_strength_to_go
5467   @forced_breakpoint_to_go
5468   @lengths_to_go
5469   @levels_to_go
5470   @leading_spaces_to_go
5471   @reduced_spaces_to_go
5472   @matching_token_to_go
5473   @mate_index_to_go
5474   @nesting_blocks_to_go
5475   @ci_levels_to_go
5476   @nesting_depth_to_go
5477   @nobreak_to_go
5478   @old_breakpoint_to_go
5479   @tokens_to_go
5480   @types_to_go
5481
5482   %saved_opening_indentation
5483
5484   $max_index_to_go
5485   $comma_count_in_batch
5486   $old_line_count_in_batch
5487   $last_nonblank_index_to_go
5488   $last_nonblank_type_to_go
5489   $last_nonblank_token_to_go
5490   $last_last_nonblank_index_to_go
5491   $last_last_nonblank_type_to_go
5492   $last_last_nonblank_token_to_go
5493   @nonblank_lines_at_depth
5494   $starting_in_quote
5495   $ending_in_quote
5496
5497   $in_format_skipping_section
5498   $format_skipping_pattern_begin
5499   $format_skipping_pattern_end
5500
5501   $forced_breakpoint_count
5502   $forced_breakpoint_undo_count
5503   @forced_breakpoint_undo_stack
5504   %postponed_breakpoint
5505
5506   $tabbing
5507   $embedded_tab_count
5508   $first_embedded_tab_at
5509   $last_embedded_tab_at
5510   $deleted_semicolon_count
5511   $first_deleted_semicolon_at
5512   $last_deleted_semicolon_at
5513   $added_semicolon_count
5514   $first_added_semicolon_at
5515   $last_added_semicolon_at
5516   $first_tabbing_disagreement
5517   $last_tabbing_disagreement
5518   $in_tabbing_disagreement
5519   $tabbing_disagreement_count
5520   $input_line_tabbing
5521
5522   $last_line_type
5523   $last_line_leading_type
5524   $last_line_leading_level
5525   $last_last_line_leading_level
5526
5527   %block_leading_text
5528   %block_opening_line_number
5529   $csc_new_statement_ok
5530   $accumulating_text_for_block
5531   $leading_block_text
5532   $rleading_block_if_elsif_text
5533   $leading_block_text_level
5534   $leading_block_text_length_exceeded
5535   $leading_block_text_line_length
5536   $leading_block_text_line_number
5537   $closing_side_comment_prefix_pattern
5538   $closing_side_comment_list_pattern
5539
5540   $last_nonblank_token
5541   $last_nonblank_type
5542   $last_last_nonblank_token
5543   $last_last_nonblank_type
5544   $last_nonblank_block_type
5545   $last_output_level
5546   %is_do_follower
5547   %is_if_brace_follower
5548   %space_after_keyword
5549   $rbrace_follower
5550   $looking_for_else
5551   %is_last_next_redo_return
5552   %is_other_brace_follower
5553   %is_else_brace_follower
5554   %is_anon_sub_brace_follower
5555   %is_anon_sub_1_brace_follower
5556   %is_sort_map_grep
5557   %is_sort_map_grep_eval
5558   %is_sort_map_grep_eval_do
5559   %is_block_without_semicolon
5560   %is_if_unless
5561   %is_and_or
5562   %is_assignment
5563   %is_chain_operator
5564   %is_if_unless_and_or_last_next_redo_return
5565   %is_until_while_for_if_elsif_else
5566
5567   @has_broken_sublist
5568   @dont_align
5569   @want_comma_break
5570
5571   $is_static_block_comment
5572   $index_start_one_line_block
5573   $semicolons_before_block_self_destruct
5574   $index_max_forced_break
5575   $input_line_number
5576   $diagnostics_object
5577   $vertical_aligner_object
5578   $logger_object
5579   $file_writer_object
5580   $formatter_self
5581   @ci_stack
5582   $last_line_had_side_comment
5583   %want_break_before
5584   %outdent_keyword
5585   $static_block_comment_pattern
5586   $static_side_comment_pattern
5587   %opening_vertical_tightness
5588   %closing_vertical_tightness
5589   %closing_token_indentation
5590
5591   %opening_token_right
5592   %stack_opening_token
5593   %stack_closing_token
5594
5595   $block_brace_vertical_tightness_pattern
5596
5597   $rOpts_add_newlines
5598   $rOpts_add_whitespace
5599   $rOpts_block_brace_tightness
5600   $rOpts_block_brace_vertical_tightness
5601   $rOpts_brace_left_and_indent
5602   $rOpts_comma_arrow_breakpoints
5603   $rOpts_break_at_old_keyword_breakpoints
5604   $rOpts_break_at_old_comma_breakpoints
5605   $rOpts_break_at_old_logical_breakpoints
5606   $rOpts_break_at_old_ternary_breakpoints
5607   $rOpts_closing_side_comment_else_flag
5608   $rOpts_closing_side_comment_maximum_text
5609   $rOpts_continuation_indentation
5610   $rOpts_cuddled_else
5611   $rOpts_delete_old_whitespace
5612   $rOpts_fuzzy_line_length
5613   $rOpts_indent_columns
5614   $rOpts_line_up_parentheses
5615   $rOpts_maximum_fields_per_table
5616   $rOpts_maximum_line_length
5617   $rOpts_short_concatenation_item_length
5618   $rOpts_keep_old_blank_lines
5619   $rOpts_ignore_old_breakpoints
5620   $rOpts_format_skipping
5621   $rOpts_space_function_paren
5622   $rOpts_space_keyword_paren
5623   $rOpts_keep_interior_semicolons
5624
5625   $half_maximum_line_length
5626
5627   %is_opening_type
5628   %is_closing_type
5629   %is_keyword_returning_list
5630   %tightness
5631   %matching_token
5632   $rOpts
5633   %right_bond_strength
5634   %left_bond_strength
5635   %binary_ws_rules
5636   %want_left_space
5637   %want_right_space
5638   %is_digraph
5639   %is_trigraph
5640   $bli_pattern
5641   $bli_list_string
5642   %is_closing_type
5643   %is_opening_type
5644   %is_closing_token
5645   %is_opening_token
5646 };
5647
5648 BEGIN {
5649
5650     # default list of block types for which -bli would apply
5651     $bli_list_string = 'if else elsif unless while for foreach do : sub';
5652
5653     @_ = qw(
5654       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5655       <= >= == =~ !~ != ++ -- /= x=
5656     );
5657     @is_digraph{@_} = (1) x scalar(@_);
5658
5659     @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5660     @is_trigraph{@_} = (1) x scalar(@_);
5661
5662     @_ = qw(
5663       = **= += *= &= <<= &&=
5664       -= /= |= >>= ||= //=
5665       .= %= ^=
5666       x=
5667     );
5668     @is_assignment{@_} = (1) x scalar(@_);
5669
5670     @_ = qw(
5671       grep
5672       keys
5673       map
5674       reverse
5675       sort
5676       split
5677     );
5678     @is_keyword_returning_list{@_} = (1) x scalar(@_);
5679
5680     @_ = qw(is if unless and or err last next redo return);
5681     @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5682
5683     # always break after a closing curly of these block types:
5684     @_ = qw(until while for if elsif else);
5685     @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
5686
5687     @_ = qw(last next redo return);
5688     @is_last_next_redo_return{@_} = (1) x scalar(@_);
5689
5690     @_ = qw(sort map grep);
5691     @is_sort_map_grep{@_} = (1) x scalar(@_);
5692
5693     @_ = qw(sort map grep eval);
5694     @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5695
5696     @_ = qw(sort map grep eval do);
5697     @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5698
5699     @_ = qw(if unless);
5700     @is_if_unless{@_} = (1) x scalar(@_);
5701
5702     @_ = qw(and or err);
5703     @is_and_or{@_} = (1) x scalar(@_);
5704
5705     # Identify certain operators which often occur in chains.
5706     # Note: the minus (-) causes a side effect of padding of the first line in
5707     # something like this (by sub set_logical_padding):
5708     #    Checkbutton => 'Transmission checked',
5709     #   -variable    => \$TRANS
5710     # This usually improves appearance so it seems ok.
5711     @_ = qw(&& || and or : ? . + - * /);
5712     @is_chain_operator{@_} = (1) x scalar(@_);
5713
5714     # We can remove semicolons after blocks preceded by these keywords
5715     @_ =
5716       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
5717       unless while until for foreach);
5718     @is_block_without_semicolon{@_} = (1) x scalar(@_);
5719
5720     # 'L' is token for opening { at hash key
5721     @_ = qw" L { ( [ ";
5722     @is_opening_type{@_} = (1) x scalar(@_);
5723
5724     # 'R' is token for closing } at hash key
5725     @_ = qw" R } ) ] ";
5726     @is_closing_type{@_} = (1) x scalar(@_);
5727
5728     @_ = qw" { ( [ ";
5729     @is_opening_token{@_} = (1) x scalar(@_);
5730
5731     @_ = qw" } ) ] ";
5732     @is_closing_token{@_} = (1) x scalar(@_);
5733 }
5734
5735 # whitespace codes
5736 use constant WS_YES      => 1;
5737 use constant WS_OPTIONAL => 0;
5738 use constant WS_NO       => -1;
5739
5740 # Token bond strengths.
5741 use constant NO_BREAK    => 10000;
5742 use constant VERY_STRONG => 100;
5743 use constant STRONG      => 2.1;
5744 use constant NOMINAL     => 1.1;
5745 use constant WEAK        => 0.8;
5746 use constant VERY_WEAK   => 0.55;
5747
5748 # values for testing indexes in output array
5749 use constant UNDEFINED_INDEX => -1;
5750
5751 # Maximum number of little messages; probably need not be changed.
5752 use constant MAX_NAG_MESSAGES => 6;
5753
5754 # increment between sequence numbers for each type
5755 # For example, ?: pairs might have numbers 7,11,15,...
5756 use constant TYPE_SEQUENCE_INCREMENT => 4;
5757
5758 {
5759
5760     # methods to count instances
5761     my $_count = 0;
5762     sub get_count        { $_count; }
5763     sub _increment_count { ++$_count }
5764     sub _decrement_count { --$_count }
5765 }
5766
5767 sub trim {
5768
5769     # trim leading and trailing whitespace from a string
5770     $_[0] =~ s/\s+$//;
5771     $_[0] =~ s/^\s+//;
5772     return $_[0];
5773 }
5774
5775 sub split_words {
5776
5777     # given a string containing words separated by whitespace,
5778     # return the list of words
5779     my ($str) = @_;
5780     return unless $str;
5781     $str =~ s/\s+$//;
5782     $str =~ s/^\s+//;
5783     return split( /\s+/, $str );
5784 }
5785
5786 # interface to Perl::Tidy::Logger routines
5787 sub warning {
5788     if ($logger_object) {
5789         $logger_object->warning(@_);
5790     }
5791 }
5792
5793 sub complain {
5794     if ($logger_object) {
5795         $logger_object->complain(@_);
5796     }
5797 }
5798
5799 sub write_logfile_entry {
5800     if ($logger_object) {
5801         $logger_object->write_logfile_entry(@_);
5802     }
5803 }
5804
5805 sub black_box {
5806     if ($logger_object) {
5807         $logger_object->black_box(@_);
5808     }
5809 }
5810
5811 sub report_definite_bug {
5812     if ($logger_object) {
5813         $logger_object->report_definite_bug();
5814     }
5815 }
5816
5817 sub get_saw_brace_error {
5818     if ($logger_object) {
5819         $logger_object->get_saw_brace_error();
5820     }
5821 }
5822
5823 sub we_are_at_the_last_line {
5824     if ($logger_object) {
5825         $logger_object->we_are_at_the_last_line();
5826     }
5827 }
5828
5829 # interface to Perl::Tidy::Diagnostics routine
5830 sub write_diagnostics {
5831
5832     if ($diagnostics_object) {
5833         $diagnostics_object->write_diagnostics(@_);
5834     }
5835 }
5836
5837 sub get_added_semicolon_count {
5838     my $self = shift;
5839     return $added_semicolon_count;
5840 }
5841
5842 sub DESTROY {
5843     $_[0]->_decrement_count();
5844 }
5845
5846 sub new {
5847
5848     my $class = shift;
5849
5850     # we are given an object with a write_line() method to take lines
5851     my %defaults = (
5852         sink_object        => undef,
5853         diagnostics_object => undef,
5854         logger_object      => undef,
5855     );
5856     my %args = ( %defaults, @_ );
5857
5858     $logger_object      = $args{logger_object};
5859     $diagnostics_object = $args{diagnostics_object};
5860
5861     # we create another object with a get_line() and peek_ahead() method
5862     my $sink_object = $args{sink_object};
5863     $file_writer_object =
5864       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
5865
5866     # initialize the leading whitespace stack to negative levels
5867     # so that we can never run off the end of the stack
5868     $gnu_position_predictor = 0;    # where the current token is predicted to be
5869     $max_gnu_stack_index    = 0;
5870     $max_gnu_item_index     = -1;
5871     $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
5872     @gnu_item_list               = ();
5873     $last_output_indentation     = 0;
5874     $last_indentation_written    = 0;
5875     $last_unadjusted_indentation = 0;
5876     $last_leading_token          = "";
5877
5878     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
5879     $saw_END_or_DATA_         = 0;
5880
5881     @block_type_to_go            = ();
5882     @type_sequence_to_go         = ();
5883     @container_environment_to_go = ();
5884     @bond_strength_to_go         = ();
5885     @forced_breakpoint_to_go     = ();
5886     @lengths_to_go               = ();    # line length to start of ith token
5887     @levels_to_go                = ();
5888     @matching_token_to_go        = ();
5889     @mate_index_to_go            = ();
5890     @nesting_blocks_to_go        = ();
5891     @ci_levels_to_go             = ();
5892     @nesting_depth_to_go         = (0);
5893     @nobreak_to_go               = ();
5894     @old_breakpoint_to_go        = ();
5895     @tokens_to_go                = ();
5896     @types_to_go                 = ();
5897     @leading_spaces_to_go        = ();
5898     @reduced_spaces_to_go        = ();
5899
5900     @dont_align         = ();
5901     @has_broken_sublist = ();
5902     @want_comma_break   = ();
5903
5904     @ci_stack                   = ("");
5905     $first_tabbing_disagreement = 0;
5906     $last_tabbing_disagreement  = 0;
5907     $tabbing_disagreement_count = 0;
5908     $in_tabbing_disagreement    = 0;
5909     $input_line_tabbing         = undef;
5910
5911     $last_line_type               = "";
5912     $last_last_line_leading_level = 0;
5913     $last_line_leading_level      = 0;
5914     $last_line_leading_type       = '#';
5915
5916     $last_nonblank_token        = ';';
5917     $last_nonblank_type         = ';';
5918     $last_last_nonblank_token   = ';';
5919     $last_last_nonblank_type    = ';';
5920     $last_nonblank_block_type   = "";
5921     $last_output_level          = 0;
5922     $looking_for_else           = 0;
5923     $embedded_tab_count         = 0;
5924     $first_embedded_tab_at      = 0;
5925     $last_embedded_tab_at       = 0;
5926     $deleted_semicolon_count    = 0;
5927     $first_deleted_semicolon_at = 0;
5928     $last_deleted_semicolon_at  = 0;
5929     $added_semicolon_count      = 0;
5930     $first_added_semicolon_at   = 0;
5931     $last_added_semicolon_at    = 0;
5932     $last_line_had_side_comment = 0;
5933     $is_static_block_comment    = 0;
5934     %postponed_breakpoint       = ();
5935
5936     # variables for adding side comments
5937     %block_leading_text        = ();
5938     %block_opening_line_number = ();
5939     $csc_new_statement_ok      = 1;
5940
5941     %saved_opening_indentation  = ();
5942     $in_format_skipping_section = 0;
5943
5944     reset_block_text_accumulator();
5945
5946     prepare_for_new_input_lines();
5947
5948     $vertical_aligner_object =
5949       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
5950         $logger_object, $diagnostics_object );
5951
5952     if ( $rOpts->{'entab-leading-whitespace'} ) {
5953         write_logfile_entry(
5954 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
5955         );
5956     }
5957     elsif ( $rOpts->{'tabs'} ) {
5958         write_logfile_entry("Indentation will be with a tab character\n");
5959     }
5960     else {
5961         write_logfile_entry(
5962             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
5963     }
5964
5965     # This was the start of a formatter referent, but object-oriented
5966     # coding has turned out to be too slow here.
5967     $formatter_self = {};
5968
5969     bless $formatter_self, $class;
5970
5971     # Safety check..this is not a class yet
5972     if ( _increment_count() > 1 ) {
5973         confess
5974 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
5975     }
5976     return $formatter_self;
5977 }
5978
5979 sub prepare_for_new_input_lines {
5980
5981     $gnu_sequence_number++;    # increment output batch counter
5982     %last_gnu_equals                = ();
5983     %gnu_comma_count                = ();
5984     %gnu_arrow_count                = ();
5985     $line_start_index_to_go         = 0;
5986     $max_gnu_item_index             = UNDEFINED_INDEX;
5987     $index_max_forced_break         = UNDEFINED_INDEX;
5988     $max_index_to_go                = UNDEFINED_INDEX;
5989     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
5990     $last_nonblank_type_to_go       = '';
5991     $last_nonblank_token_to_go      = '';
5992     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
5993     $last_last_nonblank_type_to_go  = '';
5994     $last_last_nonblank_token_to_go = '';
5995     $forced_breakpoint_count        = 0;
5996     $forced_breakpoint_undo_count   = 0;
5997     $rbrace_follower                = undef;
5998     $lengths_to_go[0]               = 0;
5999     $old_line_count_in_batch        = 1;
6000     $comma_count_in_batch           = 0;
6001     $starting_in_quote              = 0;
6002
6003     destroy_one_line_block();
6004 }
6005
6006 sub write_line {
6007
6008     my $self = shift;
6009     my ($line_of_tokens) = @_;
6010
6011     my $line_type  = $line_of_tokens->{_line_type};
6012     my $input_line = $line_of_tokens->{_line_text};
6013
6014     # _line_type codes are:
6015     #   SYSTEM         - system-specific code before hash-bang line
6016     #   CODE           - line of perl code (including comments)
6017     #   POD_START      - line starting pod, such as '=head'
6018     #   POD            - pod documentation text
6019     #   POD_END        - last line of pod section, '=cut'
6020     #   HERE           - text of here-document
6021     #   HERE_END       - last line of here-doc (target word)
6022     #   FORMAT         - format section
6023     #   FORMAT_END     - last line of format section, '.'
6024     #   DATA_START     - __DATA__ line
6025     #   DATA           - unidentified text following __DATA__
6026     #   END_START      - __END__ line
6027     #   END            - unidentified text following __END__
6028     #   ERROR          - we are in big trouble, probably not a perl script
6029
6030     # put a blank line after an =cut which comes before __END__ and __DATA__
6031     # (required by podchecker)
6032     if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6033         $file_writer_object->reset_consecutive_blank_lines();
6034         if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6035     }
6036
6037     # handle line of code..
6038     if ( $line_type eq 'CODE' ) {
6039
6040         # let logger see all non-blank lines of code
6041         if ( $input_line !~ /^\s*$/ ) {
6042             my $output_line_number =
6043               $vertical_aligner_object->get_output_line_number();
6044             black_box( $line_of_tokens, $output_line_number );
6045         }
6046         print_line_of_tokens($line_of_tokens);
6047     }
6048
6049     # handle line of non-code..
6050     else {
6051
6052         # set special flags
6053         my $skip_line = 0;
6054         my $tee_line  = 0;
6055         if ( $line_type =~ /^POD/ ) {
6056
6057             # Pod docs should have a preceding blank line.  But be
6058             # very careful in __END__ and __DATA__ sections, because:
6059             #   1. the user may be using this section for any purpose whatsoever
6060             #   2. the blank counters are not active there
6061             # It should be safe to request a blank line between an
6062             # __END__ or __DATA__ and an immediately following '=head'
6063             # type line, (types END_START and DATA_START), but not for
6064             # any other lines of type END or DATA.
6065             if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6066             if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
6067             if (  !$skip_line
6068                 && $line_type eq 'POD_START'
6069                 && $last_line_type !~ /^(END|DATA)$/ )
6070             {
6071                 want_blank_line();
6072             }
6073         }
6074
6075         # leave the blank counters in a predictable state
6076         # after __END__ or __DATA__
6077         elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6078             $file_writer_object->reset_consecutive_blank_lines();
6079             $saw_END_or_DATA_ = 1;
6080         }
6081
6082         # write unindented non-code line
6083         if ( !$skip_line ) {
6084             if ($tee_line) { $file_writer_object->tee_on() }
6085             write_unindented_line($input_line);
6086             if ($tee_line) { $file_writer_object->tee_off() }
6087         }
6088     }
6089     $last_line_type = $line_type;
6090 }
6091
6092 sub create_one_line_block {
6093     $index_start_one_line_block            = $_[0];
6094     $semicolons_before_block_self_destruct = $_[1];
6095 }
6096
6097 sub destroy_one_line_block {
6098     $index_start_one_line_block            = UNDEFINED_INDEX;
6099     $semicolons_before_block_self_destruct = 0;
6100 }
6101
6102 sub leading_spaces_to_go {
6103
6104     # return the number of indentation spaces for a token in the output stream;
6105     # these were previously stored by 'set_leading_whitespace'.
6106
6107     return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6108
6109 }
6110
6111 sub get_SPACES {
6112
6113     # return the number of leading spaces associated with an indentation
6114     # variable $indentation is either a constant number of spaces or an object
6115     # with a get_SPACES method.
6116     my $indentation = shift;
6117     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6118 }
6119
6120 sub get_RECOVERABLE_SPACES {
6121
6122     # return the number of spaces (+ means shift right, - means shift left)
6123     # that we would like to shift a group of lines with the same indentation
6124     # to get them to line up with their opening parens
6125     my $indentation = shift;
6126     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6127 }
6128
6129 sub get_AVAILABLE_SPACES_to_go {
6130
6131     my $item = $leading_spaces_to_go[ $_[0] ];
6132
6133     # return the number of available leading spaces associated with an
6134     # indentation variable.  $indentation is either a constant number of
6135     # spaces or an object with a get_AVAILABLE_SPACES method.
6136     return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6137 }
6138
6139 sub new_lp_indentation_item {
6140
6141     # this is an interface to the IndentationItem class
6142     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6143
6144     # A negative level implies not to store the item in the item_list
6145     my $index = 0;
6146     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6147
6148     my $item = Perl::Tidy::IndentationItem->new(
6149         $spaces,      $level,
6150         $ci_level,    $available_spaces,
6151         $index,       $gnu_sequence_number,
6152         $align_paren, $max_gnu_stack_index,
6153         $line_start_index_to_go,
6154     );
6155
6156     if ( $level >= 0 ) {
6157         $gnu_item_list[$max_gnu_item_index] = $item;
6158     }
6159
6160     return $item;
6161 }
6162
6163 sub set_leading_whitespace {
6164
6165     # This routine defines leading whitespace
6166     # given: the level and continuation_level of a token,
6167     # define: space count of leading string which would apply if it
6168     # were the first token of a new line.
6169
6170     my ( $level, $ci_level, $in_continued_quote ) = @_;
6171
6172     # modify for -bli, which adds one continuation indentation for
6173     # opening braces
6174     if (   $rOpts_brace_left_and_indent
6175         && $max_index_to_go == 0
6176         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6177     {
6178         $ci_level++;
6179     }
6180
6181     # patch to avoid trouble when input file has negative indentation.
6182     # other logic should catch this error.
6183     if ( $level < 0 ) { $level = 0 }
6184
6185     #-------------------------------------------
6186     # handle the standard indentation scheme
6187     #-------------------------------------------
6188     unless ($rOpts_line_up_parentheses) {
6189         my $space_count =
6190           $ci_level * $rOpts_continuation_indentation +
6191           $level * $rOpts_indent_columns;
6192         my $ci_spaces =
6193           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6194
6195         if ($in_continued_quote) {
6196             $space_count = 0;
6197             $ci_spaces   = 0;
6198         }
6199         $leading_spaces_to_go[$max_index_to_go] = $space_count;
6200         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6201         return;
6202     }
6203
6204     #-------------------------------------------------------------
6205     # handle case of -lp indentation..
6206     #-------------------------------------------------------------
6207
6208     # The continued_quote flag means that this is the first token of a
6209     # line, and it is the continuation of some kind of multi-line quote
6210     # or pattern.  It requires special treatment because it must have no
6211     # added leading whitespace. So we create a special indentation item
6212     # which is not in the stack.
6213     if ($in_continued_quote) {
6214         my $space_count     = 0;
6215         my $available_space = 0;
6216         $level = -1;    # flag to prevent storing in item_list
6217         $leading_spaces_to_go[$max_index_to_go] =
6218           $reduced_spaces_to_go[$max_index_to_go] =
6219           new_lp_indentation_item( $space_count, $level, $ci_level,
6220             $available_space, 0 );
6221         return;
6222     }
6223
6224     # get the top state from the stack
6225     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6226     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6227     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6228
6229     my $type        = $types_to_go[$max_index_to_go];
6230     my $token       = $tokens_to_go[$max_index_to_go];
6231     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6232
6233     if ( $type eq '{' || $type eq '(' ) {
6234
6235         $gnu_comma_count{ $total_depth + 1 } = 0;
6236         $gnu_arrow_count{ $total_depth + 1 } = 0;
6237
6238         # If we come to an opening token after an '=' token of some type,
6239         # see if it would be helpful to 'break' after the '=' to save space
6240         my $last_equals = $last_gnu_equals{$total_depth};
6241         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6242
6243             # find the position if we break at the '='
6244             my $i_test = $last_equals;
6245             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6246
6247             # TESTING
6248             ##my $too_close = ($i_test==$max_index_to_go-1);
6249
6250             my $test_position = total_line_length( $i_test, $max_index_to_go );
6251
6252             if (
6253
6254                 # the equals is not just before an open paren (testing)
6255                 ##!$too_close &&
6256
6257                 # if we are beyond the midpoint
6258                 $gnu_position_predictor > $half_maximum_line_length
6259
6260                 # or we are beyont the 1/4 point and there was an old
6261                 # break at the equals
6262                 || (
6263                     $gnu_position_predictor > $half_maximum_line_length / 2
6264                     && (
6265                         $old_breakpoint_to_go[$last_equals]
6266                         || (   $last_equals > 0
6267                             && $old_breakpoint_to_go[ $last_equals - 1 ] )
6268                         || (   $last_equals > 1
6269                             && $types_to_go[ $last_equals - 1 ] eq 'b'
6270                             && $old_breakpoint_to_go[ $last_equals - 2 ] )
6271                     )
6272                 )
6273               )
6274             {
6275
6276                 # then make the switch -- note that we do not set a real
6277                 # breakpoint here because we may not really need one; sub
6278                 # scan_list will do that if necessary
6279                 $line_start_index_to_go = $i_test + 1;
6280                 $gnu_position_predictor = $test_position;
6281             }
6282         }
6283     }
6284
6285     # Check for decreasing depth ..
6286     # Note that one token may have both decreasing and then increasing
6287     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
6288     # in this example we would first go back to (1,0) then up to (2,0)
6289     # in a single call.
6290     if ( $level < $current_level || $ci_level < $current_ci_level ) {
6291
6292         # loop to find the first entry at or completely below this level
6293         my ( $lev, $ci_lev );
6294         while (1) {
6295             if ($max_gnu_stack_index) {
6296
6297                 # save index of token which closes this level
6298                 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6299
6300                 # Undo any extra indentation if we saw no commas
6301                 my $available_spaces =
6302                   $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6303
6304                 my $comma_count = 0;
6305                 my $arrow_count = 0;
6306                 if ( $type eq '}' || $type eq ')' ) {
6307                     $comma_count = $gnu_comma_count{$total_depth};
6308                     $arrow_count = $gnu_arrow_count{$total_depth};
6309                     $comma_count = 0 unless $comma_count;
6310                     $arrow_count = 0 unless $arrow_count;
6311                 }
6312                 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6313                 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6314
6315                 if ( $available_spaces > 0 ) {
6316
6317                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
6318
6319                         my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6320                         my $seqno =
6321                           $gnu_stack[$max_gnu_stack_index]
6322                           ->get_SEQUENCE_NUMBER();
6323
6324                         # Be sure this item was created in this batch.  This
6325                         # should be true because we delete any available
6326                         # space from open items at the end of each batch.
6327                         if (   $gnu_sequence_number != $seqno
6328                             || $i > $max_gnu_item_index )
6329                         {
6330                             warning(
6331 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6332                             );
6333                             report_definite_bug();
6334                         }
6335
6336                         else {
6337                             if ( $arrow_count == 0 ) {
6338                                 $gnu_item_list[$i]
6339                                   ->permanently_decrease_AVAILABLE_SPACES(
6340                                     $available_spaces);
6341                             }
6342                             else {
6343                                 $gnu_item_list[$i]
6344                                   ->tentatively_decrease_AVAILABLE_SPACES(
6345                                     $available_spaces);
6346                             }
6347
6348                             my $j;
6349                             for (
6350                                 $j = $i + 1 ;
6351                                 $j <= $max_gnu_item_index ;
6352                                 $j++
6353                               )
6354                             {
6355                                 $gnu_item_list[$j]
6356                                   ->decrease_SPACES($available_spaces);
6357                             }
6358                         }
6359                     }
6360                 }
6361
6362                 # go down one level
6363                 --$max_gnu_stack_index;
6364                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6365                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6366
6367                 # stop when we reach a level at or below the current level
6368                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6369                     $space_count =
6370                       $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6371                     $current_level    = $lev;
6372                     $current_ci_level = $ci_lev;
6373                     last;
6374                 }
6375             }
6376
6377             # reached bottom of stack .. should never happen because
6378             # only negative levels can get here, and $level was forced
6379             # to be positive above.
6380             else {
6381                 warning(
6382 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6383                 );
6384                 report_definite_bug();
6385                 last;
6386             }
6387         }
6388     }
6389
6390     # handle increasing depth
6391     if ( $level > $current_level || $ci_level > $current_ci_level ) {
6392
6393         # Compute the standard incremental whitespace.  This will be
6394         # the minimum incremental whitespace that will be used.  This
6395         # choice results in a smooth transition between the gnu-style
6396         # and the standard style.
6397         my $standard_increment =
6398           ( $level - $current_level ) * $rOpts_indent_columns +
6399           ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6400
6401         # Now we have to define how much extra incremental space
6402         # ("$available_space") we want.  This extra space will be
6403         # reduced as necessary when long lines are encountered or when
6404         # it becomes clear that we do not have a good list.
6405         my $available_space = 0;
6406         my $align_paren     = 0;
6407         my $excess          = 0;
6408
6409         # initialization on empty stack..
6410         if ( $max_gnu_stack_index == 0 ) {
6411             $space_count = $level * $rOpts_indent_columns;
6412         }
6413
6414         # if this is a BLOCK, add the standard increment
6415         elsif ($last_nonblank_block_type) {
6416             $space_count += $standard_increment;
6417         }
6418
6419         # if last nonblank token was not structural indentation,
6420         # just use standard increment
6421         elsif ( $last_nonblank_type ne '{' ) {
6422             $space_count += $standard_increment;
6423         }
6424
6425         # otherwise use the space to the first non-blank level change token
6426         else {
6427
6428             $space_count = $gnu_position_predictor;
6429
6430             my $min_gnu_indentation =
6431               $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6432
6433             $available_space = $space_count - $min_gnu_indentation;
6434             if ( $available_space >= $standard_increment ) {
6435                 $min_gnu_indentation += $standard_increment;
6436             }
6437             elsif ( $available_space > 1 ) {
6438                 $min_gnu_indentation += $available_space + 1;
6439             }
6440             elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6441                 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6442                     $min_gnu_indentation += 2;
6443                 }
6444                 else {
6445                     $min_gnu_indentation += 1;
6446                 }
6447             }
6448             else {
6449                 $min_gnu_indentation += $standard_increment;
6450             }
6451             $available_space = $space_count - $min_gnu_indentation;
6452
6453             if ( $available_space < 0 ) {
6454                 $space_count     = $min_gnu_indentation;
6455                 $available_space = 0;
6456             }
6457             $align_paren = 1;
6458         }
6459
6460         # update state, but not on a blank token
6461         if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6462
6463             $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6464
6465             ++$max_gnu_stack_index;
6466             $gnu_stack[$max_gnu_stack_index] =
6467               new_lp_indentation_item( $space_count, $level, $ci_level,
6468                 $available_space, $align_paren );
6469
6470             # If the opening paren is beyond the half-line length, then
6471             # we will use the minimum (standard) indentation.  This will
6472             # help avoid problems associated with running out of space
6473             # near the end of a line.  As a result, in deeply nested
6474             # lists, there will be some indentations which are limited
6475             # to this minimum standard indentation. But the most deeply
6476             # nested container will still probably be able to shift its
6477             # parameters to the right for proper alignment, so in most
6478             # cases this will not be noticable.
6479             if (   $available_space > 0
6480                 && $space_count > $half_maximum_line_length )
6481             {
6482                 $gnu_stack[$max_gnu_stack_index]
6483                   ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6484             }
6485         }
6486     }
6487
6488     # Count commas and look for non-list characters.  Once we see a
6489     # non-list character, we give up and don't look for any more commas.
6490     if ( $type eq '=>' ) {
6491         $gnu_arrow_count{$total_depth}++;
6492
6493         # tentatively treating '=>' like '=' for estimating breaks
6494         # TODO: this could use some experimentation
6495         $last_gnu_equals{$total_depth} = $max_index_to_go;
6496     }
6497
6498     elsif ( $type eq ',' ) {
6499         $gnu_comma_count{$total_depth}++;
6500     }
6501
6502     elsif ( $is_assignment{$type} ) {
6503         $last_gnu_equals{$total_depth} = $max_index_to_go;
6504     }
6505
6506     # this token might start a new line
6507     # if this is a non-blank..
6508     if ( $type ne 'b' ) {
6509
6510         # and if ..
6511         if (
6512
6513             # this is the first nonblank token of the line
6514             $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6515
6516             # or previous character was one of these:
6517             || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6518
6519             # or previous character was opening and this does not close it
6520             || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6521             || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6522
6523             # or this token is one of these:
6524             || $type =~ /^([\.]|\|\||\&\&)$/
6525
6526             # or this is a closing structure
6527             || (   $last_nonblank_type_to_go eq '}'
6528                 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6529
6530             # or previous token was keyword 'return'
6531             || ( $last_nonblank_type_to_go eq 'k'
6532                 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6533
6534             # or starting a new line at certain keywords is fine
6535             || (   $type eq 'k'
6536                 && $is_if_unless_and_or_last_next_redo_return{$token} )
6537
6538             # or this is after an assignment after a closing structure
6539             || (
6540                 $is_assignment{$last_nonblank_type_to_go}
6541                 && (
6542                     $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6543
6544                     # and it is significantly to the right
6545                     || $gnu_position_predictor > $half_maximum_line_length
6546                 )
6547             )
6548           )
6549         {
6550             check_for_long_gnu_style_lines();
6551             $line_start_index_to_go = $max_index_to_go;
6552
6553             # back up 1 token if we want to break before that type
6554             # otherwise, we may strand tokens like '?' or ':' on a line
6555             if ( $line_start_index_to_go > 0 ) {
6556                 if ( $last_nonblank_type_to_go eq 'k' ) {
6557
6558                     if ( $want_break_before{$last_nonblank_token_to_go} ) {
6559                         $line_start_index_to_go--;
6560                     }
6561                 }
6562                 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6563                     $line_start_index_to_go--;
6564                 }
6565             }
6566         }
6567     }
6568
6569     # remember the predicted position of this token on the output line
6570     if ( $max_index_to_go > $line_start_index_to_go ) {
6571         $gnu_position_predictor =
6572           total_line_length( $line_start_index_to_go, $max_index_to_go );
6573     }
6574     else {
6575         $gnu_position_predictor = $space_count +
6576           token_sequence_length( $max_index_to_go, $max_index_to_go );
6577     }
6578
6579     # store the indentation object for this token
6580     # this allows us to manipulate the leading whitespace
6581     # (in case we have to reduce indentation to fit a line) without
6582     # having to change any token values
6583     $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6584     $reduced_spaces_to_go[$max_index_to_go] =
6585       ( $max_gnu_stack_index > 0 && $ci_level )
6586       ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6587       : $gnu_stack[$max_gnu_stack_index];
6588     return;
6589 }
6590
6591 sub check_for_long_gnu_style_lines {
6592
6593     # look at the current estimated maximum line length, and
6594     # remove some whitespace if it exceeds the desired maximum
6595
6596     # this is only for the '-lp' style
6597     return unless ($rOpts_line_up_parentheses);
6598
6599     # nothing can be done if no stack items defined for this line
6600     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6601
6602     # see if we have exceeded the maximum desired line length
6603     # keep 2 extra free because they are needed in some cases
6604     # (result of trial-and-error testing)
6605     my $spaces_needed =
6606       $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6607
6608     return if ( $spaces_needed <= 0 );
6609
6610     # We are over the limit, so try to remove a requested number of
6611     # spaces from leading whitespace.  We are only allowed to remove
6612     # from whitespace items created on this batch, since others have
6613     # already been used and cannot be undone.
6614     my @candidates = ();
6615     my $i;
6616
6617     # loop over all whitespace items created for the current batch
6618     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6619         my $item = $gnu_item_list[$i];
6620
6621         # item must still be open to be a candidate (otherwise it
6622         # cannot influence the current token)
6623         next if ( $item->get_CLOSED() >= 0 );
6624
6625         my $available_spaces = $item->get_AVAILABLE_SPACES();
6626
6627         if ( $available_spaces > 0 ) {
6628             push( @candidates, [ $i, $available_spaces ] );
6629         }
6630     }
6631
6632     return unless (@candidates);
6633
6634     # sort by available whitespace so that we can remove whitespace
6635     # from the maximum available first
6636     @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6637
6638     # keep removing whitespace until we are done or have no more
6639     my $candidate;
6640     foreach $candidate (@candidates) {
6641         my ( $i, $available_spaces ) = @{$candidate};
6642         my $deleted_spaces =
6643           ( $available_spaces > $spaces_needed )
6644           ? $spaces_needed
6645           : $available_spaces;
6646
6647         # remove the incremental space from this item
6648         $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6649
6650         my $i_debug = $i;
6651
6652         # update the leading whitespace of this item and all items
6653         # that came after it
6654         for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6655
6656             my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6657             if ( $old_spaces >= $deleted_spaces ) {
6658                 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6659             }
6660
6661             # shouldn't happen except for code bug:
6662             else {
6663                 my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
6664                 my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6665                 my $old_level    = $gnu_item_list[$i]->get_LEVEL();
6666                 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6667                 warning(
6668 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
6669                 );
6670                 report_definite_bug();
6671             }
6672         }
6673         $gnu_position_predictor -= $deleted_spaces;
6674         $spaces_needed          -= $deleted_spaces;
6675         last unless ( $spaces_needed > 0 );
6676     }
6677 }
6678
6679 sub finish_lp_batch {
6680
6681     # This routine is called once after each each output stream batch is
6682     # finished to undo indentation for all incomplete -lp
6683     # indentation levels.  It is too risky to leave a level open,
6684     # because then we can't backtrack in case of a long line to follow.
6685     # This means that comments and blank lines will disrupt this
6686     # indentation style.  But the vertical aligner may be able to
6687     # get the space back if there are side comments.
6688
6689     # this is only for the 'lp' style
6690     return unless ($rOpts_line_up_parentheses);
6691
6692     # nothing can be done if no stack items defined for this line
6693     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6694
6695     # loop over all whitespace items created for the current batch
6696     my $i;
6697     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6698         my $item = $gnu_item_list[$i];
6699
6700         # only look for open items
6701         next if ( $item->get_CLOSED() >= 0 );
6702
6703         # Tentatively remove all of the available space
6704         # (The vertical aligner will try to get it back later)
6705         my $available_spaces = $item->get_AVAILABLE_SPACES();
6706         if ( $available_spaces > 0 ) {
6707
6708             # delete incremental space for this item
6709             $gnu_item_list[$i]
6710               ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6711
6712             # Reduce the total indentation space of any nodes that follow
6713             # Note that any such nodes must necessarily be dependents
6714             # of this node.
6715             foreach ( $i + 1 .. $max_gnu_item_index ) {
6716                 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6717             }
6718         }
6719     }
6720     return;
6721 }
6722
6723 sub reduce_lp_indentation {
6724
6725     # reduce the leading whitespace at token $i if possible by $spaces_needed
6726     # (a large value of $spaces_needed will remove all excess space)
6727     # NOTE: to be called from scan_list only for a sequence of tokens
6728     # contained between opening and closing parens/braces/brackets
6729
6730     my ( $i, $spaces_wanted ) = @_;
6731     my $deleted_spaces = 0;
6732
6733     my $item             = $leading_spaces_to_go[$i];
6734     my $available_spaces = $item->get_AVAILABLE_SPACES();
6735
6736     if (
6737         $available_spaces > 0
6738         && ( ( $spaces_wanted <= $available_spaces )
6739             || !$item->get_HAVE_CHILD() )
6740       )
6741     {
6742
6743         # we'll remove these spaces, but mark them as recoverable
6744         $deleted_spaces =
6745           $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6746     }
6747
6748     return $deleted_spaces;
6749 }
6750
6751 sub token_sequence_length {
6752
6753     # return length of tokens ($ifirst .. $ilast) including first & last
6754     # returns 0 if $ifirst > $ilast
6755     my $ifirst = shift;
6756     my $ilast  = shift;
6757     return 0 if ( $ilast < 0 || $ifirst > $ilast );
6758     return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6759     return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6760 }
6761
6762 sub total_line_length {
6763
6764     # return length of a line of tokens ($ifirst .. $ilast)
6765     my $ifirst = shift;
6766     my $ilast  = shift;
6767     if ( $ifirst < 0 ) { $ifirst = 0 }
6768
6769     return leading_spaces_to_go($ifirst) +
6770       token_sequence_length( $ifirst, $ilast );
6771 }
6772
6773 sub excess_line_length {
6774
6775     # return number of characters by which a line of tokens ($ifirst..$ilast)
6776     # exceeds the allowable line length.
6777     my $ifirst = shift;
6778     my $ilast  = shift;
6779     if ( $ifirst < 0 ) { $ifirst = 0 }
6780     return leading_spaces_to_go($ifirst) +
6781       token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6782 }
6783
6784 sub finish_formatting {
6785
6786     # flush buffer and write any informative messages
6787     my $self = shift;
6788
6789     flush();
6790     $file_writer_object->decrement_output_line_number()
6791       ;    # fix up line number since it was incremented
6792     we_are_at_the_last_line();
6793     if ( $added_semicolon_count > 0 ) {
6794         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6795         my $what =
6796           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6797         write_logfile_entry("$added_semicolon_count $what added:\n");
6798         write_logfile_entry(
6799             "  $first at input line $first_added_semicolon_at\n");
6800
6801         if ( $added_semicolon_count > 1 ) {
6802             write_logfile_entry(
6803                 "   Last at input line $last_added_semicolon_at\n");
6804         }
6805         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
6806         write_logfile_entry("\n");
6807     }
6808
6809     if ( $deleted_semicolon_count > 0 ) {
6810         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6811         my $what =
6812           ( $deleted_semicolon_count > 1 )
6813           ? "semicolons were"
6814           : "semicolon was";
6815         write_logfile_entry(
6816             "$deleted_semicolon_count unnecessary $what deleted:\n");
6817         write_logfile_entry(
6818             "  $first at input line $first_deleted_semicolon_at\n");
6819
6820         if ( $deleted_semicolon_count > 1 ) {
6821             write_logfile_entry(
6822                 "   Last at input line $last_deleted_semicolon_at\n");
6823         }
6824         write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
6825         write_logfile_entry("\n");
6826     }
6827
6828     if ( $embedded_tab_count > 0 ) {
6829         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6830         my $what =
6831           ( $embedded_tab_count > 1 )
6832           ? "quotes or patterns"
6833           : "quote or pattern";
6834         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6835         write_logfile_entry(
6836 "This means the display of this script could vary with device or software\n"
6837         );
6838         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
6839
6840         if ( $embedded_tab_count > 1 ) {
6841             write_logfile_entry(
6842                 "   Last at input line $last_embedded_tab_at\n");
6843         }
6844         write_logfile_entry("\n");
6845     }
6846
6847     if ($first_tabbing_disagreement) {
6848         write_logfile_entry(
6849 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6850         );
6851     }
6852
6853     if ($in_tabbing_disagreement) {
6854         write_logfile_entry(
6855 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
6856         );
6857     }
6858     else {
6859
6860         if ($last_tabbing_disagreement) {
6861
6862             write_logfile_entry(
6863 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
6864             );
6865         }
6866         else {
6867             write_logfile_entry("No indentation disagreement seen\n");
6868         }
6869     }
6870     write_logfile_entry("\n");
6871
6872     $vertical_aligner_object->report_anything_unusual();
6873
6874     $file_writer_object->report_line_length_errors();
6875 }
6876
6877 sub check_options {
6878
6879     # This routine is called to check the Opts hash after it is defined
6880
6881     ($rOpts) = @_;
6882     my ( $tabbing_string, $tab_msg );
6883
6884     make_static_block_comment_pattern();
6885     make_static_side_comment_pattern();
6886     make_closing_side_comment_prefix();
6887     make_closing_side_comment_list_pattern();
6888     $format_skipping_pattern_begin =
6889       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
6890     $format_skipping_pattern_end =
6891       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
6892
6893     # If closing side comments ARE selected, then we can safely
6894     # delete old closing side comments unless closing side comment
6895     # warnings are requested.  This is a good idea because it will
6896     # eliminate any old csc's which fall below the line count threshold.
6897     # We cannot do this if warnings are turned on, though, because we
6898     # might delete some text which has been added.  So that must
6899     # be handled when comments are created.
6900     if ( $rOpts->{'closing-side-comments'} ) {
6901         if ( !$rOpts->{'closing-side-comment-warnings'} ) {
6902             $rOpts->{'delete-closing-side-comments'} = 1;
6903         }
6904     }
6905
6906     # If closing side comments ARE NOT selected, but warnings ARE
6907     # selected and we ARE DELETING csc's, then we will pretend to be
6908     # adding with a huge interval.  This will force the comments to be
6909     # generated for comparison with the old comments, but not added.
6910     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
6911         if ( $rOpts->{'delete-closing-side-comments'} ) {
6912             $rOpts->{'delete-closing-side-comments'}  = 0;
6913             $rOpts->{'closing-side-comments'}         = 1;
6914             $rOpts->{'closing-side-comment-interval'} = 100000000;
6915         }
6916     }
6917
6918     make_bli_pattern();
6919     make_block_brace_vertical_tightness_pattern();
6920
6921     if ( $rOpts->{'line-up-parentheses'} ) {
6922
6923         if (   $rOpts->{'indent-only'}
6924             || !$rOpts->{'add-newlines'}
6925             || !$rOpts->{'delete-old-newlines'} )
6926         {
6927             warn <<EOM;
6928 -----------------------------------------------------------------------
6929 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
6930
6931 The -lp indentation logic requires that perltidy be able to coordinate
6932 arbitrarily large numbers of line breakpoints.  This isn't possible
6933 with these flags. Sometimes an acceptable workaround is to use -wocb=3
6934 -----------------------------------------------------------------------
6935 EOM
6936             $rOpts->{'line-up-parentheses'} = 0;
6937         }
6938     }
6939
6940     # At present, tabs are not compatable with the line-up-parentheses style
6941     # (it would be possible to entab the total leading whitespace
6942     # just prior to writing the line, if desired).
6943     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
6944         warn <<EOM;
6945 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
6946 EOM
6947         $rOpts->{'tabs'} = 0;
6948     }
6949
6950     # Likewise, tabs are not compatable with outdenting..
6951     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
6952         warn <<EOM;
6953 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
6954 EOM
6955         $rOpts->{'tabs'} = 0;
6956     }
6957
6958     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
6959         warn <<EOM;
6960 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
6961 EOM
6962         $rOpts->{'tabs'} = 0;
6963     }
6964
6965     if ( !$rOpts->{'space-for-semicolon'} ) {
6966         $want_left_space{'f'} = -1;
6967     }
6968
6969     if ( $rOpts->{'space-terminal-semicolon'} ) {
6970         $want_left_space{';'} = 1;
6971     }
6972
6973     # implement outdenting preferences for keywords
6974     %outdent_keyword = ();
6975     unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
6976         @_ = qw(next last redo goto return);    # defaults
6977     }
6978
6979     # FUTURE: if not a keyword, assume that it is an identifier
6980     foreach (@_) {
6981         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
6982             $outdent_keyword{$_} = 1;
6983         }
6984         else {
6985             warn "ignoring '$_' in -okwl list; not a perl keyword";
6986         }
6987     }
6988
6989     # implement user whitespace preferences
6990     if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
6991         @want_left_space{@_} = (1) x scalar(@_);
6992     }
6993
6994     if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
6995         @want_right_space{@_} = (1) x scalar(@_);
6996     }
6997
6998     if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
6999         @want_left_space{@_} = (-1) x scalar(@_);
7000     }
7001
7002     if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7003         @want_right_space{@_} = (-1) x scalar(@_);
7004     }
7005     if ( $rOpts->{'dump-want-left-space'} ) {
7006         dump_want_left_space(*STDOUT);
7007         exit 1;
7008     }
7009
7010     if ( $rOpts->{'dump-want-right-space'} ) {
7011         dump_want_right_space(*STDOUT);
7012         exit 1;
7013     }
7014
7015     # default keywords for which space is introduced before an opening paren
7016     # (at present, including them messes up vertical alignment)
7017     @_ = qw(my local our and or err eq ne if else elsif until
7018       unless while for foreach return switch case given when);
7019     @space_after_keyword{@_} = (1) x scalar(@_);
7020
7021     # allow user to modify these defaults
7022     if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7023         @space_after_keyword{@_} = (1) x scalar(@_);
7024     }
7025
7026     if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7027         @space_after_keyword{@_} = (0) x scalar(@_);
7028     }
7029
7030     # implement user break preferences
7031     my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7032       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7033       . : ? && || and or err xor
7034     );
7035
7036     my $break_after = sub {
7037         foreach my $tok (@_) {
7038             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
7039             my $lbs = $left_bond_strength{$tok};
7040             my $rbs = $right_bond_strength{$tok};
7041             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7042                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7043                   ( $lbs, $rbs );
7044             }
7045         }
7046     };
7047
7048     my $break_before = sub {
7049         foreach my $tok (@_) {
7050             my $lbs = $left_bond_strength{$tok};
7051             my $rbs = $right_bond_strength{$tok};
7052             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7053                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7054                   ( $lbs, $rbs );
7055             }
7056         }
7057     };
7058
7059     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7060     $break_before->(@all_operators)
7061       if ( $rOpts->{'break-before-all-operators'} );
7062
7063     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7064     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7065
7066     # make note if breaks are before certain key types
7067     %want_break_before = ();
7068     foreach my $tok ( @all_operators, ',' ) {
7069         $want_break_before{$tok} =
7070           $left_bond_strength{$tok} < $right_bond_strength{$tok};
7071     }
7072
7073     # Coordinate ?/: breaks, which must be similar
7074     if ( !$want_break_before{':'} ) {
7075         $want_break_before{'?'}   = $want_break_before{':'};
7076         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7077         $left_bond_strength{'?'}  = NO_BREAK;
7078     }
7079
7080     # Define here tokens which may follow the closing brace of a do statement
7081     # on the same line, as in:
7082     #   } while ( $something);
7083     @_ = qw(until while unless if ; : );
7084     push @_, ',';
7085     @is_do_follower{@_} = (1) x scalar(@_);
7086
7087     # These tokens may follow the closing brace of an if or elsif block.
7088     # In other words, for cuddled else we want code to look like:
7089     #   } elsif ( $something) {
7090     #   } else {
7091     if ( $rOpts->{'cuddled-else'} ) {
7092         @_ = qw(else elsif);
7093         @is_if_brace_follower{@_} = (1) x scalar(@_);
7094     }
7095     else {
7096         %is_if_brace_follower = ();
7097     }
7098
7099     # nothing can follow the closing curly of an else { } block:
7100     %is_else_brace_follower = ();
7101
7102     # what can follow a multi-line anonymous sub definition closing curly:
7103     @_ = qw# ; : => or and  && || ~~ !~~ ) #;
7104     push @_, ',';
7105     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7106
7107     # what can follow a one-line anonynomous sub closing curly:
7108     # one-line anonumous subs also have ']' here...
7109     # see tk3.t and PP.pm
7110     @_ = qw#  ; : => or and  && || ) ] ~~ !~~ #;
7111     push @_, ',';
7112     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7113
7114     # What can follow a closing curly of a block
7115     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7116     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7117     @_ = qw#  ; : => or and  && || ) #;
7118     push @_, ',';
7119
7120     # allow cuddled continue if cuddled else is specified
7121     if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7122
7123     @is_other_brace_follower{@_} = (1) x scalar(@_);
7124
7125     $right_bond_strength{'{'} = WEAK;
7126     $left_bond_strength{'{'}  = VERY_STRONG;
7127
7128     # make -l=0  equal to -l=infinite
7129     if ( !$rOpts->{'maximum-line-length'} ) {
7130         $rOpts->{'maximum-line-length'} = 1000000;
7131     }
7132
7133     # make -lbl=0  equal to -lbl=infinite
7134     if ( !$rOpts->{'long-block-line-count'} ) {
7135         $rOpts->{'long-block-line-count'} = 1000000;
7136     }
7137
7138     my $ole = $rOpts->{'output-line-ending'};
7139     if ($ole) {
7140         my %endings = (
7141             dos  => "\015\012",
7142             win  => "\015\012",
7143             mac  => "\015",
7144             unix => "\012",
7145         );
7146         $ole = lc $ole;
7147         unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7148             my $str = join " ", keys %endings;
7149             die <<EOM;
7150 Unrecognized line ending '$ole'; expecting one of: $str
7151 EOM
7152         }
7153         if ( $rOpts->{'preserve-line-endings'} ) {
7154             warn "Ignoring -ple; conflicts with -ole\n";
7155             $rOpts->{'preserve-line-endings'} = undef;
7156         }
7157     }
7158
7159     # hashes used to simplify setting whitespace
7160     %tightness = (
7161         '{' => $rOpts->{'brace-tightness'},
7162         '}' => $rOpts->{'brace-tightness'},
7163         '(' => $rOpts->{'paren-tightness'},
7164         ')' => $rOpts->{'paren-tightness'},
7165         '[' => $rOpts->{'square-bracket-tightness'},
7166         ']' => $rOpts->{'square-bracket-tightness'},
7167     );
7168     %matching_token = (
7169         '{' => '}',
7170         '(' => ')',
7171         '[' => ']',
7172         '?' => ':',
7173     );
7174
7175     # frequently used parameters
7176     $rOpts_add_newlines          = $rOpts->{'add-newlines'};
7177     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
7178     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7179     $rOpts_block_brace_vertical_tightness =
7180       $rOpts->{'block-brace-vertical-tightness'};
7181     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
7182     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7183     $rOpts_break_at_old_ternary_breakpoints =
7184       $rOpts->{'break-at-old-ternary-breakpoints'};
7185     $rOpts_break_at_old_comma_breakpoints =
7186       $rOpts->{'break-at-old-comma-breakpoints'};
7187     $rOpts_break_at_old_keyword_breakpoints =
7188       $rOpts->{'break-at-old-keyword-breakpoints'};
7189     $rOpts_break_at_old_logical_breakpoints =
7190       $rOpts->{'break-at-old-logical-breakpoints'};
7191     $rOpts_closing_side_comment_else_flag =
7192       $rOpts->{'closing-side-comment-else-flag'};
7193     $rOpts_closing_side_comment_maximum_text =
7194       $rOpts->{'closing-side-comment-maximum-text'};
7195     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7196     $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
7197     $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
7198     $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
7199     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
7200     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
7201     $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7202     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
7203     $rOpts_short_concatenation_item_length =
7204       $rOpts->{'short-concatenation-item-length'};
7205     $rOpts_keep_old_blank_lines     = $rOpts->{'keep-old-blank-lines'};
7206     $rOpts_ignore_old_breakpoints   = $rOpts->{'ignore-old-breakpoints'};
7207     $rOpts_format_skipping          = $rOpts->{'format-skipping'};
7208     $rOpts_space_function_paren     = $rOpts->{'space-function-paren'};
7209     $rOpts_space_keyword_paren      = $rOpts->{'space-keyword-paren'};
7210     $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7211     $half_maximum_line_length       = $rOpts_maximum_line_length / 2;
7212
7213     # Note that both opening and closing tokens can access the opening
7214     # and closing flags of their container types.
7215     %opening_vertical_tightness = (
7216         '(' => $rOpts->{'paren-vertical-tightness'},
7217         '{' => $rOpts->{'brace-vertical-tightness'},
7218         '[' => $rOpts->{'square-bracket-vertical-tightness'},
7219         ')' => $rOpts->{'paren-vertical-tightness'},
7220         '}' => $rOpts->{'brace-vertical-tightness'},
7221         ']' => $rOpts->{'square-bracket-vertical-tightness'},
7222     );
7223
7224     %closing_vertical_tightness = (
7225         '(' => $rOpts->{'paren-vertical-tightness-closing'},
7226         '{' => $rOpts->{'brace-vertical-tightness-closing'},
7227         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7228         ')' => $rOpts->{'paren-vertical-tightness-closing'},
7229         '}' => $rOpts->{'brace-vertical-tightness-closing'},
7230         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7231     );
7232
7233     # assume flag for '>' same as ')' for closing qw quotes
7234     %closing_token_indentation = (
7235         ')' => $rOpts->{'closing-paren-indentation'},
7236         '}' => $rOpts->{'closing-brace-indentation'},
7237         ']' => $rOpts->{'closing-square-bracket-indentation'},
7238         '>' => $rOpts->{'closing-paren-indentation'},
7239     );
7240
7241     %opening_token_right = (
7242         '(' => $rOpts->{'opening-paren-right'},
7243         '{' => $rOpts->{'opening-hash-brace-right'},
7244         '[' => $rOpts->{'opening-square-bracket-right'},
7245     );
7246
7247     %stack_opening_token = (
7248         '(' => $rOpts->{'stack-opening-paren'},
7249         '{' => $rOpts->{'stack-opening-hash-brace'},
7250         '[' => $rOpts->{'stack-opening-square-bracket'},
7251     );
7252
7253     %stack_closing_token = (
7254         ')' => $rOpts->{'stack-closing-paren'},
7255         '}' => $rOpts->{'stack-closing-hash-brace'},
7256         ']' => $rOpts->{'stack-closing-square-bracket'},
7257     );
7258 }
7259
7260 sub make_static_block_comment_pattern {
7261
7262     # create the pattern used to identify static block comments
7263     $static_block_comment_pattern = '^\s*##';
7264
7265     # allow the user to change it
7266     if ( $rOpts->{'static-block-comment-prefix'} ) {
7267         my $prefix = $rOpts->{'static-block-comment-prefix'};
7268         $prefix =~ s/^\s*//;
7269         my $pattern = $prefix;
7270
7271         # user may give leading caret to force matching left comments only
7272         if ( $prefix !~ /^\^#/ ) {
7273             if ( $prefix !~ /^#/ ) {
7274                 die
7275 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7276             }
7277             $pattern = '^\s*' . $prefix;
7278         }
7279         eval "'##'=~/$pattern/";
7280         if ($@) {
7281             die
7282 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7283         }
7284         $static_block_comment_pattern = $pattern;
7285     }
7286 }
7287
7288 sub make_format_skipping_pattern {
7289     my ( $opt_name, $default ) = @_;
7290     my $param = $rOpts->{$opt_name};
7291     unless ($param) { $param = $default }
7292     $param =~ s/^\s*//;
7293     if ( $param !~ /^#/ ) {
7294         die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7295     }
7296     my $pattern = '^' . $param . '\s';
7297     eval "'#'=~/$pattern/";
7298     if ($@) {
7299         die
7300 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7301     }
7302     return $pattern;
7303 }
7304
7305 sub make_closing_side_comment_list_pattern {
7306
7307     # turn any input list into a regex for recognizing selected block types
7308     $closing_side_comment_list_pattern = '^\w+';
7309     if ( defined( $rOpts->{'closing-side-comment-list'} )
7310         && $rOpts->{'closing-side-comment-list'} )
7311     {
7312         $closing_side_comment_list_pattern =
7313           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7314     }
7315 }
7316
7317 sub make_bli_pattern {
7318
7319     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7320         && $rOpts->{'brace-left-and-indent-list'} )
7321     {
7322         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7323     }
7324
7325     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7326 }
7327
7328 sub make_block_brace_vertical_tightness_pattern {
7329
7330     # turn any input list into a regex for recognizing selected block types
7331     $block_brace_vertical_tightness_pattern =
7332       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7333
7334     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7335         && $rOpts->{'block-brace-vertical-tightness-list'} )
7336     {
7337         $block_brace_vertical_tightness_pattern =
7338           make_block_pattern( '-bbvtl',
7339             $rOpts->{'block-brace-vertical-tightness-list'} );
7340     }
7341 }
7342
7343 sub make_block_pattern {
7344
7345     #  given a string of block-type keywords, return a regex to match them
7346     #  The only tricky part is that labels are indicated with a single ':'
7347     #  and the 'sub' token text may have additional text after it (name of
7348     #  sub).
7349     #
7350     #  Example:
7351     #
7352     #   input string: "if else elsif unless while for foreach do : sub";
7353     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7354
7355     my ( $abbrev, $string ) = @_;
7356     my @list  = split_words($string);
7357     my @words = ();
7358     my %seen;
7359     for my $i (@list) {
7360         next if $seen{$i};
7361         $seen{$i} = 1;
7362         if ( $i eq 'sub' ) {
7363         }
7364         elsif ( $i eq ':' ) {
7365             push @words, '\w+:';
7366         }
7367         elsif ( $i =~ /^\w/ ) {
7368             push @words, $i;
7369         }
7370         else {
7371             warn "unrecognized block type $i after $abbrev, ignoring\n";
7372         }
7373     }
7374     my $pattern = '(' . join( '|', @words ) . ')$';
7375     if ( $seen{'sub'} ) {
7376         $pattern = '(' . $pattern . '|sub)';
7377     }
7378     $pattern = '^' . $pattern;
7379     return $pattern;
7380 }
7381
7382 sub make_static_side_comment_pattern {
7383
7384     # create the pattern used to identify static side comments
7385     $static_side_comment_pattern = '^##';
7386
7387     # allow the user to change it
7388     if ( $rOpts->{'static-side-comment-prefix'} ) {
7389         my $prefix = $rOpts->{'static-side-comment-prefix'};
7390         $prefix =~ s/^\s*//;
7391         my $pattern = '^' . $prefix;
7392         eval "'##'=~/$pattern/";
7393         if ($@) {
7394             die
7395 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7396         }
7397         $static_side_comment_pattern = $pattern;
7398     }
7399 }
7400
7401 sub make_closing_side_comment_prefix {
7402
7403     # Be sure we have a valid closing side comment prefix
7404     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7405     my $csc_prefix_pattern;
7406     if ( !defined($csc_prefix) ) {
7407         $csc_prefix         = '## end';
7408         $csc_prefix_pattern = '^##\s+end';
7409     }
7410     else {
7411         my $test_csc_prefix = $csc_prefix;
7412         if ( $test_csc_prefix !~ /^#/ ) {
7413             $test_csc_prefix = '#' . $test_csc_prefix;
7414         }
7415
7416         # make a regex to recognize the prefix
7417         my $test_csc_prefix_pattern = $test_csc_prefix;
7418
7419         # escape any special characters
7420         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7421
7422         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7423
7424         # allow exact number of intermediate spaces to vary
7425         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7426
7427         # make sure we have a good pattern
7428         # if we fail this we probably have an error in escaping
7429         # characters.
7430         eval "'##'=~/$test_csc_prefix_pattern/";
7431         if ($@) {
7432
7433             # shouldn't happen..must have screwed up escaping, above
7434             report_definite_bug();
7435             warn
7436 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7437
7438             # just warn and keep going with defaults
7439             warn "Please consider using a simpler -cscp prefix\n";
7440             warn "Using default -cscp instead; please check output\n";
7441         }
7442         else {
7443             $csc_prefix         = $test_csc_prefix;
7444             $csc_prefix_pattern = $test_csc_prefix_pattern;
7445         }
7446     }
7447     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7448     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7449 }
7450
7451 sub dump_want_left_space {
7452     my $fh = shift;
7453     local $" = "\n";
7454     print $fh <<EOM;
7455 These values are the main control of whitespace to the left of a token type;
7456 They may be altered with the -wls parameter.
7457 For a list of token types, use perltidy --dump-token-types (-dtt)
7458  1 means the token wants a space to its left
7459 -1 means the token does not want a space to its left
7460 ------------------------------------------------------------------------
7461 EOM
7462     foreach ( sort keys %want_left_space ) {
7463         print $fh "$_\t$want_left_space{$_}\n";
7464     }
7465 }
7466
7467 sub dump_want_right_space {
7468     my $fh = shift;
7469     local $" = "\n";
7470     print $fh <<EOM;
7471 These values are the main control of whitespace to the right of a token type;
7472 They may be altered with the -wrs parameter.
7473 For a list of token types, use perltidy --dump-token-types (-dtt)
7474  1 means the token wants a space to its right
7475 -1 means the token does not want a space to its right
7476 ------------------------------------------------------------------------
7477 EOM
7478     foreach ( sort keys %want_right_space ) {
7479         print $fh "$_\t$want_right_space{$_}\n";
7480     }
7481 }
7482
7483 {    # begin is_essential_whitespace
7484
7485     my %is_sort_grep_map;
7486     my %is_for_foreach;
7487
7488     BEGIN {
7489
7490         @_ = qw(sort grep map);
7491         @is_sort_grep_map{@_} = (1) x scalar(@_);
7492
7493         @_ = qw(for foreach);
7494         @is_for_foreach{@_} = (1) x scalar(@_);
7495
7496     }
7497
7498     sub is_essential_whitespace {
7499
7500         # Essential whitespace means whitespace which cannot be safely deleted
7501         # without risking the introduction of a syntax error.
7502         # We are given three tokens and their types:
7503         # ($tokenl, $typel) is the token to the left of the space in question
7504         # ($tokenr, $typer) is the token to the right of the space in question
7505         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7506         #
7507         # This is a slow routine but is not needed too often except when -mangle
7508         # is used.
7509         #
7510         # Note: This routine should almost never need to be changed.  It is
7511         # for avoiding syntax problems rather than for formatting.
7512         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7513
7514         my $result =
7515
7516           # never combine two bare words or numbers
7517           # examples:  and ::ok(1)
7518           #            return ::spw(...)
7519           #            for bla::bla:: abc
7520           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7521           #            $input eq"quit" to make $inputeq"quit"
7522           #            my $size=-s::SINK if $file;  <==OK but we won't do it
7523           # don't join something like: for bla::bla:: abc
7524           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7525           ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7526
7527           # do not combine a number with a concatination dot
7528           # example: pom.caputo:
7529           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7530           || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7531           || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7532
7533           # do not join a minus with a bare word, because you might form
7534           # a file test operator.  Example from Complex.pm:
7535           # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7536           || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7537
7538           # and something like this could become ambiguous without space
7539           # after the '-':
7540           #   use constant III=>1;
7541           #   $a = $b - III;
7542           # and even this:
7543           #   $a = - III;
7544           || ( ( $tokenl eq '-' )
7545             && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7546
7547           # '= -' should not become =- or you will get a warning
7548           # about reversed -=
7549           # || ($tokenr eq '-')
7550
7551           # keep a space between a quote and a bareword to prevent the
7552           # bareword from becomming a quote modifier.
7553           || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7554
7555           # keep a space between a token ending in '$' and any word;
7556           # this caused trouble:  "die @$ if $@"
7557           || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7558             && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7559
7560           # perl is very fussy about spaces before <<
7561           || ( $tokenr =~ /^\<\</ )
7562
7563           # avoid combining tokens to create new meanings. Example:
7564           #     $a+ +$b must not become $a++$b
7565           || ( $is_digraph{ $tokenl . $tokenr } )
7566           || ( $is_trigraph{ $tokenl . $tokenr } )
7567
7568           # another example: do not combine these two &'s:
7569           #     allow_options & &OPT_EXECCGI
7570           || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7571
7572           # don't combine $$ or $# with any alphanumeric
7573           # (testfile mangle.t with --mangle)
7574           || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7575
7576           # retain any space after possible filehandle
7577           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7578           || ( $typel eq 'Z' )
7579
7580           # Perl is sensitive to whitespace after the + here:
7581           #  $b = xvals $a + 0.1 * yvals $a;
7582           || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
7583
7584           # keep paren separate in 'use Foo::Bar ()'
7585           || ( $tokenr eq '('
7586             && $typel   eq 'w'
7587             && $typell  eq 'k'
7588             && $tokenll eq 'use' )
7589
7590           # keep any space between filehandle and paren:
7591           # file mangle.t with --mangle:
7592           || ( $typel eq 'Y' && $tokenr eq '(' )
7593
7594           # retain any space after here doc operator ( hereerr.t)
7595           || ( $typel eq 'h' )
7596
7597           # be careful with a space around ++ and --, to avoid ambiguity as to
7598           # which token it applies
7599           || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7600           || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7601
7602           # need space after foreach my; for example, this will fail in
7603           # older versions of Perl:
7604           # foreach my$ft(@filetypes)...
7605           || (
7606             $tokenl eq 'my'
7607
7608             #  /^(for|foreach)$/
7609             && $is_for_foreach{$tokenll}
7610             && $tokenr =~ /^\$/
7611           )
7612
7613           # must have space between grep and left paren; "grep(" will fail
7614           || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7615
7616           # don't stick numbers next to left parens, as in:
7617           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7618           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7619
7620           # We must be sure that a space between a ? and a quoted string
7621           # remains if the space before the ? remains.  [Loca.pm, lockarea]
7622           # ie,
7623           #    $b=join $comma ? ',' : ':', @_;  # ok
7624           #    $b=join $comma?',' : ':', @_;    # ok!
7625           #    $b=join $comma ?',' : ':', @_;   # error!
7626           # Not really required:
7627           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
7628
7629           # do not remove space between an '&' and a bare word because
7630           # it may turn into a function evaluation, like here
7631           # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
7632           #    $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
7633           || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7634
7635           ;    # the value of this long logic sequence is the result we want
7636         return $result;
7637     }
7638 }
7639
7640 sub set_white_space_flag {
7641
7642     #    This routine examines each pair of nonblank tokens and
7643     #    sets values for array @white_space_flag.
7644     #
7645     #    $white_space_flag[$j] is a flag indicating whether a white space
7646     #    BEFORE token $j is needed, with the following values:
7647     #
7648     #            -1 do not want a space before token $j
7649     #             0 optional space or $j is a whitespace
7650     #             1 want a space before token $j
7651     #
7652     #
7653     #   The values for the first token will be defined based
7654     #   upon the contents of the "to_go" output array.
7655     #
7656     #   Note: retain debug print statements because they are usually
7657     #   required after adding new token types.
7658
7659     BEGIN {
7660
7661         # initialize these global hashes, which control the use of
7662         # whitespace around tokens:
7663         #
7664         # %binary_ws_rules
7665         # %want_left_space
7666         # %want_right_space
7667         # %space_after_keyword
7668         #
7669         # Many token types are identical to the tokens themselves.
7670         # See the tokenizer for a complete list. Here are some special types:
7671         #   k = perl keyword
7672         #   f = semicolon in for statement
7673         #   m = unary minus
7674         #   p = unary plus
7675         # Note that :: is excluded since it should be contained in an identifier
7676         # Note that '->' is excluded because it never gets space
7677         # parentheses and brackets are excluded since they are handled specially
7678         # curly braces are included but may be overridden by logic, such as
7679         # newline logic.
7680
7681         # NEW_TOKENS: create a whitespace rule here.  This can be as
7682         # simple as adding your new letter to @spaces_both_sides, for
7683         # example.
7684
7685         @_ = qw" L { ( [ ";
7686         @is_opening_type{@_} = (1) x scalar(@_);
7687
7688         @_ = qw" R } ) ] ";
7689         @is_closing_type{@_} = (1) x scalar(@_);
7690
7691         my @spaces_both_sides = qw"
7692           + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7693           .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
7694           &&= ||= //= <=> A k f w F n C Y U G v
7695           ";
7696
7697         my @spaces_left_side = qw"
7698           t ! ~ m p { \ h pp mm Z j
7699           ";
7700         push( @spaces_left_side, '#' );    # avoids warning message
7701
7702         my @spaces_right_side = qw"
7703           ; } ) ] R J ++ -- **=
7704           ";
7705         push( @spaces_right_side, ',' );    # avoids warning message
7706         @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7707         @want_right_space{@spaces_both_sides} =
7708           (1) x scalar(@spaces_both_sides);
7709         @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
7710         @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7711         @want_left_space{@spaces_right_side} =
7712           (-1) x scalar(@spaces_right_side);
7713         @want_right_space{@spaces_right_side} =
7714           (1) x scalar(@spaces_right_side);
7715         $want_left_space{'L'}   = WS_NO;
7716         $want_left_space{'->'}  = WS_NO;
7717         $want_right_space{'->'} = WS_NO;
7718         $want_left_space{'**'}  = WS_NO;
7719         $want_right_space{'**'} = WS_NO;
7720
7721         # hash type information must stay tightly bound
7722         # as in :  ${xxxx}
7723         $binary_ws_rules{'i'}{'L'} = WS_NO;
7724         $binary_ws_rules{'i'}{'{'} = WS_YES;
7725         $binary_ws_rules{'k'}{'{'} = WS_YES;
7726         $binary_ws_rules{'U'}{'{'} = WS_YES;
7727         $binary_ws_rules{'i'}{'['} = WS_NO;
7728         $binary_ws_rules{'R'}{'L'} = WS_NO;
7729         $binary_ws_rules{'R'}{'{'} = WS_NO;
7730         $binary_ws_rules{'t'}{'L'} = WS_NO;
7731         $binary_ws_rules{'t'}{'{'} = WS_NO;
7732         $binary_ws_rules{'}'}{'L'} = WS_NO;
7733         $binary_ws_rules{'}'}{'{'} = WS_NO;
7734         $binary_ws_rules{'$'}{'L'} = WS_NO;
7735         $binary_ws_rules{'$'}{'{'} = WS_NO;
7736         $binary_ws_rules{'@'}{'L'} = WS_NO;
7737         $binary_ws_rules{'@'}{'{'} = WS_NO;
7738         $binary_ws_rules{'='}{'L'} = WS_YES;
7739
7740         # the following includes ') {'
7741         # as in :    if ( xxx ) { yyy }
7742         $binary_ws_rules{']'}{'L'} = WS_NO;
7743         $binary_ws_rules{']'}{'{'} = WS_NO;
7744         $binary_ws_rules{')'}{'{'} = WS_YES;
7745         $binary_ws_rules{')'}{'['} = WS_NO;
7746         $binary_ws_rules{']'}{'['} = WS_NO;
7747         $binary_ws_rules{']'}{'{'} = WS_NO;
7748         $binary_ws_rules{'}'}{'['} = WS_NO;
7749         $binary_ws_rules{'R'}{'['} = WS_NO;
7750
7751         $binary_ws_rules{']'}{'++'} = WS_NO;
7752         $binary_ws_rules{']'}{'--'} = WS_NO;
7753         $binary_ws_rules{')'}{'++'} = WS_NO;
7754         $binary_ws_rules{')'}{'--'} = WS_NO;
7755
7756         $binary_ws_rules{'R'}{'++'} = WS_NO;
7757         $binary_ws_rules{'R'}{'--'} = WS_NO;
7758
7759         ########################################################
7760         # should no longer be necessary (see niek.pl)
7761         ##$binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
7762         ##$binary_ws_rules{'w'}{':'} = WS_NO;
7763         ########################################################
7764         $binary_ws_rules{'i'}{'Q'} = WS_YES;
7765         $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
7766
7767         # FIXME: we need to split 'i' into variables and functions
7768         # and have no space for functions but space for variables.  For now,
7769         # I have a special patch in the special rules below
7770         $binary_ws_rules{'i'}{'('} = WS_NO;
7771
7772         $binary_ws_rules{'w'}{'('} = WS_NO;
7773         $binary_ws_rules{'w'}{'{'} = WS_YES;
7774     }
7775     my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7776     my ( $last_token, $last_type, $last_block_type, $token, $type,
7777         $block_type );
7778     my (@white_space_flag);
7779     my $j_tight_closing_paren = -1;
7780
7781     if ( $max_index_to_go >= 0 ) {
7782         $token      = $tokens_to_go[$max_index_to_go];
7783         $type       = $types_to_go[$max_index_to_go];
7784         $block_type = $block_type_to_go[$max_index_to_go];
7785     }
7786     else {
7787         $token      = ' ';
7788         $type       = 'b';
7789         $block_type = '';
7790     }
7791
7792     # loop over all tokens
7793     my ( $j, $ws );
7794
7795     for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7796
7797         if ( $$rtoken_type[$j] eq 'b' ) {
7798             $white_space_flag[$j] = WS_OPTIONAL;
7799             next;
7800         }
7801
7802         # set a default value, to be changed as needed
7803         $ws              = undef;
7804         $last_token      = $token;
7805         $last_type       = $type;
7806         $last_block_type = $block_type;
7807         $token           = $$rtokens[$j];
7808         $type            = $$rtoken_type[$j];
7809         $block_type      = $$rblock_type[$j];
7810
7811         #---------------------------------------------------------------
7812         # section 1:
7813         # handle space on the inside of opening braces
7814         #---------------------------------------------------------------
7815
7816         #    /^[L\{\(\[]$/
7817         if ( $is_opening_type{$last_type} ) {
7818
7819             $j_tight_closing_paren = -1;
7820
7821             # let's keep empty matched braces together: () {} []
7822             # except for BLOCKS
7823             if ( $token eq $matching_token{$last_token} ) {
7824                 if ($block_type) {
7825                     $ws = WS_YES;
7826                 }
7827                 else {
7828                     $ws = WS_NO;
7829                 }
7830             }
7831             else {
7832
7833                 # we're considering the right of an opening brace
7834                 # tightness = 0 means always pad inside with space
7835                 # tightness = 1 means pad inside if "complex"
7836                 # tightness = 2 means never pad inside with space
7837
7838                 my $tightness;
7839                 if (   $last_type eq '{'
7840                     && $last_token eq '{'
7841                     && $last_block_type )
7842                 {
7843                     $tightness = $rOpts_block_brace_tightness;
7844                 }
7845                 else { $tightness = $tightness{$last_token} }
7846
7847                 if ( $tightness <= 0 ) {
7848                     $ws = WS_YES;
7849                 }
7850                 elsif ( $tightness > 1 ) {
7851                     $ws = WS_NO;
7852                 }
7853                 else {
7854
7855                     # Patch to count '-foo' as single token so that
7856                     # each of  $a{-foo} and $a{foo} and $a{'foo'} do
7857                     # not get spaces with default formatting.
7858                     my $j_here = $j;
7859                     ++$j_here
7860                       if ( $token eq '-'
7861                         && $last_token eq '{'
7862                         && $$rtoken_type[ $j + 1 ] eq 'w' );
7863
7864                     # $j_next is where a closing token should be if
7865                     # the container has a single token
7866                     my $j_next =
7867                       ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
7868                       ? $j_here + 2
7869                       : $j_here + 1;
7870                     my $tok_next  = $$rtokens[$j_next];
7871                     my $type_next = $$rtoken_type[$j_next];
7872
7873                     # for tightness = 1, if there is just one token
7874                     # within the matching pair, we will keep it tight
7875                     if (
7876                         $tok_next eq $matching_token{$last_token}
7877
7878                         # but watch out for this: [ [ ]    (misc.t)
7879                         && $last_token ne $token
7880                       )
7881                     {
7882
7883                         # remember where to put the space for the closing paren
7884                         $j_tight_closing_paren = $j_next;
7885                         $ws                    = WS_NO;
7886                     }
7887                     else {
7888                         $ws = WS_YES;
7889                     }
7890                 }
7891             }
7892         }    # done with opening braces and brackets
7893         my $ws_1 = $ws
7894           if FORMATTER_DEBUG_FLAG_WHITE;
7895
7896         #---------------------------------------------------------------
7897         # section 2:
7898         # handle space on inside of closing brace pairs
7899         #---------------------------------------------------------------
7900
7901         #   /[\}\)\]R]/
7902         if ( $is_closing_type{$type} ) {
7903
7904             if ( $j == $j_tight_closing_paren ) {
7905
7906                 $j_tight_closing_paren = -1;
7907                 $ws                    = WS_NO;
7908             }
7909             else {
7910
7911                 if ( !defined($ws) ) {
7912
7913                     my $tightness;
7914                     if ( $type eq '}' && $token eq '}' && $block_type ) {
7915                         $tightness = $rOpts_block_brace_tightness;
7916                     }
7917                     else { $tightness = $tightness{$token} }
7918
7919                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7920                 }
7921             }
7922         }
7923
7924         my $ws_2 = $ws
7925           if FORMATTER_DEBUG_FLAG_WHITE;
7926
7927         #---------------------------------------------------------------
7928         # section 3:
7929         # use the binary table
7930         #---------------------------------------------------------------
7931         if ( !defined($ws) ) {
7932             $ws = $binary_ws_rules{$last_type}{$type};
7933         }
7934         my $ws_3 = $ws
7935           if FORMATTER_DEBUG_FLAG_WHITE;
7936
7937         #---------------------------------------------------------------
7938         # section 4:
7939         # some special cases
7940         #---------------------------------------------------------------
7941         if ( $token eq '(' ) {
7942
7943             # This will have to be tweaked as tokenization changes.
7944             # We usually want a space at '} (', for example:
7945             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7946             #
7947             # But not others:
7948             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
7949             # At present, the above & block is marked as type L/R so this case
7950             # won't go through here.
7951             if ( $last_type eq '}' ) { $ws = WS_YES }
7952
7953             # NOTE: some older versions of Perl had occasional problems if
7954             # spaces are introduced between keywords or functions and opening
7955             # parens.  So the default is not to do this except is certain
7956             # cases.  The current Perl seems to tolerate spaces.
7957
7958             # Space between keyword and '('
7959             elsif ( $last_type eq 'k' ) {
7960                 $ws = WS_NO
7961                   unless ( $rOpts_space_keyword_paren
7962                     || $space_after_keyword{$last_token} );
7963             }
7964
7965             # Space between function and '('
7966             # -----------------------------------------------------
7967             # 'w' and 'i' checks for something like:
7968             #   myfun(    &myfun(   ->myfun(
7969             # -----------------------------------------------------
7970             elsif (( $last_type =~ /^[wU]$/ )
7971                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7972             {
7973                 $ws = WS_NO unless ($rOpts_space_function_paren);
7974             }
7975
7976             # space between something like $i and ( in
7977             # for $i ( 0 .. 20 ) {
7978             # FIXME: eventually, type 'i' needs to be split into multiple
7979             # token types so this can be a hardwired rule.
7980             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7981                 $ws = WS_YES;
7982             }
7983
7984             # allow constant function followed by '()' to retain no space
7985             elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
7986                 $ws = WS_NO;
7987             }
7988         }
7989
7990         # patch for SWITCH/CASE: make space at ']{' optional
7991         # since the '{' might begin a case or when block
7992         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
7993             $ws = WS_OPTIONAL;
7994         }
7995
7996         # keep space between 'sub' and '{' for anonymous sub definition
7997         if ( $type eq '{' ) {
7998             if ( $last_token eq 'sub' ) {
7999                 $ws = WS_YES;
8000             }
8001
8002             # this is needed to avoid no space in '){'
8003             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8004
8005             # avoid any space before the brace or bracket in something like
8006             #  @opts{'a','b',...}
8007             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8008                 $ws = WS_NO;
8009             }
8010         }
8011
8012         elsif ( $type eq 'i' ) {
8013
8014             # never a space before ->
8015             if ( $token =~ /^\-\>/ ) {
8016                 $ws = WS_NO;
8017             }
8018         }
8019
8020         # retain any space between '-' and bare word
8021         elsif ( $type eq 'w' || $type eq 'C' ) {
8022             $ws = WS_OPTIONAL if $last_type eq '-';
8023
8024             # never a space before ->
8025             if ( $token =~ /^\-\>/ ) {
8026                 $ws = WS_NO;
8027             }
8028         }
8029
8030         # retain any space between '-' and bare word
8031         # example: avoid space between 'USER' and '-' here:
8032         #   $myhash{USER-NAME}='steve';
8033         elsif ( $type eq 'm' || $type eq '-' ) {
8034             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8035         }
8036
8037         # always space before side comment
8038         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8039
8040         # always preserver whatever space was used after a possible
8041         # filehandle (except _) or here doc operator
8042         if (
8043             $type ne '#'
8044             && ( ( $last_type eq 'Z' && $last_token ne '_' )
8045                 || $last_type eq 'h' )
8046           )
8047         {
8048             $ws = WS_OPTIONAL;
8049         }
8050
8051         my $ws_4 = $ws
8052           if FORMATTER_DEBUG_FLAG_WHITE;
8053
8054         #---------------------------------------------------------------
8055         # section 5:
8056         # default rules not covered above
8057         #---------------------------------------------------------------
8058         # if we fall through to here,
8059         # look at the pre-defined hash tables for the two tokens, and
8060         # if (they are equal) use the common value
8061         # if (either is zero or undef) use the other
8062         # if (either is -1) use it
8063         # That is,
8064         # left  vs right
8065         #  1    vs    1     -->  1
8066         #  0    vs    0     -->  0
8067         # -1    vs   -1     --> -1
8068         #
8069         #  0    vs   -1     --> -1
8070         #  0    vs    1     -->  1
8071         #  1    vs    0     -->  1
8072         # -1    vs    0     --> -1
8073         #
8074         # -1    vs    1     --> -1
8075         #  1    vs   -1     --> -1
8076         if ( !defined($ws) ) {
8077             my $wl = $want_left_space{$type};
8078             my $wr = $want_right_space{$last_type};
8079             if ( !defined($wl) ) { $wl = 0 }
8080             if ( !defined($wr) ) { $wr = 0 }
8081             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8082         }
8083
8084         if ( !defined($ws) ) {
8085             $ws = 0;
8086             write_diagnostics(
8087                 "WS flag is undefined for tokens $last_token $token\n");
8088         }
8089
8090         # Treat newline as a whitespace. Otherwise, we might combine
8091         # 'Send' and '-recipients' here according to the above rules:
8092         #    my $msg = new Fax::Send
8093         #      -recipients => $to,
8094         #      -data => $data;
8095         if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8096
8097         if (   ( $ws == 0 )
8098             && $j > 0
8099             && $j < $jmax
8100             && ( $last_type !~ /^[Zh]$/ ) )
8101         {
8102
8103             # If this happens, we have a non-fatal but undesirable
8104             # hole in the above rules which should be patched.
8105             write_diagnostics(
8106                 "WS flag is zero for tokens $last_token $token\n");
8107         }
8108         $white_space_flag[$j] = $ws;
8109
8110         FORMATTER_DEBUG_FLAG_WHITE && do {
8111             my $str = substr( $last_token, 0, 15 );
8112             $str .= ' ' x ( 16 - length($str) );
8113             if ( !defined($ws_1) ) { $ws_1 = "*" }
8114             if ( !defined($ws_2) ) { $ws_2 = "*" }
8115             if ( !defined($ws_3) ) { $ws_3 = "*" }
8116             if ( !defined($ws_4) ) { $ws_4 = "*" }
8117             print
8118 "WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8119         };
8120     }
8121     return \@white_space_flag;
8122 }
8123
8124 {    # begin print_line_of_tokens
8125
8126     my $rtoken_type;
8127     my $rtokens;
8128     my $rlevels;
8129     my $rslevels;
8130     my $rblock_type;
8131     my $rcontainer_type;
8132     my $rcontainer_environment;
8133     my $rtype_sequence;
8134     my $input_line;
8135     my $rnesting_tokens;
8136     my $rci_levels;
8137     my $rnesting_blocks;
8138
8139     my $in_quote;
8140     my $python_indentation_level;
8141
8142     # These local token variables are stored by store_token_to_go:
8143     my $block_type;
8144     my $ci_level;
8145     my $container_environment;
8146     my $container_type;
8147     my $in_continued_quote;
8148     my $level;
8149     my $nesting_blocks;
8150     my $no_internal_newlines;
8151     my $slevel;
8152     my $token;
8153     my $type;
8154     my $type_sequence;
8155
8156     # routine to pull the jth token from the line of tokens
8157     sub extract_token {
8158         my $j = shift;
8159         $token                 = $$rtokens[$j];
8160         $type                  = $$rtoken_type[$j];
8161         $block_type            = $$rblock_type[$j];
8162         $container_type        = $$rcontainer_type[$j];
8163         $container_environment = $$rcontainer_environment[$j];
8164         $type_sequence         = $$rtype_sequence[$j];
8165         $level                 = $$rlevels[$j];
8166         $slevel                = $$rslevels[$j];
8167         $nesting_blocks        = $$rnesting_blocks[$j];
8168         $ci_level              = $$rci_levels[$j];
8169     }
8170
8171     {
8172         my @saved_token;
8173
8174         sub save_current_token {
8175
8176             @saved_token = (
8177                 $block_type,            $ci_level,
8178                 $container_environment, $container_type,
8179                 $in_continued_quote,    $level,
8180                 $nesting_blocks,        $no_internal_newlines,
8181                 $slevel,                $token,
8182                 $type,                  $type_sequence,
8183             );
8184         }
8185
8186         sub restore_current_token {
8187             (
8188                 $block_type,            $ci_level,
8189                 $container_environment, $container_type,
8190                 $in_continued_quote,    $level,
8191                 $nesting_blocks,        $no_internal_newlines,
8192                 $slevel,                $token,
8193                 $type,                  $type_sequence,
8194             ) = @saved_token;
8195         }
8196     }
8197
8198     # Routine to place the current token into the output stream.
8199     # Called once per output token.
8200     sub store_token_to_go {
8201
8202         my $flag = $no_internal_newlines;
8203         if ( $_[0] ) { $flag = 1 }
8204
8205         $tokens_to_go[ ++$max_index_to_go ]            = $token;
8206         $types_to_go[$max_index_to_go]                 = $type;
8207         $nobreak_to_go[$max_index_to_go]               = $flag;
8208         $old_breakpoint_to_go[$max_index_to_go]        = 0;
8209         $forced_breakpoint_to_go[$max_index_to_go]     = 0;
8210         $block_type_to_go[$max_index_to_go]            = $block_type;
8211         $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
8212         $container_environment_to_go[$max_index_to_go] = $container_environment;
8213         $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
8214         $ci_levels_to_go[$max_index_to_go]             = $ci_level;
8215         $mate_index_to_go[$max_index_to_go]            = -1;
8216         $matching_token_to_go[$max_index_to_go]        = '';
8217         $bond_strength_to_go[$max_index_to_go]         = 0;
8218
8219         # Note: negative levels are currently retained as a diagnostic so that
8220         # the 'final indentation level' is correctly reported for bad scripts.
8221         # But this means that every use of $level as an index must be checked.
8222         # If this becomes too much of a problem, we might give up and just clip
8223         # them at zero.
8224         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8225         $levels_to_go[$max_index_to_go] = $level;
8226         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8227         $lengths_to_go[ $max_index_to_go + 1 ] =
8228           $lengths_to_go[$max_index_to_go] + length($token);
8229
8230         # Define the indentation that this token would have if it started
8231         # a new line.  We have to do this now because we need to know this
8232         # when considering one-line blocks.
8233         set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8234
8235         if ( $type ne 'b' ) {
8236             $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8237             $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
8238             $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8239             $last_nonblank_index_to_go      = $max_index_to_go;
8240             $last_nonblank_type_to_go       = $type;
8241             $last_nonblank_token_to_go      = $token;
8242             if ( $type eq ',' ) {
8243                 $comma_count_in_batch++;
8244             }
8245         }
8246
8247         FORMATTER_DEBUG_FLAG_STORE && do {
8248             my ( $a, $b, $c ) = caller();
8249             print
8250 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8251         };
8252     }
8253
8254     sub insert_new_token_to_go {
8255
8256         # insert a new token into the output stream.  use same level as
8257         # previous token; assumes a character at max_index_to_go.
8258         save_current_token();
8259         ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8260
8261         if ( $max_index_to_go == UNDEFINED_INDEX ) {
8262             warning("code bug: bad call to insert_new_token_to_go\n");
8263         }
8264         $level = $levels_to_go[$max_index_to_go];
8265
8266         # FIXME: it seems to be necessary to use the next, rather than
8267         # previous, value of this variable when creating a new blank (align.t)
8268         #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
8269         $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
8270         $ci_level              = $ci_levels_to_go[$max_index_to_go];
8271         $container_environment = $container_environment_to_go[$max_index_to_go];
8272         $in_continued_quote    = 0;
8273         $block_type            = "";
8274         $type_sequence         = "";
8275         store_token_to_go();
8276         restore_current_token();
8277         return;
8278     }
8279
8280     sub print_line_of_tokens {
8281
8282         my $line_of_tokens = shift;
8283
8284         # This routine is called once per input line to process all of
8285         # the tokens on that line.  This is the first stage of
8286         # beautification.
8287         #
8288         # Full-line comments and blank lines may be processed immediately.
8289         #
8290         # For normal lines of code, the tokens are stored one-by-one,
8291         # via calls to 'sub store_token_to_go', until a known line break
8292         # point is reached.  Then, the batch of collected tokens is
8293         # passed along to 'sub output_line_to_go' for further
8294         # processing.  This routine decides if there should be
8295         # whitespace between each pair of non-white tokens, so later
8296         # routines only need to decide on any additional line breaks.
8297         # Any whitespace is initally a single space character.  Later,
8298         # the vertical aligner may expand that to be multiple space
8299         # characters if necessary for alignment.
8300
8301         # extract input line number for error messages
8302         $input_line_number = $line_of_tokens->{_line_number};
8303
8304         $rtoken_type            = $line_of_tokens->{_rtoken_type};
8305         $rtokens                = $line_of_tokens->{_rtokens};
8306         $rlevels                = $line_of_tokens->{_rlevels};
8307         $rslevels               = $line_of_tokens->{_rslevels};
8308         $rblock_type            = $line_of_tokens->{_rblock_type};
8309         $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
8310         $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8311         $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
8312         $input_line             = $line_of_tokens->{_line_text};
8313         $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
8314         $rci_levels             = $line_of_tokens->{_rci_levels};
8315         $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
8316
8317         $in_continued_quote = $starting_in_quote =
8318           $line_of_tokens->{_starting_in_quote};
8319         $in_quote        = $line_of_tokens->{_ending_in_quote};
8320         $ending_in_quote = $in_quote;
8321         $python_indentation_level =
8322           $line_of_tokens->{_python_indentation_level};
8323
8324         my $j;
8325         my $j_next;
8326         my $jmax;
8327         my $next_nonblank_token;
8328         my $next_nonblank_token_type;
8329         my $rwhite_space_flag;
8330
8331         $jmax                    = @$rtokens - 1;
8332         $block_type              = "";
8333         $container_type          = "";
8334         $container_environment   = "";
8335         $type_sequence           = "";
8336         $no_internal_newlines    = 1 - $rOpts_add_newlines;
8337         $is_static_block_comment = 0;
8338
8339         # Handle a continued quote..
8340         if ($in_continued_quote) {
8341
8342             # A line which is entirely a quote or pattern must go out
8343             # verbatim.  Note: the \n is contained in $input_line.
8344             if ( $jmax <= 0 ) {
8345                 if ( ( $input_line =~ "\t" ) ) {
8346                     note_embedded_tab();
8347                 }
8348                 write_unindented_line("$input_line");
8349                 $last_line_had_side_comment = 0;
8350                 return;
8351             }
8352
8353             # prior to version 20010406, perltidy had a bug which placed
8354             # continuation indentation before the last line of some multiline
8355             # quotes and patterns -- exactly the lines passing this way.
8356             # To help find affected lines in scripts run with these
8357             # versions, run with '-chk', and it will warn of any quotes or
8358             # patterns which might have been modified by these early
8359             # versions.
8360             if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8361                 warning(
8362 "-chk: please check this line for extra leading whitespace\n"
8363                 );
8364             }
8365         }
8366
8367         # Write line verbatim if we are in a formatting skip section
8368         if ($in_format_skipping_section) {
8369             write_unindented_line("$input_line");
8370             $last_line_had_side_comment = 0;
8371
8372             # Note: extra space appended to comment simplifies pattern matching
8373             if (   $jmax == 0
8374                 && $$rtoken_type[0] eq '#'
8375                 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8376             {
8377                 $in_format_skipping_section = 0;
8378                 write_logfile_entry("Exiting formatting skip section\n");
8379             }
8380             return;
8381         }
8382
8383         # See if we are entering a formatting skip section
8384         if (   $rOpts_format_skipping
8385             && $jmax == 0
8386             && $$rtoken_type[0] eq '#'
8387             && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8388         {
8389             flush();
8390             $in_format_skipping_section = 1;
8391             write_logfile_entry("Entering formatting skip section\n");
8392             write_unindented_line("$input_line");
8393             $last_line_had_side_comment = 0;
8394             return;
8395         }
8396
8397         # delete trailing blank tokens
8398         if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8399
8400         # Handle a blank line..
8401         if ( $jmax < 0 ) {
8402
8403             # If keep-old-blank-lines is zero, we delete all
8404             # old blank lines and let the blank line rules generate any
8405             # needed blanks.
8406             if ($rOpts_keep_old_blank_lines) {
8407                 flush();
8408                 $file_writer_object->write_blank_code_line(
8409                     $rOpts_keep_old_blank_lines == 2 );
8410                 $last_line_leading_type = 'b';
8411             }
8412             $last_line_had_side_comment = 0;
8413             return;
8414         }
8415
8416         # see if this is a static block comment (starts with ## by default)
8417         my $is_static_block_comment_without_leading_space = 0;
8418         if (   $jmax == 0
8419             && $$rtoken_type[0] eq '#'
8420             && $rOpts->{'static-block-comments'}
8421             && $input_line =~ /$static_block_comment_pattern/o )
8422         {
8423             $is_static_block_comment = 1;
8424             $is_static_block_comment_without_leading_space =
8425               substr( $input_line, 0, 1 ) eq '#';
8426         }
8427
8428         # Check for comments which are line directives
8429         # Treat exactly as static block comments without leading space
8430         # reference: perlsyn, near end, section Plain Old Comments (Not!)
8431         # example: '# line 42 "new_filename.plx"'
8432         if (
8433                $jmax == 0
8434             && $$rtoken_type[0] eq '#'
8435             && $input_line =~ /^\#   \s*
8436                                line \s+ (\d+)   \s*
8437                                (?:\s("?)([^"]+)\2)? \s*
8438                                $/x
8439           )
8440         {
8441             $is_static_block_comment                       = 1;
8442             $is_static_block_comment_without_leading_space = 1;
8443         }
8444
8445         # create a hanging side comment if appropriate
8446         if (
8447                $jmax == 0
8448             && $$rtoken_type[0] eq '#'    # only token is a comment
8449             && $last_line_had_side_comment    # last line had side comment
8450             && $input_line =~ /^\s/           # there is some leading space
8451             && !$is_static_block_comment    # do not make static comment hanging
8452             && $rOpts->{'hanging-side-comments'}    # user is allowing this
8453           )
8454         {
8455
8456             # We will insert an empty qw string at the start of the token list
8457             # to force this comment to be a side comment. The vertical aligner
8458             # should then line it up with the previous side comment.
8459             unshift @$rtoken_type,            'q';
8460             unshift @$rtokens,                '';
8461             unshift @$rlevels,                $$rlevels[0];
8462             unshift @$rslevels,               $$rslevels[0];
8463             unshift @$rblock_type,            '';
8464             unshift @$rcontainer_type,        '';
8465             unshift @$rcontainer_environment, '';
8466             unshift @$rtype_sequence,         '';
8467             unshift @$rnesting_tokens,        $$rnesting_tokens[0];
8468             unshift @$rci_levels,             $$rci_levels[0];
8469             unshift @$rnesting_blocks,        $$rnesting_blocks[0];
8470             $jmax = 1;
8471         }
8472
8473         # remember if this line has a side comment
8474         $last_line_had_side_comment =
8475           ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8476
8477         # Handle a block (full-line) comment..
8478         if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8479
8480             if ( $rOpts->{'delete-block-comments'} ) { return }
8481
8482             if ( $rOpts->{'tee-block-comments'} ) {
8483                 $file_writer_object->tee_on();
8484             }
8485
8486             destroy_one_line_block();
8487             output_line_to_go();
8488
8489             # output a blank line before block comments
8490             if (
8491                    $last_line_leading_type !~ /^[#b]$/
8492                 && $rOpts->{'blanks-before-comments'}    # only if allowed
8493                 && !
8494                 $is_static_block_comment    # never before static block comments
8495               )
8496             {
8497                 flush();                    # switching to new output stream
8498                 $file_writer_object->write_blank_code_line();
8499                 $last_line_leading_type = 'b';
8500             }
8501
8502             # TRIM COMMENTS -- This could be turned off as a option
8503             $$rtokens[0] =~ s/\s*$//;       # trim right end
8504
8505             if (
8506                 $rOpts->{'indent-block-comments'}
8507                 && (  !$rOpts->{'indent-spaced-block-comments'}
8508                     || $input_line =~ /^\s+/ )
8509                 && !$is_static_block_comment_without_leading_space
8510               )
8511             {
8512                 extract_token(0);
8513                 store_token_to_go();
8514                 output_line_to_go();
8515             }
8516             else {
8517                 flush();    # switching to new output stream
8518                 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8519                 $last_line_leading_type = '#';
8520             }
8521             if ( $rOpts->{'tee-block-comments'} ) {
8522                 $file_writer_object->tee_off();
8523             }
8524             return;
8525         }
8526
8527         # compare input/output indentation except for continuation lines
8528         # (because they have an unknown amount of initial blank space)
8529         # and lines which are quotes (because they may have been outdented)
8530         # Note: this test is placed here because we know the continuation flag
8531         # at this point, which allows us to avoid non-meaningful checks.
8532         my $structural_indentation_level = $$rlevels[0];
8533         compare_indentation_levels( $python_indentation_level,
8534             $structural_indentation_level )
8535           unless ( $python_indentation_level < 0
8536             || ( $$rci_levels[0] > 0 )
8537             || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8538           );
8539
8540         #   Patch needed for MakeMaker.  Do not break a statement
8541         #   in which $VERSION may be calculated.  See MakeMaker.pm;
8542         #   this is based on the coding in it.
8543         #   The first line of a file that matches this will be eval'd:
8544         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8545         #   Examples:
8546         #     *VERSION = \'1.01';
8547         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
8548         #   We will pass such a line straight through without breaking
8549         #   it unless -npvl is used
8550
8551         my $is_VERSION_statement = 0;
8552
8553         if (
8554               !$saw_VERSION_in_this_file
8555             && $input_line =~ /VERSION/    # quick check to reject most lines
8556             && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8557           )
8558         {
8559             $saw_VERSION_in_this_file = 1;
8560             $is_VERSION_statement     = 1;
8561             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8562             $no_internal_newlines = 1;
8563         }
8564
8565         # take care of indentation-only
8566         # NOTE: In previous versions we sent all qw lines out immediately here.
8567         # No longer doing this: also write a line which is entirely a 'qw' list
8568         # to allow stacking of opening and closing tokens.  Note that interior
8569         # qw lines will still go out at the end of this routine.
8570         if ( $rOpts->{'indent-only'} ) {
8571             flush();
8572             trim($input_line);
8573
8574             extract_token(0);
8575             $token                 = $input_line;
8576             $type                  = 'q';
8577             $block_type            = "";
8578             $container_type        = "";
8579             $container_environment = "";
8580             $type_sequence         = "";
8581             store_token_to_go();
8582             output_line_to_go();
8583             return;
8584         }
8585
8586         push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
8587         push( @$rtoken_type, 'b', 'b' );
8588         ($rwhite_space_flag) =
8589           set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8590
8591         # find input tabbing to allow checks for tabbing disagreement
8592         ## not used for now
8593         ##$input_line_tabbing = "";
8594         ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8595
8596         # if the buffer hasn't been flushed, add a leading space if
8597         # necessary to keep essential whitespace. This is really only
8598         # necessary if we are squeezing out all ws.
8599         if ( $max_index_to_go >= 0 ) {
8600
8601             $old_line_count_in_batch++;
8602
8603             if (
8604                 is_essential_whitespace(
8605                     $last_last_nonblank_token,
8606                     $last_last_nonblank_type,
8607                     $tokens_to_go[$max_index_to_go],
8608                     $types_to_go[$max_index_to_go],
8609                     $$rtokens[0],
8610                     $$rtoken_type[0]
8611                 )
8612               )
8613             {
8614                 my $slevel = $$rslevels[0];
8615                 insert_new_token_to_go( ' ', 'b', $slevel,
8616                     $no_internal_newlines );
8617             }
8618         }
8619
8620         # If we just saw the end of an elsif block, write nag message
8621         # if we do not see another elseif or an else.
8622         if ($looking_for_else) {
8623
8624             unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8625                 write_logfile_entry("(No else block)\n");
8626             }
8627             $looking_for_else = 0;
8628         }
8629
8630         # This is a good place to kill incomplete one-line blocks
8631         if (   ( $semicolons_before_block_self_destruct == 0 )
8632             && ( $max_index_to_go >= 0 )
8633             && ( $types_to_go[$max_index_to_go] eq ';' )
8634             && ( $$rtokens[0] ne '}' ) )
8635         {
8636             destroy_one_line_block();
8637             output_line_to_go();
8638         }
8639
8640         # loop to process the tokens one-by-one
8641         $type  = 'b';
8642         $token = "";
8643
8644         foreach $j ( 0 .. $jmax ) {
8645
8646             # pull out the local values for this token
8647             extract_token($j);
8648
8649             if ( $type eq '#' ) {
8650
8651                 # trim trailing whitespace
8652                 # (there is no option at present to prevent this)
8653                 $token =~ s/\s*$//;
8654
8655                 if (
8656                     $rOpts->{'delete-side-comments'}
8657
8658                     # delete closing side comments if necessary
8659                     || (   $rOpts->{'delete-closing-side-comments'}
8660                         && $token =~ /$closing_side_comment_prefix_pattern/o
8661                         && $last_nonblank_block_type =~
8662                         /$closing_side_comment_list_pattern/o )
8663                   )
8664                 {
8665                     if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8666                         unstore_token_to_go();
8667                     }
8668                     last;
8669                 }
8670             }
8671
8672             # If we are continuing after seeing a right curly brace, flush
8673             # buffer unless we see what we are looking for, as in
8674             #   } else ...
8675             if ( $rbrace_follower && $type ne 'b' ) {
8676
8677                 unless ( $rbrace_follower->{$token} ) {
8678                     output_line_to_go();
8679                 }
8680                 $rbrace_follower = undef;
8681             }
8682
8683             $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8684             $next_nonblank_token      = $$rtokens[$j_next];
8685             $next_nonblank_token_type = $$rtoken_type[$j_next];
8686
8687             #--------------------------------------------------------
8688             # Start of section to patch token text
8689             #--------------------------------------------------------
8690
8691             # Modify certain tokens here for whitespace
8692             # The following is not yet done, but could be:
8693             #   sub (x x x)
8694             if ( $type =~ /^[wit]$/ ) {
8695
8696                 # Examples:
8697                 # change '$  var'  to '$var' etc
8698                 #        '-> new'  to '->new'
8699                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8700                     $token =~ s/\s*//g;
8701                 }
8702
8703                 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8704             }
8705
8706             # change 'LABEL   :'   to 'LABEL:'
8707             elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8708
8709             # patch to add space to something like "x10"
8710             # This avoids having to split this token in the pre-tokenizer
8711             elsif ( $type eq 'n' ) {
8712                 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8713             }
8714
8715             elsif ( $type eq 'Q' ) {
8716                 note_embedded_tab() if ( $token =~ "\t" );
8717
8718                 # make note of something like '$var = s/xxx/yyy/;'
8719                 # in case it should have been '$var =~ s/xxx/yyy/;'
8720                 if (
8721                        $token =~ /^(s|tr|y|m|\/)/
8722                     && $last_nonblank_token =~ /^(=|==|!=)$/
8723
8724                     # precededed by simple scalar
8725                     && $last_last_nonblank_type eq 'i'
8726                     && $last_last_nonblank_token =~ /^\$/
8727
8728                     # followed by some kind of termination
8729                     # (but give complaint if we can's see far enough ahead)
8730                     && $next_nonblank_token =~ /^[; \)\}]$/
8731
8732                     # scalar is not decleared
8733                     && !(
8734                            $types_to_go[0] eq 'k'
8735                         && $tokens_to_go[0] =~ /^(my|our|local)$/
8736                     )
8737                   )
8738                 {
8739                     my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8740                     complain(
8741 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8742                     );
8743                 }
8744             }
8745
8746            # trim blanks from right of qw quotes
8747            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8748             elsif ( $type eq 'q' ) {
8749                 $token =~ s/\s*$//;
8750                 note_embedded_tab() if ( $token =~ "\t" );
8751             }
8752
8753             #--------------------------------------------------------
8754             # End of section to patch token text
8755             #--------------------------------------------------------
8756
8757             # insert any needed whitespace
8758             if (   ( $type ne 'b' )
8759                 && ( $max_index_to_go >= 0 )
8760                 && ( $types_to_go[$max_index_to_go] ne 'b' )
8761                 && $rOpts_add_whitespace )
8762             {
8763                 my $ws = $$rwhite_space_flag[$j];
8764
8765                 if ( $ws == 1 ) {
8766                     insert_new_token_to_go( ' ', 'b', $slevel,
8767                         $no_internal_newlines );
8768                 }
8769             }
8770
8771             # Do not allow breaks which would promote a side comment to a
8772             # block comment.  In order to allow a break before an opening
8773             # or closing BLOCK, followed by a side comment, those sections
8774             # of code will handle this flag separately.
8775             my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8776             my $is_opening_BLOCK =
8777               (      $type eq '{'
8778                   && $token eq '{'
8779                   && $block_type
8780                   && $block_type ne 't' );
8781             my $is_closing_BLOCK =
8782               (      $type eq '}'
8783                   && $token eq '}'
8784                   && $block_type
8785                   && $block_type ne 't' );
8786
8787             if (   $side_comment_follows
8788                 && !$is_opening_BLOCK
8789                 && !$is_closing_BLOCK )
8790             {
8791                 $no_internal_newlines = 1;
8792             }
8793
8794             # We're only going to handle breaking for code BLOCKS at this
8795             # (top) level.  Other indentation breaks will be handled by
8796             # sub scan_list, which is better suited to dealing with them.
8797             if ($is_opening_BLOCK) {
8798
8799                 # Tentatively output this token.  This is required before
8800                 # calling starting_one_line_block.  We may have to unstore
8801                 # it, though, if we have to break before it.
8802                 store_token_to_go($side_comment_follows);
8803
8804                 # Look ahead to see if we might form a one-line block
8805                 my $too_long =
8806                   starting_one_line_block( $j, $jmax, $level, $slevel,
8807                     $ci_level, $rtokens, $rtoken_type, $rblock_type );
8808                 clear_breakpoint_undo_stack();
8809
8810                 # to simplify the logic below, set a flag to indicate if
8811                 # this opening brace is far from the keyword which introduces it
8812                 my $keyword_on_same_line = 1;
8813                 if (   ( $max_index_to_go >= 0 )
8814                     && ( $last_nonblank_type eq ')' ) )
8815                 {
8816                     if (   $block_type =~ /^(if|else|elsif)$/
8817                         && ( $tokens_to_go[0] eq '}' )
8818                         && $rOpts_cuddled_else )
8819                     {
8820                         $keyword_on_same_line = 1;
8821                     }
8822                     elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8823                     {
8824                         $keyword_on_same_line = 0;
8825                     }
8826                 }
8827
8828                 # decide if user requested break before '{'
8829                 my $want_break =
8830
8831                   # use -bl flag if not a sub block of any type
8832                   $block_type !~ /^sub/
8833                   ? $rOpts->{'opening-brace-on-new-line'}
8834
8835                   # use -sbl flag for a named sub block
8836                   : $block_type !~ /^sub\W*$/
8837                   ? $rOpts->{'opening-sub-brace-on-new-line'}
8838
8839                   # use -asbl flag for an anonymous sub block
8840                   : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
8841
8842                 # Break before an opening '{' ...
8843                 if (
8844
8845                     # if requested
8846                     $want_break
8847
8848                     # and we were unable to start looking for a block,
8849                     && $index_start_one_line_block == UNDEFINED_INDEX
8850
8851                     # or if it will not be on same line as its keyword, so that
8852                     # it will be outdented (eval.t, overload.t), and the user
8853                     # has not insisted on keeping it on the right
8854                     || (   !$keyword_on_same_line
8855                         && !$rOpts->{'opening-brace-always-on-right'} )
8856
8857                   )
8858                 {
8859
8860                     # but only if allowed
8861                     unless ($no_internal_newlines) {
8862
8863                         # since we already stored this token, we must unstore it
8864                         unstore_token_to_go();
8865
8866                         # then output the line
8867                         output_line_to_go();
8868
8869                         # and now store this token at the start of a new line
8870                         store_token_to_go($side_comment_follows);
8871                     }
8872                 }
8873
8874                 # Now update for side comment
8875                 if ($side_comment_follows) { $no_internal_newlines = 1 }
8876
8877                 # now output this line
8878                 unless ($no_internal_newlines) {
8879                     output_line_to_go();
8880                 }
8881             }
8882
8883             elsif ($is_closing_BLOCK) {
8884
8885                 # If there is a pending one-line block ..
8886                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8887
8888                     # we have to terminate it if..
8889                     if (
8890
8891                     # it is too long (final length may be different from
8892                     # initial estimate). note: must allow 1 space for this token
8893                         excess_line_length( $index_start_one_line_block,
8894                             $max_index_to_go ) >= 0
8895
8896                         # or if it has too many semicolons
8897                         || (   $semicolons_before_block_self_destruct == 0
8898                             && $last_nonblank_type ne ';' )
8899                       )
8900                     {
8901                         destroy_one_line_block();
8902                     }
8903                 }
8904
8905                 # put a break before this closing curly brace if appropriate
8906                 unless ( $no_internal_newlines
8907                     || $index_start_one_line_block != UNDEFINED_INDEX )
8908                 {
8909
8910                     # add missing semicolon if ...
8911                     # there are some tokens
8912                     if (
8913                         ( $max_index_to_go > 0 )
8914
8915                         # and we don't have one
8916                         && ( $last_nonblank_type ne ';' )
8917
8918                         # patch until some block type issues are fixed:
8919                         # Do not add semi-colon for block types '{',
8920                         # '}', and ';' because we cannot be sure yet
8921                         # that this is a block and not an anonomyous
8922                         # hash (blktype.t, blktype1.t)
8923                         && ( $block_type !~ /^[\{\};]$/ )
8924
8925                         # patch: and do not add semi-colons for recently
8926                         # added block types (see tmp/semicolon.t)
8927                         && ( $block_type !~ /^(switch|case|given|when|default)$/)
8928                         # it seems best not to add semicolons in these
8929                         # special block types: sort|map|grep
8930                         && ( !$is_sort_map_grep{$block_type} )
8931
8932                         # and we are allowed to do so.
8933                         && $rOpts->{'add-semicolons'}
8934                       )
8935                     {
8936
8937                         save_current_token();
8938                         $token  = ';';
8939                         $type   = ';';
8940                         $level  = $levels_to_go[$max_index_to_go];
8941                         $slevel = $nesting_depth_to_go[$max_index_to_go];
8942                         $nesting_blocks =
8943                           $nesting_blocks_to_go[$max_index_to_go];
8944                         $ci_level       = $ci_levels_to_go[$max_index_to_go];
8945                         $block_type     = "";
8946                         $container_type = "";
8947                         $container_environment = "";
8948                         $type_sequence         = "";
8949
8950                         # Note - we remove any blank AFTER extracting its
8951                         # parameters such as level, etc, above
8952                         if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8953                             unstore_token_to_go();
8954                         }
8955                         store_token_to_go();
8956
8957                         note_added_semicolon();
8958                         restore_current_token();
8959                     }
8960
8961                     # then write out everything before this closing curly brace
8962                     output_line_to_go();
8963
8964                 }
8965
8966                 # Now update for side comment
8967                 if ($side_comment_follows) { $no_internal_newlines = 1 }
8968
8969                 # store the closing curly brace
8970                 store_token_to_go();
8971
8972                 # ok, we just stored a closing curly brace.  Often, but
8973                 # not always, we want to end the line immediately.
8974                 # So now we have to check for special cases.
8975
8976                 # if this '}' successfully ends a one-line block..
8977                 my $is_one_line_block = 0;
8978                 my $keep_going        = 0;
8979                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8980
8981                     # Remember the type of token just before the
8982                     # opening brace.  It would be more general to use
8983                     # a stack, but this will work for one-line blocks.
8984                     $is_one_line_block =
8985                       $types_to_go[$index_start_one_line_block];
8986
8987                     # we have to actually make it by removing tentative
8988                     # breaks that were set within it
8989                     undo_forced_breakpoint_stack(0);
8990                     set_nobreaks( $index_start_one_line_block,
8991                         $max_index_to_go - 1 );
8992
8993                     # then re-initialize for the next one-line block
8994                     destroy_one_line_block();
8995
8996                     # then decide if we want to break after the '}' ..
8997                     # We will keep going to allow certain brace followers as in:
8998                     #   do { $ifclosed = 1; last } unless $losing;
8999                     #
9000                     # But make a line break if the curly ends a
9001                     # significant block:
9002                     if (
9003                         $is_block_without_semicolon{$block_type}
9004
9005                         # if needless semicolon follows we handle it later
9006                         && $next_nonblank_token ne ';'
9007                       )
9008                     {
9009                         output_line_to_go() unless ($no_internal_newlines);
9010                     }
9011                 }
9012
9013                 # set string indicating what we need to look for brace follower
9014                 # tokens
9015                 if ( $block_type eq 'do' ) {
9016                     $rbrace_follower = \%is_do_follower;
9017                 }
9018                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9019                     $rbrace_follower = \%is_if_brace_follower;
9020                 }
9021                 elsif ( $block_type eq 'else' ) {
9022                     $rbrace_follower = \%is_else_brace_follower;
9023                 }
9024
9025                 # added eval for borris.t
9026                 elsif ($is_sort_map_grep_eval{$block_type}
9027                     || $is_one_line_block eq 'G' )
9028                 {
9029                     $rbrace_follower = undef;
9030                     $keep_going      = 1;
9031                 }
9032
9033                 # anonymous sub
9034                 elsif ( $block_type =~ /^sub\W*$/ ) {
9035
9036                     if ($is_one_line_block) {
9037                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
9038                     }
9039                     else {
9040                         $rbrace_follower = \%is_anon_sub_brace_follower;
9041                     }
9042                 }
9043
9044                 # None of the above: specify what can follow a closing
9045                 # brace of a block which is not an
9046                 # if/elsif/else/do/sort/map/grep/eval
9047                 # Testfiles:
9048                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9049                 else {
9050                     $rbrace_follower = \%is_other_brace_follower;
9051                 }
9052
9053                 # See if an elsif block is followed by another elsif or else;
9054                 # complain if not.
9055                 if ( $block_type eq 'elsif' ) {
9056
9057                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
9058                         $looking_for_else = 1;    # ok, check on next line
9059                     }
9060                     else {
9061
9062                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9063                             write_logfile_entry("No else block :(\n");
9064                         }
9065                     }
9066                 }
9067
9068                 # keep going after certain block types (map,sort,grep,eval)
9069                 # added eval for borris.t
9070                 if ($keep_going) {
9071
9072                     # keep going
9073                 }
9074
9075                 # if no more tokens, postpone decision until re-entring
9076                 elsif ( ( $next_nonblank_token_type eq 'b' )
9077                     && $rOpts_add_newlines )
9078                 {
9079                     unless ($rbrace_follower) {
9080                         output_line_to_go() unless ($no_internal_newlines);
9081                     }
9082                 }
9083
9084                 elsif ($rbrace_follower) {
9085
9086                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
9087                         output_line_to_go() unless ($no_internal_newlines);
9088                     }
9089                     $rbrace_follower = undef;
9090                 }
9091
9092                 else {
9093                     output_line_to_go() unless ($no_internal_newlines);
9094                 }
9095
9096             }    # end treatment of closing block token
9097
9098             # handle semicolon
9099             elsif ( $type eq ';' ) {
9100
9101                 # kill one-line blocks with too many semicolons
9102                 $semicolons_before_block_self_destruct--;
9103                 if (
9104                     ( $semicolons_before_block_self_destruct < 0 )
9105                     || (   $semicolons_before_block_self_destruct == 0
9106                         && $next_nonblank_token_type !~ /^[b\}]$/ )
9107                   )
9108                 {
9109                     destroy_one_line_block();
9110                 }
9111
9112                 # Remove unnecessary semicolons, but not after bare
9113                 # blocks, where it could be unsafe if the brace is
9114                 # mistokenized.
9115                 if (
9116                     (
9117                         $last_nonblank_token eq '}'
9118                         && (
9119                             $is_block_without_semicolon{
9120                                 $last_nonblank_block_type}
9121                             || $last_nonblank_block_type =~ /^sub\s+\w/
9122                             || $last_nonblank_block_type =~ /^\w+:$/ )
9123                     )
9124                     || $last_nonblank_type eq ';'
9125                   )
9126                 {
9127
9128                     if (
9129                         $rOpts->{'delete-semicolons'}
9130
9131                         # don't delete ; before a # because it would promote it
9132                         # to a block comment
9133                         && ( $next_nonblank_token_type ne '#' )
9134                       )
9135                     {
9136                         note_deleted_semicolon();
9137                         output_line_to_go()
9138                           unless ( $no_internal_newlines
9139                             || $index_start_one_line_block != UNDEFINED_INDEX );
9140                         next;
9141                     }
9142                     else {
9143                         write_logfile_entry("Extra ';'\n");
9144                     }
9145                 }
9146                 store_token_to_go();
9147
9148                 output_line_to_go()
9149                   unless ( $no_internal_newlines
9150                     || ( $rOpts_keep_interior_semicolons && $j < $jmax )
9151                     || ( $next_nonblank_token eq '}' ) );
9152
9153             }
9154
9155             # handle here_doc target string
9156             elsif ( $type eq 'h' ) {
9157                 $no_internal_newlines =
9158                   1;    # no newlines after seeing here-target
9159                 destroy_one_line_block();
9160                 store_token_to_go();
9161             }
9162
9163             # handle all other token types
9164             else {
9165
9166                 # if this is a blank...
9167                 if ( $type eq 'b' ) {
9168
9169                     # make it just one character
9170                     $token = ' ' if $rOpts_add_whitespace;
9171
9172                     # delete it if unwanted by whitespace rules
9173                     # or we are deleting all whitespace
9174                     my $ws = $$rwhite_space_flag[ $j + 1 ];
9175                     if ( ( defined($ws) && $ws == -1 )
9176                         || $rOpts_delete_old_whitespace )
9177                     {
9178
9179                         # unless it might make a syntax error
9180                         next
9181                           unless is_essential_whitespace(
9182                             $last_last_nonblank_token,
9183                             $last_last_nonblank_type,
9184                             $tokens_to_go[$max_index_to_go],
9185                             $types_to_go[$max_index_to_go],
9186                             $$rtokens[ $j + 1 ],
9187                             $$rtoken_type[ $j + 1 ]
9188                           );
9189                     }
9190                 }
9191                 store_token_to_go();
9192             }
9193
9194             # remember two previous nonblank OUTPUT tokens
9195             if ( $type ne '#' && $type ne 'b' ) {
9196                 $last_last_nonblank_token = $last_nonblank_token;
9197                 $last_last_nonblank_type  = $last_nonblank_type;
9198                 $last_nonblank_token      = $token;
9199                 $last_nonblank_type       = $type;
9200                 $last_nonblank_block_type = $block_type;
9201             }
9202
9203             # unset the continued-quote flag since it only applies to the
9204             # first token, and we want to resume normal formatting if
9205             # there are additional tokens on the line
9206             $in_continued_quote = 0;
9207
9208         }    # end of loop over all tokens in this 'line_of_tokens'
9209
9210         # we have to flush ..
9211         if (
9212
9213             # if there is a side comment
9214             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9215
9216             # if this line ends in a quote
9217             # NOTE: This is critically important for insuring that quoted lines
9218             # do not get processed by things like -sot and -sct
9219             || $in_quote
9220
9221             # if this is a VERSION statement
9222             || $is_VERSION_statement
9223
9224             # to keep a label on one line if that is how it is now
9225             || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9226
9227             # if we are instructed to keep all old line breaks
9228             || !$rOpts->{'delete-old-newlines'}
9229           )
9230         {
9231             destroy_one_line_block();
9232             output_line_to_go();
9233         }
9234
9235         # mark old line breakpoints in current output stream
9236         if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9237             $old_breakpoint_to_go[$max_index_to_go] = 1;
9238         }
9239     }    # end sub print_line_of_tokens
9240 }    # end print_line_of_tokens
9241
9242 # sub output_line_to_go sends one logical line of tokens on down the
9243 # pipeline to the VerticalAligner package, breaking the line into continuation
9244 # lines as necessary.  The line of tokens is ready to go in the "to_go"
9245 # arrays.
9246 sub output_line_to_go {
9247
9248     # debug stuff; this routine can be called from many points
9249     FORMATTER_DEBUG_FLAG_OUTPUT && do {
9250         my ( $a, $b, $c ) = caller;
9251         write_diagnostics(
9252 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9253         );
9254         my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9255         write_diagnostics("$output_str\n");
9256     };
9257
9258     # just set a tentative breakpoint if we might be in a one-line block
9259     if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9260         set_forced_breakpoint($max_index_to_go);
9261         return;
9262     }
9263
9264     my $cscw_block_comment;
9265     $cscw_block_comment = add_closing_side_comment()
9266       if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9267
9268     match_opening_and_closing_tokens();
9269
9270     # tell the -lp option we are outputting a batch so it can close
9271     # any unfinished items in its stack
9272     finish_lp_batch();
9273
9274     # If this line ends in a code block brace, set breaks at any
9275     # previous closing code block braces to breakup a chain of code
9276     # blocks on one line.  This is very rare but can happen for
9277     # user-defined subs.  For example we might be looking at this:
9278     #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9279     my $saw_good_break = 0;    # flag to force breaks even if short line
9280     if (
9281
9282         # looking for opening or closing block brace
9283         $block_type_to_go[$max_index_to_go]
9284
9285         # but not one of these which are never duplicated on a line:
9286         # until|while|for|if|elsif|else
9287         && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9288       )
9289     {
9290         my $lev = $nesting_depth_to_go[$max_index_to_go];
9291
9292         # Walk backwards from the end and
9293         # set break at any closing block braces at the same level.
9294         # But quit if we are not in a chain of blocks.
9295         for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
9296             last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
9297             next if ( $levels_to_go[$i] > $lev );    # skip past higher level
9298
9299             if ( $block_type_to_go[$i] ) {
9300                 if ( $tokens_to_go[$i] eq '}' ) {
9301                     set_forced_breakpoint($i);
9302                     $saw_good_break = 1;
9303                 }
9304             }
9305
9306             # quit if we see anything besides words, function, blanks
9307             # at this level
9308             elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
9309         }
9310     }
9311
9312     my $imin = 0;
9313     my $imax = $max_index_to_go;
9314
9315     # trim any blank tokens
9316     if ( $max_index_to_go >= 0 ) {
9317         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9318         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9319     }
9320
9321     # anything left to write?
9322     if ( $imin <= $imax ) {
9323
9324         # add a blank line before certain key types
9325         if ( $last_line_leading_type !~ /^[#b]/ ) {
9326             my $want_blank    = 0;
9327             my $leading_token = $tokens_to_go[$imin];
9328             my $leading_type  = $types_to_go[$imin];
9329
9330             # blank lines before subs except declarations and one-liners
9331             # MCONVERSION LOCATION - for sub tokenization change
9332             if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9333                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9334                   && (
9335                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9336                         $imax ) !~ /^[\;\}]$/
9337                   );
9338             }
9339
9340             # break before all package declarations
9341             # MCONVERSION LOCATION - for tokenizaton change
9342             elsif ($leading_token =~ /^(package\s)/
9343                 && $leading_type eq 'i' )
9344             {
9345                 $want_blank = ( $rOpts->{'blanks-before-subs'} );
9346             }
9347
9348             # break before certain key blocks except one-liners
9349             if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9350                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
9351                   && (
9352                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9353                         $imax ) ne '}'
9354                   );
9355             }
9356
9357             # Break before certain block types if we haven't had a
9358             # break at this level for a while.  This is the
9359             # difficult decision..
9360             elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9361                 && $leading_type eq 'k' )
9362             {
9363                 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9364                 if ( !defined($lc) ) { $lc = 0 }
9365
9366                 $want_blank =
9367                      $rOpts->{'blanks-before-blocks'}
9368                   && $lc >= $rOpts->{'long-block-line-count'}
9369                   && $file_writer_object->get_consecutive_nonblank_lines() >=
9370                   $rOpts->{'long-block-line-count'}
9371                   && (
9372                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9373                         $imax ) ne '}'
9374                   );
9375             }
9376
9377             if ($want_blank) {
9378
9379                 # future: send blank line down normal path to VerticalAligner
9380                 Perl::Tidy::VerticalAligner::flush();
9381                 $file_writer_object->write_blank_code_line();
9382             }
9383         }
9384
9385         # update blank line variables and count number of consecutive
9386         # non-blank, non-comment lines at this level
9387         $last_last_line_leading_level = $last_line_leading_level;
9388         $last_line_leading_level      = $levels_to_go[$imin];
9389         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9390         $last_line_leading_type = $types_to_go[$imin];
9391         if (   $last_line_leading_level == $last_last_line_leading_level
9392             && $last_line_leading_type ne 'b'
9393             && $last_line_leading_type ne '#'
9394             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9395         {
9396             $nonblank_lines_at_depth[$last_line_leading_level]++;
9397         }
9398         else {
9399             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
9400         }
9401
9402         FORMATTER_DEBUG_FLAG_FLUSH && do {
9403             my ( $package, $file, $line ) = caller;
9404             print
9405 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9406         };
9407
9408         # add a couple of extra terminal blank tokens
9409         pad_array_to_go();
9410
9411         # set all forced breakpoints for good list formatting
9412         my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9413
9414         if (
9415             $max_index_to_go > 0
9416             && (
9417                    $is_long_line
9418                 || $old_line_count_in_batch > 1
9419                 || is_unbalanced_batch()
9420                 || (
9421                     $comma_count_in_batch
9422                     && (   $rOpts_maximum_fields_per_table > 0
9423                         || $rOpts_comma_arrow_breakpoints == 0 )
9424                 )
9425             )
9426           )
9427         {
9428             $saw_good_break ||= scan_list();
9429         }
9430
9431         # let $ri_first and $ri_last be references to lists of
9432         # first and last tokens of line fragments to output..
9433         my ( $ri_first, $ri_last );
9434
9435         # write a single line if..
9436         if (
9437
9438             # we aren't allowed to add any newlines
9439             !$rOpts_add_newlines
9440
9441             # or, we don't already have an interior breakpoint
9442             # and we didn't see a good breakpoint
9443             || (
9444                    !$forced_breakpoint_count
9445                 && !$saw_good_break
9446
9447                 # and this line is 'short'
9448                 && !$is_long_line
9449             )
9450           )
9451         {
9452             @$ri_first = ($imin);
9453             @$ri_last  = ($imax);
9454         }
9455
9456         # otherwise use multiple lines
9457         else {
9458
9459             ( $ri_first, $ri_last, my $colon_count ) =
9460               set_continuation_breaks($saw_good_break);
9461
9462             break_all_chain_tokens( $ri_first, $ri_last );
9463
9464             break_equals( $ri_first, $ri_last );
9465
9466             # now we do a correction step to clean this up a bit
9467             # (The only time we would not do this is for debugging)
9468             if ( $rOpts->{'recombine'} ) {
9469                 ( $ri_first, $ri_last ) =
9470                   recombine_breakpoints( $ri_first, $ri_last );
9471             }
9472
9473             insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
9474         }
9475
9476         # do corrector step if -lp option is used
9477         my $do_not_pad = 0;
9478         if ($rOpts_line_up_parentheses) {
9479             $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9480         }
9481         send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9482     }
9483     prepare_for_new_input_lines();
9484
9485     # output any new -cscw block comment
9486     if ($cscw_block_comment) {
9487         flush();
9488         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9489     }
9490 }
9491
9492 sub note_added_semicolon {
9493     $last_added_semicolon_at = $input_line_number;
9494     if ( $added_semicolon_count == 0 ) {
9495         $first_added_semicolon_at = $last_added_semicolon_at;
9496     }
9497     $added_semicolon_count++;
9498     write_logfile_entry("Added ';' here\n");
9499 }
9500
9501 sub note_deleted_semicolon {
9502     $last_deleted_semicolon_at = $input_line_number;
9503     if ( $deleted_semicolon_count == 0 ) {
9504         $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9505     }
9506     $deleted_semicolon_count++;
9507     write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
9508 }
9509
9510 sub note_embedded_tab {
9511     $embedded_tab_count++;
9512     $last_embedded_tab_at = $input_line_number;
9513     if ( !$first_embedded_tab_at ) {
9514         $first_embedded_tab_at = $last_embedded_tab_at;
9515     }
9516
9517     if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9518         write_logfile_entry("Embedded tabs in quote or pattern\n");
9519     }
9520 }
9521
9522 sub starting_one_line_block {
9523
9524     # after seeing an opening curly brace, look for the closing brace
9525     # and see if the entire block will fit on a line.  This routine is
9526     # not always right because it uses the old whitespace, so a check
9527     # is made later (at the closing brace) to make sure we really
9528     # have a one-line block.  We have to do this preliminary check,
9529     # though, because otherwise we would always break at a semicolon
9530     # within a one-line block if the block contains multiple statements.
9531
9532     my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9533         $rblock_type )
9534       = @_;
9535
9536     # kill any current block - we can only go 1 deep
9537     destroy_one_line_block();
9538
9539     # return value:
9540     #  1=distance from start of block to opening brace exceeds line length
9541     #  0=otherwise
9542
9543     my $i_start = 0;
9544
9545     # shouldn't happen: there must have been a prior call to
9546     # store_token_to_go to put the opening brace in the output stream
9547     if ( $max_index_to_go < 0 ) {
9548         warning("program bug: store_token_to_go called incorrectly\n");
9549         report_definite_bug();
9550     }
9551     else {
9552
9553         # cannot use one-line blocks with cuddled else else/elsif lines
9554         if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9555             return 0;
9556         }
9557     }
9558
9559     my $block_type = $$rblock_type[$j];
9560
9561     # find the starting keyword for this block (such as 'if', 'else', ...)
9562
9563     if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9564         $i_start = $max_index_to_go;
9565     }
9566
9567     elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9568
9569         # For something like "if (xxx) {", the keyword "if" will be
9570         # just after the most recent break. This will be 0 unless
9571         # we have just killed a one-line block and are starting another.
9572         # (doif.t)
9573         $i_start = $index_max_forced_break + 1;
9574         if ( $types_to_go[$i_start] eq 'b' ) {
9575             $i_start++;
9576         }
9577
9578         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9579             return 0;
9580         }
9581     }
9582
9583     # the previous nonblank token should start these block types
9584     elsif (
9585         ( $last_last_nonblank_token_to_go eq $block_type )
9586         || (   $block_type =~ /^sub/
9587             && $last_last_nonblank_token_to_go =~ /^sub/ )
9588       )
9589     {
9590         $i_start = $last_last_nonblank_index_to_go;
9591     }
9592
9593     # patch for SWITCH/CASE to retain one-line case/when blocks
9594     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9595         $i_start = $index_max_forced_break + 1;
9596         if ( $types_to_go[$i_start] eq 'b' ) {
9597             $i_start++;
9598         }
9599         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9600             return 0;
9601         }
9602     }
9603
9604     else {
9605         return 1;
9606     }
9607
9608     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9609
9610     my $i;
9611
9612     # see if length is too long to even start
9613     if ( $pos > $rOpts_maximum_line_length ) {
9614         return 1;
9615     }
9616
9617     for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9618
9619         # old whitespace could be arbitrarily large, so don't use it
9620         if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9621         else                              { $pos += length( $$rtokens[$i] ) }
9622
9623         # Return false result if we exceed the maximum line length,
9624         if ( $pos > $rOpts_maximum_line_length ) {
9625             return 0;
9626         }
9627
9628         # or encounter another opening brace before finding the closing brace.
9629         elsif ($$rtokens[$i] eq '{'
9630             && $$rtoken_type[$i] eq '{'
9631             && $$rblock_type[$i] )
9632         {
9633             return 0;
9634         }
9635
9636         # if we find our closing brace..
9637         elsif ($$rtokens[$i] eq '}'
9638             && $$rtoken_type[$i] eq '}'
9639             && $$rblock_type[$i] )
9640         {
9641
9642             # be sure any trailing comment also fits on the line
9643             my $i_nonblank =
9644               ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9645
9646             if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9647                 $pos += length( $$rtokens[$i_nonblank] );
9648
9649                 if ( $i_nonblank > $i + 1 ) {
9650                     $pos += length( $$rtokens[ $i + 1 ] );
9651                 }
9652
9653                 if ( $pos > $rOpts_maximum_line_length ) {
9654                     return 0;
9655                 }
9656             }
9657
9658             # ok, it's a one-line block
9659             create_one_line_block( $i_start, 20 );
9660             return 0;
9661         }
9662
9663         # just keep going for other characters
9664         else {
9665         }
9666     }
9667
9668     # Allow certain types of new one-line blocks to form by joining
9669     # input lines.  These can be safely done, but for other block types,
9670     # we keep old one-line blocks but do not form new ones. It is not
9671     # always a good idea to make as many one-line blocks as possible,
9672     # so other types are not done.  The user can always use -mangle.
9673     if ( $is_sort_map_grep_eval{$block_type} ) {
9674         create_one_line_block( $i_start, 1 );
9675     }
9676
9677     return 0;
9678 }
9679
9680 sub unstore_token_to_go {
9681
9682     # remove most recent token from output stream
9683     if ( $max_index_to_go > 0 ) {
9684         $max_index_to_go--;
9685     }
9686     else {
9687         $max_index_to_go = UNDEFINED_INDEX;
9688     }
9689
9690 }
9691
9692 sub want_blank_line {
9693     flush();
9694     $file_writer_object->want_blank_line();
9695 }
9696
9697 sub write_unindented_line {
9698     flush();
9699     $file_writer_object->write_line( $_[0] );
9700 }
9701
9702 sub undo_ci {
9703
9704     # Undo continuation indentation in certain sequences
9705     # For example, we can undo continuation indation in sort/map/grep chains
9706     #    my $dat1 = pack( "n*",
9707     #        map { $_, $lookup->{$_} }
9708     #          sort { $a <=> $b }
9709     #          grep { $lookup->{$_} ne $default } keys %$lookup );
9710     # To align the map/sort/grep keywords like this:
9711     #    my $dat1 = pack( "n*",
9712     #        map { $_, $lookup->{$_} }
9713     #        sort { $a <=> $b }
9714     #        grep { $lookup->{$_} ne $default } keys %$lookup );
9715     my ( $ri_first, $ri_last ) = @_;
9716     my ( $line_1, $line_2, $lev_last );
9717     my $this_line_is_semicolon_terminated;
9718     my $max_line = @$ri_first - 1;
9719
9720     # looking at each line of this batch..
9721     # We are looking at leading tokens and looking for a sequence
9722     # all at the same level and higher level than enclosing lines.
9723     foreach my $line ( 0 .. $max_line ) {
9724
9725         my $ibeg = $$ri_first[$line];
9726         my $lev  = $levels_to_go[$ibeg];
9727         if ( $line > 0 ) {
9728
9729             # if we have started a chain..
9730             if ($line_1) {
9731
9732                 # see if it continues..
9733                 if ( $lev == $lev_last ) {
9734                     if (   $types_to_go[$ibeg] eq 'k'
9735                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
9736                     {
9737
9738                         # chain continues...
9739                         # check for chain ending at end of a a statement
9740                         if ( $line == $max_line ) {
9741
9742                             # see of this line ends a statement
9743                             my $iend = $$ri_last[$line];
9744                             $this_line_is_semicolon_terminated =
9745                               $types_to_go[$iend] eq ';'
9746
9747                               # with possible side comment
9748                               || ( $types_to_go[$iend] eq '#'
9749                                 && $iend - $ibeg >= 2
9750                                 && $types_to_go[ $iend - 2 ] eq ';'
9751                                 && $types_to_go[ $iend - 1 ] eq 'b' );
9752                         }
9753                         $line_2 = $line if ($this_line_is_semicolon_terminated);
9754                     }
9755                     else {
9756
9757                         # kill chain
9758                         $line_1 = undef;
9759                     }
9760                 }
9761                 elsif ( $lev < $lev_last ) {
9762
9763                     # chain ends with previous line
9764                     $line_2 = $line - 1;
9765                 }
9766                 elsif ( $lev > $lev_last ) {
9767
9768                     # kill chain
9769                     $line_1 = undef;
9770                 }
9771
9772                 # undo the continuation indentation if a chain ends
9773                 if ( defined($line_2) && defined($line_1) ) {
9774                     my $continuation_line_count = $line_2 - $line_1 + 1;
9775                     @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
9776                       (0) x ($continuation_line_count);
9777                     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
9778                       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
9779                     $line_1 = undef;
9780                 }
9781             }
9782
9783             # not in a chain yet..
9784             else {
9785
9786                 # look for start of a new sort/map/grep chain
9787                 if ( $lev > $lev_last ) {
9788                     if (   $types_to_go[$ibeg] eq 'k'
9789                         && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
9790                     {
9791                         $line_1 = $line;
9792                     }
9793                 }
9794             }
9795         }
9796         $lev_last = $lev;
9797     }
9798 }
9799
9800 sub undo_lp_ci {
9801
9802     # If there is a single, long parameter within parens, like this:
9803     #
9804     #  $self->command( "/msg "
9805     #        . $infoline->chan
9806     #        . " You said $1, but did you know that it's square was "
9807     #        . $1 * $1 . " ?" );
9808     #
9809     # we can remove the continuation indentation of the 2nd and higher lines
9810     # to achieve this effect, which is more pleasing:
9811     #
9812     #  $self->command("/msg "
9813     #                 . $infoline->chan
9814     #                 . " You said $1, but did you know that it's square was "
9815     #                 . $1 * $1 . " ?");
9816
9817     my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9818     my $max_line = @$ri_first - 1;
9819
9820     # must be multiple lines
9821     return unless $max_line > $line_open;
9822
9823     my $lev_start     = $levels_to_go[$i_start];
9824     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9825
9826     # see if all additional lines in this container have continuation
9827     # indentation
9828     my $n;
9829     my $line_1 = 1 + $line_open;
9830     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
9831         my $ibeg = $$ri_first[$n];
9832         my $iend = $$ri_last[$n];
9833         if ( $ibeg eq $closing_index ) { $n--; last }
9834         return if ( $lev_start != $levels_to_go[$ibeg] );
9835         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
9836         last   if ( $closing_index <= $iend );
9837     }
9838
9839     # we can reduce the indentation of all continuation lines
9840     my $continuation_line_count = $n - $line_open;
9841     @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9842       (0) x ($continuation_line_count);
9843     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9844       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
9845 }
9846
9847 sub set_logical_padding {
9848
9849     # Look at a batch of lines and see if extra padding can improve the
9850     # alignment when there are certain leading operators. Here is an
9851     # example, in which some extra space is introduced before
9852     # '( $year' to make it line up with the subsequent lines:
9853     #
9854     #       if (   ( $Year < 1601 )
9855     #           || ( $Year > 2899 )
9856     #           || ( $EndYear < 1601 )
9857     #           || ( $EndYear > 2899 ) )
9858     #       {
9859     #           &Error_OutOfRange;
9860     #       }
9861     #
9862     my ( $ri_first, $ri_last ) = @_;
9863     my $max_line = @$ri_first - 1;
9864
9865     my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
9866         $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
9867
9868     # looking at each line of this batch..
9869     foreach $line ( 0 .. $max_line - 1 ) {
9870
9871         # see if the next line begins with a logical operator
9872         $ibeg      = $$ri_first[$line];
9873         $iend      = $$ri_last[$line];
9874         $ibeg_next = $$ri_first[ $line + 1 ];
9875         $tok_next  = $tokens_to_go[$ibeg_next];
9876         $type_next = $types_to_go[$ibeg_next];
9877
9878         $has_leading_op_next = ( $tok_next =~ /^\w/ )
9879           ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
9880           : $is_chain_operator{$type_next};    # and, or
9881
9882         next unless ($has_leading_op_next);
9883
9884         # next line must not be at lesser depth
9885         next
9886           if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
9887
9888         # identify the token in this line to be padded on the left
9889         $ipad = undef;
9890
9891         # handle lines at same depth...
9892         if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
9893
9894             # if this is not first line of the batch ...
9895             if ( $line > 0 ) {
9896
9897                 # and we have leading operator..
9898                 next if $has_leading_op;
9899
9900                 # Introduce padding if..
9901                 # 1. the previous line is at lesser depth, or
9902                 # 2. the previous line ends in an assignment
9903                 # 3. the previous line ends in a 'return'
9904                 # 4. the previous line ends in a comma
9905                 # Example 1: previous line at lesser depth
9906                 #       if (   ( $Year < 1601 )      # <- we are here but
9907                 #           || ( $Year > 2899 )      #  list has not yet
9908                 #           || ( $EndYear < 1601 )   # collapsed vertically
9909                 #           || ( $EndYear > 2899 ) )
9910                 #       {
9911                 #
9912                 # Example 2: previous line ending in assignment:
9913                 #    $leapyear =
9914                 #        $year % 4   ? 0     # <- We are here
9915                 #      : $year % 100 ? 1
9916                 #      : $year % 400 ? 0
9917                 #      : 1;
9918                 #
9919                 # Example 3: previous line ending in comma:
9920                 #    push @expr,
9921                 #        /test/   ? undef
9922                 #      : eval($_) ? 1
9923                 #      : eval($_) ? 1
9924                 #      :            0;
9925
9926                 # be sure levels agree (do not indent after an indented 'if')
9927                 next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
9928
9929                 # allow padding on first line after a comma but only if:
9930                 # (1) this is line 2 and
9931                 # (2) there are at more than three lines and
9932                 # (3) lines 3 and 4 have the same leading operator
9933                 # These rules try to prevent padding within a long
9934                 # comma-separated list.
9935                 my $ok_comma;
9936                 if (   $types_to_go[$iendm] eq ','
9937                     && $line == 1
9938                     && $max_line > 2 )
9939                 {
9940                     my $ibeg_next_next = $$ri_first[ $line + 2 ];
9941                     my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
9942                     $ok_comma = $tok_next_next eq $tok_next;
9943                 }
9944
9945                 next
9946                   unless (
9947                        $is_assignment{ $types_to_go[$iendm] }
9948                     || $ok_comma
9949                     || ( $nesting_depth_to_go[$ibegm] <
9950                         $nesting_depth_to_go[$ibeg] )
9951                     || (   $types_to_go[$iendm] eq 'k'
9952                         && $tokens_to_go[$iendm] eq 'return' )
9953                   );
9954
9955                 # we will add padding before the first token
9956                 $ipad = $ibeg;
9957             }
9958
9959             # for first line of the batch..
9960             else {
9961
9962                 # WARNING: Never indent if first line is starting in a
9963                 # continued quote, which would change the quote.
9964                 next if $starting_in_quote;
9965
9966                 # if this is text after closing '}'
9967                 # then look for an interior token to pad
9968                 if ( $types_to_go[$ibeg] eq '}' ) {
9969
9970                 }
9971
9972                 # otherwise, we might pad if it looks really good
9973                 else {
9974
9975                     # we might pad token $ibeg, so be sure that it
9976                     # is at the same depth as the next line.
9977                     next
9978                       if ( $nesting_depth_to_go[$ibeg] !=
9979                         $nesting_depth_to_go[$ibeg_next] );
9980
9981                     # We can pad on line 1 of a statement if at least 3
9982                     # lines will be aligned. Otherwise, it
9983                     # can look very confusing.
9984
9985                  # We have to be careful not to pad if there are too few
9986                  # lines.  The current rule is:
9987                  # (1) in general we require at least 3 consecutive lines
9988                  # with the same leading chain operator token,
9989                  # (2) but an exception is that we only require two lines
9990                  # with leading colons if there are no more lines.  For example,
9991                  # the first $i in the following snippet would get padding
9992                  # by the second rule:
9993                  #
9994                  #   $i == 1 ? ( "First", "Color" )
9995                  # : $i == 2 ? ( "Then",  "Rarity" )
9996                  # :           ( "Then",  "Name" );
9997
9998                     if ( $max_line > 1 ) {
9999                         my $leading_token = $tokens_to_go[$ibeg_next];
10000                         my $tokens_differ;
10001
10002                         # never indent line 1 of a '.' series because
10003                         # previous line is most likely at same level.
10004                         # TODO: we should also look at the leasing_spaces
10005                         # of the last output line and skip if it is same
10006                         # as this line.
10007                         next if ( $leading_token eq '.' );
10008
10009                         my $count = 1;
10010                         foreach my $l ( 2 .. 3 ) {
10011                             last if ( $line + $l > $max_line );
10012                             my $ibeg_next_next = $$ri_first[ $line + $l ];
10013                             if ( $tokens_to_go[$ibeg_next_next] ne
10014                                 $leading_token )
10015                             {
10016                                 $tokens_differ = 1;
10017                                 last;
10018                             }
10019                             $count++;
10020                         }
10021                         next if ($tokens_differ);
10022                         next if ( $count < 3 && $leading_token ne ':' );
10023                         $ipad = $ibeg;
10024                     }
10025                     else {
10026                         next;
10027                     }
10028                 }
10029             }
10030         }
10031
10032         # find interior token to pad if necessary
10033         if ( !defined($ipad) ) {
10034
10035             for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
10036
10037                 # find any unclosed container
10038                 next
10039                   unless ( $type_sequence_to_go[$i]
10040                     && $mate_index_to_go[$i] > $iend );
10041
10042                 # find next nonblank token to pad
10043                 $ipad = $i + 1;
10044                 if ( $types_to_go[$ipad] eq 'b' ) {
10045                     $ipad++;
10046                     last if ( $ipad > $iend );
10047                 }
10048             }
10049             last unless $ipad;
10050         }
10051
10052         # next line must not be at greater depth
10053         my $iend_next = $$ri_last[ $line + 1 ];
10054         next
10055           if ( $nesting_depth_to_go[ $iend_next + 1 ] >
10056             $nesting_depth_to_go[$ipad] );
10057
10058         # lines must be somewhat similar to be padded..
10059         my $inext_next = $ibeg_next + 1;
10060         if ( $types_to_go[$inext_next] eq 'b' ) {
10061             $inext_next++;
10062         }
10063         my $type      = $types_to_go[$ipad];
10064         my $type_next = $types_to_go[ $ipad + 1 ];
10065
10066         # see if there are multiple continuation lines
10067         my $logical_continuation_lines = 1;
10068         if ( $line + 2 <= $max_line ) {
10069             my $leading_token  = $tokens_to_go[$ibeg_next];
10070             my $ibeg_next_next = $$ri_first[ $line + 2 ];
10071             if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
10072                 && $nesting_depth_to_go[$ibeg_next] eq
10073                 $nesting_depth_to_go[$ibeg_next_next] )
10074             {
10075                 $logical_continuation_lines++;
10076             }
10077         }
10078
10079         # see if leading types match
10080         my $types_match = $types_to_go[$inext_next] eq $type;
10081         my $matches_without_bang;
10082
10083         # if first line has leading ! then compare the following token
10084         if ( !$types_match && $type eq '!' ) {
10085             $types_match = $matches_without_bang =
10086               $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
10087         }
10088
10089         if (
10090
10091             # either we have multiple continuation lines to follow
10092             # and we are not padding the first token
10093             ( $logical_continuation_lines > 1 && $ipad > 0 )
10094
10095             # or..
10096             || (
10097
10098                 # types must match
10099                 $types_match
10100
10101                 # and keywords must match if keyword
10102                 && !(
10103                        $type eq 'k'
10104                     && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
10105                 )
10106             )
10107           )
10108         {
10109
10110             #----------------------begin special checks--------------
10111             #
10112             # SPECIAL CHECK 1:
10113             # A check is needed before we can make the pad.
10114             # If we are in a list with some long items, we want each
10115             # item to stand out.  So in the following example, the
10116             # first line begining with '$casefold->' would look good
10117             # padded to align with the next line, but then it
10118             # would be indented more than the last line, so we
10119             # won't do it.
10120             #
10121             #  ok(
10122             #      $casefold->{code}         eq '0041'
10123             #        && $casefold->{status}  eq 'C'
10124             #        && $casefold->{mapping} eq '0061',
10125             #      'casefold 0x41'
10126             #  );
10127             #
10128             # Note:
10129             # It would be faster, and almost as good, to use a comma
10130             # count, and not pad if comma_count > 1 and the previous
10131             # line did not end with a comma.
10132             #
10133             my $ok_to_pad = 1;
10134
10135             my $ibg   = $$ri_first[ $line + 1 ];
10136             my $depth = $nesting_depth_to_go[ $ibg + 1 ];
10137
10138             # just use simplified formula for leading spaces to avoid
10139             # needless sub calls
10140             my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
10141
10142             # look at each line beyond the next ..
10143             my $l = $line + 1;
10144             foreach $l ( $line + 2 .. $max_line ) {
10145                 my $ibg = $$ri_first[$l];
10146
10147                 # quit looking at the end of this container
10148                 last
10149                   if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
10150                   || ( $nesting_depth_to_go[$ibg] < $depth );
10151
10152                 # cannot do the pad if a later line would be
10153                 # outdented more
10154                 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
10155                     $ok_to_pad = 0;
10156                     last;
10157                 }
10158             }
10159
10160             # don't pad if we end in a broken list
10161             if ( $l == $max_line ) {
10162                 my $i2 = $$ri_last[$l];
10163                 if ( $types_to_go[$i2] eq '#' ) {
10164                     my $i1 = $$ri_first[$l];
10165                     next
10166                       if (
10167                         terminal_type( \@types_to_go, \@block_type_to_go, $i1,
10168                             $i2 ) eq ','
10169                       );
10170                 }
10171             }
10172
10173             # SPECIAL CHECK 2:
10174             # a minus may introduce a quoted variable, and we will
10175             # add the pad only if this line begins with a bare word,
10176             # such as for the word 'Button' here:
10177             #    [
10178             #         Button      => "Print letter \"~$_\"",
10179             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10180             #        -accelerator => "Meta+$_"
10181             #    ];
10182             #
10183             #  On the other hand, if 'Button' is quoted, it looks best
10184             #  not to pad:
10185             #    [
10186             #        'Button'     => "Print letter \"~$_\"",
10187             #        -command     => [ sub { print "$_[0]\n" }, $_ ],
10188             #        -accelerator => "Meta+$_"
10189             #    ];
10190             if ( $types_to_go[$ibeg_next] eq 'm' ) {
10191                 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
10192             }
10193
10194             next unless $ok_to_pad;
10195
10196             #----------------------end special check---------------
10197
10198             my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
10199             my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
10200             $pad_spaces = $length_2 - $length_1;
10201
10202             # If the first line has a leading ! and the second does
10203             # not, then remove one space to try to align the next
10204             # leading characters, which are often the same.  For example:
10205             #  if (  !$ts
10206             #      || $ts == $self->Holder
10207             #      || $self->Holder->Type eq "Arena" )
10208             #
10209             # This usually helps readability, but if there are subsequent
10210             # ! operators things will still get messed up.  For example:
10211             #
10212             #  if (  !exists $Net::DNS::typesbyname{$qtype}
10213             #      && exists $Net::DNS::classesbyname{$qtype}
10214             #      && !exists $Net::DNS::classesbyname{$qclass}
10215             #      && exists $Net::DNS::typesbyname{$qclass} )
10216             # We can't fix that.
10217             if ($matches_without_bang) { $pad_spaces-- }
10218
10219             # make sure this won't change if -lp is used
10220             my $indentation_1 = $leading_spaces_to_go[$ibeg];
10221             if ( ref($indentation_1) ) {
10222                 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
10223                     my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
10224                     unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
10225                         $pad_spaces = 0;
10226                     }
10227                 }
10228             }
10229
10230             # we might be able to handle a pad of -1 by removing a blank
10231             # token
10232             if ( $pad_spaces < 0 ) {
10233
10234                 if ( $pad_spaces == -1 ) {
10235                     if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
10236                         $tokens_to_go[ $ipad - 1 ] = '';
10237                     }
10238                 }
10239                 $pad_spaces = 0;
10240             }
10241
10242             # now apply any padding for alignment
10243             if ( $ipad >= 0 && $pad_spaces ) {
10244
10245                 my $length_t = total_line_length( $ibeg, $iend );
10246                 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
10247                     $tokens_to_go[$ipad] =
10248                       ' ' x $pad_spaces . $tokens_to_go[$ipad];
10249                 }
10250             }
10251         }
10252     }
10253     continue {
10254         $iendm          = $iend;
10255         $ibegm          = $ibeg;
10256         $has_leading_op = $has_leading_op_next;
10257     }    # end of loop over lines
10258     return;
10259 }
10260
10261 sub correct_lp_indentation {
10262
10263     # When the -lp option is used, we need to make a last pass through
10264     # each line to correct the indentation positions in case they differ
10265     # from the predictions.  This is necessary because perltidy uses a
10266     # predictor/corrector method for aligning with opening parens.  The
10267     # predictor is usually good, but sometimes stumbles.  The corrector
10268     # tries to patch things up once the actual opening paren locations
10269     # are known.
10270     my ( $ri_first, $ri_last ) = @_;
10271     my $do_not_pad = 0;
10272
10273     #  Note on flag '$do_not_pad':
10274     #  We want to avoid a situation like this, where the aligner inserts
10275     #  whitespace before the '=' to align it with a previous '=', because
10276     #  otherwise the parens might become mis-aligned in a situation like
10277     #  this, where the '=' has become aligned with the previous line,
10278     #  pushing the opening '(' forward beyond where we want it.
10279     #
10280     #  $mkFloor::currentRoom = '';
10281     #  $mkFloor::c_entry     = $c->Entry(
10282     #                                 -width        => '10',
10283     #                                 -relief       => 'sunken',
10284     #                                 ...
10285     #                                 );
10286     #
10287     #  We leave it to the aligner to decide how to do this.
10288
10289     # first remove continuation indentation if appropriate
10290     my $max_line = @$ri_first - 1;
10291
10292     # looking at each line of this batch..
10293     my ( $ibeg, $iend );
10294     my $line;
10295     foreach $line ( 0 .. $max_line ) {
10296         $ibeg = $$ri_first[$line];
10297         $iend = $$ri_last[$line];
10298
10299         # looking at each token in this output line..
10300         my $i;
10301         foreach $i ( $ibeg .. $iend ) {
10302
10303             # How many space characters to place before this token
10304             # for special alignment.  Actual padding is done in the
10305             # continue block.
10306
10307             # looking for next unvisited indentation item
10308             my $indentation = $leading_spaces_to_go[$i];
10309             if ( !$indentation->get_MARKED() ) {
10310                 $indentation->set_MARKED(1);
10311
10312                 # looking for indentation item for which we are aligning
10313                 # with parens, braces, and brackets
10314                 next unless ( $indentation->get_ALIGN_PAREN() );
10315
10316                 # skip closed container on this line
10317                 if ( $i > $ibeg ) {
10318                     my $im = $i - 1;
10319                     if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
10320                     if (   $type_sequence_to_go[$im]
10321                         && $mate_index_to_go[$im] <= $iend )
10322                     {
10323                         next;
10324                     }
10325                 }
10326
10327                 if ( $line == 1 && $i == $ibeg ) {
10328                     $do_not_pad = 1;
10329                 }
10330
10331                 # Ok, let's see what the error is and try to fix it
10332                 my $actual_pos;
10333                 my $predicted_pos = $indentation->get_SPACES();
10334                 if ( $i > $ibeg ) {
10335
10336                     # token is mid-line - use length to previous token
10337                     $actual_pos = total_line_length( $ibeg, $i - 1 );
10338
10339                     # for mid-line token, we must check to see if all
10340                     # additional lines have continuation indentation,
10341                     # and remove it if so.  Otherwise, we do not get
10342                     # good alignment.
10343                     my $closing_index = $indentation->get_CLOSED();
10344                     if ( $closing_index > $iend ) {
10345                         my $ibeg_next = $$ri_first[ $line + 1 ];
10346                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
10347                             undo_lp_ci( $line, $i, $closing_index, $ri_first,
10348                                 $ri_last );
10349                         }
10350                     }
10351                 }
10352                 elsif ( $line > 0 ) {
10353
10354                     # handle case where token starts a new line;
10355                     # use length of previous line
10356                     my $ibegm = $$ri_first[ $line - 1 ];
10357                     my $iendm = $$ri_last[ $line - 1 ];
10358                     $actual_pos = total_line_length( $ibegm, $iendm );
10359
10360                     # follow -pt style
10361                     ++$actual_pos
10362                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
10363                 }
10364                 else {
10365
10366                     # token is first character of first line of batch
10367                     $actual_pos = $predicted_pos;
10368                 }
10369
10370                 my $move_right = $actual_pos - $predicted_pos;
10371
10372                 # done if no error to correct (gnu2.t)
10373                 if ( $move_right == 0 ) {
10374                     $indentation->set_RECOVERABLE_SPACES($move_right);
10375                     next;
10376                 }
10377
10378                 # if we have not seen closure for this indentation in
10379                 # this batch, we can only pass on a request to the
10380                 # vertical aligner
10381                 my $closing_index = $indentation->get_CLOSED();
10382
10383                 if ( $closing_index < 0 ) {
10384                     $indentation->set_RECOVERABLE_SPACES($move_right);
10385                     next;
10386                 }
10387
10388                 # If necessary, look ahead to see if there is really any
10389                 # leading whitespace dependent on this whitespace, and
10390                 # also find the longest line using this whitespace.
10391                 # Since it is always safe to move left if there are no
10392                 # dependents, we only need to do this if we may have
10393                 # dependent nodes or need to move right.
10394
10395                 my $right_margin = 0;
10396                 my $have_child   = $indentation->get_HAVE_CHILD();
10397
10398                 my %saw_indentation;
10399                 my $line_count = 1;
10400                 $saw_indentation{$indentation} = $indentation;
10401
10402                 if ( $have_child || $move_right > 0 ) {
10403                     $have_child = 0;
10404                     my $max_length = 0;
10405                     if ( $i == $ibeg ) {
10406                         $max_length = total_line_length( $ibeg, $iend );
10407                     }
10408
10409                     # look ahead at the rest of the lines of this batch..
10410                     my $line_t;
10411                     foreach $line_t ( $line + 1 .. $max_line ) {
10412                         my $ibeg_t = $$ri_first[$line_t];
10413                         my $iend_t = $$ri_last[$line_t];
10414                         last if ( $closing_index <= $ibeg_t );
10415
10416                         # remember all different indentation objects
10417                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
10418                         $saw_indentation{$indentation_t} = $indentation_t;
10419                         $line_count++;
10420
10421                         # remember longest line in the group
10422                         my $length_t = total_line_length( $ibeg_t, $iend_t );
10423                         if ( $length_t > $max_length ) {
10424                             $max_length = $length_t;
10425                         }
10426                     }
10427                     $right_margin = $rOpts_maximum_line_length - $max_length;
10428                     if ( $right_margin < 0 ) { $right_margin = 0 }
10429                 }
10430
10431                 my $first_line_comma_count =
10432                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
10433                 my $comma_count = $indentation->get_COMMA_COUNT();
10434                 my $arrow_count = $indentation->get_ARROW_COUNT();
10435
10436                 # This is a simple approximate test for vertical alignment:
10437                 # if we broke just after an opening paren, brace, bracket,
10438                 # and there are 2 or more commas in the first line,
10439                 # and there are no '=>'s,
10440                 # then we are probably vertically aligned.  We could set
10441                 # an exact flag in sub scan_list, but this is good
10442                 # enough.
10443                 my $indentation_count = keys %saw_indentation;
10444                 my $is_vertically_aligned =
10445                   (      $i == $ibeg
10446                       && $first_line_comma_count > 1
10447                       && $indentation_count == 1
10448                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
10449
10450                 # Make the move if possible ..
10451                 if (
10452
10453                     # we can always move left
10454                     $move_right < 0
10455
10456                     # but we should only move right if we are sure it will
10457                     # not spoil vertical alignment
10458                     || ( $comma_count == 0 )
10459                     || ( $comma_count > 0 && !$is_vertically_aligned )
10460                   )
10461                 {
10462                     my $move =
10463                       ( $move_right <= $right_margin )
10464                       ? $move_right
10465                       : $right_margin;
10466
10467                     foreach ( keys %saw_indentation ) {
10468                         $saw_indentation{$_}
10469                           ->permanently_decrease_AVAILABLE_SPACES( -$move );
10470                     }
10471                 }
10472
10473                 # Otherwise, record what we want and the vertical aligner
10474                 # will try to recover it.
10475                 else {
10476                     $indentation->set_RECOVERABLE_SPACES($move_right);
10477                 }
10478             }
10479         }
10480     }
10481     return $do_not_pad;
10482 }
10483
10484 # flush is called to output any tokens in the pipeline, so that
10485 # an alternate source of lines can be written in the correct order
10486
10487 sub flush {
10488     destroy_one_line_block();
10489     output_line_to_go();
10490     Perl::Tidy::VerticalAligner::flush();
10491 }
10492
10493 sub reset_block_text_accumulator {
10494
10495     # save text after 'if' and 'elsif' to append after 'else'
10496     if ($accumulating_text_for_block) {
10497
10498         if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10499             push @{$rleading_block_if_elsif_text}, $leading_block_text;
10500         }
10501     }
10502     $accumulating_text_for_block        = "";
10503     $leading_block_text                 = "";
10504     $leading_block_text_level           = 0;
10505     $leading_block_text_length_exceeded = 0;
10506     $leading_block_text_line_number     = 0;
10507     $leading_block_text_line_length     = 0;
10508 }
10509
10510 sub set_block_text_accumulator {
10511     my $i = shift;
10512     $accumulating_text_for_block = $tokens_to_go[$i];
10513     if ( $accumulating_text_for_block !~ /^els/ ) {
10514         $rleading_block_if_elsif_text = [];
10515     }
10516     $leading_block_text       = "";
10517     $leading_block_text_level = $levels_to_go[$i];
10518     $leading_block_text_line_number =
10519       $vertical_aligner_object->get_output_line_number();
10520     $leading_block_text_length_exceeded = 0;
10521
10522     # this will contain the column number of the last character
10523     # of the closing side comment
10524     $leading_block_text_line_length =
10525       length($accumulating_text_for_block) +
10526       length( $rOpts->{'closing-side-comment-prefix'} ) +
10527       $leading_block_text_level * $rOpts_indent_columns + 3;
10528 }
10529
10530 sub accumulate_block_text {
10531     my $i = shift;
10532
10533     # accumulate leading text for -csc, ignoring any side comments
10534     if (   $accumulating_text_for_block
10535         && !$leading_block_text_length_exceeded
10536         && $types_to_go[$i] ne '#' )
10537     {
10538
10539         my $added_length = length( $tokens_to_go[$i] );
10540         $added_length += 1 if $i == 0;
10541         my $new_line_length = $leading_block_text_line_length + $added_length;
10542
10543         # we can add this text if we don't exceed some limits..
10544         if (
10545
10546             # we must not have already exceeded the text length limit
10547             length($leading_block_text) <
10548             $rOpts_closing_side_comment_maximum_text
10549
10550             # and either:
10551             # the new total line length must be below the line length limit
10552             # or the new length must be below the text length limit
10553             # (ie, we may allow one token to exceed the text length limit)
10554             && ( $new_line_length < $rOpts_maximum_line_length
10555                 || length($leading_block_text) + $added_length <
10556                 $rOpts_closing_side_comment_maximum_text )
10557
10558             # UNLESS: we are adding a closing paren before the brace we seek.
10559             # This is an attempt to avoid situations where the ... to be
10560             # added are longer than the omitted right paren, as in:
10561
10562             #   foreach my $item (@a_rather_long_variable_name_here) {
10563             #      &whatever;
10564             #   } ## end foreach my $item (@a_rather_long_variable_name_here...
10565
10566             || (
10567                 $tokens_to_go[$i] eq ')'
10568                 && (
10569                     (
10570                            $i + 1 <= $max_index_to_go
10571                         && $block_type_to_go[ $i + 1 ] eq
10572                         $accumulating_text_for_block
10573                     )
10574                     || (   $i + 2 <= $max_index_to_go
10575                         && $block_type_to_go[ $i + 2 ] eq
10576                         $accumulating_text_for_block )
10577                 )
10578             )
10579           )
10580         {
10581
10582             # add an extra space at each newline
10583             if ( $i == 0 ) { $leading_block_text .= ' ' }
10584
10585             # add the token text
10586             $leading_block_text .= $tokens_to_go[$i];
10587             $leading_block_text_line_length = $new_line_length;
10588         }
10589
10590         # show that text was truncated if necessary
10591         elsif ( $types_to_go[$i] ne 'b' ) {
10592             $leading_block_text_length_exceeded = 1;
10593             $leading_block_text .= '...';
10594         }
10595     }
10596 }
10597
10598 {
10599     my %is_if_elsif_else_unless_while_until_for_foreach;
10600
10601     BEGIN {
10602
10603         # These block types may have text between the keyword and opening
10604         # curly.  Note: 'else' does not, but must be included to allow trailing
10605         # if/elsif text to be appended.
10606         # patch for SWITCH/CASE: added 'case' and 'when'
10607         @_ = qw(if elsif else unless while until for foreach case when);
10608         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10609     }
10610
10611     sub accumulate_csc_text {
10612
10613         # called once per output buffer when -csc is used. Accumulates
10614         # the text placed after certain closing block braces.
10615         # Defines and returns the following for this buffer:
10616
10617         my $block_leading_text = "";    # the leading text of the last '}'
10618         my $rblock_leading_if_elsif_text;
10619         my $i_block_leading_text =
10620           -1;    # index of token owning block_leading_text
10621         my $block_line_count    = 100;    # how many lines the block spans
10622         my $terminal_type       = 'b';    # type of last nonblank token
10623         my $i_terminal          = 0;      # index of last nonblank token
10624         my $terminal_block_type = "";
10625
10626         for my $i ( 0 .. $max_index_to_go ) {
10627             my $type       = $types_to_go[$i];
10628             my $block_type = $block_type_to_go[$i];
10629             my $token      = $tokens_to_go[$i];
10630
10631             # remember last nonblank token type
10632             if ( $type ne '#' && $type ne 'b' ) {
10633                 $terminal_type       = $type;
10634                 $terminal_block_type = $block_type;
10635                 $i_terminal          = $i;
10636             }
10637
10638             my $type_sequence = $type_sequence_to_go[$i];
10639             if ( $block_type && $type_sequence ) {
10640
10641                 if ( $token eq '}' ) {
10642
10643                     # restore any leading text saved when we entered this block
10644                     if ( defined( $block_leading_text{$type_sequence} ) ) {
10645                         ( $block_leading_text, $rblock_leading_if_elsif_text ) =
10646                           @{ $block_leading_text{$type_sequence} };
10647                         $i_block_leading_text = $i;
10648                         delete $block_leading_text{$type_sequence};
10649                         $rleading_block_if_elsif_text =
10650                           $rblock_leading_if_elsif_text;
10651                     }
10652
10653                     # if we run into a '}' then we probably started accumulating
10654                     # at something like a trailing 'if' clause..no harm done.
10655                     if (   $accumulating_text_for_block
10656                         && $levels_to_go[$i] <= $leading_block_text_level )
10657                     {
10658                         my $lev = $levels_to_go[$i];
10659                         reset_block_text_accumulator();
10660                     }
10661
10662                     if ( defined( $block_opening_line_number{$type_sequence} ) )
10663                     {
10664                         my $output_line_number =
10665                           $vertical_aligner_object->get_output_line_number();
10666                         $block_line_count =
10667                           $output_line_number -
10668                           $block_opening_line_number{$type_sequence} + 1;
10669                         delete $block_opening_line_number{$type_sequence};
10670                     }
10671                     else {
10672
10673                         # Error: block opening line undefined for this line..
10674                         # This shouldn't be possible, but it is not a
10675                         # significant problem.
10676                     }
10677                 }
10678
10679                 elsif ( $token eq '{' ) {
10680
10681                     my $line_number =
10682                       $vertical_aligner_object->get_output_line_number();
10683                     $block_opening_line_number{$type_sequence} = $line_number;
10684
10685                     if (   $accumulating_text_for_block
10686                         && $levels_to_go[$i] == $leading_block_text_level )
10687                     {
10688
10689                         if ( $accumulating_text_for_block eq $block_type ) {
10690
10691                             # save any leading text before we enter this block
10692                             $block_leading_text{$type_sequence} = [
10693                                 $leading_block_text,
10694                                 $rleading_block_if_elsif_text
10695                             ];
10696                             $block_opening_line_number{$type_sequence} =
10697                               $leading_block_text_line_number;
10698                             reset_block_text_accumulator();
10699                         }
10700                         else {
10701
10702                             # shouldn't happen, but not a serious error.
10703                             # We were accumulating -csc text for block type
10704                             # $accumulating_text_for_block and unexpectedly
10705                             # encountered a '{' for block type $block_type.
10706                         }
10707                     }
10708                 }
10709             }
10710
10711             if (   $type eq 'k'
10712                 && $csc_new_statement_ok
10713                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
10714                 && $token =~ /$closing_side_comment_list_pattern/o )
10715             {
10716                 set_block_text_accumulator($i);
10717             }
10718             else {
10719
10720                 # note: ignoring type 'q' because of tricks being played
10721                 # with 'q' for hanging side comments
10722                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10723                     $csc_new_statement_ok =
10724                       ( $block_type || $type eq 'J' || $type eq ';' );
10725                 }
10726                 if (   $type eq ';'
10727                     && $accumulating_text_for_block
10728                     && $levels_to_go[$i] == $leading_block_text_level )
10729                 {
10730                     reset_block_text_accumulator();
10731                 }
10732                 else {
10733                     accumulate_block_text($i);
10734                 }
10735             }
10736         }
10737
10738         # Treat an 'else' block specially by adding preceding 'if' and
10739         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
10740         # especially for cuddled-else formatting.
10741         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10742             $block_leading_text =
10743               make_else_csc_text( $i_terminal, $terminal_block_type,
10744                 $block_leading_text, $rblock_leading_if_elsif_text );
10745         }
10746
10747         return ( $terminal_type, $i_terminal, $i_block_leading_text,
10748             $block_leading_text, $block_line_count );
10749     }
10750 }
10751
10752 sub make_else_csc_text {
10753
10754     # create additional -csc text for an 'else' and optionally 'elsif',
10755     # depending on the value of switch
10756     # $rOpts_closing_side_comment_else_flag:
10757     #
10758     #  = 0 add 'if' text to trailing else
10759     #  = 1 same as 0 plus:
10760     #      add 'if' to 'elsif's if can fit in line length
10761     #      add last 'elsif' to trailing else if can fit in one line
10762     #  = 2 same as 1 but do not check if exceed line length
10763     #
10764     # $rif_elsif_text = a reference to a list of all previous closing
10765     # side comments created for this if block
10766     #
10767     my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10768     my $csc_text = $block_leading_text;
10769
10770     if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10771     {
10772         return $csc_text;
10773     }
10774
10775     my $count = @{$rif_elsif_text};
10776     return $csc_text unless ($count);
10777
10778     my $if_text = '[ if' . $rif_elsif_text->[0];
10779
10780     # always show the leading 'if' text on 'else'
10781     if ( $block_type eq 'else' ) {
10782         $csc_text .= $if_text;
10783     }
10784
10785     # see if that's all
10786     if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10787         return $csc_text;
10788     }
10789
10790     my $last_elsif_text = "";
10791     if ( $count > 1 ) {
10792         $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10793         if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10794     }
10795
10796     # tentatively append one more item
10797     my $saved_text = $csc_text;
10798     if ( $block_type eq 'else' ) {
10799         $csc_text .= $last_elsif_text;
10800     }
10801     else {
10802         $csc_text .= ' ' . $if_text;
10803     }
10804
10805     # all done if no length checks requested
10806     if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10807         return $csc_text;
10808     }
10809
10810     # undo it if line length exceeded
10811     my $length =
10812       length($csc_text) +
10813       length($block_type) +
10814       length( $rOpts->{'closing-side-comment-prefix'} ) +
10815       $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10816     if ( $length > $rOpts_maximum_line_length ) {
10817         $csc_text = $saved_text;
10818     }
10819     return $csc_text;
10820 }
10821
10822 {    # sub balance_csc_text
10823
10824     my %matching_char;
10825
10826     BEGIN {
10827         %matching_char = (
10828             '{' => '}',
10829             '(' => ')',
10830             '[' => ']',
10831             '}' => '{',
10832             ')' => '(',
10833             ']' => '[',
10834         );
10835     }
10836
10837     sub balance_csc_text {
10838
10839         # Append characters to balance a closing side comment so that editors
10840         # such as vim can correctly jump through code.
10841         # Simple Example:
10842         #  input  = ## end foreach my $foo ( sort { $b  ...
10843         #  output = ## end foreach my $foo ( sort { $b  ...})
10844
10845         # NOTE: This routine does not currently filter out structures within
10846         # quoted text because the bounce algorithims in text editors do not
10847         # necessarily do this either (a version of vim was checked and
10848         # did not do this).
10849
10850         # Some complex examples which will cause trouble for some editors:
10851         #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
10852         #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
10853         #  if ( $1 eq '{' ) {
10854         # test file test1/braces.pl has many such examples.
10855
10856         my ($csc) = @_;
10857
10858         # loop to examine characters one-by-one, RIGHT to LEFT and
10859         # build a balancing ending, LEFT to RIGHT.
10860         for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
10861
10862             my $char = substr( $csc, $pos, 1 );
10863
10864             # ignore everything except structural characters
10865             next unless ( $matching_char{$char} );
10866
10867             # pop most recently appended character
10868             my $top = chop($csc);
10869
10870             # push it back plus the mate to the newest character
10871             # unless they balance each other.
10872             $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
10873         }
10874
10875         # return the balanced string
10876         return $csc;
10877     }
10878 }
10879
10880 sub add_closing_side_comment {
10881
10882     # add closing side comments after closing block braces if -csc used
10883     my $cscw_block_comment;
10884
10885     #---------------------------------------------------------------
10886     # Step 1: loop through all tokens of this line to accumulate
10887     # the text needed to create the closing side comments. Also see
10888     # how the line ends.
10889     #---------------------------------------------------------------
10890
10891     my ( $terminal_type, $i_terminal, $i_block_leading_text,
10892         $block_leading_text, $block_line_count )
10893       = accumulate_csc_text();
10894
10895     #---------------------------------------------------------------
10896     # Step 2: make the closing side comment if this ends a block
10897     #---------------------------------------------------------------
10898     my $have_side_comment = $i_terminal != $max_index_to_go;
10899
10900     # if this line might end in a block closure..
10901     if (
10902         $terminal_type eq '}'
10903
10904         # ..and either
10905         && (
10906
10907             # the block is long enough
10908             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
10909
10910             # or there is an existing comment to check
10911             || (   $have_side_comment
10912                 && $rOpts->{'closing-side-comment-warnings'} )
10913         )
10914
10915         # .. and if this is one of the types of interest
10916         && $block_type_to_go[$i_terminal] =~
10917         /$closing_side_comment_list_pattern/o
10918
10919         # .. but not an anonymous sub
10920         # These are not normally of interest, and their closing braces are
10921         # often followed by commas or semicolons anyway.  This also avoids
10922         # possible erratic output due to line numbering inconsistencies
10923         # in the cases where their closing braces terminate a line.
10924         && $block_type_to_go[$i_terminal] ne 'sub'
10925
10926         # ..and the corresponding opening brace must is not in this batch
10927         # (because we do not need to tag one-line blocks, although this
10928         # should also be caught with a positive -csci value)
10929         && $mate_index_to_go[$i_terminal] < 0
10930
10931         # ..and either
10932         && (
10933
10934             # this is the last token (line doesnt have a side comment)
10935             !$have_side_comment
10936
10937             # or the old side comment is a closing side comment
10938             || $tokens_to_go[$max_index_to_go] =~
10939             /$closing_side_comment_prefix_pattern/o
10940         )
10941       )
10942     {
10943
10944         # then make the closing side comment text
10945         my $token =
10946 "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
10947
10948         # append any extra descriptive text collected above
10949         if ( $i_block_leading_text == $i_terminal ) {
10950             $token .= $block_leading_text;
10951         }
10952
10953         $token = balance_csc_text($token)
10954           if $rOpts->{'closing-side-comments-balanced'};
10955
10956         $token =~ s/\s*$//;    # trim any trailing whitespace
10957
10958         # handle case of existing closing side comment
10959         if ($have_side_comment) {
10960
10961             # warn if requested and tokens differ significantly
10962             if ( $rOpts->{'closing-side-comment-warnings'} ) {
10963                 my $old_csc = $tokens_to_go[$max_index_to_go];
10964                 my $new_csc = $token;
10965                 $new_csc =~ s/\s+//g;            # trim all whitespace
10966                 $old_csc =~ s/\s+//g;            # trim all whitespace
10967                 $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
10968                 $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
10969                 $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
10970                 my $new_trailing_dots = $1;
10971                 $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
10972
10973                 # Patch to handle multiple closing side comments at
10974                 # else and elsif's.  These have become too complicated
10975                 # to check, so if we see an indication of
10976                 # '[ if' or '[ # elsif', then assume they were made
10977                 # by perltidy.
10978                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
10979                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
10980                 }
10981                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
10982                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
10983                 }
10984
10985                 # if old comment is contained in new comment,
10986                 # only compare the common part.
10987                 if ( length($new_csc) > length($old_csc) ) {
10988                     $new_csc = substr( $new_csc, 0, length($old_csc) );
10989                 }
10990
10991                 # if the new comment is shorter and has been limited,
10992                 # only compare the common part.
10993                 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
10994                 {
10995                     $old_csc = substr( $old_csc, 0, length($new_csc) );
10996                 }
10997
10998                 # any remaining difference?
10999                 if ( $new_csc ne $old_csc ) {
11000
11001                     # just leave the old comment if we are below the threshold
11002                     # for creating side comments
11003                     if ( $block_line_count <
11004                         $rOpts->{'closing-side-comment-interval'} )
11005                     {
11006                         $token = undef;
11007                     }
11008
11009                     # otherwise we'll make a note of it
11010                     else {
11011
11012                         warning(
11013 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
11014                         );
11015
11016                      # save the old side comment in a new trailing block comment
11017                         my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
11018                         $year  += 1900;
11019                         $month += 1;
11020                         $cscw_block_comment =
11021 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
11022                     }
11023                 }
11024                 else {
11025
11026                     # No differences.. we can safely delete old comment if we
11027                     # are below the threshold
11028                     if ( $block_line_count <
11029                         $rOpts->{'closing-side-comment-interval'} )
11030                     {
11031                         $token = undef;
11032                         unstore_token_to_go()
11033                           if ( $types_to_go[$max_index_to_go] eq '#' );
11034                         unstore_token_to_go()
11035                           if ( $types_to_go[$max_index_to_go] eq 'b' );
11036                     }
11037                 }
11038             }
11039
11040             # switch to the new csc (unless we deleted it!)
11041             $tokens_to_go[$max_index_to_go] = $token if $token;
11042         }
11043
11044         # handle case of NO existing closing side comment
11045         else {
11046
11047             # insert the new side comment into the output token stream
11048             my $type          = '#';
11049             my $block_type    = '';
11050             my $type_sequence = '';
11051             my $container_environment =
11052               $container_environment_to_go[$max_index_to_go];
11053             my $level                = $levels_to_go[$max_index_to_go];
11054             my $slevel               = $nesting_depth_to_go[$max_index_to_go];
11055             my $no_internal_newlines = 0;
11056
11057             my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
11058             my $ci_level           = $ci_levels_to_go[$max_index_to_go];
11059             my $in_continued_quote = 0;
11060
11061             # first insert a blank token
11062             insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
11063
11064             # then the side comment
11065             insert_new_token_to_go( $token, $type, $slevel,
11066                 $no_internal_newlines );
11067         }
11068     }
11069     return $cscw_block_comment;
11070 }
11071
11072 sub previous_nonblank_token {
11073     my ($i)  = @_;
11074     my $name = "";
11075     my $im   = $i - 1;
11076     return "" if ( $im < 0 );
11077     if ( $types_to_go[$im] eq 'b' ) { $im--; }
11078     return "" if ( $im < 0 );
11079     $name = $tokens_to_go[$im];
11080
11081     # prepend any sub name to an isolated -> to avoid unwanted alignments
11082     # [test case is test8/penco.pl]
11083     if ( $name eq '->' ) {
11084         $im--;
11085         if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
11086             $name = $tokens_to_go[$im] . $name;
11087         }
11088     }
11089     return $name;
11090 }
11091
11092 sub send_lines_to_vertical_aligner {
11093
11094     my ( $ri_first, $ri_last, $do_not_pad ) = @_;
11095
11096     my $rindentation_list = [0];    # ref to indentations for each line
11097
11098     # define the array @matching_token_to_go for the output tokens
11099     # which will be non-blank for each special token (such as =>)
11100     # for which alignment is required.
11101     set_vertical_alignment_markers( $ri_first, $ri_last );
11102
11103     # flush if necessary to avoid unwanted alignment
11104     my $must_flush = 0;
11105     if ( @$ri_first > 1 ) {
11106
11107         # flush before a long if statement
11108         if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
11109             $must_flush = 1;
11110         }
11111     }
11112     if ($must_flush) {
11113         Perl::Tidy::VerticalAligner::flush();
11114     }
11115
11116     undo_ci( $ri_first, $ri_last );
11117
11118     set_logical_padding( $ri_first, $ri_last );
11119
11120     # loop to prepare each line for shipment
11121     my $n_last_line = @$ri_first - 1;
11122     my $in_comma_list;
11123     for my $n ( 0 .. $n_last_line ) {
11124         my $ibeg = $$ri_first[$n];
11125         my $iend = $$ri_last[$n];
11126
11127         my ( $rtokens, $rfields, $rpatterns ) =
11128           make_alignment_patterns( $ibeg, $iend );
11129
11130         my ( $indentation, $lev, $level_end, $terminal_type,
11131             $is_semicolon_terminated, $is_outdented_line )
11132           = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
11133             $ri_first, $ri_last, $rindentation_list );
11134
11135         # we will allow outdenting of long lines..
11136         my $outdent_long_lines = (
11137
11138             # which are long quotes, if allowed
11139             ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
11140
11141             # which are long block comments, if allowed
11142               || (
11143                    $types_to_go[$ibeg] eq '#'
11144                 && $rOpts->{'outdent-long-comments'}
11145
11146                 # but not if this is a static block comment
11147                 && !$is_static_block_comment
11148               )
11149         );
11150
11151         my $level_jump =
11152           $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
11153
11154         my $rvertical_tightness_flags =
11155           set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
11156             $ri_first, $ri_last );
11157
11158         # flush an outdented line to avoid any unwanted vertical alignment
11159         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11160
11161         my $is_terminal_ternary = 0;
11162         if (   $tokens_to_go[$ibeg] eq ':'
11163             || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
11164         {
11165             if (   ( $terminal_type eq ';' && $level_end <= $lev )
11166                 || ( $level_end < $lev ) )
11167             {
11168                 $is_terminal_ternary = 1;
11169             }
11170         }
11171
11172         # send this new line down the pipe
11173         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
11174         Perl::Tidy::VerticalAligner::append_line(
11175             $lev,
11176             $level_end,
11177             $indentation,
11178             $rfields,
11179             $rtokens,
11180             $rpatterns,
11181             $forced_breakpoint_to_go[$iend] || $in_comma_list,
11182             $outdent_long_lines,
11183             $is_terminal_ternary,
11184             $is_semicolon_terminated,
11185             $do_not_pad,
11186             $rvertical_tightness_flags,
11187             $level_jump,
11188         );
11189         $in_comma_list =
11190           $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
11191
11192         # flush an outdented line to avoid any unwanted vertical alignment
11193         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11194
11195         $do_not_pad = 0;
11196
11197     }    # end of loop to output each line
11198
11199     # remember indentation of lines containing opening containers for
11200     # later use by sub set_adjusted_indentation
11201     save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
11202 }
11203
11204 {        # begin make_alignment_patterns
11205
11206     my %block_type_map;
11207     my %keyword_map;
11208
11209     BEGIN {
11210
11211         # map related block names into a common name to
11212         # allow alignment
11213         %block_type_map = (
11214             'unless'  => 'if',
11215             'else'    => 'if',
11216             'elsif'   => 'if',
11217             'when'    => 'if',
11218             'default' => 'if',
11219             'case'    => 'if',
11220             'sort'    => 'map',
11221             'grep'    => 'map',
11222         );
11223
11224         # map certain keywords to the same 'if' class to align
11225         # long if/elsif sequences. [elsif.pl]
11226         %keyword_map = (
11227             'unless'  => 'if',
11228             'else'    => 'if',
11229             'elsif'   => 'if',
11230             'when'    => 'given',
11231             'default' => 'given',
11232             'case'    => 'switch',
11233
11234             # treat an 'undef' similar to numbers and quotes
11235             'undef' => 'Q',
11236         );
11237     }
11238
11239     sub make_alignment_patterns {
11240
11241         # Here we do some important preliminary work for the
11242         # vertical aligner.  We create three arrays for one
11243         # output line. These arrays contain strings that can
11244         # be tested by the vertical aligner to see if
11245         # consecutive lines can be aligned vertically.
11246         #
11247         # The three arrays are indexed on the vertical
11248         # alignment fields and are:
11249         # @tokens - a list of any vertical alignment tokens for this line.
11250         #   These are tokens, such as '=' '&&' '#' etc which
11251         #   we want to might align vertically.  These are
11252         #   decorated with various information such as
11253         #   nesting depth to prevent unwanted vertical
11254         #   alignment matches.
11255         # @fields - the actual text of the line between the vertical alignment
11256         #   tokens.
11257         # @patterns - a modified list of token types, one for each alignment
11258         #   field.  These should normally each match before alignment is
11259         #   allowed, even when the alignment tokens match.
11260         my ( $ibeg, $iend ) = @_;
11261         my @tokens   = ();
11262         my @fields   = ();
11263         my @patterns = ();
11264         my $i_start  = $ibeg;
11265         my $i;
11266
11267         my $depth                 = 0;
11268         my @container_name        = ("");
11269         my @multiple_comma_arrows = (undef);
11270
11271         my $j = 0;    # field index
11272
11273         $patterns[0] = "";
11274         for $i ( $ibeg .. $iend ) {
11275
11276             # Keep track of containers balanced on this line only.
11277             # These are used below to prevent unwanted cross-line alignments.
11278             # Unbalanced containers already avoid aligning across
11279             # container boundaries.
11280             if ( $tokens_to_go[$i] eq '(' ) {
11281
11282                 # if container is balanced on this line...
11283                 my $i_mate = $mate_index_to_go[$i];
11284                 if ( $i_mate > $i && $i_mate <= $iend ) {
11285                     $depth++;
11286                     my $seqno = $type_sequence_to_go[$i];
11287                     my $count = comma_arrow_count($seqno);
11288                     $multiple_comma_arrows[$depth] = $count && $count > 1;
11289
11290                     # Append the previous token name to make the container name
11291                     # more unique.  This name will also be given to any commas
11292                     # within this container, and it helps avoid undesirable
11293                     # alignments of different types of containers.
11294                     my $name = previous_nonblank_token($i);
11295                     $name =~ s/^->//;
11296                     $container_name[$depth] = "+" . $name;
11297
11298                     # Make the container name even more unique if necessary.
11299                     # If we are not vertically aligning this opening paren,
11300                     # append a character count to avoid bad alignment because
11301                     # it usually looks bad to align commas within continers
11302                     # for which the opening parens do not align.  Here
11303                     # is an example very BAD alignment of commas (because
11304                     # the atan2 functions are not all aligned):
11305                     #    $XY =
11306                     #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
11307                     #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
11308                     #      $X * atan2( $X,            1 ) -
11309                     #      $Y * atan2( $Y,            1 );
11310                     #
11311                     # On the other hand, it is usually okay to align commas if
11312                     # opening parens align, such as:
11313                     #    glVertex3d( $cx + $s * $xs, $cy,            $z );
11314                     #    glVertex3d( $cx,            $cy + $s * $ys, $z );
11315                     #    glVertex3d( $cx - $s * $xs, $cy,            $z );
11316                     #    glVertex3d( $cx,            $cy - $s * $ys, $z );
11317                     #
11318                     # To distinguish between these situations, we will
11319                     # append the length of the line from the previous matching
11320                     # token, or beginning of line, to the function name.  This
11321                     # will allow the vertical aligner to reject undesirable
11322                     # matches.
11323
11324                     # if we are not aligning on this paren...
11325                     if ( $matching_token_to_go[$i] eq '' ) {
11326
11327                         # Sum length from previous alignment, or start of line.
11328                         # Note that we have to sum token lengths here because
11329                         # padding has been done and so array $lengths_to_go
11330                         # is now wrong.
11331                         my $len =
11332                           length(
11333                             join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11334                         $len += leading_spaces_to_go($i_start)
11335                           if ( $i_start == $ibeg );
11336
11337                         # tack length onto the container name to make unique
11338                         $container_name[$depth] .= "-" . $len;
11339                     }
11340                 }
11341             }
11342             elsif ( $tokens_to_go[$i] eq ')' ) {
11343                 $depth-- if $depth > 0;
11344             }
11345
11346             # if we find a new synchronization token, we are done with
11347             # a field
11348             if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
11349
11350                 my $tok = my $raw_tok = $matching_token_to_go[$i];
11351
11352                 # make separators in different nesting depths unique
11353                 # by appending the nesting depth digit.
11354                 if ( $raw_tok ne '#' ) {
11355                     $tok .= "$nesting_depth_to_go[$i]";
11356                 }
11357
11358                 # also decorate commas with any container name to avoid
11359                 # unwanted cross-line alignments.
11360                 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
11361                     if ( $container_name[$depth] ) {
11362                         $tok .= $container_name[$depth];
11363                     }
11364                 }
11365
11366                 # Patch to avoid aligning leading and trailing if, unless.
11367                 # Mark trailing if, unless statements with container names.
11368                 # This makes them different from leading if, unless which
11369                 # are not so marked at present.  If we ever need to name
11370                 # them too, we could use ci to distinguish them.
11371                 # Example problem to avoid:
11372                 #    return ( 2, "DBERROR" )
11373                 #      if ( $retval == 2 );
11374                 #    if   ( scalar @_ ) {
11375                 #        my ( $a, $b, $c, $d, $e, $f ) = @_;
11376                 #    }
11377                 if ( $raw_tok eq '(' ) {
11378                     my $ci = $ci_levels_to_go[$ibeg];
11379                     if (   $container_name[$depth] =~ /^\+(if|unless)/
11380                         && $ci )
11381                     {
11382                         $tok .= $container_name[$depth];
11383                     }
11384                 }
11385
11386                 # Decorate block braces with block types to avoid
11387                 # unwanted alignments such as the following:
11388                 # foreach ( @{$routput_array} ) { $fh->print($_) }
11389                 # eval                          { $fh->close() };
11390                 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
11391                     my $block_type = $block_type_to_go[$i];
11392
11393                     # map certain related block types to allow
11394                     # else blocks to align
11395                     $block_type = $block_type_map{$block_type}
11396                       if ( defined( $block_type_map{$block_type} ) );
11397
11398                     # remove sub names to allow one-line sub braces to align
11399                     # regardless of name
11400                     if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
11401
11402                     # allow all control-type blocks to align
11403                     if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
11404
11405                     $tok .= $block_type;
11406                 }
11407
11408                 # concatenate the text of the consecutive tokens to form
11409                 # the field
11410                 push( @fields,
11411                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11412
11413                 # store the alignment token for this field
11414                 push( @tokens, $tok );
11415
11416                 # get ready for the next batch
11417                 $i_start = $i;
11418                 $j++;
11419                 $patterns[$j] = "";
11420             }
11421
11422             # continue accumulating tokens
11423             # handle non-keywords..
11424             if ( $types_to_go[$i] ne 'k' ) {
11425                 my $type = $types_to_go[$i];
11426
11427                 # Mark most things before arrows as a quote to
11428                 # get them to line up. Testfile: mixed.pl.
11429                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
11430                     my $next_type = $types_to_go[ $i + 1 ];
11431                     my $i_next_nonblank =
11432                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
11433
11434                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
11435                         $type = 'Q';
11436
11437                         # Patch to ignore leading minus before words,
11438                         # by changing pattern 'mQ' into just 'Q',
11439                         # so that we can align things like this:
11440                         #  Button   => "Print letter \"~$_\"",
11441                         #  -command => [ sub { print "$_[0]\n" }, $_ ],
11442                         if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
11443                     }
11444                 }
11445
11446                 # patch to make numbers and quotes align
11447                 if ( $type eq 'n' ) { $type = 'Q' }
11448
11449                 # patch to ignore any ! in patterns
11450                 if ( $type eq '!' ) { $type = '' }
11451
11452                 $patterns[$j] .= $type;
11453             }
11454
11455             # for keywords we have to use the actual text
11456             else {
11457
11458                 my $tok = $tokens_to_go[$i];
11459
11460                 # but map certain keywords to a common string to allow
11461                 # alignment.
11462                 $tok = $keyword_map{$tok}
11463                   if ( defined( $keyword_map{$tok} ) );
11464                 $patterns[$j] .= $tok;
11465             }
11466         }
11467
11468         # done with this line .. join text of tokens to make the last field
11469         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
11470         return ( \@tokens, \@fields, \@patterns );
11471     }
11472
11473 }    # end make_alignment_patterns
11474
11475 {    # begin unmatched_indexes
11476
11477     # closure to keep track of unbalanced containers.
11478     # arrays shared by the routines in this block:
11479     my @unmatched_opening_indexes_in_this_batch;
11480     my @unmatched_closing_indexes_in_this_batch;
11481     my %comma_arrow_count;
11482
11483     sub is_unbalanced_batch {
11484         @unmatched_opening_indexes_in_this_batch +
11485           @unmatched_closing_indexes_in_this_batch;
11486     }
11487
11488     sub comma_arrow_count {
11489         my $seqno = $_[0];
11490         return $comma_arrow_count{$seqno};
11491     }
11492
11493     sub match_opening_and_closing_tokens {
11494
11495         # Match up indexes of opening and closing braces, etc, in this batch.
11496         # This has to be done after all tokens are stored because unstoring
11497         # of tokens would otherwise cause trouble.
11498
11499         @unmatched_opening_indexes_in_this_batch = ();
11500         @unmatched_closing_indexes_in_this_batch = ();
11501         %comma_arrow_count                       = ();
11502
11503         my ( $i, $i_mate, $token );
11504         foreach $i ( 0 .. $max_index_to_go ) {
11505             if ( $type_sequence_to_go[$i] ) {
11506                 $token = $tokens_to_go[$i];
11507                 if ( $token =~ /^[\(\[\{\?]$/ ) {
11508                     push @unmatched_opening_indexes_in_this_batch, $i;
11509                 }
11510                 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
11511
11512                     $i_mate = pop @unmatched_opening_indexes_in_this_batch;
11513                     if ( defined($i_mate) && $i_mate >= 0 ) {
11514                         if ( $type_sequence_to_go[$i_mate] ==
11515                             $type_sequence_to_go[$i] )
11516                         {
11517                             $mate_index_to_go[$i]      = $i_mate;
11518                             $mate_index_to_go[$i_mate] = $i;
11519                         }
11520                         else {
11521                             push @unmatched_opening_indexes_in_this_batch,
11522                               $i_mate;
11523                             push @unmatched_closing_indexes_in_this_batch, $i;
11524                         }
11525                     }
11526                     else {
11527                         push @unmatched_closing_indexes_in_this_batch, $i;
11528                     }
11529                 }
11530             }
11531             elsif ( $tokens_to_go[$i] eq '=>' ) {
11532                 if (@unmatched_opening_indexes_in_this_batch) {
11533                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
11534                     my $seqno = $type_sequence_to_go[$j];
11535                     $comma_arrow_count{$seqno}++;
11536                 }
11537             }
11538         }
11539     }
11540
11541     sub save_opening_indentation {
11542
11543         # This should be called after each batch of tokens is output. It
11544         # saves indentations of lines of all unmatched opening tokens.
11545         # These will be used by sub get_opening_indentation.
11546
11547         my ( $ri_first, $ri_last, $rindentation_list ) = @_;
11548
11549         # we no longer need indentations of any saved indentations which
11550         # are unmatched closing tokens in this batch, because we will
11551         # never encounter them again.  So we can delete them to keep
11552         # the hash size down.
11553         foreach (@unmatched_closing_indexes_in_this_batch) {
11554             my $seqno = $type_sequence_to_go[$_];
11555             delete $saved_opening_indentation{$seqno};
11556         }
11557
11558         # we need to save indentations of any unmatched opening tokens
11559         # in this batch because we may need them in a subsequent batch.
11560         foreach (@unmatched_opening_indexes_in_this_batch) {
11561             my $seqno = $type_sequence_to_go[$_];
11562             $saved_opening_indentation{$seqno} = [
11563                 lookup_opening_indentation(
11564                     $_, $ri_first, $ri_last, $rindentation_list
11565                 )
11566             ];
11567         }
11568     }
11569 }    # end unmatched_indexes
11570
11571 sub get_opening_indentation {
11572
11573     # get the indentation of the line which output the opening token
11574     # corresponding to a given closing token in the current output batch.
11575     #
11576     # given:
11577     # $i_closing - index in this line of a closing token ')' '}' or ']'
11578     #
11579     # $ri_first - reference to list of the first index $i for each output
11580     #               line in this batch
11581     # $ri_last - reference to list of the last index $i for each output line
11582     #              in this batch
11583     # $rindentation_list - reference to a list containing the indentation
11584     #            used for each line.
11585     #
11586     # return:
11587     #   -the indentation of the line which contained the opening token
11588     #    which matches the token at index $i_opening
11589     #   -and its offset (number of columns) from the start of the line
11590     #
11591     my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
11592
11593     # first, see if the opening token is in the current batch
11594     my $i_opening = $mate_index_to_go[$i_closing];
11595     my ( $indent, $offset, $is_leading, $exists );
11596     $exists = 1;
11597     if ( $i_opening >= 0 ) {
11598
11599         # it is..look up the indentation
11600         ( $indent, $offset, $is_leading ) =
11601           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
11602             $rindentation_list );
11603     }
11604
11605     # if not, it should have been stored in the hash by a previous batch
11606     else {
11607         my $seqno = $type_sequence_to_go[$i_closing];
11608         if ($seqno) {
11609             if ( $saved_opening_indentation{$seqno} ) {
11610                 ( $indent, $offset, $is_leading ) =
11611                   @{ $saved_opening_indentation{$seqno} };
11612             }
11613
11614             # some kind of serious error
11615             # (example is badfile.t)
11616             else {
11617                 $indent     = 0;
11618                 $offset     = 0;
11619                 $is_leading = 0;
11620                 $exists     = 0;
11621             }
11622         }
11623
11624         # if no sequence number it must be an unbalanced container
11625         else {
11626             $indent     = 0;
11627             $offset     = 0;
11628             $is_leading = 0;
11629             $exists     = 0;
11630         }
11631     }
11632     return ( $indent, $offset, $is_leading, $exists );
11633 }
11634
11635 sub lookup_opening_indentation {
11636
11637     # get the indentation of the line in the current output batch
11638     # which output a selected opening token
11639     #
11640     # given:
11641     #   $i_opening - index of an opening token in the current output batch
11642     #                whose line indentation we need
11643     #   $ri_first - reference to list of the first index $i for each output
11644     #               line in this batch
11645     #   $ri_last - reference to list of the last index $i for each output line
11646     #              in this batch
11647     #   $rindentation_list - reference to a list containing the indentation
11648     #            used for each line.  (NOTE: the first slot in
11649     #            this list is the last returned line number, and this is
11650     #            followed by the list of indentations).
11651     #
11652     # return
11653     #   -the indentation of the line which contained token $i_opening
11654     #   -and its offset (number of columns) from the start of the line
11655
11656     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11657
11658     my $nline = $rindentation_list->[0];    # line number of previous lookup
11659
11660     # reset line location if necessary
11661     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
11662
11663     # find the correct line
11664     unless ( $i_opening > $ri_last->[-1] ) {
11665         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11666     }
11667
11668     # error - token index is out of bounds - shouldn't happen
11669     else {
11670         warning(
11671 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
11672         );
11673         report_definite_bug();
11674         $nline = $#{$ri_last};
11675     }
11676
11677     $rindentation_list->[0] =
11678       $nline;    # save line number to start looking next call
11679     my $ibeg       = $ri_start->[$nline];
11680     my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
11681     my $is_leading = ( $ibeg == $i_opening );
11682     return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
11683 }
11684
11685 {
11686     my %is_if_elsif_else_unless_while_until_for_foreach;
11687
11688     BEGIN {
11689
11690         # These block types may have text between the keyword and opening
11691         # curly.  Note: 'else' does not, but must be included to allow trailing
11692         # if/elsif text to be appended.
11693         # patch for SWITCH/CASE: added 'case' and 'when'
11694         @_ = qw(if elsif else unless while until for foreach case when);
11695         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11696     }
11697
11698     sub set_adjusted_indentation {
11699
11700         # This routine has the final say regarding the actual indentation of
11701         # a line.  It starts with the basic indentation which has been
11702         # defined for the leading token, and then takes into account any
11703         # options that the user has set regarding special indenting and
11704         # outdenting.
11705
11706         my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11707             $rindentation_list )
11708           = @_;
11709
11710         # we need to know the last token of this line
11711         my ( $terminal_type, $i_terminal ) =
11712           terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11713
11714         my $is_outdented_line = 0;
11715
11716         my $is_semicolon_terminated = $terminal_type eq ';'
11717           && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11718
11719         ##########################################################
11720         # Section 1: set a flag and a default indentation
11721         #
11722         # Most lines are indented according to the initial token.
11723         # But it is common to outdent to the level just after the
11724         # terminal token in certain cases...
11725         # adjust_indentation flag:
11726         #       0 - do not adjust
11727         #       1 - outdent
11728         #       2 - vertically align with opening token
11729         #       3 - indent
11730         ##########################################################
11731         my $adjust_indentation         = 0;
11732         my $default_adjust_indentation = $adjust_indentation;
11733
11734         my (
11735             $opening_indentation, $opening_offset,
11736             $is_leading,          $opening_exists
11737         );
11738
11739         # if we are at a closing token of some type..
11740         if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11741
11742             # get the indentation of the line containing the corresponding
11743             # opening token
11744             (
11745                 $opening_indentation, $opening_offset,
11746                 $is_leading,          $opening_exists
11747               )
11748               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11749                 $rindentation_list );
11750
11751             # First set the default behavior:
11752             # default behavior is to outdent closing lines
11753             # of the form:   ");  };  ];  )->xxx;"
11754             if (
11755                 $is_semicolon_terminated
11756
11757                 # and 'cuddled parens' of the form:   ")->pack("
11758                 || (
11759                        $terminal_type eq '('
11760                     && $types_to_go[$ibeg] eq ')'
11761                     && ( $nesting_depth_to_go[$iend] + 1 ==
11762                         $nesting_depth_to_go[$ibeg] )
11763                 )
11764               )
11765             {
11766                 $adjust_indentation = 1;
11767             }
11768
11769             # TESTING: outdent something like '),'
11770             if (
11771                 $terminal_type eq ','
11772
11773                 # allow just one character before the comma
11774                 && $i_terminal == $ibeg + 1
11775
11776                 # requre LIST environment; otherwise, we may outdent too much --
11777                 # this can happen in calls without parentheses (overload.t);
11778                 && $container_environment_to_go[$i_terminal] eq 'LIST'
11779               )
11780             {
11781                 $adjust_indentation = 1;
11782             }
11783
11784             # undo continuation indentation of a terminal closing token if
11785             # it is the last token before a level decrease.  This will allow
11786             # a closing token to line up with its opening counterpart, and
11787             # avoids a indentation jump larger than 1 level.
11788             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11789                 && $i_terminal == $ibeg )
11790             {
11791                 my $ci        = $ci_levels_to_go[$ibeg];
11792                 my $lev       = $levels_to_go[$ibeg];
11793                 my $next_type = $types_to_go[ $ibeg + 1 ];
11794                 my $i_next_nonblank =
11795                   ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11796                 if (   $i_next_nonblank <= $max_index_to_go
11797                     && $levels_to_go[$i_next_nonblank] < $lev )
11798                 {
11799                     $adjust_indentation = 1;
11800                 }
11801             }
11802
11803             # YVES patch 1 of 2:
11804             # Undo ci of line with leading closing eval brace,
11805             # but not beyond the indention of the line with
11806             # the opening brace.
11807             if (   $block_type_to_go[$ibeg] eq 'eval'
11808                 && !$rOpts->{'line-up-parentheses'}
11809                 && !$rOpts->{'indent-closing-brace'} )
11810             {
11811                 (
11812                     $opening_indentation, $opening_offset,
11813                     $is_leading,          $opening_exists
11814                   )
11815                   = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11816                     $rindentation_list );
11817                 my $indentation = $leading_spaces_to_go[$ibeg];
11818                 if ( defined($opening_indentation)
11819                     && $indentation > $opening_indentation )
11820                 {
11821                     $adjust_indentation = 1;
11822                 }
11823             }
11824
11825             $default_adjust_indentation = $adjust_indentation;
11826
11827             # Now modify default behavior according to user request:
11828             # handle option to indent non-blocks of the form );  };  ];
11829             # But don't do special indentation to something like ')->pack('
11830             if ( !$block_type_to_go[$ibeg] ) {
11831                 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11832                 if ( $cti == 1 ) {
11833                     if (   $i_terminal <= $ibeg + 1
11834                         || $is_semicolon_terminated )
11835                     {
11836                         $adjust_indentation = 2;
11837                     }
11838                     else {
11839                         $adjust_indentation = 0;
11840                     }
11841                 }
11842                 elsif ( $cti == 2 ) {
11843                     if ($is_semicolon_terminated) {
11844                         $adjust_indentation = 3;
11845                     }
11846                     else {
11847                         $adjust_indentation = 0;
11848                     }
11849                 }
11850                 elsif ( $cti == 3 ) {
11851                     $adjust_indentation = 3;
11852                 }
11853             }
11854
11855             # handle option to indent blocks
11856             else {
11857                 if (
11858                     $rOpts->{'indent-closing-brace'}
11859                     && (
11860                         $i_terminal == $ibeg    #  isolated terminal '}'
11861                         || $is_semicolon_terminated
11862                     )
11863                   )                             #  } xxxx ;
11864                 {
11865                     $adjust_indentation = 3;
11866                 }
11867             }
11868         }
11869
11870         # if at ');', '};', '>;', and '];' of a terminal qw quote
11871         elsif ($$rpatterns[0] =~ /^qb*;$/
11872             && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
11873         {
11874             if ( $closing_token_indentation{$1} == 0 ) {
11875                 $adjust_indentation = 1;
11876             }
11877             else {
11878                 $adjust_indentation = 3;
11879             }
11880         }
11881
11882         # if line begins with a ':', align it with any
11883         # previous line leading with corresponding ?
11884         elsif ( $types_to_go[$ibeg] eq ':' ) {
11885             (
11886                 $opening_indentation, $opening_offset,
11887                 $is_leading,          $opening_exists
11888               )
11889               = get_opening_indentation( $ibeg, $ri_first, $ri_last,
11890                 $rindentation_list );
11891             if ($is_leading) { $adjust_indentation = 2; }
11892         }
11893
11894         ##########################################################
11895         # Section 2: set indentation according to flag set above
11896         #
11897         # Select the indentation object to define leading
11898         # whitespace.  If we are outdenting something like '} } );'
11899         # then we want to use one level below the last token
11900         # ($i_terminal) in order to get it to fully outdent through
11901         # all levels.
11902         ##########################################################
11903         my $indentation;
11904         my $lev;
11905         my $level_end = $levels_to_go[$iend];
11906
11907         if ( $adjust_indentation == 0 ) {
11908             $indentation = $leading_spaces_to_go[$ibeg];
11909             $lev         = $levels_to_go[$ibeg];
11910         }
11911         elsif ( $adjust_indentation == 1 ) {
11912             $indentation = $reduced_spaces_to_go[$i_terminal];
11913             $lev         = $levels_to_go[$i_terminal];
11914         }
11915
11916         # handle indented closing token which aligns with opening token
11917         elsif ( $adjust_indentation == 2 ) {
11918
11919             # handle option to align closing token with opening token
11920             $lev = $levels_to_go[$ibeg];
11921
11922             # calculate spaces needed to align with opening token
11923             my $space_count =
11924               get_SPACES($opening_indentation) + $opening_offset;
11925
11926             # Indent less than the previous line.
11927             #
11928             # Problem: For -lp we don't exactly know what it was if there
11929             # were recoverable spaces sent to the aligner.  A good solution
11930             # would be to force a flush of the vertical alignment buffer, so
11931             # that we would know.  For now, this rule is used for -lp:
11932             #
11933             # When the last line did not start with a closing token we will
11934             # be optimistic that the aligner will recover everything wanted.
11935             #
11936             # This rule will prevent us from breaking a hierarchy of closing
11937             # tokens, and in a worst case will leave a closing paren too far
11938             # indented, but this is better than frequently leaving it not
11939             # indented enough.
11940             my $last_spaces = get_SPACES($last_indentation_written);
11941             if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
11942                 $last_spaces +=
11943                   get_RECOVERABLE_SPACES($last_indentation_written);
11944             }
11945
11946             # reset the indentation to the new space count if it works
11947             # only options are all or none: nothing in-between looks good
11948             $lev = $levels_to_go[$ibeg];
11949             if ( $space_count < $last_spaces ) {
11950                 if ($rOpts_line_up_parentheses) {
11951                     my $lev = $levels_to_go[$ibeg];
11952                     $indentation =
11953                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11954                 }
11955                 else {
11956                     $indentation = $space_count;
11957                 }
11958             }
11959
11960             # revert to default if it doesnt work
11961             else {
11962                 $space_count = leading_spaces_to_go($ibeg);
11963                 if ( $default_adjust_indentation == 0 ) {
11964                     $indentation = $leading_spaces_to_go[$ibeg];
11965                 }
11966                 elsif ( $default_adjust_indentation == 1 ) {
11967                     $indentation = $reduced_spaces_to_go[$i_terminal];
11968                     $lev         = $levels_to_go[$i_terminal];
11969                 }
11970             }
11971         }
11972
11973         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
11974         else {
11975
11976             # handle -icb (indented closing code block braces)
11977             # Updated method for indented block braces: indent one full level if
11978             # there is no continuation indentation.  This will occur for major
11979             # structures such as sub, if, else, but not for things like map
11980             # blocks.
11981             #
11982             # Note: only code blocks without continuation indentation are
11983             # handled here (if, else, unless, ..). In the following snippet,
11984             # the terminal brace of the sort block will have continuation
11985             # indentation as shown so it will not be handled by the coding
11986             # here.  We would have to undo the continuation indentation to do
11987             # this, but it probably looks ok as is.  This is a possible future
11988             # update for semicolon terminated lines.
11989             #
11990             #     if ($sortby eq 'date' or $sortby eq 'size') {
11991             #         @files = sort {
11992             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
11993             #                 or $a cmp $b
11994             #                 } @files;
11995             #         }
11996             #
11997             if (   $block_type_to_go[$ibeg]
11998                 && $ci_levels_to_go[$i_terminal] == 0 )
11999             {
12000                 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
12001                 $indentation = $spaces + $rOpts_indent_columns;
12002
12003                 # NOTE: for -lp we could create a new indentation object, but
12004                 # there is probably no need to do it
12005             }
12006
12007             # handle -icp and any -icb block braces which fall through above
12008             # test such as the 'sort' block mentioned above.
12009             else {
12010
12011                 # There are currently two ways to handle -icp...
12012                 # One way is to use the indentation of the previous line:
12013                 # $indentation = $last_indentation_written;
12014
12015                 # The other way is to use the indentation that the previous line
12016                 # would have had if it hadn't been adjusted:
12017                 $indentation = $last_unadjusted_indentation;
12018
12019                 # Current method: use the minimum of the two. This avoids
12020                 # inconsistent indentation.
12021                 if ( get_SPACES($last_indentation_written) <
12022                     get_SPACES($indentation) )
12023                 {
12024                     $indentation = $last_indentation_written;
12025                 }
12026             }
12027
12028             # use previous indentation but use own level
12029             # to cause list to be flushed properly
12030             $lev = $levels_to_go[$ibeg];
12031         }
12032
12033         # remember indentation except for multi-line quotes, which get
12034         # no indentation
12035         unless ( $ibeg == 0 && $starting_in_quote ) {
12036             $last_indentation_written    = $indentation;
12037             $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
12038             $last_leading_token          = $tokens_to_go[$ibeg];
12039         }
12040
12041         # be sure lines with leading closing tokens are not outdented more
12042         # than the line which contained the corresponding opening token.
12043
12044         #############################################################
12045         # updated per bug report in alex_bug.pl: we must not
12046         # mess with the indentation of closing logical braces so
12047         # we must treat something like '} else {' as if it were
12048         # an isolated brace my $is_isolated_block_brace = (
12049         # $iend == $ibeg ) && $block_type_to_go[$ibeg];
12050         #############################################################
12051         my $is_isolated_block_brace = $block_type_to_go[$ibeg]
12052           && ( $iend == $ibeg
12053             || $is_if_elsif_else_unless_while_until_for_foreach{
12054                 $block_type_to_go[$ibeg] } );
12055
12056         # only do this for a ':; which is aligned with its leading '?'
12057         my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
12058         if (   defined($opening_indentation)
12059             && !$is_isolated_block_brace
12060             && !$is_unaligned_colon )
12061         {
12062             if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
12063                 $indentation = $opening_indentation;
12064             }
12065         }
12066
12067         # remember the indentation of each line of this batch
12068         push @{$rindentation_list}, $indentation;
12069
12070         # outdent lines with certain leading tokens...
12071         if (
12072
12073             # must be first word of this batch
12074             $ibeg == 0
12075
12076             # and ...
12077             && (
12078
12079                 # certain leading keywords if requested
12080                 (
12081                        $rOpts->{'outdent-keywords'}
12082                     && $types_to_go[$ibeg] eq 'k'
12083                     && $outdent_keyword{ $tokens_to_go[$ibeg] }
12084                 )
12085
12086                 # or labels if requested
12087                 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
12088
12089                 # or static block comments if requested
12090                 || (   $types_to_go[$ibeg] eq '#'
12091                     && $rOpts->{'outdent-static-block-comments'}
12092                     && $is_static_block_comment )
12093             )
12094           )
12095
12096         {
12097             my $space_count = leading_spaces_to_go($ibeg);
12098             if ( $space_count > 0 ) {
12099                 $space_count -= $rOpts_continuation_indentation;
12100                 $is_outdented_line = 1;
12101                 if ( $space_count < 0 ) { $space_count = 0 }
12102
12103                 # do not promote a spaced static block comment to non-spaced;
12104                 # this is not normally necessary but could be for some
12105                 # unusual user inputs (such as -ci = -i)
12106                 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
12107                     $space_count = 1;
12108                 }
12109
12110                 if ($rOpts_line_up_parentheses) {
12111                     $indentation =
12112                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
12113                 }
12114                 else {
12115                     $indentation = $space_count;
12116                 }
12117             }
12118         }
12119
12120         return ( $indentation, $lev, $level_end, $terminal_type,
12121             $is_semicolon_terminated, $is_outdented_line );
12122     }
12123 }
12124
12125 sub set_vertical_tightness_flags {
12126
12127     my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
12128
12129     # Define vertical tightness controls for the nth line of a batch.
12130     # We create an array of parameters which tell the vertical aligner
12131     # if we should combine this line with the next line to achieve the
12132     # desired vertical tightness.  The array of parameters contains:
12133     #
12134     #   [0] type: 1=is opening tok 2=is closing tok  3=is opening block brace
12135     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
12136     #             if closing: spaces of padding to use
12137     #   [2] sequence number of container
12138     #   [3] valid flag: do not append if this flag is false. Will be
12139     #       true if appropriate -vt flag is set.  Otherwise, Will be
12140     #       made true only for 2 line container in parens with -lp
12141     #
12142     # These flags are used by sub set_leading_whitespace in
12143     # the vertical aligner
12144
12145     my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
12146
12147     # For non-BLOCK tokens, we will need to examine the next line
12148     # too, so we won't consider the last line.
12149     if ( $n < $n_last_line ) {
12150
12151         # see if last token is an opening token...not a BLOCK...
12152         my $ibeg_next = $$ri_first[ $n + 1 ];
12153         my $token_end = $tokens_to_go[$iend];
12154         my $iend_next = $$ri_last[ $n + 1 ];
12155         if (
12156                $type_sequence_to_go[$iend]
12157             && !$block_type_to_go[$iend]
12158             && $is_opening_token{$token_end}
12159             && (
12160                 $opening_vertical_tightness{$token_end} > 0
12161
12162                 # allow 2-line method call to be closed up
12163                 || (   $rOpts_line_up_parentheses
12164                     && $token_end eq '('
12165                     && $iend > $ibeg
12166                     && $types_to_go[ $iend - 1 ] ne 'b' )
12167             )
12168           )
12169         {
12170
12171             # avoid multiple jumps in nesting depth in one line if
12172             # requested
12173             my $ovt       = $opening_vertical_tightness{$token_end};
12174             my $iend_next = $$ri_last[ $n + 1 ];
12175             unless (
12176                 $ovt < 2
12177                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
12178                     $nesting_depth_to_go[$ibeg_next] )
12179               )
12180             {
12181
12182                 # If -vt flag has not been set, mark this as invalid
12183                 # and aligner will validate it if it sees the closing paren
12184                 # within 2 lines.
12185                 my $valid_flag = $ovt;
12186                 @{$rvertical_tightness_flags} =
12187                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
12188             }
12189         }
12190
12191         # see if first token of next line is a closing token...
12192         # ..and be sure this line does not have a side comment
12193         my $token_next = $tokens_to_go[$ibeg_next];
12194         if (   $type_sequence_to_go[$ibeg_next]
12195             && !$block_type_to_go[$ibeg_next]
12196             && $is_closing_token{$token_next}
12197             && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
12198         {
12199             my $ovt = $opening_vertical_tightness{$token_next};
12200             my $cvt = $closing_vertical_tightness{$token_next};
12201             if (
12202
12203                 # never append a trailing line like   )->pack(
12204                 # because it will throw off later alignment
12205                 (
12206                     $nesting_depth_to_go[$ibeg_next] ==
12207                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
12208                 )
12209                 && (
12210                     $cvt == 2
12211                     || (
12212                         $container_environment_to_go[$ibeg_next] ne 'LIST'
12213                         && (
12214                             $cvt == 1
12215
12216                             # allow closing up 2-line method calls
12217                             || (   $rOpts_line_up_parentheses
12218                                 && $token_next eq ')' )
12219                         )
12220                     )
12221                 )
12222               )
12223             {
12224
12225                 # decide which trailing closing tokens to append..
12226                 my $ok = 0;
12227                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
12228                 else {
12229                     my $str = join( '',
12230                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
12231
12232                     # append closing token if followed by comment or ';'
12233                     if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
12234                 }
12235
12236                 if ($ok) {
12237                     my $valid_flag = $cvt;
12238                     @{$rvertical_tightness_flags} = (
12239                         2,
12240                         $tightness{$token_next} == 2 ? 0 : 1,
12241                         $type_sequence_to_go[$ibeg_next], $valid_flag,
12242                     );
12243                 }
12244             }
12245         }
12246
12247         # Opening Token Right
12248         # If requested, move an isolated trailing opening token to the end of
12249         # the previous line which ended in a comma.  We could do this
12250         # in sub recombine_breakpoints but that would cause problems
12251         # with -lp formatting.  The problem is that indentation will
12252         # quickly move far to the right in nested expressions.  By
12253         # doing it after indentation has been set, we avoid changes
12254         # to the indentation.  Actual movement of the token takes place
12255         # in sub write_leader_and_string.
12256         if (
12257             $opening_token_right{ $tokens_to_go[$ibeg_next] }
12258
12259             # previous line is not opening
12260             # (use -sot to combine with it)
12261             && !$is_opening_token{$token_end}
12262
12263             # previous line ended in one of these
12264             # (add other cases if necessary; '=>' and '.' are not necessary
12265             ##&& ($is_opening_token{$token_end} || $token_end eq ',')
12266             && !$block_type_to_go[$ibeg_next]
12267
12268             # this is a line with just an opening token
12269             && (   $iend_next == $ibeg_next
12270                 || $iend_next == $ibeg_next + 2
12271                 && $types_to_go[$iend_next] eq '#' )
12272
12273             # looks bad if we align vertically with the wrong container
12274             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
12275           )
12276         {
12277             my $valid_flag = 1;
12278             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12279             @{$rvertical_tightness_flags} =
12280               ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
12281         }
12282
12283         # Stacking of opening and closing tokens
12284         my $stackable;
12285         my $token_beg_next = $tokens_to_go[$ibeg_next];
12286
12287         # patch to make something like 'qw(' behave like an opening paren
12288         # (aran.t)
12289         if ( $types_to_go[$ibeg_next] eq 'q' ) {
12290             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
12291                 $token_beg_next = $1;
12292             }
12293         }
12294
12295         if (   $is_closing_token{$token_end}
12296             && $is_closing_token{$token_beg_next} )
12297         {
12298             $stackable = $stack_closing_token{$token_beg_next}
12299               unless ( $block_type_to_go[$ibeg_next] )
12300               ;    # shouldn't happen; just checking
12301         }
12302         elsif ($is_opening_token{$token_end}
12303             && $is_opening_token{$token_beg_next} )
12304         {
12305             $stackable = $stack_opening_token{$token_beg_next}
12306               unless ( $block_type_to_go[$ibeg_next] )
12307               ;    # shouldn't happen; just checking
12308         }
12309
12310         if ($stackable) {
12311
12312             my $is_semicolon_terminated;
12313             if ( $n + 1 == $n_last_line ) {
12314                 my ( $terminal_type, $i_terminal ) = terminal_type(
12315                     \@types_to_go, \@block_type_to_go,
12316                     $ibeg_next,    $iend_next
12317                 );
12318                 $is_semicolon_terminated = $terminal_type eq ';'
12319                   && $nesting_depth_to_go[$iend_next] <
12320                   $nesting_depth_to_go[$ibeg_next];
12321             }
12322
12323             # this must be a line with just an opening token
12324             # or end in a semicolon
12325             if (
12326                 $is_semicolon_terminated
12327                 || (   $iend_next == $ibeg_next
12328                     || $iend_next == $ibeg_next + 2
12329                     && $types_to_go[$iend_next] eq '#' )
12330               )
12331             {
12332                 my $valid_flag = 1;
12333                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12334                 @{$rvertical_tightness_flags} =
12335                   ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
12336                   );
12337             }
12338         }
12339     }
12340
12341     # Check for a last line with isolated opening BLOCK curly
12342     elsif ($rOpts_block_brace_vertical_tightness
12343         && $ibeg eq $iend
12344         && $types_to_go[$iend] eq '{'
12345         && $block_type_to_go[$iend] =~
12346         /$block_brace_vertical_tightness_pattern/o )
12347     {
12348         @{$rvertical_tightness_flags} =
12349           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
12350     }
12351
12352     # pack in the sequence numbers of the ends of this line
12353     $rvertical_tightness_flags->[4] = get_seqno($ibeg);
12354     $rvertical_tightness_flags->[5] = get_seqno($iend);
12355     return $rvertical_tightness_flags;
12356 }
12357
12358 sub get_seqno {
12359
12360     # get opening and closing sequence numbers of a token for the vertical
12361     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
12362     # to be treated somewhat like opening and closing tokens for stacking
12363     # tokens by the vertical aligner.
12364     my ($ii) = @_;
12365     my $seqno = $type_sequence_to_go[$ii];
12366     if ( $types_to_go[$ii] eq 'q' ) {
12367         my $SEQ_QW = -1;
12368         if ( $ii > 0 ) {
12369             $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
12370         }
12371         else {
12372             if ( !$ending_in_quote ) {
12373                 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
12374             }
12375         }
12376     }
12377     return ($seqno);
12378 }
12379
12380 {
12381     my %is_vertical_alignment_type;
12382     my %is_vertical_alignment_keyword;
12383
12384     BEGIN {
12385
12386         @_ = qw#
12387           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
12388           { ? : => =~ && || // ~~ !~~
12389           #;
12390         @is_vertical_alignment_type{@_} = (1) x scalar(@_);
12391
12392         @_ = qw(if unless and or err eq ne for foreach while until);
12393         @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
12394     }
12395
12396     sub set_vertical_alignment_markers {
12397
12398         # This routine takes the first step toward vertical alignment of the
12399         # lines of output text.  It looks for certain tokens which can serve as
12400         # vertical alignment markers (such as an '=').
12401         #
12402         # Method: We look at each token $i in this output batch and set
12403         # $matching_token_to_go[$i] equal to those tokens at which we would
12404         # accept vertical alignment.
12405
12406         # nothing to do if we aren't allowed to change whitespace
12407         if ( !$rOpts_add_whitespace ) {
12408             for my $i ( 0 .. $max_index_to_go ) {
12409                 $matching_token_to_go[$i] = '';
12410             }
12411             return;
12412         }
12413
12414         my ( $ri_first, $ri_last ) = @_;
12415
12416         # remember the index of last nonblank token before any sidecomment
12417         my $i_terminal = $max_index_to_go;
12418         if ( $types_to_go[$i_terminal] eq '#' ) {
12419             if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
12420                 if ( $i_terminal > 0 ) { --$i_terminal }
12421             }
12422         }
12423
12424         # look at each line of this batch..
12425         my $last_vertical_alignment_before_index;
12426         my $vert_last_nonblank_type;
12427         my $vert_last_nonblank_token;
12428         my $vert_last_nonblank_block_type;
12429         my $max_line = @$ri_first - 1;
12430         my ( $i, $type, $token, $block_type, $alignment_type );
12431         my ( $ibeg, $iend, $line );
12432
12433         foreach $line ( 0 .. $max_line ) {
12434             $ibeg                                 = $$ri_first[$line];
12435             $iend                                 = $$ri_last[$line];
12436             $last_vertical_alignment_before_index = -1;
12437             $vert_last_nonblank_type              = '';
12438             $vert_last_nonblank_token             = '';
12439             $vert_last_nonblank_block_type        = '';
12440
12441             # look at each token in this output line..
12442             foreach $i ( $ibeg .. $iend ) {
12443                 $alignment_type = '';
12444                 $type           = $types_to_go[$i];
12445                 $block_type     = $block_type_to_go[$i];
12446                 $token          = $tokens_to_go[$i];
12447
12448                 # check for flag indicating that we should not align
12449                 # this token
12450                 if ( $matching_token_to_go[$i] ) {
12451                     $matching_token_to_go[$i] = '';
12452                     next;
12453                 }
12454
12455                 #--------------------------------------------------------
12456                 # First see if we want to align BEFORE this token
12457                 #--------------------------------------------------------
12458
12459                 # The first possible token that we can align before
12460                 # is index 2 because: 1) it doesn't normally make sense to
12461                 # align before the first token and 2) the second
12462                 # token must be a blank if we are to align before
12463                 # the third
12464                 if ( $i < $ibeg + 2 ) { }
12465
12466                 # must follow a blank token
12467                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
12468
12469                 # align a side comment --
12470                 elsif ( $type eq '#' ) {
12471
12472                     unless (
12473
12474                         # it is a static side comment
12475                         (
12476                                $rOpts->{'static-side-comments'}
12477                             && $token =~ /$static_side_comment_pattern/o
12478                         )
12479
12480                         # or a closing side comment
12481                         || (   $vert_last_nonblank_block_type
12482                             && $token =~
12483                             /$closing_side_comment_prefix_pattern/o )
12484                       )
12485                     {
12486                         $alignment_type = $type;
12487                     }    ## Example of a static side comment
12488                 }
12489
12490                 # otherwise, do not align two in a row to create a
12491                 # blank field
12492                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
12493
12494                 # align before one of these keywords
12495                 # (within a line, since $i>1)
12496                 elsif ( $type eq 'k' ) {
12497
12498                     #  /^(if|unless|and|or|eq|ne)$/
12499                     if ( $is_vertical_alignment_keyword{$token} ) {
12500                         $alignment_type = $token;
12501                     }
12502                 }
12503
12504                 # align before one of these types..
12505                 # Note: add '.' after new vertical aligner is operational
12506                 elsif ( $is_vertical_alignment_type{$type} ) {
12507                     $alignment_type = $token;
12508
12509                     # Do not align a terminal token.  Although it might
12510                     # occasionally look ok to do this, it has been found to be
12511                     # a good general rule.  The main problems are:
12512                     # (1) that the terminal token (such as an = or :) might get
12513                     # moved far to the right where it is hard to see because
12514                     # nothing follows it, and
12515                     # (2) doing so may prevent other good alignments.
12516                     if ( $i == $iend || $i >= $i_terminal ) {
12517                         $alignment_type = "";
12518                     }
12519
12520                     # Do not align leading ': (' or '. ('.  This would prevent
12521                     # alignment in something like the following:
12522                     #   $extra_space .=
12523                     #       ( $input_line_number < 10 )  ? "  "
12524                     #     : ( $input_line_number < 100 ) ? " "
12525                     #     :                                "";
12526                     # or
12527                     #  $code =
12528                     #      ( $case_matters ? $accessor : " lc($accessor) " )
12529                     #    . ( $yesno        ? " eq "       : " ne " )
12530                     if (   $i == $ibeg + 2
12531                         && $types_to_go[$ibeg] =~ /^[\.\:]$/
12532                         && $types_to_go[ $i - 1 ] eq 'b' )
12533                     {
12534                         $alignment_type = "";
12535                     }
12536
12537                     # For a paren after keyword, only align something like this:
12538                     #    if    ( $a ) { &a }
12539                     #    elsif ( $b ) { &b }
12540                     if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
12541                         $alignment_type = ""
12542                           unless $vert_last_nonblank_token =~
12543                               /^(if|unless|elsif)$/;
12544                     }
12545
12546                     # be sure the alignment tokens are unique
12547                     # This didn't work well: reason not determined
12548                     # if ($token ne $type) {$alignment_type .= $type}
12549                 }
12550
12551                 # NOTE: This is deactivated because it causes the previous
12552                 # if/elsif alignment to fail
12553                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
12554                 #{ $alignment_type = $type; }
12555
12556                 if ($alignment_type) {
12557                     $last_vertical_alignment_before_index = $i;
12558                 }
12559
12560                 #--------------------------------------------------------
12561                 # Next see if we want to align AFTER the previous nonblank
12562                 #--------------------------------------------------------
12563
12564                 # We want to line up ',' and interior ';' tokens, with the added
12565                 # space AFTER these tokens.  (Note: interior ';' is included
12566                 # because it may occur in short blocks).
12567                 if (
12568
12569                     # we haven't already set it
12570                     !$alignment_type
12571
12572                     # and its not the first token of the line
12573                     && ( $i > $ibeg )
12574
12575                     # and it follows a blank
12576                     && $types_to_go[ $i - 1 ] eq 'b'
12577
12578                     # and previous token IS one of these:
12579                     && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12580
12581                     # and it's NOT one of these
12582                     && ( $type !~ /^[b\#\)\]\}]$/ )
12583
12584                     # then go ahead and align
12585                   )
12586
12587                 {
12588                     $alignment_type = $vert_last_nonblank_type;
12589                 }
12590
12591                 #--------------------------------------------------------
12592                 # then store the value
12593                 #--------------------------------------------------------
12594                 $matching_token_to_go[$i] = $alignment_type;
12595                 if ( $type ne 'b' ) {
12596                     $vert_last_nonblank_type       = $type;
12597                     $vert_last_nonblank_token      = $token;
12598                     $vert_last_nonblank_block_type = $block_type;
12599                 }
12600             }
12601         }
12602     }
12603 }
12604
12605 sub terminal_type {
12606
12607     #    returns type of last token on this line (terminal token), as follows:
12608     #    returns # for a full-line comment
12609     #    returns ' ' for a blank line
12610     #    otherwise returns final token type
12611
12612     my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
12613
12614     # check for full-line comment..
12615     if ( $$rtype[$ibeg] eq '#' ) {
12616         return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
12617     }
12618     else {
12619
12620         # start at end and walk bakwards..
12621         for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
12622
12623             # skip past any side comment and blanks
12624             next if ( $$rtype[$i] eq 'b' );
12625             next if ( $$rtype[$i] eq '#' );
12626
12627             # found it..make sure it is a BLOCK termination,
12628             # but hide a terminal } after sort/grep/map because it is not
12629             # necessarily the end of the line.  (terminal.t)
12630             my $terminal_type = $$rtype[$i];
12631             if (
12632                 $terminal_type eq '}'
12633                 && ( !$$rblock_type[$i]
12634                     || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
12635               )
12636             {
12637                 $terminal_type = 'b';
12638             }
12639             return wantarray ? ( $terminal_type, $i ) : $terminal_type;
12640         }
12641
12642         # empty line
12643         return wantarray ? ( ' ', $ibeg ) : ' ';
12644     }
12645 }
12646
12647 {
12648     my %is_good_keyword_breakpoint;
12649     my %is_lt_gt_le_ge;
12650
12651     sub set_bond_strengths {
12652
12653         BEGIN {
12654
12655             @_ = qw(if unless while until for foreach);
12656             @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
12657
12658             @_ = qw(lt gt le ge);
12659             @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
12660
12661             ###############################################################
12662             # NOTE: NO_BREAK's set here are HINTS which may not be honored;
12663             # essential NO_BREAKS's must be enforced in section 2, below.
12664             ###############################################################
12665
12666             # adding NEW_TOKENS: add a left and right bond strength by
12667             # mimmicking what is done for an existing token type.  You
12668             # can skip this step at first and take the default, then
12669             # tweak later to get desired results.
12670
12671             # The bond strengths should roughly follow precenence order where
12672             # possible.  If you make changes, please check the results very
12673             # carefully on a variety of scripts.
12674
12675             # no break around possible filehandle
12676             $left_bond_strength{'Z'}  = NO_BREAK;
12677             $right_bond_strength{'Z'} = NO_BREAK;
12678
12679             # never put a bare word on a new line:
12680             # example print (STDERR, "bla"); will fail with break after (
12681             $left_bond_strength{'w'} = NO_BREAK;
12682
12683         # blanks always have infinite strength to force breaks after real tokens
12684             $right_bond_strength{'b'} = NO_BREAK;
12685
12686             # try not to break on exponentation
12687             @_                       = qw" ** .. ... <=> ";
12688             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12689             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12690
12691             # The comma-arrow has very low precedence but not a good break point
12692             $left_bond_strength{'=>'}  = NO_BREAK;
12693             $right_bond_strength{'=>'} = NOMINAL;
12694
12695             # ok to break after label
12696             $left_bond_strength{'J'}  = NO_BREAK;
12697             $right_bond_strength{'J'} = NOMINAL;
12698             $left_bond_strength{'j'}  = STRONG;
12699             $right_bond_strength{'j'} = STRONG;
12700             $left_bond_strength{'A'}  = STRONG;
12701             $right_bond_strength{'A'} = STRONG;
12702
12703             $left_bond_strength{'->'}  = STRONG;
12704             $right_bond_strength{'->'} = VERY_STRONG;
12705
12706             # breaking AFTER modulus operator is ok:
12707             @_ = qw" % ";
12708             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12709             @right_bond_strength{@_} =
12710               ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
12711
12712             # Break AFTER math operators * and /
12713             @_                       = qw" * / x  ";
12714             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12715             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12716
12717             # Break AFTER weakest math operators + and -
12718             # Make them weaker than * but a bit stronger than '.'
12719             @_ = qw" + - ";
12720             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12721             @right_bond_strength{@_} =
12722               ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
12723
12724             # breaking BEFORE these is just ok:
12725             @_                       = qw" >> << ";
12726             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12727             @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
12728
12729             # breaking before the string concatenation operator seems best
12730             # because it can be hard to see at the end of a line
12731             $right_bond_strength{'.'} = STRONG;
12732             $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
12733
12734             @_                       = qw"} ] ) ";
12735             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12736             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12737
12738             # make these a little weaker than nominal so that they get
12739             # favored for end-of-line characters
12740             @_ = qw"!= == =~ !~ ~~ !~~";
12741             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12742             @right_bond_strength{@_} =
12743               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
12744
12745             # break AFTER these
12746             @_ = qw" < >  | & >= <=";
12747             @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
12748             @right_bond_strength{@_} =
12749               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
12750
12751             # breaking either before or after a quote is ok
12752             # but bias for breaking before a quote
12753             $left_bond_strength{'Q'}  = NOMINAL;
12754             $right_bond_strength{'Q'} = NOMINAL + 0.02;
12755             $left_bond_strength{'q'}  = NOMINAL;
12756             $right_bond_strength{'q'} = NOMINAL;
12757
12758             # starting a line with a keyword is usually ok
12759             $left_bond_strength{'k'} = NOMINAL;
12760
12761             # we usually want to bond a keyword strongly to what immediately
12762             # follows, rather than leaving it stranded at the end of a line
12763             $right_bond_strength{'k'} = STRONG;
12764
12765             $left_bond_strength{'G'}  = NOMINAL;
12766             $right_bond_strength{'G'} = STRONG;
12767
12768             # it is good to break AFTER various assignment operators
12769             @_ = qw(
12770               = **= += *= &= <<= &&=
12771               -= /= |= >>= ||= //=
12772               .= %= ^=
12773               x=
12774             );
12775             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12776             @right_bond_strength{@_} =
12777               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12778
12779             # break BEFORE '&&' and '||' and '//'
12780             # set strength of '||' to same as '=' so that chains like
12781             # $a = $b || $c || $d   will break before the first '||'
12782             $right_bond_strength{'||'} = NOMINAL;
12783             $left_bond_strength{'||'}  = $right_bond_strength{'='};
12784
12785             # same thing for '//'
12786             $right_bond_strength{'//'} = NOMINAL;
12787             $left_bond_strength{'//'}  = $right_bond_strength{'='};
12788
12789             # set strength of && a little higher than ||
12790             $right_bond_strength{'&&'} = NOMINAL;
12791             $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
12792
12793             $left_bond_strength{';'}  = VERY_STRONG;
12794             $right_bond_strength{';'} = VERY_WEAK;
12795             $left_bond_strength{'f'}  = VERY_STRONG;
12796
12797             # make right strength of for ';' a little less than '='
12798             # to make for contents break after the ';' to avoid this:
12799             #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12800             #     $number_of_fields )
12801             # and make it weaker than ',' and 'and' too
12802             $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12803
12804             # The strengths of ?/: should be somewhere between
12805             # an '=' and a quote (NOMINAL),
12806             # make strength of ':' slightly less than '?' to help
12807             # break long chains of ? : after the colons
12808             $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
12809             $right_bond_strength{':'} = NO_BREAK;
12810             $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
12811             $right_bond_strength{'?'} = NO_BREAK;
12812
12813             $left_bond_strength{','}  = VERY_STRONG;
12814             $right_bond_strength{','} = VERY_WEAK;
12815
12816             # Set bond strengths of certain keywords
12817             # make 'or', 'err', 'and' slightly weaker than a ','
12818             $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
12819             $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
12820             $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
12821             $left_bond_strength{'xor'}  = NOMINAL;
12822             $right_bond_strength{'and'} = NOMINAL;
12823             $right_bond_strength{'or'}  = NOMINAL;
12824             $right_bond_strength{'err'} = NOMINAL;
12825             $right_bond_strength{'xor'} = STRONG;
12826         }
12827
12828         # patch-its always ok to break at end of line
12829         $nobreak_to_go[$max_index_to_go] = 0;
12830
12831         # adding a small 'bias' to strengths is a simple way to make a line
12832         # break at the first of a sequence of identical terms.  For example,
12833         # to force long string of conditional operators to break with
12834         # each line ending in a ':', we can add a small number to the bond
12835         # strength of each ':'
12836         my $colon_bias = 0;
12837         my $amp_bias   = 0;
12838         my $bar_bias   = 0;
12839         my $and_bias   = 0;
12840         my $or_bias    = 0;
12841         my $dot_bias   = 0;
12842         my $f_bias     = 0;
12843         my $code_bias  = -.01;
12844         my $type       = 'b';
12845         my $token      = ' ';
12846         my $last_type;
12847         my $last_nonblank_type  = $type;
12848         my $last_nonblank_token = $token;
12849         my $delta_bias          = 0.0001;
12850         my $list_str            = $left_bond_strength{'?'};
12851
12852         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
12853             $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
12854         );
12855
12856         # preliminary loop to compute bond strengths
12857         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
12858             $last_type = $type;
12859             if ( $type ne 'b' ) {
12860                 $last_nonblank_type  = $type;
12861                 $last_nonblank_token = $token;
12862             }
12863             $type = $types_to_go[$i];
12864
12865             # strength on both sides of a blank is the same
12866             if ( $type eq 'b' && $last_type ne 'b' ) {
12867                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
12868                 next;
12869             }
12870
12871             $token               = $tokens_to_go[$i];
12872             $block_type          = $block_type_to_go[$i];
12873             $i_next              = $i + 1;
12874             $next_type           = $types_to_go[$i_next];
12875             $next_token          = $tokens_to_go[$i_next];
12876             $total_nesting_depth = $nesting_depth_to_go[$i_next];
12877             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12878             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
12879             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12880
12881             # Some token chemistry...  The decision about where to break a
12882             # line depends upon a "bond strength" between tokens.  The LOWER
12883             # the bond strength, the MORE likely a break.  The strength
12884             # values are based on trial-and-error, and need to be tweaked
12885             # occasionally to get desired results.  Things to keep in mind
12886             # are:
12887             #   1. relative strengths are important.  small differences
12888             #      in strengths can make big formatting differences.
12889             #   2. each indentation level adds one unit of bond strength
12890             #   3. a value of NO_BREAK makes an unbreakable bond
12891             #   4. a value of VERY_WEAK is the strength of a ','
12892             #   5. values below NOMINAL are considered ok break points
12893             #   6. values above NOMINAL are considered poor break points
12894             # We are computing the strength of the bond between the current
12895             # token and the NEXT token.
12896             my $bond_str = VERY_STRONG;    # a default, high strength
12897
12898             #---------------------------------------------------------------
12899             # section 1:
12900             # use minimum of left and right bond strengths if defined;
12901             # digraphs and trigraphs like to break on their left
12902             #---------------------------------------------------------------
12903             my $bsr = $right_bond_strength{$type};
12904
12905             if ( !defined($bsr) ) {
12906
12907                 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
12908                     $bsr = STRONG;
12909                 }
12910                 else {
12911                     $bsr = VERY_STRONG;
12912                 }
12913             }
12914
12915             # define right bond strengths of certain keywords
12916             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
12917                 $bsr = $right_bond_strength{$token};
12918             }
12919             elsif ( $token eq 'ne' or $token eq 'eq' ) {
12920                 $bsr = NOMINAL;
12921             }
12922             my $bsl = $left_bond_strength{$next_nonblank_type};
12923
12924             # set terminal bond strength to the nominal value
12925             # this will cause good preceding breaks to be retained
12926             if ( $i_next_nonblank > $max_index_to_go ) {
12927                 $bsl = NOMINAL;
12928             }
12929
12930             if ( !defined($bsl) ) {
12931
12932                 if (   $is_digraph{$next_nonblank_type}
12933                     || $is_trigraph{$next_nonblank_type} )
12934                 {
12935                     $bsl = WEAK;
12936                 }
12937                 else {
12938                     $bsl = VERY_STRONG;
12939                 }
12940             }
12941
12942             # define right bond strengths of certain keywords
12943             if ( $next_nonblank_type eq 'k'
12944                 && defined( $left_bond_strength{$next_nonblank_token} ) )
12945             {
12946                 $bsl = $left_bond_strength{$next_nonblank_token};
12947             }
12948             elsif ($next_nonblank_token eq 'ne'
12949                 or $next_nonblank_token eq 'eq' )
12950             {
12951                 $bsl = NOMINAL;
12952             }
12953             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
12954                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
12955             }
12956
12957             # Note: it might seem that we would want to keep a NO_BREAK if
12958             # either token has this value.  This didn't work, because in an
12959             # arrow list, it prevents the comma from separating from the
12960             # following bare word (which is probably quoted by its arrow).
12961             # So necessary NO_BREAK's have to be handled as special cases
12962             # in the final section.
12963             $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
12964             my $bond_str_1 = $bond_str;
12965
12966             #---------------------------------------------------------------
12967             # section 2:
12968             # special cases
12969             #---------------------------------------------------------------
12970
12971             # allow long lines before final { in an if statement, as in:
12972             #    if (..........
12973             #      ..........)
12974             #    {
12975             #
12976             # Otherwise, the line before the { tends to be too short.
12977             if ( $type eq ')' ) {
12978                 if ( $next_nonblank_type eq '{' ) {
12979                     $bond_str = VERY_WEAK + 0.03;
12980                 }
12981             }
12982
12983             elsif ( $type eq '(' ) {
12984                 if ( $next_nonblank_type eq '{' ) {
12985                     $bond_str = NOMINAL;
12986                 }
12987             }
12988
12989             # break on something like '} (', but keep this stronger than a ','
12990             # example is in 'howe.pl'
12991             elsif ( $type eq 'R' or $type eq '}' ) {
12992                 if ( $next_nonblank_type eq '(' ) {
12993                     $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
12994                 }
12995             }
12996
12997             #-----------------------------------------------------------------
12998             # adjust bond strength bias
12999             #-----------------------------------------------------------------
13000
13001             # TESTING: add any bias set by sub scan_list at old comma
13002             # break points.
13003             elsif ( $type eq ',' ) {
13004                 $bond_str += $bond_strength_to_go[$i];
13005             }
13006
13007             elsif ( $type eq 'f' ) {
13008                 $bond_str += $f_bias;
13009                 $f_bias   += $delta_bias;
13010             }
13011
13012           # in long ?: conditionals, bias toward just one set per line (colon.t)
13013             elsif ( $type eq ':' ) {
13014                 if ( !$want_break_before{$type} ) {
13015                     $bond_str   += $colon_bias;
13016                     $colon_bias += $delta_bias;
13017                 }
13018             }
13019
13020             if (   $next_nonblank_type eq ':'
13021                 && $want_break_before{$next_nonblank_type} )
13022             {
13023                 $bond_str   += $colon_bias;
13024                 $colon_bias += $delta_bias;
13025             }
13026
13027             # if leading '.' is used, align all but 'short' quotes;
13028             # the idea is to not place something like "\n" on a single line.
13029             elsif ( $next_nonblank_type eq '.' ) {
13030                 if ( $want_break_before{'.'} ) {
13031                     unless (
13032                         $last_nonblank_type eq '.'
13033                         && (
13034                             length($token) <=
13035                             $rOpts_short_concatenation_item_length )
13036                         && ( $token !~ /^[\)\]\}]$/ )
13037                       )
13038                     {
13039                         $dot_bias += $delta_bias;
13040                     }
13041                     $bond_str += $dot_bias;
13042                 }
13043             }
13044             elsif ($next_nonblank_type eq '&&'
13045                 && $want_break_before{$next_nonblank_type} )
13046             {
13047                 $bond_str += $amp_bias;
13048                 $amp_bias += $delta_bias;
13049             }
13050             elsif ($next_nonblank_type eq '||'
13051                 && $want_break_before{$next_nonblank_type} )
13052             {
13053                 $bond_str += $bar_bias;
13054                 $bar_bias += $delta_bias;
13055             }
13056             elsif ( $next_nonblank_type eq 'k' ) {
13057
13058                 if (   $next_nonblank_token eq 'and'
13059                     && $want_break_before{$next_nonblank_token} )
13060                 {
13061                     $bond_str += $and_bias;
13062                     $and_bias += $delta_bias;
13063                 }
13064                 elsif ($next_nonblank_token =~ /^(or|err)$/
13065                     && $want_break_before{$next_nonblank_token} )
13066                 {
13067                     $bond_str += $or_bias;
13068                     $or_bias  += $delta_bias;
13069                 }
13070
13071                 # FIXME: needs more testing
13072                 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
13073                     $bond_str = $list_str if ( $bond_str > $list_str );
13074                 }
13075                 elsif ( $token eq 'err'
13076                     && !$want_break_before{$token} )
13077                 {
13078                     $bond_str += $or_bias;
13079                     $or_bias  += $delta_bias;
13080                 }
13081             }
13082
13083             if ( $type eq ':'
13084                 && !$want_break_before{$type} )
13085             {
13086                 $bond_str   += $colon_bias;
13087                 $colon_bias += $delta_bias;
13088             }
13089             elsif ( $type eq '&&'
13090                 && !$want_break_before{$type} )
13091             {
13092                 $bond_str += $amp_bias;
13093                 $amp_bias += $delta_bias;
13094             }
13095             elsif ( $type eq '||'
13096                 && !$want_break_before{$type} )
13097             {
13098                 $bond_str += $bar_bias;
13099                 $bar_bias += $delta_bias;
13100             }
13101             elsif ( $type eq 'k' ) {
13102
13103                 if ( $token eq 'and'
13104                     && !$want_break_before{$token} )
13105                 {
13106                     $bond_str += $and_bias;
13107                     $and_bias += $delta_bias;
13108                 }
13109                 elsif ( $token eq 'or'
13110                     && !$want_break_before{$token} )
13111                 {
13112                     $bond_str += $or_bias;
13113                     $or_bias  += $delta_bias;
13114                 }
13115             }
13116
13117             # keep matrix and hash indices together
13118             # but make them a little below STRONG to allow breaking open
13119             # something like {'some-word'}{'some-very-long-word'} at the }{
13120             # (bracebrk.t)
13121             if (   ( $type eq ']' or $type eq 'R' )
13122                 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
13123               )
13124             {
13125                 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
13126             }
13127
13128             if ( $next_nonblank_token =~ /^->/ ) {
13129
13130                 # increase strength to the point where a break in the following
13131                 # will be after the opening paren rather than at the arrow:
13132                 #    $a->$b($c);
13133                 if ( $type eq 'i' ) {
13134                     $bond_str = 1.45 * STRONG;
13135                 }
13136
13137                 elsif ( $type =~ /^[\)\]\}R]$/ ) {
13138                     $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
13139                 }
13140
13141                 # otherwise make strength before an '->' a little over a '+'
13142                 else {
13143                     if ( $bond_str <= NOMINAL ) {
13144                         $bond_str = NOMINAL + 0.01;
13145                     }
13146                 }
13147             }
13148
13149             if ( $token eq ')' && $next_nonblank_token eq '[' ) {
13150                 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
13151             }
13152
13153             # map1.t -- correct for a quirk in perl
13154             if (   $token eq '('
13155                 && $next_nonblank_type eq 'i'
13156                 && $last_nonblank_type eq 'k'
13157                 && $is_sort_map_grep{$last_nonblank_token} )
13158
13159               #     /^(sort|map|grep)$/ )
13160             {
13161                 $bond_str = NO_BREAK;
13162             }
13163
13164             # extrude.t: do not break before paren at:
13165             #    -l pid_filename(
13166             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
13167                 $bond_str = NO_BREAK;
13168             }
13169
13170             # good to break after end of code blocks
13171             if ( $type eq '}' && $block_type ) {
13172
13173                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
13174                 $code_bias += $delta_bias;
13175             }
13176
13177             if ( $type eq 'k' ) {
13178
13179                 # allow certain control keywords to stand out
13180                 if (   $next_nonblank_type eq 'k'
13181                     && $is_last_next_redo_return{$token} )
13182                 {
13183                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
13184                 }
13185
13186 # Don't break after keyword my.  This is a quick fix for a
13187 # rare problem with perl. An example is this line from file
13188 # Container.pm:
13189 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
13190
13191                 if ( $token eq 'my' ) {
13192                     $bond_str = NO_BREAK;
13193                 }
13194
13195             }
13196
13197             # good to break before 'if', 'unless', etc
13198             if ( $is_if_brace_follower{$next_nonblank_token} ) {
13199                 $bond_str = VERY_WEAK;
13200             }
13201
13202             if ( $next_nonblank_type eq 'k' ) {
13203
13204                 # keywords like 'unless', 'if', etc, within statements
13205                 # make good breaks
13206                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
13207                     $bond_str = VERY_WEAK / 1.05;
13208                 }
13209             }
13210
13211             # try not to break before a comma-arrow
13212             elsif ( $next_nonblank_type eq '=>' ) {
13213                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
13214             }
13215
13216          #----------------------------------------------------------------------
13217          # only set NO_BREAK's from here on
13218          #----------------------------------------------------------------------
13219             if ( $type eq 'C' or $type eq 'U' ) {
13220
13221                 # use strict requires that bare word and => not be separated
13222                 if ( $next_nonblank_type eq '=>' ) {
13223                     $bond_str = NO_BREAK;
13224                 }
13225
13226                 # Never break between a bareword and a following paren because
13227                 # perl may give an error.  For example, if a break is placed
13228                 # between 'to_filehandle' and its '(' the following line will
13229                 # give a syntax error [Carp.pm]: my( $no) =fileno(
13230                 # to_filehandle( $in)) ;
13231                 if ( $next_nonblank_token eq '(' ) {
13232                     $bond_str = NO_BREAK;
13233                 }
13234             }
13235
13236            # use strict requires that bare word within braces not start new line
13237             elsif ( $type eq 'L' ) {
13238
13239                 if ( $next_nonblank_type eq 'w' ) {
13240                     $bond_str = NO_BREAK;
13241                 }
13242             }
13243
13244             # in older version of perl, use strict can cause problems with
13245             # breaks before bare words following opening parens.  For example,
13246             # this will fail under older versions if a break is made between
13247             # '(' and 'MAIL':
13248             #  use strict;
13249             #  open( MAIL, "a long filename or command");
13250             #  close MAIL;
13251             elsif ( $type eq '{' ) {
13252
13253                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
13254
13255                     # but it's fine to break if the word is followed by a '=>'
13256                     # or if it is obviously a sub call
13257                     my $i_next_next_nonblank = $i_next_nonblank + 1;
13258                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
13259                     if (   $next_next_type eq 'b'
13260                         && $i_next_nonblank < $max_index_to_go )
13261                     {
13262                         $i_next_next_nonblank++;
13263                         $next_next_type = $types_to_go[$i_next_next_nonblank];
13264                     }
13265
13266                     ##if ( $next_next_type ne '=>' ) {
13267                     # these are ok: '->xxx', '=>', '('
13268
13269                     # We'll check for an old breakpoint and keep a leading
13270                     # bareword if it was that way in the input file.
13271                     # Presumably it was ok that way.  For example, the
13272                     # following would remain unchanged:
13273                     #
13274                     # @months = (
13275                     #   January,   February, March,    April,
13276                     #   May,       June,     July,     August,
13277                     #   September, October,  November, December,
13278                     # );
13279                     #
13280                     # This should be sufficient:
13281                     if ( !$old_breakpoint_to_go[$i]
13282                         && ( $next_next_type eq ',' || $next_next_type eq '}' )
13283                       )
13284                     {
13285                         $bond_str = NO_BREAK;
13286                     }
13287                 }
13288             }
13289
13290             elsif ( $type eq 'w' ) {
13291
13292                 if ( $next_nonblank_type eq 'R' ) {
13293                     $bond_str = NO_BREAK;
13294                 }
13295
13296                 # use strict requires that bare word and => not be separated
13297                 if ( $next_nonblank_type eq '=>' ) {
13298                     $bond_str = NO_BREAK;
13299                 }
13300             }
13301
13302             # in fact, use strict hates bare words on any new line.  For
13303             # example, a break before the underscore here provokes the
13304             # wrath of use strict:
13305             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
13306             elsif ( $type eq 'F' ) {
13307                 $bond_str = NO_BREAK;
13308             }
13309
13310             # use strict does not allow separating type info from trailing { }
13311             # testfile is readmail.pl
13312             elsif ( $type eq 't' or $type eq 'i' ) {
13313
13314                 if ( $next_nonblank_type eq 'L' ) {
13315                     $bond_str = NO_BREAK;
13316                 }
13317             }
13318
13319             # Do not break between a possible filehandle and a ? or / and do
13320             # not introduce a break after it if there is no blank
13321             # (extrude.t)
13322             elsif ( $type eq 'Z' ) {
13323
13324                 # dont break..
13325                 if (
13326
13327                     # if there is no blank and we do not want one. Examples:
13328                     #    print $x++    # do not break after $x
13329                     #    print HTML"HELLO"   # break ok after HTML
13330                     (
13331                            $next_type ne 'b'
13332                         && defined( $want_left_space{$next_type} )
13333                         && $want_left_space{$next_type} == WS_NO
13334                     )
13335
13336                     # or we might be followed by the start of a quote
13337                     || $next_nonblank_type =~ /^[\/\?]$/
13338                   )
13339                 {
13340                     $bond_str = NO_BREAK;
13341                 }
13342             }
13343
13344             # Do not break before a possible file handle
13345             if ( $next_nonblank_type eq 'Z' ) {
13346                 $bond_str = NO_BREAK;
13347             }
13348
13349             # As a defensive measure, do not break between a '(' and a
13350             # filehandle.  In some cases, this can cause an error.  For
13351             # example, the following program works:
13352             #    my $msg="hi!\n";
13353             #    print
13354             #    ( STDOUT
13355             #    $msg
13356             #    );
13357             #
13358             # But this program fails:
13359             #    my $msg="hi!\n";
13360             #    print
13361             #    (
13362             #    STDOUT
13363             #    $msg
13364             #    );
13365             #
13366             # This is normally only a problem with the 'extrude' option
13367             if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
13368                 $bond_str = NO_BREAK;
13369             }
13370
13371             # Breaking before a ++ can cause perl to guess wrong. For
13372             # example the following line will cause a syntax error
13373             # with -extrude if we break between '$i' and '++' [fixstyle2]
13374             #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
13375             elsif ( $next_nonblank_type eq '++' ) {
13376                 $bond_str = NO_BREAK;
13377             }
13378
13379             # Breaking before a ? before a quote can cause trouble if
13380             # they are not separated by a blank.
13381             # Example: a syntax error occurs if you break before the ? here
13382             #  my$logic=join$all?' && ':' || ',@regexps;
13383             # From: Professional_Perl_Programming_Code/multifind.pl
13384             elsif ( $next_nonblank_type eq '?' ) {
13385                 $bond_str = NO_BREAK
13386                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
13387             }
13388
13389             # Breaking before a . followed by a number
13390             # can cause trouble if there is no intervening space
13391             # Example: a syntax error occurs if you break before the .2 here
13392             #  $str .= pack($endian.2, ensurrogate($ord));
13393             # From: perl58/Unicode.pm
13394             elsif ( $next_nonblank_type eq '.' ) {
13395                 $bond_str = NO_BREAK
13396                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
13397             }
13398
13399             # patch to put cuddled elses back together when on multiple
13400             # lines, as in: } \n else \n { \n
13401             if ($rOpts_cuddled_else) {
13402
13403                 if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
13404                     || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
13405                 {
13406                     $bond_str = NO_BREAK;
13407                 }
13408             }
13409
13410             # keep '}' together with ';'
13411             if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
13412                 $bond_str = NO_BREAK;
13413             }
13414
13415             # never break between sub name and opening paren
13416             if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
13417                 $bond_str = NO_BREAK;
13418             }
13419
13420             #---------------------------------------------------------------
13421             # section 3:
13422             # now take nesting depth into account
13423             #---------------------------------------------------------------
13424             # final strength incorporates the bond strength and nesting depth
13425             my $strength;
13426
13427             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
13428                 if ( $total_nesting_depth > 0 ) {
13429                     $strength = $bond_str + $total_nesting_depth;
13430                 }
13431                 else {
13432                     $strength = $bond_str;
13433                 }
13434             }
13435             else {
13436                 $strength = NO_BREAK;
13437             }
13438
13439             # always break after side comment
13440             if ( $type eq '#' ) { $strength = 0 }
13441
13442             $bond_strength_to_go[$i] = $strength;
13443
13444             FORMATTER_DEBUG_FLAG_BOND && do {
13445                 my $str = substr( $token, 0, 15 );
13446                 $str .= ' ' x ( 16 - length($str) );
13447                 print
13448 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
13449             };
13450         }
13451     }
13452
13453 }
13454
13455 sub pad_array_to_go {
13456
13457     # to simplify coding in scan_list and set_bond_strengths, it helps
13458     # to create some extra blank tokens at the end of the arrays
13459     $tokens_to_go[ $max_index_to_go + 1 ] = '';
13460     $tokens_to_go[ $max_index_to_go + 2 ] = '';
13461     $types_to_go[ $max_index_to_go + 1 ]  = 'b';
13462     $types_to_go[ $max_index_to_go + 2 ]  = 'b';
13463     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
13464       $nesting_depth_to_go[$max_index_to_go];
13465
13466     #    /^[R\}\)\]]$/
13467     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
13468         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
13469
13470             # shouldn't happen:
13471             unless ( get_saw_brace_error() ) {
13472                 warning(
13473 "Program bug in scan_list: hit nesting error which should have been caught\n"
13474                 );
13475                 report_definite_bug();
13476             }
13477         }
13478         else {
13479             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
13480         }
13481     }
13482
13483     #       /^[L\{\(\[]$/
13484     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
13485         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
13486     }
13487 }
13488
13489 {    # begin scan_list
13490
13491     my (
13492         $block_type,                $current_depth,
13493         $depth,                     $i,
13494         $i_last_nonblank_token,     $last_colon_sequence_number,
13495         $last_nonblank_token,       $last_nonblank_type,
13496         $last_old_breakpoint_count, $minimum_depth,
13497         $next_nonblank_block_type,  $next_nonblank_token,
13498         $next_nonblank_type,        $old_breakpoint_count,
13499         $starting_breakpoint_count, $starting_depth,
13500         $token,                     $type,
13501         $type_sequence,
13502     );
13503
13504     my (
13505         @breakpoint_stack,              @breakpoint_undo_stack,
13506         @comma_index,                   @container_type,
13507         @identifier_count_stack,        @index_before_arrow,
13508         @interrupted_list,              @item_count_stack,
13509         @last_comma_index,              @last_dot_index,
13510         @last_nonblank_type,            @old_breakpoint_count_stack,
13511         @opening_structure_index_stack, @rfor_semicolon_list,
13512         @has_old_logical_breakpoints,   @rand_or_list,
13513         @i_equals,
13514     );
13515
13516     # routine to define essential variables when we go 'up' to
13517     # a new depth
13518     sub check_for_new_minimum_depth {
13519         my $depth = shift;
13520         if ( $depth < $minimum_depth ) {
13521
13522             $minimum_depth = $depth;
13523
13524             # these arrays need not retain values between calls
13525             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
13526             $container_type[$depth]                = "";
13527             $identifier_count_stack[$depth]        = 0;
13528             $index_before_arrow[$depth]            = -1;
13529             $interrupted_list[$depth]              = 1;
13530             $item_count_stack[$depth]              = 0;
13531             $last_nonblank_type[$depth]            = "";
13532             $opening_structure_index_stack[$depth] = -1;
13533
13534             $breakpoint_undo_stack[$depth]       = undef;
13535             $comma_index[$depth]                 = undef;
13536             $last_comma_index[$depth]            = undef;
13537             $last_dot_index[$depth]              = undef;
13538             $old_breakpoint_count_stack[$depth]  = undef;
13539             $has_old_logical_breakpoints[$depth] = 0;
13540             $rand_or_list[$depth]                = [];
13541             $rfor_semicolon_list[$depth]         = [];
13542             $i_equals[$depth]                    = -1;
13543
13544             # these arrays must retain values between calls
13545             if ( !defined( $has_broken_sublist[$depth] ) ) {
13546                 $dont_align[$depth]         = 0;
13547                 $has_broken_sublist[$depth] = 0;
13548                 $want_comma_break[$depth]   = 0;
13549             }
13550         }
13551     }
13552
13553     # routine to decide which commas to break at within a container;
13554     # returns:
13555     #   $bp_count = number of comma breakpoints set
13556     #   $do_not_break_apart = a flag indicating if container need not
13557     #     be broken open
13558     sub set_comma_breakpoints {
13559
13560         my $dd                 = shift;
13561         my $bp_count           = 0;
13562         my $do_not_break_apart = 0;
13563
13564         # anything to do?
13565         if ( $item_count_stack[$dd] ) {
13566
13567             # handle commas not in containers...
13568             if ( $dont_align[$dd] ) {
13569                 do_uncontained_comma_breaks($dd);
13570             }
13571
13572             # handle commas within containers...
13573             else {
13574                 my $fbc = $forced_breakpoint_count;
13575
13576                 # always open comma lists not preceded by keywords,
13577                 # barewords, identifiers (that is, anything that doesn't
13578                 # look like a function call)
13579                 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
13580
13581                 set_comma_breakpoints_do(
13582                     $dd,
13583                     $opening_structure_index_stack[$dd],
13584                     $i,
13585                     $item_count_stack[$dd],
13586                     $identifier_count_stack[$dd],
13587                     $comma_index[$dd],
13588                     $next_nonblank_type,
13589                     $container_type[$dd],
13590                     $interrupted_list[$dd],
13591                     \$do_not_break_apart,
13592                     $must_break_open,
13593                 );
13594                 $bp_count = $forced_breakpoint_count - $fbc;
13595                 $do_not_break_apart = 0 if $must_break_open;
13596             }
13597         }
13598         return ( $bp_count, $do_not_break_apart );
13599     }
13600
13601     sub do_uncontained_comma_breaks {
13602
13603         # Handle commas not in containers...
13604         # This is a catch-all routine for commas that we
13605         # don't know what to do with because the don't fall
13606         # within containers.  We will bias the bond strength
13607         # to break at commas which ended lines in the input
13608         # file.  This usually works better than just trying
13609         # to put as many items on a line as possible.  A
13610         # downside is that if the input file is garbage it
13611         # won't work very well. However, the user can always
13612         # prevent following the old breakpoints with the
13613         # -iob flag.
13614         my $dd   = shift;
13615         my $bias = -.01;
13616         foreach my $ii ( @{ $comma_index[$dd] } ) {
13617             if ( $old_breakpoint_to_go[$ii] ) {
13618                 $bond_strength_to_go[$ii] = $bias;
13619
13620                 # reduce bias magnitude to force breaks in order
13621                 $bias *= 0.99;
13622             }
13623         }
13624
13625         # Also put a break before the first comma if
13626         # (1) there was a break there in the input, and
13627         # (2) that was exactly one previous break in the input
13628         #
13629         # For example, we will follow the user and break after
13630         # 'print' in this snippet:
13631         #    print
13632         #      "conformability (Not the same dimension)\n",
13633         #      "\t", $have, " is ", text_unit($hu), "\n",
13634         #      "\t", $want, " is ", text_unit($wu), "\n",
13635         #      ;
13636         my $i_first_comma = $comma_index[$dd]->[0];
13637         if ( $old_breakpoint_to_go[$i_first_comma] ) {
13638             my $level_comma = $levels_to_go[$i_first_comma];
13639             my $ibreak      = -1;
13640             my $obp_count   = 0;
13641             for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
13642                 if ( $old_breakpoint_to_go[$ii] ) {
13643                     $obp_count++;
13644                     last if ( $obp_count > 1 );
13645                     $ibreak = $ii
13646                       if ( $levels_to_go[$ii] == $level_comma );
13647                 }
13648             }
13649             if ( $ibreak >= 0 && $obp_count == 1 ) {
13650                 set_forced_breakpoint($ibreak);
13651             }
13652         }
13653     }
13654
13655     my %is_logical_container;
13656
13657     BEGIN {
13658         @_ = qw# if elsif unless while and or err not && | || ? : ! #;
13659         @is_logical_container{@_} = (1) x scalar(@_);
13660     }
13661
13662     sub set_for_semicolon_breakpoints {
13663         my $dd = shift;
13664         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
13665             set_forced_breakpoint($_);
13666         }
13667     }
13668
13669     sub set_logical_breakpoints {
13670         my $dd = shift;
13671         if (
13672                $item_count_stack[$dd] == 0
13673             && $is_logical_container{ $container_type[$dd] }
13674
13675             # TESTING:
13676             || $has_old_logical_breakpoints[$dd]
13677           )
13678         {
13679
13680             # Look for breaks in this order:
13681             # 0   1    2   3
13682             # or  and  ||  &&
13683             foreach my $i ( 0 .. 3 ) {
13684                 if ( $rand_or_list[$dd][$i] ) {
13685                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
13686                         set_forced_breakpoint($_);
13687                     }
13688
13689                     # break at any 'if' and 'unless' too
13690                     foreach ( @{ $rand_or_list[$dd][4] } ) {
13691                         set_forced_breakpoint($_);
13692                     }
13693                     $rand_or_list[$dd] = [];
13694                     last;
13695                 }
13696             }
13697         }
13698     }
13699
13700     sub is_unbreakable_container {
13701
13702         # never break a container of one of these types
13703         # because bad things can happen (map1.t)
13704         my $dd = shift;
13705         $is_sort_map_grep{ $container_type[$dd] };
13706     }
13707
13708     sub scan_list {
13709
13710         # This routine is responsible for setting line breaks for all lists,
13711         # so that hierarchical structure can be displayed and so that list
13712         # items can be vertically aligned.  The output of this routine is
13713         # stored in the array @forced_breakpoint_to_go, which is used to set
13714         # final breakpoints.
13715
13716         $starting_depth = $nesting_depth_to_go[0];
13717
13718         $block_type                 = ' ';
13719         $current_depth              = $starting_depth;
13720         $i                          = -1;
13721         $last_colon_sequence_number = -1;
13722         $last_nonblank_token        = ';';
13723         $last_nonblank_type         = ';';
13724         $last_nonblank_block_type   = ' ';
13725         $last_old_breakpoint_count  = 0;
13726         $minimum_depth = $current_depth + 1;    # forces update in check below
13727         $old_breakpoint_count      = 0;
13728         $starting_breakpoint_count = $forced_breakpoint_count;
13729         $token                     = ';';
13730         $type                      = ';';
13731         $type_sequence             = '';
13732
13733         check_for_new_minimum_depth($current_depth);
13734
13735         my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13736         my $want_previous_breakpoint = -1;
13737
13738         my $saw_good_breakpoint;
13739         my $i_line_end   = -1;
13740         my $i_line_start = -1;
13741
13742         # loop over all tokens in this batch
13743         while ( ++$i <= $max_index_to_go ) {
13744             if ( $type ne 'b' ) {
13745                 $i_last_nonblank_token    = $i - 1;
13746                 $last_nonblank_type       = $type;
13747                 $last_nonblank_token      = $token;
13748                 $last_nonblank_block_type = $block_type;
13749             }
13750             $type          = $types_to_go[$i];
13751             $block_type    = $block_type_to_go[$i];
13752             $token         = $tokens_to_go[$i];
13753             $type_sequence = $type_sequence_to_go[$i];
13754             my $next_type       = $types_to_go[ $i + 1 ];
13755             my $next_token      = $tokens_to_go[ $i + 1 ];
13756             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13757             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
13758             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
13759             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13760
13761             # set break if flag was set
13762             if ( $want_previous_breakpoint >= 0 ) {
13763                 set_forced_breakpoint($want_previous_breakpoint);
13764                 $want_previous_breakpoint = -1;
13765             }
13766
13767             $last_old_breakpoint_count = $old_breakpoint_count;
13768             if ( $old_breakpoint_to_go[$i] ) {
13769                 $i_line_end   = $i;
13770                 $i_line_start = $i_next_nonblank;
13771
13772                 $old_breakpoint_count++;
13773
13774                 # Break before certain keywords if user broke there and
13775                 # this is a 'safe' break point. The idea is to retain
13776                 # any preferred breaks for sequential list operations,
13777                 # like a schwartzian transform.
13778                 if ($rOpts_break_at_old_keyword_breakpoints) {
13779                     if (
13780                            $next_nonblank_type eq 'k'
13781                         && $is_keyword_returning_list{$next_nonblank_token}
13782                         && (   $type =~ /^[=\)\]\}Riw]$/
13783                             || $type eq 'k'
13784                             && $is_keyword_returning_list{$token} )
13785                       )
13786                     {
13787
13788                         # we actually have to set this break next time through
13789                         # the loop because if we are at a closing token (such
13790                         # as '}') which forms a one-line block, this break might
13791                         # get undone.
13792                         $want_previous_breakpoint = $i;
13793                     }
13794                 }
13795             }
13796             next if ( $type eq 'b' );
13797             $depth = $nesting_depth_to_go[ $i + 1 ];
13798
13799             # safety check - be sure we always break after a comment
13800             # Shouldn't happen .. an error here probably means that the
13801             # nobreak flag did not get turned off correctly during
13802             # formatting.
13803             if ( $type eq '#' ) {
13804                 if ( $i != $max_index_to_go ) {
13805                     warning(
13806 "Non-fatal program bug: backup logic needed to break after a comment\n"
13807                     );
13808                     report_definite_bug();
13809                     $nobreak_to_go[$i] = 0;
13810                     set_forced_breakpoint($i);
13811                 }
13812             }
13813
13814             # Force breakpoints at certain tokens in long lines.
13815             # Note that such breakpoints will be undone later if these tokens
13816             # are fully contained within parens on a line.
13817             if (
13818
13819                 # break before a keyword within a line
13820                 $type eq 'k'
13821                 && $i > 0
13822
13823                 # if one of these keywords:
13824                 && $token =~ /^(if|unless|while|until|for)$/
13825
13826                 # but do not break at something like '1 while'
13827                 && ( $last_nonblank_type ne 'n' || $i > 2 )
13828
13829                 # and let keywords follow a closing 'do' brace
13830                 && $last_nonblank_block_type ne 'do'
13831
13832                 && (
13833                     $is_long_line
13834
13835                     # or container is broken (by side-comment, etc)
13836                     || (   $next_nonblank_token eq '('
13837                         && $mate_index_to_go[$i_next_nonblank] < $i )
13838                 )
13839               )
13840             {
13841                 set_forced_breakpoint( $i - 1 );
13842             }
13843
13844             # remember locations of '||'  and '&&' for possible breaks if we
13845             # decide this is a long logical expression.
13846             if ( $type eq '||' ) {
13847                 push @{ $rand_or_list[$depth][2] }, $i;
13848                 ++$has_old_logical_breakpoints[$depth]
13849                   if ( ( $i == $i_line_start || $i == $i_line_end )
13850                     && $rOpts_break_at_old_logical_breakpoints );
13851             }
13852             elsif ( $type eq '&&' ) {
13853                 push @{ $rand_or_list[$depth][3] }, $i;
13854                 ++$has_old_logical_breakpoints[$depth]
13855                   if ( ( $i == $i_line_start || $i == $i_line_end )
13856                     && $rOpts_break_at_old_logical_breakpoints );
13857             }
13858             elsif ( $type eq 'f' ) {
13859                 push @{ $rfor_semicolon_list[$depth] }, $i;
13860             }
13861             elsif ( $type eq 'k' ) {
13862                 if ( $token eq 'and' ) {
13863                     push @{ $rand_or_list[$depth][1] }, $i;
13864                     ++$has_old_logical_breakpoints[$depth]
13865                       if ( ( $i == $i_line_start || $i == $i_line_end )
13866                         && $rOpts_break_at_old_logical_breakpoints );
13867                 }
13868
13869                 # break immediately at 'or's which are probably not in a logical
13870                 # block -- but we will break in logical breaks below so that
13871                 # they do not add to the forced_breakpoint_count
13872                 elsif ( $token eq 'or' ) {
13873                     push @{ $rand_or_list[$depth][0] }, $i;
13874                     ++$has_old_logical_breakpoints[$depth]
13875                       if ( ( $i == $i_line_start || $i == $i_line_end )
13876                         && $rOpts_break_at_old_logical_breakpoints );
13877                     if ( $is_logical_container{ $container_type[$depth] } ) {
13878                     }
13879                     else {
13880                         if ($is_long_line) { set_forced_breakpoint($i) }
13881                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
13882                             && $rOpts_break_at_old_logical_breakpoints )
13883                         {
13884                             $saw_good_breakpoint = 1;
13885                         }
13886                     }
13887                 }
13888                 elsif ( $token eq 'if' || $token eq 'unless' ) {
13889                     push @{ $rand_or_list[$depth][4] }, $i;
13890                     if ( ( $i == $i_line_start || $i == $i_line_end )
13891                         && $rOpts_break_at_old_logical_breakpoints )
13892                     {
13893                         set_forced_breakpoint($i);
13894                     }
13895                 }
13896             }
13897             elsif ( $is_assignment{$type} ) {
13898                 $i_equals[$depth] = $i;
13899             }
13900
13901             if ($type_sequence) {
13902
13903                 # handle any postponed closing breakpoints
13904                 if ( $token =~ /^[\)\]\}\:]$/ ) {
13905                     if ( $type eq ':' ) {
13906                         $last_colon_sequence_number = $type_sequence;
13907
13908                         # TESTING: retain break at a ':' line break
13909                         if ( ( $i == $i_line_start || $i == $i_line_end )
13910                             && $rOpts_break_at_old_ternary_breakpoints )
13911                         {
13912
13913                             # TESTING:
13914                             set_forced_breakpoint($i);
13915
13916                             # break at previous '='
13917                             if ( $i_equals[$depth] > 0 ) {
13918                                 set_forced_breakpoint( $i_equals[$depth] );
13919                                 $i_equals[$depth] = -1;
13920                             }
13921                         }
13922                     }
13923                     if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
13924                         my $inc = ( $type eq ':' ) ? 0 : 1;
13925                         set_forced_breakpoint( $i - $inc );
13926                         delete $postponed_breakpoint{$type_sequence};
13927                     }
13928                 }
13929
13930                 # set breaks at ?/: if they will get separated (and are
13931                 # not a ?/: chain), or if the '?' is at the end of the
13932                 # line
13933                 elsif ( $token eq '?' ) {
13934                     my $i_colon = $mate_index_to_go[$i];
13935                     if (
13936                         $i_colon <= 0  # the ':' is not in this batch
13937                         || $i == 0     # this '?' is the first token of the line
13938                         || $i ==
13939                         $max_index_to_go    # or this '?' is the last token
13940                       )
13941                     {
13942
13943                         # don't break at a '?' if preceded by ':' on
13944                         # this line of previous ?/: pair on this line.
13945                         # This is an attempt to preserve a chain of ?/:
13946                         # expressions (elsif2.t).  And don't break if
13947                         # this has a side comment.
13948                         set_forced_breakpoint($i)
13949                           unless (
13950                             $type_sequence == (
13951                                 $last_colon_sequence_number +
13952                                   TYPE_SEQUENCE_INCREMENT
13953                             )
13954                             || $tokens_to_go[$max_index_to_go] eq '#'
13955                           );
13956                         set_closing_breakpoint($i);
13957                     }
13958                 }
13959             }
13960
13961 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
13962
13963             #------------------------------------------------------------
13964             # Handle Increasing Depth..
13965             #
13966             # prepare for a new list when depth increases
13967             # token $i is a '(','{', or '['
13968             #------------------------------------------------------------
13969             if ( $depth > $current_depth ) {
13970
13971                 $breakpoint_stack[$depth]       = $forced_breakpoint_count;
13972                 $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
13973                 $has_broken_sublist[$depth]     = 0;
13974                 $identifier_count_stack[$depth] = 0;
13975                 $index_before_arrow[$depth]     = -1;
13976                 $interrupted_list[$depth]       = 0;
13977                 $item_count_stack[$depth]       = 0;
13978                 $last_comma_index[$depth]       = undef;
13979                 $last_dot_index[$depth]         = undef;
13980                 $last_nonblank_type[$depth]     = $last_nonblank_type;
13981                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
13982                 $opening_structure_index_stack[$depth] = $i;
13983                 $rand_or_list[$depth]                  = [];
13984                 $rfor_semicolon_list[$depth]           = [];
13985                 $i_equals[$depth]                      = -1;
13986                 $want_comma_break[$depth]              = 0;
13987                 $container_type[$depth] =
13988                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
13989                   ? $last_nonblank_token
13990                   : "";
13991                 $has_old_logical_breakpoints[$depth] = 0;
13992
13993                 # if line ends here then signal closing token to break
13994                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
13995                 {
13996                     set_closing_breakpoint($i);
13997                 }
13998
13999                 # Not all lists of values should be vertically aligned..
14000                 $dont_align[$depth] =
14001
14002                   # code BLOCKS are handled at a higher level
14003                   ( $block_type ne "" )
14004
14005                   # certain paren lists
14006                   || ( $type eq '(' ) && (
14007
14008                     # it does not usually look good to align a list of
14009                     # identifiers in a parameter list, as in:
14010                     #    my($var1, $var2, ...)
14011                     # (This test should probably be refined, for now I'm just
14012                     # testing for any keyword)
14013                     ( $last_nonblank_type eq 'k' )
14014
14015                     # a trailing '(' usually indicates a non-list
14016                     || ( $next_nonblank_type eq '(' )
14017                   );
14018
14019                 # patch to outdent opening brace of long if/for/..
14020                 # statements (like this one).  See similar coding in
14021                 # set_continuation breaks.  We have also catch it here for
14022                 # short line fragments which otherwise will not go through
14023                 # set_continuation_breaks.
14024                 if (
14025                     $block_type
14026
14027                     # if we have the ')' but not its '(' in this batch..
14028                     && ( $last_nonblank_token eq ')' )
14029                     && $mate_index_to_go[$i_last_nonblank_token] < 0
14030
14031                     # and user wants brace to left
14032                     && !$rOpts->{'opening-brace-always-on-right'}
14033
14034                     && ( $type  eq '{' )    # should be true
14035                     && ( $token eq '{' )    # should be true
14036                   )
14037                 {
14038                     set_forced_breakpoint( $i - 1 );
14039                 }
14040             }
14041
14042             #------------------------------------------------------------
14043             # Handle Decreasing Depth..
14044             #
14045             # finish off any old list when depth decreases
14046             # token $i is a ')','}', or ']'
14047             #------------------------------------------------------------
14048             elsif ( $depth < $current_depth ) {
14049
14050                 check_for_new_minimum_depth($depth);
14051
14052                 # force all outer logical containers to break after we see on
14053                 # old breakpoint
14054                 $has_old_logical_breakpoints[$depth] ||=
14055                   $has_old_logical_breakpoints[$current_depth];
14056
14057                 # Patch to break between ') {' if the paren list is broken.
14058                 # There is similar logic in set_continuation_breaks for
14059                 # non-broken lists.
14060                 if (   $token eq ')'
14061                     && $next_nonblank_block_type
14062                     && $interrupted_list[$current_depth]
14063                     && $next_nonblank_type eq '{'
14064                     && !$rOpts->{'opening-brace-always-on-right'} )
14065                 {
14066                     set_forced_breakpoint($i);
14067                 }
14068
14069 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
14070
14071                 # set breaks at commas if necessary
14072                 my ( $bp_count, $do_not_break_apart ) =
14073                   set_comma_breakpoints($current_depth);
14074
14075                 my $i_opening = $opening_structure_index_stack[$current_depth];
14076                 my $saw_opening_structure = ( $i_opening >= 0 );
14077
14078                 # this term is long if we had to break at interior commas..
14079                 my $is_long_term = $bp_count > 0;
14080
14081                 # ..or if the length between opening and closing parens exceeds
14082                 # allowed line length
14083                 if ( !$is_long_term && $saw_opening_structure ) {
14084                     my $i_opening_minus = find_token_starting_list($i_opening);
14085
14086                     # Note: we have to allow for one extra space after a
14087                     # closing token so that we do not strand a comma or
14088                     # semicolon, hence the '>=' here (oneline.t)
14089                     $is_long_term =
14090                       excess_line_length( $i_opening_minus, $i ) >= 0;
14091                 }
14092
14093                 # We've set breaks after all comma-arrows.  Now we have to
14094                 # undo them if this can be a one-line block
14095                 # (the only breakpoints set will be due to comma-arrows)
14096                 if (
14097
14098                     # user doesn't require breaking after all comma-arrows
14099                     ( $rOpts_comma_arrow_breakpoints != 0 )
14100
14101                     # and if the opening structure is in this batch
14102                     && $saw_opening_structure
14103
14104                     # and either on the same old line
14105                     && (
14106                         $old_breakpoint_count_stack[$current_depth] ==
14107                         $last_old_breakpoint_count
14108
14109                         # or user wants to form long blocks with arrows
14110                         || $rOpts_comma_arrow_breakpoints == 2
14111                     )
14112
14113                   # and we made some breakpoints between the opening and closing
14114                     && ( $breakpoint_undo_stack[$current_depth] <
14115                         $forced_breakpoint_undo_count )
14116
14117                     # and this block is short enough to fit on one line
14118                     # Note: use < because need 1 more space for possible comma
14119                     && !$is_long_term
14120
14121                   )
14122                 {
14123                     undo_forced_breakpoint_stack(
14124                         $breakpoint_undo_stack[$current_depth] );
14125                 }
14126
14127                 # now see if we have any comma breakpoints left
14128                 my $has_comma_breakpoints =
14129                   ( $breakpoint_stack[$current_depth] !=
14130                       $forced_breakpoint_count );
14131
14132                 # update broken-sublist flag of the outer container
14133                 $has_broken_sublist[$depth] =
14134                      $has_broken_sublist[$depth]
14135                   || $has_broken_sublist[$current_depth]
14136                   || $is_long_term
14137                   || $has_comma_breakpoints;
14138
14139 # Having come to the closing ')', '}', or ']', now we have to decide if we
14140 # should 'open up' the structure by placing breaks at the opening and
14141 # closing containers.  This is a tricky decision.  Here are some of the
14142 # basic considerations:
14143 #
14144 # -If this is a BLOCK container, then any breakpoints will have already
14145 # been set (and according to user preferences), so we need do nothing here.
14146 #
14147 # -If we have a comma-separated list for which we can align the list items,
14148 # then we need to do so because otherwise the vertical aligner cannot
14149 # currently do the alignment.
14150 #
14151 # -If this container does itself contain a container which has been broken
14152 # open, then it should be broken open to properly show the structure.
14153 #
14154 # -If there is nothing to align, and no other reason to break apart,
14155 # then do not do it.
14156 #
14157 # We will not break open the parens of a long but 'simple' logical expression.
14158 # For example:
14159 #
14160 # This is an example of a simple logical expression and its formatting:
14161 #
14162 #     if ( $bigwasteofspace1 && $bigwasteofspace2
14163 #         || $bigwasteofspace3 && $bigwasteofspace4 )
14164 #
14165 # Most people would prefer this than the 'spacey' version:
14166 #
14167 #     if (
14168 #         $bigwasteofspace1 && $bigwasteofspace2
14169 #         || $bigwasteofspace3 && $bigwasteofspace4
14170 #     )
14171 #
14172 # To illustrate the rules for breaking logical expressions, consider:
14173 #
14174 #             FULLY DENSE:
14175 #             if ( $opt_excl
14176 #                 and ( exists $ids_excl_uc{$id_uc}
14177 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
14178 #
14179 # This is on the verge of being difficult to read.  The current default is to
14180 # open it up like this:
14181 #
14182 #             DEFAULT:
14183 #             if (
14184 #                 $opt_excl
14185 #                 and ( exists $ids_excl_uc{$id_uc}
14186 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
14187 #               )
14188 #
14189 # This is a compromise which tries to avoid being too dense and to spacey.
14190 # A more spaced version would be:
14191 #
14192 #             SPACEY:
14193 #             if (
14194 #                 $opt_excl
14195 #                 and (
14196 #                     exists $ids_excl_uc{$id_uc}
14197 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
14198 #                 )
14199 #               )
14200 #
14201 # Some people might prefer the spacey version -- an option could be added.  The
14202 # innermost expression contains a long block '( exists $ids_...  ')'.
14203 #
14204 # Here is how the logic goes: We will force a break at the 'or' that the
14205 # innermost expression contains, but we will not break apart its opening and
14206 # closing containers because (1) it contains no multi-line sub-containers itself,
14207 # and (2) there is no alignment to be gained by breaking it open like this
14208 #
14209 #             and (
14210 #                 exists $ids_excl_uc{$id_uc}
14211 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
14212 #             )
14213 #
14214 # (although this looks perfectly ok and might be good for long expressions).  The
14215 # outer 'if' container, though, contains a broken sub-container, so it will be
14216 # broken open to avoid too much density.  Also, since it contains no 'or's, there
14217 # will be a forced break at its 'and'.
14218
14219                 # set some flags telling something about this container..
14220                 my $is_simple_logical_expression = 0;
14221                 if (   $item_count_stack[$current_depth] == 0
14222                     && $saw_opening_structure
14223                     && $tokens_to_go[$i_opening] eq '('
14224                     && $is_logical_container{ $container_type[$current_depth] }
14225                   )
14226                 {
14227
14228                     # This seems to be a simple logical expression with
14229                     # no existing breakpoints.  Set a flag to prevent
14230                     # opening it up.
14231                     if ( !$has_comma_breakpoints ) {
14232                         $is_simple_logical_expression = 1;
14233                     }
14234
14235                     # This seems to be a simple logical expression with
14236                     # breakpoints (broken sublists, for example).  Break
14237                     # at all 'or's and '||'s.
14238                     else {
14239                         set_logical_breakpoints($current_depth);
14240                     }
14241                 }
14242
14243                 if ( $is_long_term
14244                     && @{ $rfor_semicolon_list[$current_depth] } )
14245                 {
14246                     set_for_semicolon_breakpoints($current_depth);
14247
14248                     # open up a long 'for' or 'foreach' container to allow
14249                     # leading term alignment unless -lp is used.
14250                     $has_comma_breakpoints = 1
14251                       unless $rOpts_line_up_parentheses;
14252                 }
14253
14254                 if (
14255
14256                     # breaks for code BLOCKS are handled at a higher level
14257                     !$block_type
14258
14259                     # we do not need to break at the top level of an 'if'
14260                     # type expression
14261                     && !$is_simple_logical_expression
14262
14263                     ## modification to keep ': (' containers vertically tight;
14264                     ## but probably better to let user set -vt=1 to avoid
14265                     ## inconsistency with other paren types
14266                     ## && ($container_type[$current_depth] ne ':')
14267
14268                     # otherwise, we require one of these reasons for breaking:
14269                     && (
14270
14271                         # - this term has forced line breaks
14272                         $has_comma_breakpoints
14273
14274                        # - the opening container is separated from this batch
14275                        #   for some reason (comment, blank line, code block)
14276                        # - this is a non-paren container spanning multiple lines
14277                         || !$saw_opening_structure
14278
14279                         # - this is a long block contained in another breakable
14280                         #   container
14281                         || (   $is_long_term
14282                             && $container_environment_to_go[$i_opening] ne
14283                             'BLOCK' )
14284                     )
14285                   )
14286                 {
14287
14288                     # For -lp option, we must put a breakpoint before
14289                     # the token which has been identified as starting
14290                     # this indentation level.  This is necessary for
14291                     # proper alignment.
14292                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
14293                     {
14294                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
14295                         if (   $i_opening + 1 < $max_index_to_go
14296                             && $types_to_go[ $i_opening + 1 ] eq 'b' )
14297                         {
14298                             $item = $leading_spaces_to_go[ $i_opening + 2 ];
14299                         }
14300                         if ( defined($item) ) {
14301                             my $i_start_2 = $item->get_STARTING_INDEX();
14302                             if (
14303                                 defined($i_start_2)
14304
14305                                 # we are breaking after an opening brace, paren,
14306                                 # so don't break before it too
14307                                 && $i_start_2 ne $i_opening
14308                               )
14309                             {
14310
14311                                 # Only break for breakpoints at the same
14312                                 # indentation level as the opening paren
14313                                 my $test1 = $nesting_depth_to_go[$i_opening];
14314                                 my $test2 = $nesting_depth_to_go[$i_start_2];
14315                                 if ( $test2 == $test1 ) {
14316                                     set_forced_breakpoint( $i_start_2 - 1 );
14317                                 }
14318                             }
14319                         }
14320                     }
14321
14322                     # break after opening structure.
14323                     # note: break before closing structure will be automatic
14324                     if ( $minimum_depth <= $current_depth ) {
14325
14326                         set_forced_breakpoint($i_opening)
14327                           unless ( $do_not_break_apart
14328                             || is_unbreakable_container($current_depth) );
14329
14330                         # break at '.' of lower depth level before opening token
14331                         if ( $last_dot_index[$depth] ) {
14332                             set_forced_breakpoint( $last_dot_index[$depth] );
14333                         }
14334
14335                         # break before opening structure if preeced by another
14336                         # closing structure and a comma.  This is normally
14337                         # done by the previous closing brace, but not
14338                         # if it was a one-line block.
14339                         if ( $i_opening > 2 ) {
14340                             my $i_prev =
14341                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
14342                               ? $i_opening - 2
14343                               : $i_opening - 1;
14344
14345                             if (   $types_to_go[$i_prev] eq ','
14346                                 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
14347                             {
14348                                 set_forced_breakpoint($i_prev);
14349                             }
14350
14351                             # also break before something like ':('  or '?('
14352                             # if appropriate.
14353                             elsif (
14354                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
14355                             {
14356                                 my $token_prev = $tokens_to_go[$i_prev];
14357                                 if ( $want_break_before{$token_prev} ) {
14358                                     set_forced_breakpoint($i_prev);
14359                                 }
14360                             }
14361                         }
14362                     }
14363
14364                     # break after comma following closing structure
14365                     if ( $next_type eq ',' ) {
14366                         set_forced_breakpoint( $i + 1 );
14367                     }
14368
14369                     # break before an '=' following closing structure
14370                     if (
14371                         $is_assignment{$next_nonblank_type}
14372                         && ( $breakpoint_stack[$current_depth] !=
14373                             $forced_breakpoint_count )
14374                       )
14375                     {
14376                         set_forced_breakpoint($i);
14377                     }
14378
14379                     # break at any comma before the opening structure Added
14380                     # for -lp, but seems to be good in general.  It isn't
14381                     # obvious how far back to look; the '5' below seems to
14382                     # work well and will catch the comma in something like
14383                     #  push @list, myfunc( $param, $param, ..
14384
14385                     my $icomma = $last_comma_index[$depth];
14386                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
14387                         unless ( $forced_breakpoint_to_go[$icomma] ) {
14388                             set_forced_breakpoint($icomma);
14389                         }
14390                     }
14391                 }    # end logic to open up a container
14392
14393                 # Break open a logical container open if it was already open
14394                 elsif ($is_simple_logical_expression
14395                     && $has_old_logical_breakpoints[$current_depth] )
14396                 {
14397                     set_logical_breakpoints($current_depth);
14398                 }
14399
14400                 # Handle long container which does not get opened up
14401                 elsif ($is_long_term) {
14402
14403                     # must set fake breakpoint to alert outer containers that
14404                     # they are complex
14405                     set_fake_breakpoint();
14406                 }
14407             }
14408
14409             #------------------------------------------------------------
14410             # Handle this token
14411             #------------------------------------------------------------
14412
14413             $current_depth = $depth;
14414
14415             # handle comma-arrow
14416             if ( $type eq '=>' ) {
14417                 next if ( $last_nonblank_type eq '=>' );
14418                 next if $rOpts_break_at_old_comma_breakpoints;
14419                 next if $rOpts_comma_arrow_breakpoints == 3;
14420                 $want_comma_break[$depth]   = 1;
14421                 $index_before_arrow[$depth] = $i_last_nonblank_token;
14422                 next;
14423             }
14424
14425             elsif ( $type eq '.' ) {
14426                 $last_dot_index[$depth] = $i;
14427             }
14428
14429             # Turn off alignment if we are sure that this is not a list
14430             # environment.  To be safe, we will do this if we see certain
14431             # non-list tokens, such as ';', and also the environment is
14432             # not a list.  Note that '=' could be in any of the = operators
14433             # (lextest.t). We can't just use the reported environment
14434             # because it can be incorrect in some cases.
14435             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
14436                 && $container_environment_to_go[$i] ne 'LIST' )
14437             {
14438                 $dont_align[$depth]         = 1;
14439                 $want_comma_break[$depth]   = 0;
14440                 $index_before_arrow[$depth] = -1;
14441             }
14442
14443             # now just handle any commas
14444             next unless ( $type eq ',' );
14445
14446             $last_dot_index[$depth]   = undef;
14447             $last_comma_index[$depth] = $i;
14448
14449             # break here if this comma follows a '=>'
14450             # but not if there is a side comment after the comma
14451             if ( $want_comma_break[$depth] ) {
14452
14453                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
14454                     $want_comma_break[$depth]   = 0;
14455                     $index_before_arrow[$depth] = -1;
14456                     next;
14457                 }
14458
14459                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14460
14461                 # break before the previous token if it looks safe
14462                 # Example of something that we will not try to break before:
14463                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
14464                 # Also we don't want to break at a binary operator (like +):
14465                 # $c->createOval(
14466                 #    $x + $R, $y +
14467                 #    $R => $x - $R,
14468                 #    $y - $R, -fill   => 'black',
14469                 # );
14470                 my $ibreak = $index_before_arrow[$depth] - 1;
14471                 if (   $ibreak > 0
14472                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
14473                 {
14474                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
14475                     if ( $types_to_go[$ibreak]  eq 'b' ) { $ibreak-- }
14476                     if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
14477
14478                         # don't break pointer calls, such as the following:
14479                         #  File::Spec->curdir  => 1,
14480                         # (This is tokenized as adjacent 'w' tokens)
14481                         if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
14482                             set_forced_breakpoint($ibreak);
14483                         }
14484                     }
14485                 }
14486
14487                 $want_comma_break[$depth]   = 0;
14488                 $index_before_arrow[$depth] = -1;
14489
14490                 # handle list which mixes '=>'s and ','s:
14491                 # treat any list items so far as an interrupted list
14492                 $interrupted_list[$depth] = 1;
14493                 next;
14494             }
14495
14496             # break after all commas above starting depth
14497             if ( $depth < $starting_depth && !$dont_align[$depth] ) {
14498                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14499                 next;
14500             }
14501
14502             # add this comma to the list..
14503             my $item_count = $item_count_stack[$depth];
14504             if ( $item_count == 0 ) {
14505
14506                 # but do not form a list with no opening structure
14507                 # for example:
14508
14509                 #            open INFILE_COPY, ">$input_file_copy"
14510                 #              or die ("very long message");
14511
14512                 if ( ( $opening_structure_index_stack[$depth] < 0 )
14513                     && $container_environment_to_go[$i] eq 'BLOCK' )
14514                 {
14515                     $dont_align[$depth] = 1;
14516                 }
14517             }
14518
14519             $comma_index[$depth][$item_count] = $i;
14520             ++$item_count_stack[$depth];
14521             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
14522                 $identifier_count_stack[$depth]++;
14523             }
14524         }
14525
14526         #-------------------------------------------
14527         # end of loop over all tokens in this batch
14528         #-------------------------------------------
14529
14530         # set breaks for any unfinished lists ..
14531         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
14532
14533             $interrupted_list[$dd] = 1;
14534             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
14535             set_comma_breakpoints($dd);
14536             set_logical_breakpoints($dd)
14537               if ( $has_old_logical_breakpoints[$dd] );
14538             set_for_semicolon_breakpoints($dd);
14539
14540             # break open container...
14541             my $i_opening = $opening_structure_index_stack[$dd];
14542             set_forced_breakpoint($i_opening)
14543               unless (
14544                 is_unbreakable_container($dd)
14545
14546                 # Avoid a break which would place an isolated ' or "
14547                 # on a line
14548                 || (   $type eq 'Q'
14549                     && $i_opening >= $max_index_to_go - 2
14550                     && $token =~ /^['"]$/ )
14551               );
14552         }
14553
14554         # Return a flag indicating if the input file had some good breakpoints.
14555         # This flag will be used to force a break in a line shorter than the
14556         # allowed line length.
14557         if ( $has_old_logical_breakpoints[$current_depth] ) {
14558             $saw_good_breakpoint = 1;
14559         }
14560         return $saw_good_breakpoint;
14561     }
14562 }    # end scan_list
14563
14564 sub find_token_starting_list {
14565
14566     # When testing to see if a block will fit on one line, some
14567     # previous token(s) may also need to be on the line; particularly
14568     # if this is a sub call.  So we will look back at least one
14569     # token. NOTE: This isn't perfect, but not critical, because
14570     # if we mis-identify a block, it will be wrapped and therefore
14571     # fixed the next time it is formatted.
14572     my $i_opening_paren = shift;
14573     my $i_opening_minus = $i_opening_paren;
14574     my $im1             = $i_opening_paren - 1;
14575     my $im2             = $i_opening_paren - 2;
14576     my $im3             = $i_opening_paren - 3;
14577     my $typem1          = $types_to_go[$im1];
14578     my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
14579     if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
14580         $i_opening_minus = $i_opening_paren;
14581     }
14582     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
14583         $i_opening_minus = $im1 if $im1 >= 0;
14584
14585         # walk back to improve length estimate
14586         for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
14587             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
14588             $i_opening_minus = $j;
14589         }
14590         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
14591     }
14592     elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
14593     elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
14594         $i_opening_minus = $im2;
14595     }
14596     return $i_opening_minus;
14597 }
14598
14599 {    # begin set_comma_breakpoints_do
14600
14601     my %is_keyword_with_special_leading_term;
14602
14603     BEGIN {
14604
14605         # These keywords have prototypes which allow a special leading item
14606         # followed by a list
14607         @_ =
14608           qw(formline grep kill map printf sprintf push chmod join pack unshift);
14609         @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
14610     }
14611
14612     sub set_comma_breakpoints_do {
14613
14614         # Given a list with some commas, set breakpoints at some of the
14615         # commas, if necessary, to make it easy to read.  This list is
14616         # an example:
14617         my (
14618             $depth,               $i_opening_paren,  $i_closing_paren,
14619             $item_count,          $identifier_count, $rcomma_index,
14620             $next_nonblank_type,  $list_type,        $interrupted,
14621             $rdo_not_break_apart, $must_break_open,
14622         ) = @_;
14623
14624         # nothing to do if no commas seen
14625         return if ( $item_count < 1 );
14626         my $i_first_comma     = $$rcomma_index[0];
14627         my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
14628         my $i_last_comma      = $i_true_last_comma;
14629         if ( $i_last_comma >= $max_index_to_go ) {
14630             $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
14631             return if ( $item_count < 1 );
14632         }
14633
14634         #---------------------------------------------------------------
14635         # find lengths of all items in the list to calculate page layout
14636         #---------------------------------------------------------------
14637         my $comma_count = $item_count;
14638         my @item_lengths;
14639         my @i_term_begin;
14640         my @i_term_end;
14641         my @i_term_comma;
14642         my $i_prev_plus;
14643         my @max_length = ( 0, 0 );
14644         my $first_term_length;
14645         my $i      = $i_opening_paren;
14646         my $is_odd = 1;
14647
14648         for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
14649             $is_odd      = 1 - $is_odd;
14650             $i_prev_plus = $i + 1;
14651             $i           = $$rcomma_index[$j];
14652
14653             my $i_term_end =
14654               ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
14655             my $i_term_begin =
14656               ( $types_to_go[$i_prev_plus] eq 'b' )
14657               ? $i_prev_plus + 1
14658               : $i_prev_plus;
14659             push @i_term_begin, $i_term_begin;
14660             push @i_term_end,   $i_term_end;
14661             push @i_term_comma, $i;
14662
14663             # note: currently adding 2 to all lengths (for comma and space)
14664             my $length =
14665               2 + token_sequence_length( $i_term_begin, $i_term_end );
14666             push @item_lengths, $length;
14667
14668             if ( $j == 0 ) {
14669                 $first_term_length = $length;
14670             }
14671             else {
14672
14673                 if ( $length > $max_length[$is_odd] ) {
14674                     $max_length[$is_odd] = $length;
14675                 }
14676             }
14677         }
14678
14679         # now we have to make a distinction between the comma count and item
14680         # count, because the item count will be one greater than the comma
14681         # count if the last item is not terminated with a comma
14682         my $i_b =
14683           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
14684           ? $i_last_comma + 1
14685           : $i_last_comma;
14686         my $i_e =
14687           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
14688           ? $i_closing_paren - 2
14689           : $i_closing_paren - 1;
14690         my $i_effective_last_comma = $i_last_comma;
14691
14692         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
14693
14694         if ( $last_item_length > 0 ) {
14695
14696             # add 2 to length because other lengths include a comma and a blank
14697             $last_item_length += 2;
14698             push @item_lengths, $last_item_length;
14699             push @i_term_begin, $i_b + 1;
14700             push @i_term_end,   $i_e;
14701             push @i_term_comma, undef;
14702
14703             my $i_odd = $item_count % 2;
14704
14705             if ( $last_item_length > $max_length[$i_odd] ) {
14706                 $max_length[$i_odd] = $last_item_length;
14707             }
14708
14709             $item_count++;
14710             $i_effective_last_comma = $i_e + 1;
14711
14712             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
14713                 $identifier_count++;
14714             }
14715         }
14716
14717         #---------------------------------------------------------------
14718         # End of length calculations
14719         #---------------------------------------------------------------
14720
14721         #---------------------------------------------------------------
14722         # Compound List Rule 1:
14723         # Break at (almost) every comma for a list containing a broken
14724         # sublist.  This has higher priority than the Interrupted List
14725         # Rule.
14726         #---------------------------------------------------------------
14727         if ( $has_broken_sublist[$depth] ) {
14728
14729             # Break at every comma except for a comma between two
14730             # simple, small terms.  This prevents long vertical
14731             # columns of, say, just 0's.
14732             my $small_length = 10;    # 2 + actual maximum length wanted
14733
14734             # We'll insert a break in long runs of small terms to
14735             # allow alignment in uniform tables.
14736             my $skipped_count = 0;
14737             my $columns       = table_columns_available($i_first_comma);
14738             my $fields        = int( $columns / $small_length );
14739             if (   $rOpts_maximum_fields_per_table
14740                 && $fields > $rOpts_maximum_fields_per_table )
14741             {
14742                 $fields = $rOpts_maximum_fields_per_table;
14743             }
14744             my $max_skipped_count = $fields - 1;
14745
14746             my $is_simple_last_term = 0;
14747             my $is_simple_next_term = 0;
14748             foreach my $j ( 0 .. $item_count ) {
14749                 $is_simple_last_term = $is_simple_next_term;
14750                 $is_simple_next_term = 0;
14751                 if (   $j < $item_count
14752                     && $i_term_end[$j] == $i_term_begin[$j]
14753                     && $item_lengths[$j] <= $small_length )
14754                 {
14755                     $is_simple_next_term = 1;
14756                 }
14757                 next if $j == 0;
14758                 if (   $is_simple_last_term
14759                     && $is_simple_next_term
14760                     && $skipped_count < $max_skipped_count )
14761                 {
14762                     $skipped_count++;
14763                 }
14764                 else {
14765                     $skipped_count = 0;
14766                     my $i = $i_term_comma[ $j - 1 ];
14767                     last unless defined $i;
14768                     set_forced_breakpoint($i);
14769                 }
14770             }
14771
14772             # always break at the last comma if this list is
14773             # interrupted; we wouldn't want to leave a terminal '{', for
14774             # example.
14775             if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14776             return;
14777         }
14778
14779 #my ( $a, $b, $c ) = caller();
14780 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
14781 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
14782 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14783
14784         #---------------------------------------------------------------
14785         # Interrupted List Rule:
14786         # A list is is forced to use old breakpoints if it was interrupted
14787         # by side comments or blank lines, or requested by user.
14788         #---------------------------------------------------------------
14789         if (   $rOpts_break_at_old_comma_breakpoints
14790             || $interrupted
14791             || $i_opening_paren < 0 )
14792         {
14793             copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14794             return;
14795         }
14796
14797         #---------------------------------------------------------------
14798         # Looks like a list of items.  We have to look at it and size it up.
14799         #---------------------------------------------------------------
14800
14801         my $opening_token = $tokens_to_go[$i_opening_paren];
14802         my $opening_environment =
14803           $container_environment_to_go[$i_opening_paren];
14804
14805         #-------------------------------------------------------------------
14806         # Return if this will fit on one line
14807         #-------------------------------------------------------------------
14808
14809         my $i_opening_minus = find_token_starting_list($i_opening_paren);
14810         return
14811           unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14812
14813         #-------------------------------------------------------------------
14814         # Now we know that this block spans multiple lines; we have to set
14815         # at least one breakpoint -- real or fake -- as a signal to break
14816         # open any outer containers.
14817         #-------------------------------------------------------------------
14818         set_fake_breakpoint();
14819
14820         # be sure we do not extend beyond the current list length
14821         if ( $i_effective_last_comma >= $max_index_to_go ) {
14822             $i_effective_last_comma = $max_index_to_go - 1;
14823         }
14824
14825         # Set a flag indicating if we need to break open to keep -lp
14826         # items aligned.  This is necessary if any of the list terms
14827         # exceeds the available space after the '('.
14828         my $need_lp_break_open = $must_break_open;
14829         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14830             my $columns_if_unbroken = $rOpts_maximum_line_length -
14831               total_line_length( $i_opening_minus, $i_opening_paren );
14832             $need_lp_break_open =
14833                  ( $max_length[0] > $columns_if_unbroken )
14834               || ( $max_length[1] > $columns_if_unbroken )
14835               || ( $first_term_length > $columns_if_unbroken );
14836         }
14837
14838         # Specify if the list must have an even number of fields or not.
14839         # It is generally safest to assume an even number, because the
14840         # list items might be a hash list.  But if we can be sure that
14841         # it is not a hash, then we can allow an odd number for more
14842         # flexibility.
14843         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
14844
14845         if (   $identifier_count >= $item_count - 1
14846             || $is_assignment{$next_nonblank_type}
14847             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
14848           )
14849         {
14850             $odd_or_even = 1;
14851         }
14852
14853         # do we have a long first term which should be
14854         # left on a line by itself?
14855         my $use_separate_first_term = (
14856             $odd_or_even == 1       # only if we can use 1 field/line
14857               && $item_count > 3    # need several items
14858               && $first_term_length >
14859               2 * $max_length[0] - 2    # need long first term
14860               && $first_term_length >
14861               2 * $max_length[1] - 2    # need long first term
14862         );
14863
14864         # or do we know from the type of list that the first term should
14865         # be placed alone?
14866         if ( !$use_separate_first_term ) {
14867             if ( $is_keyword_with_special_leading_term{$list_type} ) {
14868                 $use_separate_first_term = 1;
14869
14870                 # should the container be broken open?
14871                 if ( $item_count < 3 ) {
14872                     if ( $i_first_comma - $i_opening_paren < 4 ) {
14873                         $$rdo_not_break_apart = 1;
14874                     }
14875                 }
14876                 elsif ($first_term_length < 20
14877                     && $i_first_comma - $i_opening_paren < 4 )
14878                 {
14879                     my $columns = table_columns_available($i_first_comma);
14880                     if ( $first_term_length < $columns ) {
14881                         $$rdo_not_break_apart = 1;
14882                     }
14883                 }
14884             }
14885         }
14886
14887         # if so,
14888         if ($use_separate_first_term) {
14889
14890             # ..set a break and update starting values
14891             $use_separate_first_term = 1;
14892             set_forced_breakpoint($i_first_comma);
14893             $i_opening_paren = $i_first_comma;
14894             $i_first_comma   = $$rcomma_index[1];
14895             $item_count--;
14896             return if $comma_count == 1;
14897             shift @item_lengths;
14898             shift @i_term_begin;
14899             shift @i_term_end;
14900             shift @i_term_comma;
14901         }
14902
14903         # if not, update the metrics to include the first term
14904         else {
14905             if ( $first_term_length > $max_length[0] ) {
14906                 $max_length[0] = $first_term_length;
14907             }
14908         }
14909
14910         # Field width parameters
14911         my $pair_width = ( $max_length[0] + $max_length[1] );
14912         my $max_width =
14913           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
14914
14915         # Number of free columns across the page width for laying out tables
14916         my $columns = table_columns_available($i_first_comma);
14917
14918         # Estimated maximum number of fields which fit this space
14919         # This will be our first guess
14920         my $number_of_fields_max =
14921           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
14922             $pair_width );
14923         my $number_of_fields = $number_of_fields_max;
14924
14925         # Find the best-looking number of fields
14926         # and make this our second guess if possible
14927         my ( $number_of_fields_best, $ri_ragged_break_list,
14928             $new_identifier_count )
14929           = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
14930             $max_width );
14931
14932         if (   $number_of_fields_best != 0
14933             && $number_of_fields_best < $number_of_fields_max )
14934         {
14935             $number_of_fields = $number_of_fields_best;
14936         }
14937
14938         # ----------------------------------------------------------------------
14939         # If we are crowded and the -lp option is being used, try to
14940         # undo some indentation
14941         # ----------------------------------------------------------------------
14942         if (
14943             $rOpts_line_up_parentheses
14944             && (
14945                 $number_of_fields == 0
14946                 || (   $number_of_fields == 1
14947                     && $number_of_fields != $number_of_fields_best )
14948             )
14949           )
14950         {
14951             my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
14952             if ( $available_spaces > 0 ) {
14953
14954                 my $spaces_wanted = $max_width - $columns;    # for 1 field
14955
14956                 if ( $number_of_fields_best == 0 ) {
14957                     $number_of_fields_best =
14958                       get_maximum_fields_wanted( \@item_lengths );
14959                 }
14960
14961                 if ( $number_of_fields_best != 1 ) {
14962                     my $spaces_wanted_2 =
14963                       1 + $pair_width - $columns;             # for 2 fields
14964                     if ( $available_spaces > $spaces_wanted_2 ) {
14965                         $spaces_wanted = $spaces_wanted_2;
14966                     }
14967                 }
14968
14969                 if ( $spaces_wanted > 0 ) {
14970                     my $deleted_spaces =
14971                       reduce_lp_indentation( $i_first_comma, $spaces_wanted );
14972
14973                     # redo the math
14974                     if ( $deleted_spaces > 0 ) {
14975                         $columns = table_columns_available($i_first_comma);
14976                         $number_of_fields_max =
14977                           maximum_number_of_fields( $columns, $odd_or_even,
14978                             $max_width, $pair_width );
14979                         $number_of_fields = $number_of_fields_max;
14980
14981                         if (   $number_of_fields_best == 1
14982                             && $number_of_fields >= 1 )
14983                         {
14984                             $number_of_fields = $number_of_fields_best;
14985                         }
14986                     }
14987                 }
14988             }
14989         }
14990
14991         # try for one column if two won't work
14992         if ( $number_of_fields <= 0 ) {
14993             $number_of_fields = int( $columns / $max_width );
14994         }
14995
14996         # The user can place an upper bound on the number of fields,
14997         # which can be useful for doing maintenance on tables
14998         if (   $rOpts_maximum_fields_per_table
14999             && $number_of_fields > $rOpts_maximum_fields_per_table )
15000         {
15001             $number_of_fields = $rOpts_maximum_fields_per_table;
15002         }
15003
15004         # How many columns (characters) and lines would this container take
15005         # if no additional whitespace were added?
15006         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
15007             $i_effective_last_comma + 1 );
15008         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
15009         my $packed_lines = 1 + int( $packed_columns / $columns );
15010
15011         # are we an item contained in an outer list?
15012         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
15013
15014         if ( $number_of_fields <= 0 ) {
15015
15016 #         #---------------------------------------------------------------
15017 #         # We're in trouble.  We can't find a single field width that works.
15018 #         # There is no simple answer here; we may have a single long list
15019 #         # item, or many.
15020 #         #---------------------------------------------------------------
15021 #
15022 #         In many cases, it may be best to not force a break if there is just one
15023 #         comma, because the standard continuation break logic will do a better
15024 #         job without it.
15025 #
15026 #         In the common case that all but one of the terms can fit
15027 #         on a single line, it may look better not to break open the
15028 #         containing parens.  Consider, for example
15029 #
15030 #             $color =
15031 #               join ( '/',
15032 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
15033 #                 keys %colors );
15034 #
15035 #         which will look like this with the container broken:
15036 #
15037 #             $color = join (
15038 #                 '/',
15039 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
15040 #             );
15041 #
15042 #         Here is an example of this rule for a long last term:
15043 #
15044 #             log_message( 0, 256, 128,
15045 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
15046 #
15047 #         And here is an example with a long first term:
15048 #
15049 #         $s = sprintf(
15050 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
15051 #             $r, $pu, $ps, $cu, $cs, $tt
15052 #           )
15053 #           if $style eq 'all';
15054
15055             my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
15056             my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
15057             my $long_first_term =
15058               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
15059
15060             # break at every comma ...
15061             if (
15062
15063                 # if requested by user or is best looking
15064                 $number_of_fields_best == 1
15065
15066                 # or if this is a sublist of a larger list
15067                 || $in_hierarchical_list
15068
15069                 # or if multiple commas and we dont have a long first or last
15070                 # term
15071                 || ( $comma_count > 1
15072                     && !( $long_last_term || $long_first_term ) )
15073               )
15074             {
15075                 foreach ( 0 .. $comma_count - 1 ) {
15076                     set_forced_breakpoint( $$rcomma_index[$_] );
15077                 }
15078             }
15079             elsif ($long_last_term) {
15080
15081                 set_forced_breakpoint($i_last_comma);
15082                 $$rdo_not_break_apart = 1 unless $must_break_open;
15083             }
15084             elsif ($long_first_term) {
15085
15086                 set_forced_breakpoint($i_first_comma);
15087             }
15088             else {
15089
15090                 # let breaks be defined by default bond strength logic
15091             }
15092             return;
15093         }
15094
15095         # --------------------------------------------------------
15096         # We have a tentative field count that seems to work.
15097         # How many lines will this require?
15098         # --------------------------------------------------------
15099         my $formatted_lines = $item_count / ($number_of_fields);
15100         if ( $formatted_lines != int $formatted_lines ) {
15101             $formatted_lines = 1 + int $formatted_lines;
15102         }
15103
15104         # So far we've been trying to fill out to the right margin.  But
15105         # compact tables are easier to read, so let's see if we can use fewer
15106         # fields without increasing the number of lines.
15107         $number_of_fields =
15108           compactify_table( $item_count, $number_of_fields, $formatted_lines,
15109             $odd_or_even );
15110
15111         # How many spaces across the page will we fill?
15112         my $columns_per_line =
15113           ( int $number_of_fields / 2 ) * $pair_width +
15114           ( $number_of_fields % 2 ) * $max_width;
15115
15116         my $formatted_columns;
15117
15118         if ( $number_of_fields > 1 ) {
15119             $formatted_columns =
15120               ( $pair_width * ( int( $item_count / 2 ) ) +
15121                   ( $item_count % 2 ) * $max_width );
15122         }
15123         else {
15124             $formatted_columns = $max_width * $item_count;
15125         }
15126         if ( $formatted_columns < $packed_columns ) {
15127             $formatted_columns = $packed_columns;
15128         }
15129
15130         my $unused_columns = $formatted_columns - $packed_columns;
15131
15132         # set some empirical parameters to help decide if we should try to
15133         # align; high sparsity does not look good, especially with few lines
15134         my $sparsity = ($unused_columns) / ($formatted_columns);
15135         my $max_allowed_sparsity =
15136             ( $item_count < 3 )    ? 0.1
15137           : ( $packed_lines == 1 ) ? 0.15
15138           : ( $packed_lines == 2 ) ? 0.4
15139           :                          0.7;
15140
15141         # Begin check for shortcut methods, which avoid treating a list
15142         # as a table for relatively small parenthesized lists.  These
15143         # are usually easier to read if not formatted as tables.
15144         if (
15145             $packed_lines <= 2    # probably can fit in 2 lines
15146             && $item_count < 9    # doesn't have too many items
15147             && $opening_environment eq 'BLOCK'    # not a sub-container
15148             && $opening_token       eq '('        # is paren list
15149           )
15150         {
15151
15152             # Shortcut method 1: for -lp and just one comma:
15153             # This is a no-brainer, just break at the comma.
15154             if (
15155                 $rOpts_line_up_parentheses        # -lp
15156                 && $item_count == 2               # two items, one comma
15157                 && !$must_break_open
15158               )
15159             {
15160                 my $i_break = $$rcomma_index[0];
15161                 set_forced_breakpoint($i_break);
15162                 $$rdo_not_break_apart = 1;
15163                 set_non_alignment_flags( $comma_count, $rcomma_index );
15164                 return;
15165
15166             }
15167
15168             # method 2 is for most small ragged lists which might look
15169             # best if not displayed as a table.
15170             if (
15171                 ( $number_of_fields == 2 && $item_count == 3 )
15172                 || (
15173                     $new_identifier_count > 0    # isn't all quotes
15174                     && $sparsity > 0.15
15175                 )    # would be fairly spaced gaps if aligned
15176               )
15177             {
15178
15179                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15180                     $ri_ragged_break_list );
15181                 ++$break_count if ($use_separate_first_term);
15182
15183                 # NOTE: we should really use the true break count here,
15184                 # which can be greater if there are large terms and
15185                 # little space, but usually this will work well enough.
15186                 unless ($must_break_open) {
15187
15188                     if ( $break_count <= 1 ) {
15189                         $$rdo_not_break_apart = 1;
15190                     }
15191                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15192                     {
15193                         $$rdo_not_break_apart = 1;
15194                     }
15195                 }
15196                 set_non_alignment_flags( $comma_count, $rcomma_index );
15197                 return;
15198             }
15199
15200         }    # end shortcut methods
15201
15202         # debug stuff
15203
15204         FORMATTER_DEBUG_FLAG_SPARSE && do {
15205             print
15206 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
15207
15208         };
15209
15210         #---------------------------------------------------------------
15211         # Compound List Rule 2:
15212         # If this list is too long for one line, and it is an item of a
15213         # larger list, then we must format it, regardless of sparsity
15214         # (ian.t).  One reason that we have to do this is to trigger
15215         # Compound List Rule 1, above, which causes breaks at all commas of
15216         # all outer lists.  In this way, the structure will be properly
15217         # displayed.
15218         #---------------------------------------------------------------
15219
15220         # Decide if this list is too long for one line unless broken
15221         my $total_columns = table_columns_available($i_opening_paren);
15222         my $too_long      = $packed_columns > $total_columns;
15223
15224         # For a paren list, include the length of the token just before the
15225         # '(' because this is likely a sub call, and we would have to
15226         # include the sub name on the same line as the list.  This is still
15227         # imprecise, but not too bad.  (steve.t)
15228         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
15229
15230             $too_long = excess_line_length( $i_opening_minus,
15231                 $i_effective_last_comma + 1 ) > 0;
15232         }
15233
15234         # FIXME: For an item after a '=>', try to include the length of the
15235         # thing before the '=>'.  This is crude and should be improved by
15236         # actually looking back token by token.
15237         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
15238             my $i_opening_minus = $i_opening_paren - 4;
15239             if ( $i_opening_minus >= 0 ) {
15240                 $too_long = excess_line_length( $i_opening_minus,
15241                     $i_effective_last_comma + 1 ) > 0;
15242             }
15243         }
15244
15245         # Always break lists contained in '[' and '{' if too long for 1 line,
15246         # and always break lists which are too long and part of a more complex
15247         # structure.
15248         my $must_break_open_container = $must_break_open
15249           || ( $too_long
15250             && ( $in_hierarchical_list || $opening_token ne '(' ) );
15251
15252 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
15253
15254         #---------------------------------------------------------------
15255         # The main decision:
15256         # Now decide if we will align the data into aligned columns.  Do not
15257         # attempt to align columns if this is a tiny table or it would be
15258         # too spaced.  It seems that the more packed lines we have, the
15259         # sparser the list that can be allowed and still look ok.
15260         #---------------------------------------------------------------
15261
15262         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
15263             || ( $formatted_lines < 2 )
15264             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
15265           )
15266         {
15267
15268             #---------------------------------------------------------------
15269             # too sparse: would look ugly if aligned in a table;
15270             #---------------------------------------------------------------
15271
15272             # use old breakpoints if this is a 'big' list
15273             # FIXME: goal is to improve set_ragged_breakpoints so that
15274             # this is not necessary.
15275             if ( $packed_lines > 2 && $item_count > 10 ) {
15276                 write_logfile_entry("List sparse: using old breakpoints\n");
15277                 copy_old_breakpoints( $i_first_comma, $i_last_comma );
15278             }
15279
15280             # let the continuation logic handle it if 2 lines
15281             else {
15282
15283                 my $break_count = set_ragged_breakpoints( \@i_term_comma,
15284                     $ri_ragged_break_list );
15285                 ++$break_count if ($use_separate_first_term);
15286
15287                 unless ($must_break_open_container) {
15288                     if ( $break_count <= 1 ) {
15289                         $$rdo_not_break_apart = 1;
15290                     }
15291                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15292                     {
15293                         $$rdo_not_break_apart = 1;
15294                     }
15295                 }
15296                 set_non_alignment_flags( $comma_count, $rcomma_index );
15297             }
15298             return;
15299         }
15300
15301         #---------------------------------------------------------------
15302         # go ahead and format as a table
15303         #---------------------------------------------------------------
15304         write_logfile_entry(
15305             "List: auto formatting with $number_of_fields fields/row\n");
15306
15307         my $j_first_break =
15308           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
15309
15310         for (
15311             my $j = $j_first_break ;
15312             $j < $comma_count ;
15313             $j += $number_of_fields
15314           )
15315         {
15316             my $i = $$rcomma_index[$j];
15317             set_forced_breakpoint($i);
15318         }
15319         return;
15320     }
15321 }
15322
15323 sub set_non_alignment_flags {
15324
15325     # set flag which indicates that these commas should not be
15326     # aligned
15327     my ( $comma_count, $rcomma_index ) = @_;
15328     foreach ( 0 .. $comma_count - 1 ) {
15329         $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
15330     }
15331 }
15332
15333 sub study_list_complexity {
15334
15335     # Look for complex tables which should be formatted with one term per line.
15336     # Returns the following:
15337     #
15338     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
15339     #    which are hard to read
15340     #  $number_of_fields_best = suggested number of fields based on
15341     #    complexity; = 0 if any number may be used.
15342     #
15343     my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
15344     my $item_count            = @{$ri_term_begin};
15345     my $complex_item_count    = 0;
15346     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
15347     my $i_max                 = @{$ritem_lengths} - 1;
15348     ##my @item_complexity;
15349
15350     my $i_last_last_break = -3;
15351     my $i_last_break      = -2;
15352     my @i_ragged_break_list;
15353
15354     my $definitely_complex = 30;
15355     my $definitely_simple  = 12;
15356     my $quote_count        = 0;
15357
15358     for my $i ( 0 .. $i_max ) {
15359         my $ib = $ri_term_begin->[$i];
15360         my $ie = $ri_term_end->[$i];
15361
15362         # define complexity: start with the actual term length
15363         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
15364
15365         ##TBD: join types here and check for variations
15366         ##my $str=join "", @tokens_to_go[$ib..$ie];
15367
15368         my $is_quote = 0;
15369         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
15370             $is_quote = 1;
15371             $quote_count++;
15372         }
15373         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
15374             $quote_count++;
15375         }
15376
15377         if ( $ib eq $ie ) {
15378             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
15379                 $complex_item_count++;
15380                 $weighted_length *= 2;
15381             }
15382             else {
15383             }
15384         }
15385         else {
15386             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
15387                 $complex_item_count++;
15388                 $weighted_length *= 2;
15389             }
15390             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
15391                 $weighted_length += 4;
15392             }
15393         }
15394
15395         # add weight for extra tokens.
15396         $weighted_length += 2 * ( $ie - $ib );
15397
15398 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
15399 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
15400
15401 ##push @item_complexity, $weighted_length;
15402
15403         # now mark a ragged break after this item it if it is 'long and
15404         # complex':
15405         if ( $weighted_length >= $definitely_complex ) {
15406
15407             # if we broke after the previous term
15408             # then break before it too
15409             if (   $i_last_break == $i - 1
15410                 && $i > 1
15411                 && $i_last_last_break != $i - 2 )
15412             {
15413
15414                 ## FIXME: don't strand a small term
15415                 pop @i_ragged_break_list;
15416                 push @i_ragged_break_list, $i - 2;
15417                 push @i_ragged_break_list, $i - 1;
15418             }
15419
15420             push @i_ragged_break_list, $i;
15421             $i_last_last_break = $i_last_break;
15422             $i_last_break      = $i;
15423         }
15424
15425         # don't break before a small last term -- it will
15426         # not look good on a line by itself.
15427         elsif ($i == $i_max
15428             && $i_last_break == $i - 1
15429             && $weighted_length <= $definitely_simple )
15430         {
15431             pop @i_ragged_break_list;
15432         }
15433     }
15434
15435     my $identifier_count = $i_max + 1 - $quote_count;
15436
15437     # Need more tuning here..
15438     if (   $max_width > 12
15439         && $complex_item_count > $item_count / 2
15440         && $number_of_fields_best != 2 )
15441     {
15442         $number_of_fields_best = 1;
15443     }
15444
15445     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
15446 }
15447
15448 sub get_maximum_fields_wanted {
15449
15450     # Not all tables look good with more than one field of items.
15451     # This routine looks at a table and decides if it should be
15452     # formatted with just one field or not.
15453     # This coding is still under development.
15454     my ($ritem_lengths) = @_;
15455
15456     my $number_of_fields_best = 0;
15457
15458     # For just a few items, we tentatively assume just 1 field.
15459     my $item_count = @{$ritem_lengths};
15460     if ( $item_count <= 5 ) {
15461         $number_of_fields_best = 1;
15462     }
15463
15464     # For larger tables, look at it both ways and see what looks best
15465     else {
15466
15467         my $is_odd            = 1;
15468         my @max_length        = ( 0, 0 );
15469         my @last_length_2     = ( undef, undef );
15470         my @first_length_2    = ( undef, undef );
15471         my $last_length       = undef;
15472         my $total_variation_1 = 0;
15473         my $total_variation_2 = 0;
15474         my @total_variation_2 = ( 0, 0 );
15475         for ( my $j = 0 ; $j < $item_count ; $j++ ) {
15476
15477             $is_odd = 1 - $is_odd;
15478             my $length = $ritem_lengths->[$j];
15479             if ( $length > $max_length[$is_odd] ) {
15480                 $max_length[$is_odd] = $length;
15481             }
15482
15483             if ( defined($last_length) ) {
15484                 my $dl = abs( $length - $last_length );
15485                 $total_variation_1 += $dl;
15486             }
15487             $last_length = $length;
15488
15489             my $ll = $last_length_2[$is_odd];
15490             if ( defined($ll) ) {
15491                 my $dl = abs( $length - $ll );
15492                 $total_variation_2[$is_odd] += $dl;
15493             }
15494             else {
15495                 $first_length_2[$is_odd] = $length;
15496             }
15497             $last_length_2[$is_odd] = $length;
15498         }
15499         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
15500
15501         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
15502         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
15503             $number_of_fields_best = 1;
15504         }
15505     }
15506     return ($number_of_fields_best);
15507 }
15508
15509 sub table_columns_available {
15510     my $i_first_comma = shift;
15511     my $columns =
15512       $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
15513
15514     # Patch: the vertical formatter does not line up lines whose lengths
15515     # exactly equal the available line length because of allowances
15516     # that must be made for side comments.  Therefore, the number of
15517     # available columns is reduced by 1 character.
15518     $columns -= 1;
15519     return $columns;
15520 }
15521
15522 sub maximum_number_of_fields {
15523
15524     # how many fields will fit in the available space?
15525     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
15526     my $max_pairs        = int( $columns / $pair_width );
15527     my $number_of_fields = $max_pairs * 2;
15528     if (   $odd_or_even == 1
15529         && $max_pairs * $pair_width + $max_width <= $columns )
15530     {
15531         $number_of_fields++;
15532     }
15533     return $number_of_fields;
15534 }
15535
15536 sub compactify_table {
15537
15538     # given a table with a certain number of fields and a certain number
15539     # of lines, see if reducing the number of fields will make it look
15540     # better.
15541     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
15542     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
15543         my $min_fields;
15544
15545         for (
15546             $min_fields = $number_of_fields ;
15547             $min_fields >= $odd_or_even
15548             && $min_fields * $formatted_lines >= $item_count ;
15549             $min_fields -= $odd_or_even
15550           )
15551         {
15552             $number_of_fields = $min_fields;
15553         }
15554     }
15555     return $number_of_fields;
15556 }
15557
15558 sub set_ragged_breakpoints {
15559
15560     # Set breakpoints in a list that cannot be formatted nicely as a
15561     # table.
15562     my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
15563
15564     my $break_count = 0;
15565     foreach (@$ri_ragged_break_list) {
15566         my $j = $ri_term_comma->[$_];
15567         if ($j) {
15568             set_forced_breakpoint($j);
15569             $break_count++;
15570         }
15571     }
15572     return $break_count;
15573 }
15574
15575 sub copy_old_breakpoints {
15576     my ( $i_first_comma, $i_last_comma ) = @_;
15577     for my $i ( $i_first_comma .. $i_last_comma ) {
15578         if ( $old_breakpoint_to_go[$i] ) {
15579             set_forced_breakpoint($i);
15580         }
15581     }
15582 }
15583
15584 sub set_nobreaks {
15585     my ( $i, $j ) = @_;
15586     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
15587
15588         FORMATTER_DEBUG_FLAG_NOBREAK && do {
15589             my ( $a, $b, $c ) = caller();
15590             print(
15591 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
15592             );
15593         };
15594
15595         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
15596     }
15597
15598     # shouldn't happen; non-critical error
15599     else {
15600         FORMATTER_DEBUG_FLAG_NOBREAK && do {
15601             my ( $a, $b, $c ) = caller();
15602             print(
15603 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
15604             );
15605         };
15606     }
15607 }
15608
15609 sub set_fake_breakpoint {
15610
15611     # Just bump up the breakpoint count as a signal that there are breaks.
15612     # This is useful if we have breaks but may want to postpone deciding where
15613     # to make them.
15614     $forced_breakpoint_count++;
15615 }
15616
15617 sub set_forced_breakpoint {
15618     my $i = shift;
15619
15620     return unless defined $i && $i >= 0;
15621
15622     # when called with certain tokens, use bond strengths to decide
15623     # if we break before or after it
15624     my $token = $tokens_to_go[$i];
15625
15626     if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
15627         if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
15628     }
15629
15630     # breaks are forced before 'if' and 'unless'
15631     elsif ( $is_if_unless{$token} ) { $i-- }
15632
15633     if ( $i >= 0 && $i <= $max_index_to_go ) {
15634         my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
15635
15636         FORMATTER_DEBUG_FLAG_FORCE && do {
15637             my ( $a, $b, $c ) = caller();
15638             print
15639 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
15640         };
15641
15642         if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
15643             $forced_breakpoint_to_go[$i_nonblank] = 1;
15644
15645             if ( $i_nonblank > $index_max_forced_break ) {
15646                 $index_max_forced_break = $i_nonblank;
15647             }
15648             $forced_breakpoint_count++;
15649             $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
15650               $i_nonblank;
15651
15652             # if we break at an opening container..break at the closing
15653             if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
15654                 set_closing_breakpoint($i_nonblank);
15655             }
15656         }
15657     }
15658 }
15659
15660 sub clear_breakpoint_undo_stack {
15661     $forced_breakpoint_undo_count = 0;
15662 }
15663
15664 sub undo_forced_breakpoint_stack {
15665
15666     my $i_start = shift;
15667     if ( $i_start < 0 ) {
15668         $i_start = 0;
15669         my ( $a, $b, $c ) = caller();
15670         warning(
15671 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
15672         );
15673     }
15674
15675     while ( $forced_breakpoint_undo_count > $i_start ) {
15676         my $i =
15677           $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
15678         if ( $i >= 0 && $i <= $max_index_to_go ) {
15679             $forced_breakpoint_to_go[$i] = 0;
15680             $forced_breakpoint_count--;
15681
15682             FORMATTER_DEBUG_FLAG_UNDOBP && do {
15683                 my ( $a, $b, $c ) = caller();
15684                 print(
15685 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
15686                 );
15687             };
15688         }
15689
15690         # shouldn't happen, but not a critical error
15691         else {
15692             FORMATTER_DEBUG_FLAG_UNDOBP && do {
15693                 my ( $a, $b, $c ) = caller();
15694                 print(
15695 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
15696                 );
15697             };
15698         }
15699     }
15700 }
15701
15702 {    # begin recombine_breakpoints
15703
15704     my %is_amp_amp;
15705     my %is_ternary;
15706     my %is_math_op;
15707
15708     BEGIN {
15709
15710         @_ = qw( && || );
15711         @is_amp_amp{@_} = (1) x scalar(@_);
15712
15713         @_ = qw( ? : );
15714         @is_ternary{@_} = (1) x scalar(@_);
15715
15716         @_ = qw( + - * / );
15717         @is_math_op{@_} = (1) x scalar(@_);
15718     }
15719
15720     sub recombine_breakpoints {
15721
15722         # sub set_continuation_breaks is very liberal in setting line breaks
15723         # for long lines, always setting breaks at good breakpoints, even
15724         # when that creates small lines.  Occasionally small line fragments
15725         # are produced which would look better if they were combined.
15726         # That's the task of this routine, recombine_breakpoints.
15727         #
15728         # $ri_beg = ref to array of BEGinning indexes of each line
15729         # $ri_end = ref to array of ENDing indexes of each line
15730         my ( $ri_beg, $ri_end ) = @_;
15731
15732         my $more_to_do = 1;
15733
15734         # We keep looping over all of the lines of this batch
15735         # until there are no more possible recombinations
15736         my $nmax_last = @$ri_end;
15737         while ($more_to_do) {
15738             my $n_best = 0;
15739             my $bs_best;
15740             my $n;
15741             my $nmax = @$ri_end - 1;
15742
15743             # safety check for infinite loop
15744             unless ( $nmax < $nmax_last ) {
15745
15746             # shouldn't happen because splice below decreases nmax on each pass:
15747             # but i get paranoid sometimes
15748                 die "Program bug-infinite loop in recombine breakpoints\n";
15749             }
15750             $nmax_last  = $nmax;
15751             $more_to_do = 0;
15752             my $previous_outdentable_closing_paren;
15753             my $leading_amp_count = 0;
15754             my $this_line_is_semicolon_terminated;
15755
15756             # loop over all remaining lines in this batch
15757             for $n ( 1 .. $nmax ) {
15758
15759                 #----------------------------------------------------------
15760                 # If we join the current pair of lines,
15761                 # line $n-1 will become the left part of the joined line
15762                 # line $n will become the right part of the joined line
15763                 #
15764                 # Here are Indexes of the endpoint tokens of the two lines:
15765                 #
15766                 #  -----line $n-1--- | -----line $n-----
15767                 #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
15768                 #                    ^
15769                 #                    |
15770                 # We want to decide if we should remove the line break
15771                 # betwen the tokens at $iend_1 and $ibeg_2
15772                 #
15773                 # We will apply a number of ad-hoc tests to see if joining
15774                 # here will look ok.  The code will just issue a 'next'
15775                 # command if the join doesn't look good.  If we get through
15776                 # the gauntlet of tests, the lines will be recombined.
15777                 #----------------------------------------------------------
15778                 #
15779                 # beginning and ending tokens of the lines we are working on
15780                 my $ibeg_1 = $$ri_beg[ $n - 1 ];
15781                 my $iend_1 = $$ri_end[ $n - 1 ];
15782                 my $iend_2 = $$ri_end[$n];
15783                 my $ibeg_2 = $$ri_beg[$n];
15784
15785                 my $ibeg_nmax = $$ri_beg[$nmax];
15786
15787                 # some beginning indexes of other lines, which may not exist
15788                 my $ibeg_0 = $n > 1          ? $$ri_beg[ $n - 2 ] : -1;
15789                 my $ibeg_3 = $n < $nmax      ? $$ri_beg[ $n + 1 ] : -1;
15790                 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
15791
15792                 my $bs_tweak = 0;
15793
15794                 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15795                 #        $nesting_depth_to_go[$ibeg_1] );
15796
15797 ##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
15798
15799                 # If line $n is the last line, we set some flags and
15800                 # do any special checks for it
15801                 if ( $n == $nmax ) {
15802
15803                     # a terminal '{' should stay where it is
15804                     next if $types_to_go[$ibeg_2] eq '{';
15805
15806                     # set flag if statement $n ends in ';'
15807                     $this_line_is_semicolon_terminated =
15808                       $types_to_go[$iend_2] eq ';'
15809
15810                       # with possible side comment
15811                       || ( $types_to_go[$iend_2] eq '#'
15812                         && $iend_2 - $ibeg_2 >= 2
15813                         && $types_to_go[ $iend_2 - 2 ] eq ';'
15814                         && $types_to_go[ $iend_2 - 1 ] eq 'b' );
15815                 }
15816
15817                 #----------------------------------------------------------
15818                 # Section 1: examine token at $iend_1 (right end of first line
15819                 # of pair)
15820                 #----------------------------------------------------------
15821
15822                 # an isolated '}' may join with a ';' terminated segment
15823                 if ( $types_to_go[$iend_1] eq '}' ) {
15824
15825                     # Check for cases where combining a semicolon terminated
15826                     # statement with a previous isolated closing paren will
15827                     # allow the combined line to be outdented.  This is
15828                     # generally a good move.  For example, we can join up
15829                     # the last two lines here:
15830                     #  (
15831                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
15832                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
15833                     #    )
15834                     #    = stat($file);
15835                     #
15836                     # to get:
15837                     #  (
15838                     #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
15839                     #      $size, $atime, $mtime, $ctime, $blksize, $blocks
15840                     #  ) = stat($file);
15841                     #
15842                     # which makes the parens line up.
15843                     #
15844                     # Another example, from Joe Matarazzo, probably looks best
15845                     # with the 'or' clause appended to the trailing paren:
15846                     #  $self->some_method(
15847                     #      PARAM1 => 'foo',
15848                     #      PARAM2 => 'bar'
15849                     #  ) or die "Some_method didn't work";
15850                     #
15851                     $previous_outdentable_closing_paren =
15852                       $this_line_is_semicolon_terminated    # ends in ';'
15853                       && $ibeg_1 == $iend_1    # only one token on last line
15854                       && $tokens_to_go[$iend_1] eq
15855                       ')'                      # must be structural paren
15856
15857                       # only &&, ||, and : if no others seen
15858                       # (but note: our count made below could be wrong
15859                       # due to intervening comments)
15860                       && ( $leading_amp_count == 0
15861                         || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
15862
15863                       # but leading colons probably line up with with a
15864                       # previous colon or question (count could be wrong).
15865                       && $types_to_go[$ibeg_2] ne ':'
15866
15867                       # only one step in depth allowed.  this line must not
15868                       # begin with a ')' itself.
15869                       && ( $nesting_depth_to_go[$iend_1] ==
15870                         $nesting_depth_to_go[$iend_2] + 1 );
15871
15872                     # YVES patch 2 of 2:
15873                     # Allow cuddled eval chains, like this:
15874                     #   eval {
15875                     #       #STUFF;
15876                     #       1; # return true
15877                     #   } or do {
15878                     #       #handle error
15879                     #   };
15880                     # This patch works together with a patch in
15881                     # setting adjusted indentation (where the closing eval
15882                     # brace is outdented if possible).
15883                     # The problem is that an 'eval' block has continuation
15884                     # indentation and it looks better to undo it in some
15885                     # cases.  If we do not use this patch we would get:
15886                     #   eval {
15887                     #       #STUFF;
15888                     #       1; # return true
15889                     #       }
15890                     #       or do {
15891                     #       #handle error
15892                     #     };
15893                     # The alternative, for uncuddled style, is to create
15894                     # a patch in set_adjusted_indentation which undoes
15895                     # the indentation of a leading line like 'or do {'.
15896                     # This doesn't work well with -icb through
15897                     if (
15898                            $block_type_to_go[$iend_1] eq 'eval'
15899                         && !$rOpts->{'line-up-parentheses'}
15900                         && !$rOpts->{'indent-closing-brace'}
15901                         && $tokens_to_go[$iend_2] eq '{'
15902                         && (
15903                             ( $types_to_go[$ibeg_2] =~ /^(|\&\&|\|\|)$/ )
15904                             || (   $types_to_go[$ibeg_2] eq 'k'
15905                                 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
15906                             || $is_if_unless{ $tokens_to_go[$ibeg_2] }
15907                         )
15908                       )
15909                     {
15910                         $previous_outdentable_closing_paren ||= 1;
15911                     }
15912
15913                     next
15914                       unless (
15915                         $previous_outdentable_closing_paren
15916
15917                         # handle '.' and '?' specially below
15918                         || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
15919                       );
15920                 }
15921
15922                 # YVES
15923                 # honor breaks at opening brace
15924                 # Added to prevent recombining something like this:
15925                 #  } || eval { package main;
15926                 elsif ( $types_to_go[$iend_1] eq '{' ) {
15927                     next if $forced_breakpoint_to_go[$iend_1];
15928                 }
15929
15930                 # do not recombine lines with ending &&, ||,
15931                 elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
15932                     next unless $want_break_before{ $types_to_go[$iend_1] };
15933                 }
15934
15935                 # keep a terminal colon
15936                 elsif ( $types_to_go[$iend_1] eq ':' ) {
15937                     next unless $want_break_before{ $types_to_go[$iend_1] };
15938                 }
15939
15940                 # Identify and recombine a broken ?/: chain
15941                 elsif ( $types_to_go[$iend_1] eq '?' ) {
15942
15943                     # Do not recombine different levels
15944                     next
15945                       if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
15946
15947                     # do not recombine unless next line ends in :
15948                     next unless $types_to_go[$iend_2] eq ':';
15949                 }
15950
15951                 # for lines ending in a comma...
15952                 elsif ( $types_to_go[$iend_1] eq ',' ) {
15953
15954                     # Do not recombine at comma which is following the
15955                     # input bias.
15956                     # TODO: might be best to make a special flag
15957                     next if ( $old_breakpoint_to_go[$iend_1] );
15958
15959                  # an isolated '},' may join with an identifier + ';'
15960                  # this is useful for the class of a 'bless' statement (bless.t)
15961                     if (   $types_to_go[$ibeg_1] eq '}'
15962                         && $types_to_go[$ibeg_2] eq 'i' )
15963                     {
15964                         next
15965                           unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
15966                             && ( $iend_2 == ( $ibeg_2 + 1 ) )
15967                             && $this_line_is_semicolon_terminated );
15968
15969                         # override breakpoint
15970                         $forced_breakpoint_to_go[$iend_1] = 0;
15971                     }
15972
15973                     # but otherwise ..
15974                     else {
15975
15976                         # do not recombine after a comma unless this will leave
15977                         # just 1 more line
15978                         next unless ( $n + 1 >= $nmax );
15979
15980                     # do not recombine if there is a change in indentation depth
15981                         next
15982                           if (
15983                             $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
15984
15985                         # do not recombine a "complex expression" after a
15986                         # comma.  "complex" means no parens.
15987                         my $saw_paren;
15988                         foreach my $ii ( $ibeg_2 .. $iend_2 ) {
15989                             if ( $tokens_to_go[$ii] eq '(' ) {
15990                                 $saw_paren = 1;
15991                                 last;
15992                             }
15993                         }
15994                         next if $saw_paren;
15995                     }
15996                 }
15997
15998                 # opening paren..
15999                 elsif ( $types_to_go[$iend_1] eq '(' ) {
16000
16001                     # No longer doing this
16002                 }
16003
16004                 elsif ( $types_to_go[$iend_1] eq ')' ) {
16005
16006                     # No longer doing this
16007                 }
16008
16009                 # keep a terminal for-semicolon
16010                 elsif ( $types_to_go[$iend_1] eq 'f' ) {
16011                     next;
16012                 }
16013
16014                 # if '=' at end of line ...
16015                 elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
16016
16017                     my $is_short_quote =
16018                       (      $types_to_go[$ibeg_2] eq 'Q'
16019                           && $ibeg_2 == $iend_2
16020                           && length( $tokens_to_go[$ibeg_2] ) <
16021                           $rOpts_short_concatenation_item_length );
16022                     my $is_ternary =
16023                       ( $types_to_go[$ibeg_1] eq '?'
16024                           && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
16025
16026                     # always join an isolated '=', a short quote, or if this
16027                     # will put ?/: at start of adjacent lines
16028                     if (   $ibeg_1 != $iend_1
16029                         && !$is_short_quote
16030                         && !$is_ternary )
16031                     {
16032                         next
16033                           unless (
16034                             (
16035
16036                                 # unless we can reduce this to two lines
16037                                 $nmax < $n + 2
16038
16039                              # or three lines, the last with a leading semicolon
16040                                 || (   $nmax == $n + 2
16041                                     && $types_to_go[$ibeg_nmax] eq ';' )
16042
16043                                 # or the next line ends with a here doc
16044                                 || $types_to_go[$iend_2] eq 'h'
16045
16046                                # or the next line ends in an open paren or brace
16047                                # and the break hasn't been forced [dima.t]
16048                                 || (  !$forced_breakpoint_to_go[$iend_1]
16049                                     && $types_to_go[$iend_2] eq '{' )
16050                             )
16051
16052                             # do not recombine if the two lines might align well
16053                             # this is a very approximate test for this
16054                             && (   $ibeg_3 >= 0
16055                                 && $types_to_go[$ibeg_2] ne
16056                                 $types_to_go[$ibeg_3] )
16057                           );
16058
16059                         # -lp users often prefer this:
16060                         #  my $title = function($env, $env, $sysarea,
16061                         #                       "bubba Borrower Entry");
16062                         #  so we will recombine if -lp is used we have ending
16063                         #  comma
16064                         if (  !$rOpts_line_up_parentheses
16065                             || $types_to_go[$iend_2] ne ',' )
16066                         {
16067
16068                            # otherwise, scan the rhs line up to last token for
16069                            # complexity.  Note that we are not counting the last
16070                            # token in case it is an opening paren.
16071                             my $tv    = 0;
16072                             my $depth = $nesting_depth_to_go[$ibeg_2];
16073                             for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
16074                                 if ( $nesting_depth_to_go[$i] != $depth ) {
16075                                     $tv++;
16076                                     last if ( $tv > 1 );
16077                                 }
16078                                 $depth = $nesting_depth_to_go[$i];
16079                             }
16080
16081                          # ok to recombine if no level changes before last token
16082                             if ( $tv > 0 ) {
16083
16084                                 # otherwise, do not recombine if more than two
16085                                 # level changes.
16086                                 next if ( $tv > 1 );
16087
16088                               # check total complexity of the two adjacent lines
16089                               # that will occur if we do this join
16090                                 my $istop =
16091                                   ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
16092                                 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
16093                                     if ( $nesting_depth_to_go[$i] != $depth ) {
16094                                         $tv++;
16095                                         last if ( $tv > 2 );
16096                                     }
16097                                     $depth = $nesting_depth_to_go[$i];
16098                                 }
16099
16100                         # do not recombine if total is more than 2 level changes
16101                                 next if ( $tv > 2 );
16102                             }
16103                         }
16104                     }
16105
16106                     unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
16107                         $forced_breakpoint_to_go[$iend_1] = 0;
16108                     }
16109                 }
16110
16111                 # for keywords..
16112                 elsif ( $types_to_go[$iend_1] eq 'k' ) {
16113
16114                     # make major control keywords stand out
16115                     # (recombine.t)
16116                     next
16117                       if (
16118
16119                         #/^(last|next|redo|return)$/
16120                         $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
16121
16122                         # but only if followed by multiple lines
16123                         && $n < $nmax
16124                       );
16125
16126                     if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
16127                         next
16128                           unless $want_break_before{ $tokens_to_go[$iend_1] };
16129                     }
16130                 }
16131
16132                 # handle trailing + - * /
16133                 elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
16134
16135                     # combine lines if next line has single number
16136                     # or a short term followed by same operator
16137                     my $i_next_nonblank = $ibeg_2;
16138                     my $i_next_next     = $i_next_nonblank + 1;
16139                     $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16140                     my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
16141                       && (
16142                         $i_next_nonblank == $iend_2
16143                         || (   $i_next_next == $iend_2
16144                             && $is_math_op{ $types_to_go[$i_next_next] } )
16145                         || $types_to_go[$i_next_next] eq ';'
16146                       );
16147
16148                     # find token before last operator of previous line
16149                     my $iend_1_minus = $iend_1;
16150                     $iend_1_minus--
16151                       if ( $iend_1_minus > $ibeg_1 );
16152                     $iend_1_minus--
16153                       if ( $types_to_go[$iend_1_minus] eq 'b'
16154                         && $iend_1_minus > $ibeg_1 );
16155
16156                     my $short_term_follows =
16157                       (      $types_to_go[$iend_2] eq $types_to_go[$iend_1]
16158                           && $types_to_go[$iend_1_minus] =~ /^[in]$/
16159                           && $iend_2 <= $ibeg_2 + 2
16160                           && length( $tokens_to_go[$ibeg_2] ) <
16161                           $rOpts_short_concatenation_item_length );
16162
16163                     next
16164                       unless ( $number_follows || $short_term_follows );
16165                 }
16166
16167                 #----------------------------------------------------------
16168                 # Section 2: Now examine token at $ibeg_2 (left end of second
16169                 # line of pair)
16170                 #----------------------------------------------------------
16171
16172                 # join lines identified above as capable of
16173                 # causing an outdented line with leading closing paren
16174                 if ($previous_outdentable_closing_paren) {
16175                     $forced_breakpoint_to_go[$iend_1] = 0;
16176                 }
16177
16178                 # do not recombine lines with leading :
16179                 elsif ( $types_to_go[$ibeg_2] eq ':' ) {
16180                     $leading_amp_count++;
16181                     next if $want_break_before{ $types_to_go[$ibeg_2] };
16182                 }
16183
16184                 # handle lines with leading &&, ||
16185                 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16186
16187                     $leading_amp_count++;
16188
16189                     # ok to recombine if it follows a ? or :
16190                     # and is followed by an open paren..
16191                     my $ok =
16192                       (      $is_ternary{ $types_to_go[$ibeg_1] }
16193                           && $tokens_to_go[$iend_2] eq '(' )
16194
16195                     # or is followed by a ? or : at same depth
16196                     #
16197                     # We are looking for something like this. We can
16198                     # recombine the && line with the line above to make the
16199                     # structure more clear:
16200                     #  return
16201                     #    exists $G->{Attr}->{V}
16202                     #    && exists $G->{Attr}->{V}->{$u}
16203                     #    ? %{ $G->{Attr}->{V}->{$u} }
16204                     #    : ();
16205                     #
16206                     # We should probably leave something like this alone:
16207                     #  return
16208                     #       exists $G->{Attr}->{E}
16209                     #    && exists $G->{Attr}->{E}->{$u}
16210                     #    && exists $G->{Attr}->{E}->{$u}->{$v}
16211                     #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
16212                     #    : ();
16213                     # so that we either have all of the &&'s (or ||'s)
16214                     # on one line, as in the first example, or break at
16215                     # each one as in the second example.  However, it
16216                     # sometimes makes things worse to check for this because
16217                     # it prevents multiple recombinations.  So this is not done.
16218                       || ( $ibeg_3 >= 0
16219                         && $is_ternary{ $types_to_go[$ibeg_3] }
16220                         && $nesting_depth_to_go[$ibeg_3] ==
16221                         $nesting_depth_to_go[$ibeg_2] );
16222
16223                     next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
16224                     $forced_breakpoint_to_go[$iend_1] = 0;
16225
16226                     # tweak the bond strength to give this joint priority
16227                     # over ? and :
16228                     $bs_tweak = 0.25;
16229                 }
16230
16231                 # Identify and recombine a broken ?/: chain
16232                 elsif ( $types_to_go[$ibeg_2] eq '?' ) {
16233
16234                     # Do not recombine different levels
16235                     my $lev = $levels_to_go[$ibeg_2];
16236                     next if ( $lev ne $levels_to_go[$ibeg_1] );
16237
16238                     # Do not recombine a '?' if either next line or
16239                     # previous line does not start with a ':'.  The reasons
16240                     # are that (1) no alignment of the ? will be possible
16241                     # and (2) the expression is somewhat complex, so the
16242                     # '?' is harder to see in the interior of the line.
16243                     my $follows_colon =
16244                       $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
16245                     my $precedes_colon =
16246                       $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
16247                     next unless ( $follows_colon || $precedes_colon );
16248
16249                     # we will always combining a ? line following a : line
16250                     if ( !$follows_colon ) {
16251
16252                         # ...otherwise recombine only if it looks like a chain.
16253                         # we will just look at a few nearby lines to see if
16254                         # this looks like a chain.
16255                         my $local_count = 0;
16256                         foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
16257                             $local_count++
16258                               if $ii >= 0
16259                                   && $types_to_go[$ii] eq ':'
16260                                   && $levels_to_go[$ii] == $lev;
16261                         }
16262                         next unless ( $local_count > 1 );
16263                     }
16264                     $forced_breakpoint_to_go[$iend_1] = 0;
16265                 }
16266
16267                 # do not recombine lines with leading '.'
16268                 elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
16269                     my $i_next_nonblank = $ibeg_2 + 1;
16270                     if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16271                         $i_next_nonblank++;
16272                     }
16273
16274                     next
16275                       unless (
16276
16277                    # ... unless there is just one and we can reduce
16278                    # this to two lines if we do.  For example, this
16279                    #
16280                    #
16281                    #  $bodyA .=
16282                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16283                    #
16284                    #  looks better than this:
16285                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16286                    #    . '$args .= $pat;'
16287
16288                         (
16289                                $n == 2
16290                             && $n == $nmax
16291                             && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
16292                         )
16293
16294                         #  ... or this would strand a short quote , like this
16295                         #                . "some long qoute"
16296                         #                . "\n";
16297                         || (   $types_to_go[$i_next_nonblank] eq 'Q'
16298                             && $i_next_nonblank >= $iend_2 - 1
16299                             && length( $tokens_to_go[$i_next_nonblank] ) <
16300                             $rOpts_short_concatenation_item_length )
16301                       );
16302                 }
16303
16304                 # handle leading keyword..
16305                 elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
16306
16307                     # handle leading "or"
16308                     if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16309                         next
16310                           unless (
16311                             $this_line_is_semicolon_terminated
16312                             && (
16313
16314                                 # following 'if' or 'unless' or 'or'
16315                                 $types_to_go[$ibeg_1] eq 'k'
16316                                 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16317
16318                                 # important: only combine a very simple or
16319                                 # statement because the step below may have
16320                                 # combined a trailing 'and' with this or,
16321                                 # and we do not want to then combine
16322                                 # everything together
16323                                 && ( $iend_2 - $ibeg_2 <= 7 )
16324                             )
16325                           );
16326                     }
16327
16328                     # handle leading 'and'
16329                     elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
16330
16331                         # Decide if we will combine a single terminal 'and'
16332                         # after an 'if' or 'unless'.
16333
16334                         #     This looks best with the 'and' on the same
16335                         #     line as the 'if':
16336                         #
16337                         #         $a = 1
16338                         #           if $seconds and $nu < 2;
16339                         #
16340                         #     But this looks better as shown:
16341                         #
16342                         #         $a = 1
16343                         #           if !$this->{Parents}{$_}
16344                         #           or $this->{Parents}{$_} eq $_;
16345                         #
16346                         next
16347                           unless (
16348                             $this_line_is_semicolon_terminated
16349                             && (
16350
16351                                 # following 'if' or 'unless' or 'or'
16352                                 $types_to_go[$ibeg_1] eq 'k'
16353                                 && (   $is_if_unless{ $tokens_to_go[$ibeg_1] }
16354                                     || $tokens_to_go[$ibeg_1] eq 'or' )
16355                             )
16356                           );
16357                     }
16358
16359                     # handle leading "if" and "unless"
16360                     elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16361
16362                       # FIXME: This is still experimental..may not be too useful
16363                         next
16364                           unless (
16365                             $this_line_is_semicolon_terminated
16366
16367                             #  previous line begins with 'and' or 'or'
16368                             && $types_to_go[$ibeg_1] eq 'k'
16369                             && $is_and_or{ $tokens_to_go[$ibeg_1] }
16370
16371                           );
16372                     }
16373
16374                     # handle all other leading keywords
16375                     else {
16376
16377                         # keywords look best at start of lines,
16378                         # but combine things like "1 while"
16379                         unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
16380                             next
16381                               if ( ( $types_to_go[$iend_1] ne 'k' )
16382                                 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16383                         }
16384                     }
16385                 }
16386
16387                 # similar treatment of && and || as above for 'and' and 'or':
16388                 # NOTE: This block of code is currently bypassed because
16389                 # of a previous block but is retained for possible future use.
16390                 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16391
16392                     # maybe looking at something like:
16393                     # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16394
16395                     next
16396                       unless (
16397                         $this_line_is_semicolon_terminated
16398
16399                         # previous line begins with an 'if' or 'unless' keyword
16400                         && $types_to_go[$ibeg_1] eq 'k'
16401                         && $is_if_unless{ $tokens_to_go[$ibeg_1] }
16402
16403                       );
16404                 }
16405
16406                 # handle leading + - * /
16407                 elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
16408                     my $i_next_nonblank = $ibeg_2 + 1;
16409                     if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16410                         $i_next_nonblank++;
16411                     }
16412
16413                     my $i_next_next = $i_next_nonblank + 1;
16414                     $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16415
16416                     my $is_number = (
16417                         $types_to_go[$i_next_nonblank] eq 'n'
16418                           && ( $i_next_nonblank >= $iend_2 - 1
16419                             || $types_to_go[$i_next_next] eq ';' )
16420                     );
16421
16422                     my $iend_1_nonblank =
16423                       $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
16424                     my $iend_2_nonblank =
16425                       $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
16426
16427                     my $is_short_term =
16428                       (      $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
16429                           && $types_to_go[$iend_2_nonblank] =~ /^[in]$/
16430                           && $types_to_go[$iend_1_nonblank] =~ /^[in]$/
16431                           && $iend_2_nonblank <= $ibeg_2 + 2
16432                           && length( $tokens_to_go[$iend_2_nonblank] ) <
16433                           $rOpts_short_concatenation_item_length );
16434
16435                     # Combine these lines if this line is a single
16436                     # number, or if it is a short term with same
16437                     # operator as the previous line.  For example, in
16438                     # the following code we will combine all of the
16439                     # short terms $A, $B, $C, $D, $E, $F, together
16440                     # instead of leaving them one per line:
16441                     #  my $time =
16442                     #    $A * $B * $C * $D * $E * $F *
16443                     #    ( 2. * $eps * $sigma * $area ) *
16444                     #    ( 1. / $tcold**3 - 1. / $thot**3 );
16445                     # This can be important in math-intensive code.
16446                     next
16447                       unless (
16448                            $is_number
16449                         || $is_short_term
16450
16451                         # or if we can reduce this to two lines if we do.
16452                         || (   $n == 2
16453                             && $n == $nmax
16454                             && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
16455                       );
16456                 }
16457
16458                 # handle line with leading = or similar
16459                 elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
16460                     next unless $n == 1;
16461                     next
16462                       unless (
16463
16464                         # unless we can reduce this to two lines
16465                         $nmax == 2
16466
16467                         # or three lines, the last with a leading semicolon
16468                         || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16469
16470                         # or the next line ends with a here doc
16471                         || $types_to_go[$iend_2] eq 'h'
16472                       );
16473                 }
16474
16475                 #----------------------------------------------------------
16476                 # Section 3:
16477                 # Combine the lines if we arrive here and it is possible
16478                 #----------------------------------------------------------
16479
16480                 # honor hard breakpoints
16481                 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16482
16483                 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
16484
16485                 # combined line cannot be too long
16486                 next
16487                   if excess_line_length( $ibeg_1, $iend_2 ) > 0;
16488
16489                 # do not recombine if we would skip in indentation levels
16490                 if ( $n < $nmax ) {
16491                     my $if_next = $$ri_beg[ $n + 1 ];
16492                     next
16493                       if (
16494                            $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16495                         && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16496
16497                         # but an isolated 'if (' is undesirable
16498                         && !(
16499                                $n == 1
16500                             && $iend_1 - $ibeg_1 <= 2
16501                             && $types_to_go[$ibeg_1]  eq 'k'
16502                             && $tokens_to_go[$ibeg_1] eq 'if'
16503                             && $tokens_to_go[$iend_1] ne '('
16504                         )
16505                       );
16506                 }
16507
16508                 # honor no-break's
16509                 next if ( $bs == NO_BREAK );
16510
16511                 # remember the pair with the greatest bond strength
16512                 if ( !$n_best ) {
16513                     $n_best  = $n;
16514                     $bs_best = $bs;
16515                 }
16516                 else {
16517
16518                     if ( $bs > $bs_best ) {
16519                         $n_best  = $n;
16520                         $bs_best = $bs;
16521                     }
16522                 }
16523             }
16524
16525             # recombine the pair with the greatest bond strength
16526             if ($n_best) {
16527                 splice @$ri_beg, $n_best, 1;
16528                 splice @$ri_end, $n_best - 1, 1;
16529
16530                 # keep going if we are still making progress
16531                 $more_to_do++;
16532             }
16533         }
16534         return ( $ri_beg, $ri_end );
16535     }
16536 }    # end recombine_breakpoints
16537
16538 sub break_all_chain_tokens {
16539
16540     # scan the current breakpoints looking for breaks at certain "chain
16541     # operators" (. : && || + etc) which often occur repeatedly in a long
16542     # statement.  If we see a break at any one, break at all similar tokens
16543     # within the same container.
16544     #
16545     my ( $ri_left, $ri_right ) = @_;
16546
16547     my %saw_chain_type;
16548     my %left_chain_type;
16549     my %right_chain_type;
16550     my %interior_chain_type;
16551     my $nmax = @$ri_right - 1;
16552
16553     # scan the left and right end tokens of all lines
16554     my $count = 0;
16555     for my $n ( 0 .. $nmax ) {
16556         my $il    = $$ri_left[$n];
16557         my $ir    = $$ri_right[$n];
16558         my $typel = $types_to_go[$il];
16559         my $typer = $types_to_go[$ir];
16560         $typel = '+' if ( $typel eq '-' );    # treat + and - the same
16561         $typer = '+' if ( $typer eq '-' );
16562         $typel = '*' if ( $typel eq '/' );    # treat * and / the same
16563         $typer = '*' if ( $typer eq '/' );
16564         my $tokenl = $tokens_to_go[$il];
16565         my $tokenr = $tokens_to_go[$ir];
16566
16567         if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
16568             next if ( $typel eq '?' );
16569             push @{ $left_chain_type{$typel} }, $il;
16570             $saw_chain_type{$typel} = 1;
16571             $count++;
16572         }
16573         if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
16574             next if ( $typer eq '?' );
16575             push @{ $right_chain_type{$typer} }, $ir;
16576             $saw_chain_type{$typer} = 1;
16577             $count++;
16578         }
16579     }
16580     return unless $count;
16581
16582     # now look for any interior tokens of the same types
16583     $count = 0;
16584     for my $n ( 0 .. $nmax ) {
16585         my $il = $$ri_left[$n];
16586         my $ir = $$ri_right[$n];
16587         for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
16588             my $type = $types_to_go[$i];
16589             $type = '+' if ( $type eq '-' );
16590             $type = '*' if ( $type eq '/' );
16591             if ( $saw_chain_type{$type} ) {
16592                 push @{ $interior_chain_type{$type} }, $i;
16593                 $count++;
16594             }
16595         }
16596     }
16597     return unless $count;
16598
16599     # now make a list of all new break points
16600     my @insert_list;
16601
16602     # loop over all chain types
16603     foreach my $type ( keys %saw_chain_type ) {
16604
16605         # quit if just ONE continuation line with leading .  For example--
16606         # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
16607         #  . $contents;
16608         last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
16609
16610         # loop over all interior chain tokens
16611         foreach my $itest ( @{ $interior_chain_type{$type} } ) {
16612
16613             # loop over all left end tokens of same type
16614             if ( $left_chain_type{$type} ) {
16615                 next if $nobreak_to_go[ $itest - 1 ];
16616                 foreach my $i ( @{ $left_chain_type{$type} } ) {
16617                     next unless in_same_container( $i, $itest );
16618                     push @insert_list, $itest - 1;
16619
16620                     # Break at matching ? if this : is at a different level.
16621                     # For example, the ? before $THRf_DEAD in the following
16622                     # should get a break if its : gets a break.
16623                     #
16624                     # my $flags =
16625                     #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
16626                     #   : ( $_ & 4 ) ? $THRf_R_DETACHED
16627                     #   :              $THRf_R_JOINABLE;
16628                     if (   $type eq ':'
16629                         && $levels_to_go[$i] != $levels_to_go[$itest] )
16630                     {
16631                         my $i_question = $mate_index_to_go[$itest];
16632                         if ( $i_question > 0 ) {
16633                             push @insert_list, $i_question - 1;
16634                         }
16635                     }
16636                     last;
16637                 }
16638             }
16639
16640             # loop over all right end tokens of same type
16641             if ( $right_chain_type{$type} ) {
16642                 next if $nobreak_to_go[$itest];
16643                 foreach my $i ( @{ $right_chain_type{$type} } ) {
16644                     next unless in_same_container( $i, $itest );
16645                     push @insert_list, $itest;
16646
16647                     # break at matching ? if this : is at a different level
16648                     if (   $type eq ':'
16649                         && $levels_to_go[$i] != $levels_to_go[$itest] )
16650                     {
16651                         my $i_question = $mate_index_to_go[$itest];
16652                         if ( $i_question >= 0 ) {
16653                             push @insert_list, $i_question;
16654                         }
16655                     }
16656                     last;
16657                 }
16658             }
16659         }
16660     }
16661
16662     # insert any new break points
16663     if (@insert_list) {
16664         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16665     }
16666 }
16667
16668 sub break_equals {
16669
16670     # Look for assignment operators that could use a breakpoint.
16671     # For example, in the following snippet
16672     #
16673     #    $HOME = $ENV{HOME}
16674     #      || $ENV{LOGDIR}
16675     #      || $pw[7]
16676     #      || die "no home directory for user $<";
16677     #
16678     # we could break at the = to get this, which is a little nicer:
16679     #    $HOME =
16680     #         $ENV{HOME}
16681     #      || $ENV{LOGDIR}
16682     #      || $pw[7]
16683     #      || die "no home directory for user $<";
16684     #
16685     # The logic here follows the logic in set_logical_padding, which
16686     # will add the padding in the second line to improve alignment.
16687     #
16688     my ( $ri_left, $ri_right ) = @_;
16689     my $nmax = @$ri_right - 1;
16690     return unless ( $nmax >= 2 );
16691
16692     # scan the left ends of first two lines
16693     my $tokbeg = "";
16694     my $depth_beg;
16695     for my $n ( 1 .. 2 ) {
16696         my $il     = $$ri_left[$n];
16697         my $typel  = $types_to_go[$il];
16698         my $tokenl = $tokens_to_go[$il];
16699
16700         my $has_leading_op = ( $tokenl =~ /^\w/ )
16701           ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
16702           : $is_chain_operator{$typel};    # and, or
16703         return unless ($has_leading_op);
16704         if ( $n > 1 ) {
16705             return
16706               unless ( $tokenl eq $tokbeg
16707                 && $nesting_depth_to_go[$il] eq $depth_beg );
16708         }
16709         $tokbeg    = $tokenl;
16710         $depth_beg = $nesting_depth_to_go[$il];
16711     }
16712
16713     # now look for any interior tokens of the same types
16714     my $il = $$ri_left[0];
16715     my $ir = $$ri_right[0];
16716
16717     # now make a list of all new break points
16718     my @insert_list;
16719     for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
16720         my $type = $types_to_go[$i];
16721         if (   $is_assignment{$type}
16722             && $nesting_depth_to_go[$i] eq $depth_beg )
16723         {
16724             if ( $want_break_before{$type} ) {
16725                 push @insert_list, $i - 1;
16726             }
16727             else {
16728                 push @insert_list, $i;
16729             }
16730         }
16731     }
16732
16733     # Break after a 'return' followed by a chain of operators
16734     #  return ( $^O !~ /win32|dos/i )
16735     #    && ( $^O ne 'VMS' )
16736     #    && ( $^O ne 'OS2' )
16737     #    && ( $^O ne 'MacOS' );
16738     # To give:
16739     #  return
16740     #       ( $^O !~ /win32|dos/i )
16741     #    && ( $^O ne 'VMS' )
16742     #    && ( $^O ne 'OS2' )
16743     #    && ( $^O ne 'MacOS' );
16744     my $i = 0;
16745     if (   $types_to_go[$i] eq 'k'
16746         && $tokens_to_go[$i] eq 'return'
16747         && $ir > $il
16748         && $nesting_depth_to_go[$i] eq $depth_beg )
16749     {
16750         push @insert_list, $i;
16751     }
16752
16753     return unless (@insert_list);
16754
16755     # One final check...
16756     # scan second and thrid lines and be sure there are no assignments
16757     # we want to avoid breaking at an = to make something like this:
16758     #    unless ( $icon =
16759     #           $html_icons{"$type-$state"}
16760     #        or $icon = $html_icons{$type}
16761     #        or $icon = $html_icons{$state} )
16762     for my $n ( 1 .. 2 ) {
16763         my $il = $$ri_left[$n];
16764         my $ir = $$ri_right[$n];
16765         for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
16766             my $type = $types_to_go[$i];
16767             return
16768               if ( $is_assignment{$type}
16769                 && $nesting_depth_to_go[$i] eq $depth_beg );
16770         }
16771     }
16772
16773     # ok, insert any new break point
16774     if (@insert_list) {
16775         insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16776     }
16777 }
16778
16779 sub insert_final_breaks {
16780
16781     my ( $ri_left, $ri_right ) = @_;
16782
16783     my $nmax = @$ri_right - 1;
16784
16785     # scan the left and right end tokens of all lines
16786     my $count         = 0;
16787     my $i_first_colon = -1;
16788     for my $n ( 0 .. $nmax ) {
16789         my $il    = $$ri_left[$n];
16790         my $ir    = $$ri_right[$n];
16791         my $typel = $types_to_go[$il];
16792         my $typer = $types_to_go[$ir];
16793         return if ( $typel eq '?' );
16794         return if ( $typer eq '?' );
16795         if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
16796         elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16797     }
16798
16799     # For long ternary chains,
16800     # if the first : we see has its # ? is in the interior
16801     # of a preceding line, then see if there are any good
16802     # breakpoints before the ?.
16803     if ( $i_first_colon > 0 ) {
16804         my $i_question = $mate_index_to_go[$i_first_colon];
16805         if ( $i_question > 0 ) {
16806             my @insert_list;
16807             for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16808                 my $token = $tokens_to_go[$ii];
16809                 my $type  = $types_to_go[$ii];
16810
16811                 # For now, a good break is either a comma or a 'return'.
16812                 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
16813                     && in_same_container( $ii, $i_question ) )
16814                 {
16815                     push @insert_list, $ii;
16816                     last;
16817                 }
16818             }
16819
16820             # insert any new break points
16821             if (@insert_list) {
16822                 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16823             }
16824         }
16825     }
16826 }
16827
16828 sub in_same_container {
16829
16830     # check to see if tokens at i1 and i2 are in the
16831     # same container, and not separated by a comma, ? or :
16832     my ( $i1, $i2 ) = @_;
16833     my $type  = $types_to_go[$i1];
16834     my $depth = $nesting_depth_to_go[$i1];
16835     return unless ( $nesting_depth_to_go[$i2] == $depth );
16836     if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
16837
16838     ###########################################################
16839     # This is potentially a very slow routine and not critical.
16840     # For safety just give up for large differences.
16841     # See test file 'infinite_loop.txt'
16842     # TODO: replace this loop with a data structure
16843     ###########################################################
16844     return if ( $i2 - $i1 > 200 );
16845
16846     for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
16847         next   if ( $nesting_depth_to_go[$i] > $depth );
16848         return if ( $nesting_depth_to_go[$i] < $depth );
16849
16850         my $tok = $tokens_to_go[$i];
16851         $tok = ',' if $tok eq '=>';    # treat => same as ,
16852
16853         # Example: we would not want to break at any of these .'s
16854         #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
16855         if ( $type ne ':' ) {
16856             return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
16857         }
16858         else {
16859             return if ( $tok =~ /^[\,]$/ );
16860         }
16861     }
16862     return 1;
16863 }
16864
16865 sub set_continuation_breaks {
16866
16867     # Define an array of indexes for inserting newline characters to
16868     # keep the line lengths below the maximum desired length.  There is
16869     # an implied break after the last token, so it need not be included.
16870
16871     # Method:
16872     # This routine is part of series of routines which adjust line
16873     # lengths.  It is only called if a statement is longer than the
16874     # maximum line length, or if a preliminary scanning located
16875     # desirable break points.   Sub scan_list has already looked at
16876     # these tokens and set breakpoints (in array
16877     # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
16878     # after commas, after opening parens, and before closing parens).
16879     # This routine will honor these breakpoints and also add additional
16880     # breakpoints as necessary to keep the line length below the maximum
16881     # requested.  It bases its decision on where the 'bond strength' is
16882     # lowest.
16883
16884     # Output: returns references to the arrays:
16885     #  @i_first
16886     #  @i_last
16887     # which contain the indexes $i of the first and last tokens on each
16888     # line.
16889
16890     # In addition, the array:
16891     #   $forced_breakpoint_to_go[$i]
16892     # may be updated to be =1 for any index $i after which there must be
16893     # a break.  This signals later routines not to undo the breakpoint.
16894
16895     my $saw_good_break = shift;
16896     my @i_first        = ();      # the first index to output
16897     my @i_last         = ();      # the last index to output
16898     my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
16899     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
16900
16901     set_bond_strengths();
16902
16903     my $imin = 0;
16904     my $imax = $max_index_to_go;
16905     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
16906     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
16907     my $i_begin = $imin;          # index for starting next iteration
16908
16909     my $leading_spaces          = leading_spaces_to_go($imin);
16910     my $line_count              = 0;
16911     my $last_break_strength     = NO_BREAK;
16912     my $i_last_break            = -1;
16913     my $max_bias                = 0.001;
16914     my $tiny_bias               = 0.0001;
16915     my $leading_alignment_token = "";
16916     my $leading_alignment_type  = "";
16917
16918     # see if any ?/:'s are in order
16919     my $colons_in_order = 1;
16920     my $last_tok        = "";
16921     my @colon_list  = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
16922     my $colon_count = @colon_list;
16923     foreach (@colon_list) {
16924         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
16925         $last_tok = $_;
16926     }
16927
16928     # This is a sufficient but not necessary condition for colon chain
16929     my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
16930
16931     #-------------------------------------------------------
16932     # BEGINNING of main loop to set continuation breakpoints
16933     # Keep iterating until we reach the end
16934     #-------------------------------------------------------
16935     while ( $i_begin <= $imax ) {
16936         my $lowest_strength        = NO_BREAK;
16937         my $starting_sum           = $lengths_to_go[$i_begin];
16938         my $i_lowest               = -1;
16939         my $i_test                 = -1;
16940         my $lowest_next_token      = '';
16941         my $lowest_next_type       = 'b';
16942         my $i_lowest_next_nonblank = -1;
16943
16944         #-------------------------------------------------------
16945         # BEGINNING of inner loop to find the best next breakpoint
16946         #-------------------------------------------------------
16947         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
16948             my $type       = $types_to_go[$i_test];
16949             my $token      = $tokens_to_go[$i_test];
16950             my $next_type  = $types_to_go[ $i_test + 1 ];
16951             my $next_token = $tokens_to_go[ $i_test + 1 ];
16952             my $i_next_nonblank =
16953               ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
16954             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
16955             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
16956             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
16957             my $strength                 = $bond_strength_to_go[$i_test];
16958             my $must_break               = 0;
16959
16960             # FIXME: TESTING: Might want to be able to break after these
16961             # force an immediate break at certain operators
16962             # with lower level than the start of the line
16963             if (
16964                 (
16965                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
16966                     || (   $next_nonblank_type eq 'k'
16967                         && $next_nonblank_token =~ /^(and|or)$/ )
16968                 )
16969                 && ( $nesting_depth_to_go[$i_begin] >
16970                     $nesting_depth_to_go[$i_next_nonblank] )
16971               )
16972             {
16973                 set_forced_breakpoint($i_next_nonblank);
16974             }
16975
16976             if (
16977
16978                 # Try to put a break where requested by scan_list
16979                 $forced_breakpoint_to_go[$i_test]
16980
16981                 # break between ) { in a continued line so that the '{' can
16982                 # be outdented
16983                 # See similar logic in scan_list which catches instances
16984                 # where a line is just something like ') {'
16985                 || (   $line_count
16986                     && ( $token              eq ')' )
16987                     && ( $next_nonblank_type eq '{' )
16988                     && ($next_nonblank_block_type)
16989                     && !$rOpts->{'opening-brace-always-on-right'} )
16990
16991                 # There is an implied forced break at a terminal opening brace
16992                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
16993               )
16994             {
16995
16996                 # Forced breakpoints must sometimes be overridden, for example
16997                 # because of a side comment causing a NO_BREAK.  It is easier
16998                 # to catch this here than when they are set.
16999                 if ( $strength < NO_BREAK ) {
17000                     $strength   = $lowest_strength - $tiny_bias;
17001                     $must_break = 1;
17002                 }
17003             }
17004
17005             # quit if a break here would put a good terminal token on
17006             # the next line and we already have a possible break
17007             if (
17008                    !$must_break
17009                 && ( $next_nonblank_type =~ /^[\;\,]$/ )
17010                 && (
17011                     (
17012                         $leading_spaces +
17013                         $lengths_to_go[ $i_next_nonblank + 1 ] -
17014                         $starting_sum
17015                     ) > $rOpts_maximum_line_length
17016                 )
17017               )
17018             {
17019                 last if ( $i_lowest >= 0 );
17020             }
17021
17022             # Avoid a break which would strand a single punctuation
17023             # token.  For example, we do not want to strand a leading
17024             # '.' which is followed by a long quoted string.
17025             if (
17026                    !$must_break
17027                 && ( $i_test == $i_begin )
17028                 && ( $i_test < $imax )
17029                 && ( $token eq $type )
17030                 && (
17031                     (
17032                         $leading_spaces +
17033                         $lengths_to_go[ $i_test + 1 ] -
17034                         $starting_sum
17035                     ) <= $rOpts_maximum_line_length
17036                 )
17037               )
17038             {
17039                 $i_test++;
17040
17041                 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
17042                     $i_test++;
17043                 }
17044                 redo;
17045             }
17046
17047             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
17048             {
17049
17050                 # break at previous best break if it would have produced
17051                 # a leading alignment of certain common tokens, and it
17052                 # is different from the latest candidate break
17053                 last
17054                   if ($leading_alignment_type);
17055
17056                 # Force at least one breakpoint if old code had good
17057                 # break It is only called if a breakpoint is required or
17058                 # desired.  This will probably need some adjustments
17059                 # over time.  A goal is to try to be sure that, if a new
17060                 # side comment is introduced into formated text, then
17061                 # the same breakpoints will occur.  scbreak.t
17062                 last
17063                   if (
17064                     $i_test == $imax                # we are at the end
17065                     && !$forced_breakpoint_count    #
17066                     && $saw_good_break              # old line had good break
17067                     && $type =~ /^[#;\{]$/          # and this line ends in
17068                                                     # ';' or side comment
17069                     && $i_last_break < 0        # and we haven't made a break
17070                     && $i_lowest > 0            # and we saw a possible break
17071                     && $i_lowest < $imax - 1    # (but not just before this ;)
17072                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
17073                   );
17074
17075                 $lowest_strength        = $strength;
17076                 $i_lowest               = $i_test;
17077                 $lowest_next_token      = $next_nonblank_token;
17078                 $lowest_next_type       = $next_nonblank_type;
17079                 $i_lowest_next_nonblank = $i_next_nonblank;
17080                 last if $must_break;
17081
17082                 # set flags to remember if a break here will produce a
17083                 # leading alignment of certain common tokens
17084                 if (   $line_count > 0
17085                     && $i_test < $imax
17086                     && ( $lowest_strength - $last_break_strength <= $max_bias )
17087                   )
17088                 {
17089                     my $i_last_end = $i_begin - 1;
17090                     if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
17091                     my $tok_beg  = $tokens_to_go[$i_begin];
17092                     my $type_beg = $types_to_go[$i_begin];
17093                     if (
17094
17095                         # check for leading alignment of certain tokens
17096                         (
17097                                $tok_beg eq $next_nonblank_token
17098                             && $is_chain_operator{$tok_beg}
17099                             && (   $type_beg eq 'k'
17100                                 || $type_beg eq $tok_beg )
17101                             && $nesting_depth_to_go[$i_begin] >=
17102                             $nesting_depth_to_go[$i_next_nonblank]
17103                         )
17104
17105                         || (   $tokens_to_go[$i_last_end] eq $token
17106                             && $is_chain_operator{$token}
17107                             && ( $type eq 'k' || $type eq $token )
17108                             && $nesting_depth_to_go[$i_last_end] >=
17109                             $nesting_depth_to_go[$i_test] )
17110                       )
17111                     {
17112                         $leading_alignment_token = $next_nonblank_token;
17113                         $leading_alignment_type  = $next_nonblank_type;
17114                     }
17115                 }
17116             }
17117
17118             my $too_long =
17119               ( $i_test >= $imax )
17120               ? 1
17121               : (
17122                 (
17123                     $leading_spaces +
17124                       $lengths_to_go[ $i_test + 2 ] -
17125                       $starting_sum
17126                 ) > $rOpts_maximum_line_length
17127               );
17128
17129             FORMATTER_DEBUG_FLAG_BREAK
17130               && print
17131 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
17132
17133             # allow one extra terminal token after exceeding line length
17134             # if it would strand this token.
17135             if (   $rOpts_fuzzy_line_length
17136                 && $too_long
17137                 && ( $i_lowest == $i_test )
17138                 && ( length($token) > 1 )
17139                 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
17140             {
17141                 $too_long = 0;
17142             }
17143
17144             last
17145               if (
17146                 ( $i_test == $imax )    # we're done if no more tokens,
17147                 || (
17148                     ( $i_lowest >= 0 )    # or no more space and we have a break
17149                     && $too_long
17150                 )
17151               );
17152         }
17153
17154         #-------------------------------------------------------
17155         # END of inner loop to find the best next breakpoint
17156         # Now decide exactly where to put the breakpoint
17157         #-------------------------------------------------------
17158
17159         # it's always ok to break at imax if no other break was found
17160         if ( $i_lowest < 0 ) { $i_lowest = $imax }
17161
17162         # semi-final index calculation
17163         my $i_next_nonblank = (
17164             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17165             ? $i_lowest + 2
17166             : $i_lowest + 1
17167         );
17168         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17169         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17170
17171         #-------------------------------------------------------
17172         # ?/: rule 1 : if a break here will separate a '?' on this
17173         # line from its closing ':', then break at the '?' instead.
17174         #-------------------------------------------------------
17175         my $i;
17176         foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
17177             next unless ( $tokens_to_go[$i] eq '?' );
17178
17179             # do not break if probable sequence of ?/: statements
17180             next if ($is_colon_chain);
17181
17182             # do not break if statement is broken by side comment
17183             next
17184               if (
17185                 $tokens_to_go[$max_index_to_go] eq '#'
17186                 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
17187                     $max_index_to_go ) !~ /^[\;\}]$/
17188               );
17189
17190             # no break needed if matching : is also on the line
17191             next
17192               if ( $mate_index_to_go[$i] >= 0
17193                 && $mate_index_to_go[$i] <= $i_next_nonblank );
17194
17195             $i_lowest = $i;
17196             if ( $want_break_before{'?'} ) { $i_lowest-- }
17197             last;
17198         }
17199
17200         #-------------------------------------------------------
17201         # END of inner loop to find the best next breakpoint:
17202         # Break the line after the token with index i=$i_lowest
17203         #-------------------------------------------------------
17204
17205         # final index calculation
17206         $i_next_nonblank = (
17207             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
17208             ? $i_lowest + 2
17209             : $i_lowest + 1
17210         );
17211         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
17212         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
17213
17214         FORMATTER_DEBUG_FLAG_BREAK
17215           && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
17216
17217         #-------------------------------------------------------
17218         # ?/: rule 2 : if we break at a '?', then break at its ':'
17219         #
17220         # Note: this rule is also in sub scan_list to handle a break
17221         # at the start and end of a line (in case breaks are dictated
17222         # by side comments).
17223         #-------------------------------------------------------
17224         if ( $next_nonblank_type eq '?' ) {
17225             set_closing_breakpoint($i_next_nonblank);
17226         }
17227         elsif ( $types_to_go[$i_lowest] eq '?' ) {
17228             set_closing_breakpoint($i_lowest);
17229         }
17230
17231         #-------------------------------------------------------
17232         # ?/: rule 3 : if we break at a ':' then we save
17233         # its location for further work below.  We may need to go
17234         # back and break at its '?'.
17235         #-------------------------------------------------------
17236         if ( $next_nonblank_type eq ':' ) {
17237             push @i_colon_breaks, $i_next_nonblank;
17238         }
17239         elsif ( $types_to_go[$i_lowest] eq ':' ) {
17240             push @i_colon_breaks, $i_lowest;
17241         }
17242
17243         # here we should set breaks for all '?'/':' pairs which are
17244         # separated by this line
17245
17246         $line_count++;
17247
17248         # save this line segment, after trimming blanks at the ends
17249         push( @i_first,
17250             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
17251         push( @i_last,
17252             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
17253
17254         # set a forced breakpoint at a container opening, if necessary, to
17255         # signal a break at a closing container.  Excepting '(' for now.
17256         if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
17257             && !$forced_breakpoint_to_go[$i_lowest] )
17258         {
17259             set_closing_breakpoint($i_lowest);
17260         }
17261
17262         # get ready to go again
17263         $i_begin                 = $i_lowest + 1;
17264         $last_break_strength     = $lowest_strength;
17265         $i_last_break            = $i_lowest;
17266         $leading_alignment_token = "";
17267         $leading_alignment_type  = "";
17268         $lowest_next_token       = '';
17269         $lowest_next_type        = 'b';
17270
17271         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17272             $i_begin++;
17273         }
17274
17275         # update indentation size
17276         if ( $i_begin <= $imax ) {
17277             $leading_spaces = leading_spaces_to_go($i_begin);
17278         }
17279     }
17280
17281     #-------------------------------------------------------
17282     # END of main loop to set continuation breakpoints
17283     # Now go back and make any necessary corrections
17284     #-------------------------------------------------------
17285
17286     #-------------------------------------------------------
17287     # ?/: rule 4 -- if we broke at a ':', then break at
17288     # corresponding '?' unless this is a chain of ?: expressions
17289     #-------------------------------------------------------
17290     if (@i_colon_breaks) {
17291
17292         # using a simple method for deciding if we are in a ?/: chain --
17293         # this is a chain if it has multiple ?/: pairs all in order;
17294         # otherwise not.
17295         # Note that if line starts in a ':' we count that above as a break
17296         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17297
17298         unless ($is_chain) {
17299             my @insert_list = ();
17300             foreach (@i_colon_breaks) {
17301                 my $i_question = $mate_index_to_go[$_];
17302                 if ( $i_question >= 0 ) {
17303                     if ( $want_break_before{'?'} ) {
17304                         $i_question--;
17305                         if (   $i_question > 0
17306                             && $types_to_go[$i_question] eq 'b' )
17307                         {
17308                             $i_question--;
17309                         }
17310                     }
17311
17312                     if ( $i_question >= 0 ) {
17313                         push @insert_list, $i_question;
17314                     }
17315                 }
17316                 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
17317             }
17318         }
17319     }
17320     return ( \@i_first, \@i_last, $colon_count );
17321 }
17322
17323 sub insert_additional_breaks {
17324
17325     # this routine will add line breaks at requested locations after
17326     # sub set_continuation_breaks has made preliminary breaks.
17327
17328     my ( $ri_break_list, $ri_first, $ri_last ) = @_;
17329     my $i_f;
17330     my $i_l;
17331     my $line_number = 0;
17332     my $i_break_left;
17333     foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
17334
17335         $i_f = $$ri_first[$line_number];
17336         $i_l = $$ri_last[$line_number];
17337         while ( $i_break_left >= $i_l ) {
17338             $line_number++;
17339
17340             # shouldn't happen unless caller passes bad indexes
17341             if ( $line_number >= @$ri_last ) {
17342                 warning(
17343 "Non-fatal program bug: couldn't set break at $i_break_left\n"
17344                 );
17345                 report_definite_bug();
17346                 return;
17347             }
17348             $i_f = $$ri_first[$line_number];
17349             $i_l = $$ri_last[$line_number];
17350         }
17351
17352         my $i_break_right = $i_break_left + 1;
17353         if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
17354
17355         if (   $i_break_left >= $i_f
17356             && $i_break_left < $i_l
17357             && $i_break_right > $i_f
17358             && $i_break_right <= $i_l )
17359         {
17360             splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
17361             splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
17362         }
17363     }
17364 }
17365
17366 sub set_closing_breakpoint {
17367
17368     # set a breakpoint at a matching closing token
17369     # at present, this is only used to break at a ':' which matches a '?'
17370     my $i_break = shift;
17371
17372     if ( $mate_index_to_go[$i_break] >= 0 ) {
17373
17374         # CAUTION: infinite recursion possible here:
17375         #   set_closing_breakpoint calls set_forced_breakpoint, and
17376         #   set_forced_breakpoint call set_closing_breakpoint
17377         #   ( test files attrib.t, BasicLyx.pm.html).
17378         # Don't reduce the '2' in the statement below
17379         if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
17380
17381             # break before } ] and ), but sub set_forced_breakpoint will decide
17382             # to break before or after a ? and :
17383             my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
17384             set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
17385         }
17386     }
17387     else {
17388         my $type_sequence = $type_sequence_to_go[$i_break];
17389         if ($type_sequence) {
17390             my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
17391             $postponed_breakpoint{$type_sequence} = 1;
17392         }
17393     }
17394 }
17395
17396 # check to see if output line tabbing agrees with input line
17397 # this can be very useful for debugging a script which has an extra
17398 # or missing brace
17399 sub compare_indentation_levels {
17400
17401     my ( $python_indentation_level, $structural_indentation_level ) = @_;
17402     if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
17403         $last_tabbing_disagreement = $input_line_number;
17404
17405         if ($in_tabbing_disagreement) {
17406         }
17407         else {
17408             $tabbing_disagreement_count++;
17409
17410             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17411                 write_logfile_entry(
17412 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
17413                 );
17414             }
17415             $in_tabbing_disagreement    = $input_line_number;
17416             $first_tabbing_disagreement = $in_tabbing_disagreement
17417               unless ($first_tabbing_disagreement);
17418         }
17419     }
17420     else {
17421
17422         if ($in_tabbing_disagreement) {
17423
17424             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17425                 write_logfile_entry(
17426 "End indentation disagreement from input line $in_tabbing_disagreement\n"
17427                 );
17428
17429                 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
17430                     write_logfile_entry(
17431                         "No further tabbing disagreements will be noted\n");
17432                 }
17433             }
17434             $in_tabbing_disagreement = 0;
17435         }
17436     }
17437 }
17438
17439 #####################################################################
17440 #
17441 # the Perl::Tidy::IndentationItem class supplies items which contain
17442 # how much whitespace should be used at the start of a line
17443 #
17444 #####################################################################
17445
17446 package Perl::Tidy::IndentationItem;
17447
17448 # Indexes for indentation items
17449 use constant SPACES             => 0;     # total leading white spaces
17450 use constant LEVEL              => 1;     # the indentation 'level'
17451 use constant CI_LEVEL           => 2;     # the 'continuation level'
17452 use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
17453                                           # for this level
17454 use constant CLOSED             => 4;     # index where we saw closing '}'
17455 use constant COMMA_COUNT        => 5;     # how many commas at this level?
17456 use constant SEQUENCE_NUMBER    => 6;     # output batch number
17457 use constant INDEX              => 7;     # index in output batch list
17458 use constant HAVE_CHILD         => 8;     # any dependents?
17459 use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
17460                                           # we would like to move to get
17461                                           # alignment (negative if left)
17462 use constant ALIGN_PAREN        => 10;    # do we want to try to align
17463                                           # with an opening structure?
17464 use constant MARKED             => 11;    # if visited by corrector logic
17465 use constant STACK_DEPTH        => 12;    # indentation nesting depth
17466 use constant STARTING_INDEX     => 13;    # first token index of this level
17467 use constant ARROW_COUNT        => 14;    # how many =>'s
17468
17469 sub new {
17470
17471     # Create an 'indentation_item' which describes one level of leading
17472     # whitespace when the '-lp' indentation is used.  We return
17473     # a reference to an anonymous array of associated variables.
17474     # See above constants for storage scheme.
17475     my (
17476         $class,               $spaces,           $level,
17477         $ci_level,            $available_spaces, $index,
17478         $gnu_sequence_number, $align_paren,      $stack_depth,
17479         $starting_index,
17480     ) = @_;
17481     my $closed            = -1;
17482     my $arrow_count       = 0;
17483     my $comma_count       = 0;
17484     my $have_child        = 0;
17485     my $want_right_spaces = 0;
17486     my $marked            = 0;
17487     bless [
17488         $spaces,              $level,          $ci_level,
17489         $available_spaces,    $closed,         $comma_count,
17490         $gnu_sequence_number, $index,          $have_child,
17491         $want_right_spaces,   $align_paren,    $marked,
17492         $stack_depth,         $starting_index, $arrow_count,
17493     ], $class;
17494 }
17495
17496 sub permanently_decrease_AVAILABLE_SPACES {
17497
17498     # make a permanent reduction in the available indentation spaces
17499     # at one indentation item.  NOTE: if there are child nodes, their
17500     # total SPACES must be reduced by the caller.
17501
17502     my ( $item, $spaces_needed ) = @_;
17503     my $available_spaces = $item->get_AVAILABLE_SPACES();
17504     my $deleted_spaces =
17505       ( $available_spaces > $spaces_needed )
17506       ? $spaces_needed
17507       : $available_spaces;
17508     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
17509     $item->decrease_SPACES($deleted_spaces);
17510     $item->set_RECOVERABLE_SPACES(0);
17511
17512     return $deleted_spaces;
17513 }
17514
17515 sub tentatively_decrease_AVAILABLE_SPACES {
17516
17517     # We are asked to tentatively delete $spaces_needed of indentation
17518     # for a indentation item.  We may want to undo this later.  NOTE: if
17519     # there are child nodes, their total SPACES must be reduced by the
17520     # caller.
17521     my ( $item, $spaces_needed ) = @_;
17522     my $available_spaces = $item->get_AVAILABLE_SPACES();
17523     my $deleted_spaces =
17524       ( $available_spaces > $spaces_needed )
17525       ? $spaces_needed
17526       : $available_spaces;
17527     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
17528     $item->decrease_SPACES($deleted_spaces);
17529     $item->increase_RECOVERABLE_SPACES($deleted_spaces);
17530     return $deleted_spaces;
17531 }
17532
17533 sub get_STACK_DEPTH {
17534     my $self = shift;
17535     return $self->[STACK_DEPTH];
17536 }
17537
17538 sub get_SPACES {
17539     my $self = shift;
17540     return $self->[SPACES];
17541 }
17542
17543 sub get_MARKED {
17544     my $self = shift;
17545     return $self->[MARKED];
17546 }
17547
17548 sub set_MARKED {
17549     my ( $self, $value ) = @_;
17550     if ( defined($value) ) {
17551         $self->[MARKED] = $value;
17552     }
17553     return $self->[MARKED];
17554 }
17555
17556 sub get_AVAILABLE_SPACES {
17557     my $self = shift;
17558     return $self->[AVAILABLE_SPACES];
17559 }
17560
17561 sub decrease_SPACES {
17562     my ( $self, $value ) = @_;
17563     if ( defined($value) ) {
17564         $self->[SPACES] -= $value;
17565     }
17566     return $self->[SPACES];
17567 }
17568
17569 sub decrease_AVAILABLE_SPACES {
17570     my ( $self, $value ) = @_;
17571     if ( defined($value) ) {
17572         $self->[AVAILABLE_SPACES] -= $value;
17573     }
17574     return $self->[AVAILABLE_SPACES];
17575 }
17576
17577 sub get_ALIGN_PAREN {
17578     my $self = shift;
17579     return $self->[ALIGN_PAREN];
17580 }
17581
17582 sub get_RECOVERABLE_SPACES {
17583     my $self = shift;
17584     return $self->[RECOVERABLE_SPACES];
17585 }
17586
17587 sub set_RECOVERABLE_SPACES {
17588     my ( $self, $value ) = @_;
17589     if ( defined($value) ) {
17590         $self->[RECOVERABLE_SPACES] = $value;
17591     }
17592     return $self->[RECOVERABLE_SPACES];
17593 }
17594
17595 sub increase_RECOVERABLE_SPACES {
17596     my ( $self, $value ) = @_;
17597     if ( defined($value) ) {
17598         $self->[RECOVERABLE_SPACES] += $value;
17599     }
17600     return $self->[RECOVERABLE_SPACES];
17601 }
17602
17603 sub get_CI_LEVEL {
17604     my $self = shift;
17605     return $self->[CI_LEVEL];
17606 }
17607
17608 sub get_LEVEL {
17609     my $self = shift;
17610     return $self->[LEVEL];
17611 }
17612
17613 sub get_SEQUENCE_NUMBER {
17614     my $self = shift;
17615     return $self->[SEQUENCE_NUMBER];
17616 }
17617
17618 sub get_INDEX {
17619     my $self = shift;
17620     return $self->[INDEX];
17621 }
17622
17623 sub get_STARTING_INDEX {
17624     my $self = shift;
17625     return $self->[STARTING_INDEX];
17626 }
17627
17628 sub set_HAVE_CHILD {
17629     my ( $self, $value ) = @_;
17630     if ( defined($value) ) {
17631         $self->[HAVE_CHILD] = $value;
17632     }
17633     return $self->[HAVE_CHILD];
17634 }
17635
17636 sub get_HAVE_CHILD {
17637     my $self = shift;
17638     return $self->[HAVE_CHILD];
17639 }
17640
17641 sub set_ARROW_COUNT {
17642     my ( $self, $value ) = @_;
17643     if ( defined($value) ) {
17644         $self->[ARROW_COUNT] = $value;
17645     }
17646     return $self->[ARROW_COUNT];
17647 }
17648
17649 sub get_ARROW_COUNT {
17650     my $self = shift;
17651     return $self->[ARROW_COUNT];
17652 }
17653
17654 sub set_COMMA_COUNT {
17655     my ( $self, $value ) = @_;
17656     if ( defined($value) ) {
17657         $self->[COMMA_COUNT] = $value;
17658     }
17659     return $self->[COMMA_COUNT];
17660 }
17661
17662 sub get_COMMA_COUNT {
17663     my $self = shift;
17664     return $self->[COMMA_COUNT];
17665 }
17666
17667 sub set_CLOSED {
17668     my ( $self, $value ) = @_;
17669     if ( defined($value) ) {
17670         $self->[CLOSED] = $value;
17671     }
17672     return $self->[CLOSED];
17673 }
17674
17675 sub get_CLOSED {
17676     my $self = shift;
17677     return $self->[CLOSED];
17678 }
17679
17680 #####################################################################
17681 #
17682 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
17683 # contain a single output line
17684 #
17685 #####################################################################
17686
17687 package Perl::Tidy::VerticalAligner::Line;
17688
17689 {
17690
17691     use strict;
17692     use Carp;
17693
17694     use constant JMAX                      => 0;
17695     use constant JMAX_ORIGINAL_LINE        => 1;
17696     use constant RTOKENS                   => 2;
17697     use constant RFIELDS                   => 3;
17698     use constant RPATTERNS                 => 4;
17699     use constant INDENTATION               => 5;
17700     use constant LEADING_SPACE_COUNT       => 6;
17701     use constant OUTDENT_LONG_LINES        => 7;
17702     use constant LIST_TYPE                 => 8;
17703     use constant IS_HANGING_SIDE_COMMENT   => 9;
17704     use constant RALIGNMENTS               => 10;
17705     use constant MAXIMUM_LINE_LENGTH       => 11;
17706     use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
17707
17708     my %_index_map;
17709     $_index_map{jmax}                      = JMAX;
17710     $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
17711     $_index_map{rtokens}                   = RTOKENS;
17712     $_index_map{rfields}                   = RFIELDS;
17713     $_index_map{rpatterns}                 = RPATTERNS;
17714     $_index_map{indentation}               = INDENTATION;
17715     $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
17716     $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
17717     $_index_map{list_type}                 = LIST_TYPE;
17718     $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
17719     $_index_map{ralignments}               = RALIGNMENTS;
17720     $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
17721     $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
17722
17723     my @_default_data = ();
17724     $_default_data[JMAX]                      = undef;
17725     $_default_data[JMAX_ORIGINAL_LINE]        = undef;
17726     $_default_data[RTOKENS]                   = undef;
17727     $_default_data[RFIELDS]                   = undef;
17728     $_default_data[RPATTERNS]                 = undef;
17729     $_default_data[INDENTATION]               = undef;
17730     $_default_data[LEADING_SPACE_COUNT]       = undef;
17731     $_default_data[OUTDENT_LONG_LINES]        = undef;
17732     $_default_data[LIST_TYPE]                 = undef;
17733     $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
17734     $_default_data[RALIGNMENTS]               = [];
17735     $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
17736     $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
17737
17738     {
17739
17740         # methods to count object population
17741         my $_count = 0;
17742         sub get_count        { $_count; }
17743         sub _increment_count { ++$_count }
17744         sub _decrement_count { --$_count }
17745     }
17746
17747     # Constructor may be called as a class method
17748     sub new {
17749         my ( $caller, %arg ) = @_;
17750         my $caller_is_obj = ref($caller);
17751         my $class = $caller_is_obj || $caller;
17752         no strict "refs";
17753         my $self = bless [], $class;
17754
17755         $self->[RALIGNMENTS] = [];
17756
17757         my $index;
17758         foreach ( keys %_index_map ) {
17759             $index = $_index_map{$_};
17760             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
17761             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
17762             else { $self->[$index] = $_default_data[$index] }
17763         }
17764
17765         $self->_increment_count();
17766         return $self;
17767     }
17768
17769     sub DESTROY {
17770         $_[0]->_decrement_count();
17771     }
17772
17773     sub get_jmax                      { $_[0]->[JMAX] }
17774     sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
17775     sub get_rtokens                   { $_[0]->[RTOKENS] }
17776     sub get_rfields                   { $_[0]->[RFIELDS] }
17777     sub get_rpatterns                 { $_[0]->[RPATTERNS] }
17778     sub get_indentation               { $_[0]->[INDENTATION] }
17779     sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
17780     sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
17781     sub get_list_type                 { $_[0]->[LIST_TYPE] }
17782     sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
17783     sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
17784
17785     sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
17786     sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
17787     sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
17788     sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
17789
17790     sub get_starting_column {
17791         $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
17792     }
17793
17794     sub increment_column {
17795         $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
17796     }
17797     sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
17798
17799     sub current_field_width {
17800         my $self = shift;
17801         my ($j) = @_;
17802         if ( $j == 0 ) {
17803             return $self->get_column($j);
17804         }
17805         else {
17806             return $self->get_column($j) - $self->get_column( $j - 1 );
17807         }
17808     }
17809
17810     sub field_width_growth {
17811         my $self = shift;
17812         my $j    = shift;
17813         return $self->get_column($j) - $self->get_starting_column($j);
17814     }
17815
17816     sub starting_field_width {
17817         my $self = shift;
17818         my $j    = shift;
17819         if ( $j == 0 ) {
17820             return $self->get_starting_column($j);
17821         }
17822         else {
17823             return $self->get_starting_column($j) -
17824               $self->get_starting_column( $j - 1 );
17825         }
17826     }
17827
17828     sub increase_field_width {
17829
17830         my $self = shift;
17831         my ( $j, $pad ) = @_;
17832         my $jmax = $self->get_jmax();
17833         for my $k ( $j .. $jmax ) {
17834             $self->increment_column( $k, $pad );
17835         }
17836     }
17837
17838     sub get_available_space_on_right {
17839         my $self = shift;
17840         my $jmax = $self->get_jmax();
17841         return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
17842     }
17843
17844     sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
17845     sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
17846     sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
17847     sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
17848     sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
17849     sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
17850     sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
17851     sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
17852     sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
17853     sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
17854     sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
17855
17856 }
17857
17858 #####################################################################
17859 #
17860 # the Perl::Tidy::VerticalAligner::Alignment class holds information
17861 # on a single column being aligned
17862 #
17863 #####################################################################
17864 package Perl::Tidy::VerticalAligner::Alignment;
17865
17866 {
17867
17868     use strict;
17869
17870     #use Carp;
17871
17872     # Symbolic array indexes
17873     use constant COLUMN          => 0;    # the current column number
17874     use constant STARTING_COLUMN => 1;    # column number when created
17875     use constant MATCHING_TOKEN  => 2;    # what token we are matching
17876     use constant STARTING_LINE   => 3;    # the line index of creation
17877     use constant ENDING_LINE     => 4;    # the most recent line to use it
17878     use constant SAVED_COLUMN    => 5;    # the most recent line to use it
17879     use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
17880                                           # (just its index in an array)
17881
17882     # Correspondence between variables and array indexes
17883     my %_index_map;
17884     $_index_map{column}          = COLUMN;
17885     $_index_map{starting_column} = STARTING_COLUMN;
17886     $_index_map{matching_token}  = MATCHING_TOKEN;
17887     $_index_map{starting_line}   = STARTING_LINE;
17888     $_index_map{ending_line}     = ENDING_LINE;
17889     $_index_map{saved_column}    = SAVED_COLUMN;
17890     $_index_map{serial_number}   = SERIAL_NUMBER;
17891
17892     my @_default_data = ();
17893     $_default_data[COLUMN]          = undef;
17894     $_default_data[STARTING_COLUMN] = undef;
17895     $_default_data[MATCHING_TOKEN]  = undef;
17896     $_default_data[STARTING_LINE]   = undef;
17897     $_default_data[ENDING_LINE]     = undef;
17898     $_default_data[SAVED_COLUMN]    = undef;
17899     $_default_data[SERIAL_NUMBER]   = undef;
17900
17901     # class population count
17902     {
17903         my $_count = 0;
17904         sub get_count        { $_count; }
17905         sub _increment_count { ++$_count }
17906         sub _decrement_count { --$_count }
17907     }
17908
17909     # constructor
17910     sub new {
17911         my ( $caller, %arg ) = @_;
17912         my $caller_is_obj = ref($caller);
17913         my $class = $caller_is_obj || $caller;
17914         no strict "refs";
17915         my $self = bless [], $class;
17916
17917         foreach ( keys %_index_map ) {
17918             my $index = $_index_map{$_};
17919             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
17920             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
17921             else { $self->[$index] = $_default_data[$index] }
17922         }
17923         $self->_increment_count();
17924         return $self;
17925     }
17926
17927     sub DESTROY {
17928         $_[0]->_decrement_count();
17929     }
17930
17931     sub get_column          { return $_[0]->[COLUMN] }
17932     sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
17933     sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
17934     sub get_starting_line   { return $_[0]->[STARTING_LINE] }
17935     sub get_ending_line     { return $_[0]->[ENDING_LINE] }
17936     sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
17937
17938     sub set_column          { $_[0]->[COLUMN]          = $_[1] }
17939     sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
17940     sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
17941     sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
17942     sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
17943     sub increment_column { $_[0]->[COLUMN] += $_[1] }
17944
17945     sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
17946     sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
17947
17948 }
17949
17950 package Perl::Tidy::VerticalAligner;
17951
17952 # The Perl::Tidy::VerticalAligner package collects output lines and
17953 # attempts to line up certain common tokens, such as => and #, which are
17954 # identified by the calling routine.
17955 #
17956 # There are two main routines: append_line and flush.  Append acts as a
17957 # storage buffer, collecting lines into a group which can be vertically
17958 # aligned.  When alignment is no longer possible or desirable, it dumps
17959 # the group to flush.
17960 #
17961 #     append_line -----> flush
17962 #
17963 #     collects          writes
17964 #     vertical          one
17965 #     groups            group
17966
17967 BEGIN {
17968
17969     # Caution: these debug flags produce a lot of output
17970     # They should all be 0 except when debugging small scripts
17971
17972     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
17973     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
17974     use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
17975
17976     my $debug_warning = sub {
17977         print "VALIGN_DEBUGGING with key $_[0]\n";
17978     };
17979
17980     VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
17981     VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
17982
17983 }
17984
17985 use vars qw(
17986   $vertical_aligner_self
17987   $current_line
17988   $maximum_alignment_index
17989   $ralignment_list
17990   $maximum_jmax_seen
17991   $minimum_jmax_seen
17992   $previous_minimum_jmax_seen
17993   $previous_maximum_jmax_seen
17994   $maximum_line_index
17995   $group_level
17996   $group_type
17997   $group_maximum_gap
17998   $marginal_match
17999   $last_group_level_written
18000   $last_leading_space_count
18001   $extra_indent_ok
18002   $zero_count
18003   @group_lines
18004   $last_comment_column
18005   $last_side_comment_line_number
18006   $last_side_comment_length
18007   $last_side_comment_level
18008   $outdented_line_count
18009   $first_outdented_line_at
18010   $last_outdented_line_at
18011   $diagnostics_object
18012   $logger_object
18013   $file_writer_object
18014   @side_comment_history
18015   $comment_leading_space_count
18016   $is_matching_terminal_line
18017
18018   $cached_line_text
18019   $cached_line_type
18020   $cached_line_flag
18021   $cached_seqno
18022   $cached_line_valid
18023   $cached_line_leading_space_count
18024   $cached_seqno_string
18025
18026   $seqno_string
18027   $last_nonblank_seqno_string
18028
18029   $rOpts
18030
18031   $rOpts_maximum_line_length
18032   $rOpts_continuation_indentation
18033   $rOpts_indent_columns
18034   $rOpts_tabs
18035   $rOpts_entab_leading_whitespace
18036   $rOpts_valign
18037
18038   $rOpts_fixed_position_side_comment
18039   $rOpts_minimum_space_to_comment
18040
18041 );
18042
18043 sub initialize {
18044
18045     my $class;
18046
18047     ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
18048       = @_;
18049
18050     # variables describing the entire space group:
18051     $ralignment_list            = [];
18052     $group_level                = 0;
18053     $last_group_level_written   = -1;
18054     $extra_indent_ok            = 0;    # can we move all lines to the right?
18055     $last_side_comment_length   = 0;
18056     $maximum_jmax_seen          = 0;
18057     $minimum_jmax_seen          = 0;
18058     $previous_minimum_jmax_seen = 0;
18059     $previous_maximum_jmax_seen = 0;
18060
18061     # variables describing each line of the group
18062     @group_lines = ();                  # list of all lines in group
18063
18064     $outdented_line_count          = 0;
18065     $first_outdented_line_at       = 0;
18066     $last_outdented_line_at        = 0;
18067     $last_side_comment_line_number = 0;
18068     $last_side_comment_level       = -1;
18069     $is_matching_terminal_line     = 0;
18070
18071     # most recent 3 side comments; [ line number, column ]
18072     $side_comment_history[0] = [ -300, 0 ];
18073     $side_comment_history[1] = [ -200, 0 ];
18074     $side_comment_history[2] = [ -100, 0 ];
18075
18076     # write_leader_and_string cache:
18077     $cached_line_text                = "";
18078     $cached_line_type                = 0;
18079     $cached_line_flag                = 0;
18080     $cached_seqno                    = 0;
18081     $cached_line_valid               = 0;
18082     $cached_line_leading_space_count = 0;
18083     $cached_seqno_string             = "";
18084
18085     # string of sequence numbers joined together
18086     $seqno_string               = "";
18087     $last_nonblank_seqno_string = "";
18088
18089     # frequently used parameters
18090     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
18091     $rOpts_tabs                     = $rOpts->{'tabs'};
18092     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
18093     $rOpts_fixed_position_side_comment =
18094       $rOpts->{'fixed-position-side-comment'};
18095     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
18096     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
18097     $rOpts_valign                   = $rOpts->{'valign'};
18098
18099     forget_side_comment();
18100
18101     initialize_for_new_group();
18102
18103     $vertical_aligner_self = {};
18104     bless $vertical_aligner_self, $class;
18105     return $vertical_aligner_self;
18106 }
18107
18108 sub initialize_for_new_group {
18109     $maximum_line_index      = -1;      # lines in the current group
18110     $maximum_alignment_index = -1;      # alignments in current group
18111     $zero_count              = 0;       # count consecutive lines without tokens
18112     $current_line            = undef;   # line being matched for alignment
18113     $group_maximum_gap       = 0;       # largest gap introduced
18114     $group_type              = "";
18115     $marginal_match          = 0;
18116     $comment_leading_space_count = 0;
18117     $last_leading_space_count    = 0;
18118 }
18119
18120 # interface to Perl::Tidy::Diagnostics routines
18121 sub write_diagnostics {
18122     if ($diagnostics_object) {
18123         $diagnostics_object->write_diagnostics(@_);
18124     }
18125 }
18126
18127 # interface to Perl::Tidy::Logger routines
18128 sub warning {
18129     if ($logger_object) {
18130         $logger_object->warning(@_);
18131     }
18132 }
18133
18134 sub write_logfile_entry {
18135     if ($logger_object) {
18136         $logger_object->write_logfile_entry(@_);
18137     }
18138 }
18139
18140 sub report_definite_bug {
18141     if ($logger_object) {
18142         $logger_object->report_definite_bug();
18143     }
18144 }
18145
18146 sub get_SPACES {
18147
18148     # return the number of leading spaces associated with an indentation
18149     # variable $indentation is either a constant number of spaces or an
18150     # object with a get_SPACES method.
18151     my $indentation = shift;
18152     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
18153 }
18154
18155 sub get_RECOVERABLE_SPACES {
18156
18157     # return the number of spaces (+ means shift right, - means shift left)
18158     # that we would like to shift a group of lines with the same indentation
18159     # to get them to line up with their opening parens
18160     my $indentation = shift;
18161     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
18162 }
18163
18164 sub get_STACK_DEPTH {
18165
18166     my $indentation = shift;
18167     return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
18168 }
18169
18170 sub make_alignment {
18171     my ( $col, $token ) = @_;
18172
18173     # make one new alignment at column $col which aligns token $token
18174     ++$maximum_alignment_index;
18175     my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
18176         column          => $col,
18177         starting_column => $col,
18178         matching_token  => $token,
18179         starting_line   => $maximum_line_index,
18180         ending_line     => $maximum_line_index,
18181         serial_number   => $maximum_alignment_index,
18182     );
18183     $ralignment_list->[$maximum_alignment_index] = $alignment;
18184     return $alignment;
18185 }
18186
18187 sub dump_alignments {
18188     print
18189 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
18190     for my $i ( 0 .. $maximum_alignment_index ) {
18191         my $column          = $ralignment_list->[$i]->get_column();
18192         my $starting_column = $ralignment_list->[$i]->get_starting_column();
18193         my $matching_token  = $ralignment_list->[$i]->get_matching_token();
18194         my $starting_line   = $ralignment_list->[$i]->get_starting_line();
18195         my $ending_line     = $ralignment_list->[$i]->get_ending_line();
18196         print
18197 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
18198     }
18199 }
18200
18201 sub save_alignment_columns {
18202     for my $i ( 0 .. $maximum_alignment_index ) {
18203         $ralignment_list->[$i]->save_column();
18204     }
18205 }
18206
18207 sub restore_alignment_columns {
18208     for my $i ( 0 .. $maximum_alignment_index ) {
18209         $ralignment_list->[$i]->restore_column();
18210     }
18211 }
18212
18213 sub forget_side_comment {
18214     $last_comment_column = 0;
18215 }
18216
18217 sub append_line {
18218
18219     # sub append is called to place one line in the current vertical group.
18220     #
18221     # The input parameters are:
18222     #     $level = indentation level of this line
18223     #     $rfields = reference to array of fields
18224     #     $rpatterns = reference to array of patterns, one per field
18225     #     $rtokens   = reference to array of tokens starting fields 1,2,..
18226     #
18227     # Here is an example of what this package does.  In this example,
18228     # we are trying to line up both the '=>' and the '#'.
18229     #
18230     #         '18' => 'grave',    #   \`
18231     #         '19' => 'acute',    #   `'
18232     #         '20' => 'caron',    #   \v
18233     # <-tabs-><f1-><--field 2 ---><-f3->
18234     # |            |              |    |
18235     # |            |              |    |
18236     # col1        col2         col3 col4
18237     #
18238     # The calling routine has already broken the entire line into 3 fields as
18239     # indicated.  (So the work of identifying promising common tokens has
18240     # already been done).
18241     #
18242     # In this example, there will be 2 tokens being matched: '=>' and '#'.
18243     # They are the leading parts of fields 2 and 3, but we do need to know
18244     # what they are so that we can dump a group of lines when these tokens
18245     # change.
18246     #
18247     # The fields contain the actual characters of each field.  The patterns
18248     # are like the fields, but they contain mainly token types instead
18249     # of tokens, so they have fewer characters.  They are used to be
18250     # sure we are matching fields of similar type.
18251     #
18252     # In this example, there will be 4 column indexes being adjusted.  The
18253     # first one is always at zero.  The interior columns are at the start of
18254     # the matching tokens, and the last one tracks the maximum line length.
18255     #
18256     # Basically, each time a new line comes in, it joins the current vertical
18257     # group if possible.  Otherwise it causes the current group to be dumped
18258     # and a new group is started.
18259     #
18260     # For each new group member, the column locations are increased, as
18261     # necessary, to make room for the new fields.  When the group is finally
18262     # output, these column numbers are used to compute the amount of spaces of
18263     # padding needed for each field.
18264     #
18265     # Programming note: the fields are assumed not to have any tab characters.
18266     # Tabs have been previously removed except for tabs in quoted strings and
18267     # side comments.  Tabs in these fields can mess up the column counting.
18268     # The log file warns the user if there are any such tabs.
18269
18270     my (
18271         $level,               $level_end,
18272         $indentation,         $rfields,
18273         $rtokens,             $rpatterns,
18274         $is_forced_break,     $outdent_long_lines,
18275         $is_terminal_ternary, $is_terminal_statement,
18276         $do_not_pad,          $rvertical_tightness_flags,
18277         $level_jump,
18278     ) = @_;
18279
18280     # number of fields is $jmax
18281     # number of tokens between fields is $jmax-1
18282     my $jmax = $#{$rfields};
18283
18284     my $leading_space_count = get_SPACES($indentation);
18285
18286     # set outdented flag to be sure we either align within statements or
18287     # across statement boundaries, but not both.
18288     my $is_outdented = $last_leading_space_count > $leading_space_count;
18289     $last_leading_space_count = $leading_space_count;
18290
18291     # Patch: undo for hanging side comment
18292     my $is_hanging_side_comment =
18293       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
18294     $is_outdented = 0 if $is_hanging_side_comment;
18295
18296     VALIGN_DEBUG_FLAG_APPEND0 && do {
18297         print
18298 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
18299     };
18300
18301     # Validate cached line if necessary: If we can produce a container
18302     # with just 2 lines total by combining an existing cached opening
18303     # token with the closing token to follow, then we will mark both
18304     # cached flags as valid.
18305     if ($rvertical_tightness_flags) {
18306         if (   $maximum_line_index <= 0
18307             && $cached_line_type
18308             && $cached_seqno
18309             && $rvertical_tightness_flags->[2]
18310             && $rvertical_tightness_flags->[2] == $cached_seqno )
18311         {
18312             $rvertical_tightness_flags->[3] ||= 1;
18313             $cached_line_valid ||= 1;
18314         }
18315     }
18316
18317     # do not join an opening block brace with an unbalanced line
18318     # unless requested with a flag value of 2
18319     if (   $cached_line_type == 3
18320         && $maximum_line_index < 0
18321         && $cached_line_flag < 2
18322         && $level_jump != 0 )
18323     {
18324         $cached_line_valid = 0;
18325     }
18326
18327     # patch until new aligner is finished
18328     if ($do_not_pad) { my_flush() }
18329
18330     # shouldn't happen:
18331     if ( $level < 0 ) { $level = 0 }
18332
18333     # do not align code across indentation level changes
18334     # or if vertical alignment is turned off for debugging
18335     if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
18336
18337         # we are allowed to shift a group of lines to the right if its
18338         # level is greater than the previous and next group
18339         $extra_indent_ok =
18340           ( $level < $group_level && $last_group_level_written < $group_level );
18341
18342         my_flush();
18343
18344         # If we know that this line will get flushed out by itself because
18345         # of level changes, we can leave the extra_indent_ok flag set.
18346         # That way, if we get an external flush call, we will still be
18347         # able to do some -lp alignment if necessary.
18348         $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
18349
18350         $group_level = $level;
18351
18352         # wait until after the above flush to get the leading space
18353         # count because it may have been changed if the -icp flag is in
18354         # effect
18355         $leading_space_count = get_SPACES($indentation);
18356
18357     }
18358
18359     # --------------------------------------------------------------------
18360     # Patch to collect outdentable block COMMENTS
18361     # --------------------------------------------------------------------
18362     my $is_blank_line = "";
18363     my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
18364     if ( $group_type eq 'COMMENT' ) {
18365         if (
18366             (
18367                    $is_block_comment
18368                 && $outdent_long_lines
18369                 && $leading_space_count == $comment_leading_space_count
18370             )
18371             || $is_blank_line
18372           )
18373         {
18374             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
18375             return;
18376         }
18377         else {
18378             my_flush();
18379         }
18380     }
18381
18382     # --------------------------------------------------------------------
18383     # add dummy fields for terminal ternary
18384     # --------------------------------------------------------------------
18385     my $j_terminal_match;
18386     if ( $is_terminal_ternary && $current_line ) {
18387         $j_terminal_match =
18388           fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
18389         $jmax = @{$rfields} - 1;
18390     }
18391
18392     # --------------------------------------------------------------------
18393     # add dummy fields for else statement
18394     # --------------------------------------------------------------------
18395     if (   $rfields->[0] =~ /^else\s*$/
18396         && $current_line
18397         && $level_jump == 0 )
18398     {
18399         $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
18400         $jmax = @{$rfields} - 1;
18401     }
18402
18403     # --------------------------------------------------------------------
18404     # Step 1. Handle simple line of code with no fields to match.
18405     # --------------------------------------------------------------------
18406     if ( $jmax <= 0 ) {
18407         $zero_count++;
18408
18409         if ( $maximum_line_index >= 0
18410             && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
18411         {
18412
18413             # flush the current group if it has some aligned columns..
18414             if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
18415
18416             # flush current group if we are just collecting side comments..
18417             elsif (
18418
18419                 # ...and we haven't seen a comment lately
18420                 ( $zero_count > 3 )
18421
18422                 # ..or if this new line doesn't fit to the left of the comments
18423                 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
18424                     $group_lines[0]->get_column(0) )
18425               )
18426             {
18427                 my_flush();
18428             }
18429         }
18430
18431         # patch to start new COMMENT group if this comment may be outdented
18432         if (   $is_block_comment
18433             && $outdent_long_lines
18434             && $maximum_line_index < 0 )
18435         {
18436             $group_type                           = 'COMMENT';
18437             $comment_leading_space_count          = $leading_space_count;
18438             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
18439             return;
18440         }
18441
18442         # just write this line directly if no current group, no side comment,
18443         # and no space recovery is needed.
18444         if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
18445         {
18446             write_leader_and_string( $leading_space_count, $$rfields[0], 0,
18447                 $outdent_long_lines, $rvertical_tightness_flags );
18448             return;
18449         }
18450     }
18451     else {
18452         $zero_count = 0;
18453     }
18454
18455     # programming check: (shouldn't happen)
18456     # an error here implies an incorrect call was made
18457     if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
18458         warning(
18459 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
18460         );
18461         report_definite_bug();
18462     }
18463
18464     # --------------------------------------------------------------------
18465     # create an object to hold this line
18466     # --------------------------------------------------------------------
18467     my $new_line = new Perl::Tidy::VerticalAligner::Line(
18468         jmax                      => $jmax,
18469         jmax_original_line        => $jmax,
18470         rtokens                   => $rtokens,
18471         rfields                   => $rfields,
18472         rpatterns                 => $rpatterns,
18473         indentation               => $indentation,
18474         leading_space_count       => $leading_space_count,
18475         outdent_long_lines        => $outdent_long_lines,
18476         list_type                 => "",
18477         is_hanging_side_comment   => $is_hanging_side_comment,
18478         maximum_line_length       => $rOpts->{'maximum-line-length'},
18479         rvertical_tightness_flags => $rvertical_tightness_flags,
18480     );
18481
18482     # Initialize a global flag saying if the last line of the group should
18483     # match end of group and also terminate the group.  There should be no
18484     # returns between here and where the flag is handled at the bottom.
18485     my $col_matching_terminal = 0;
18486     if ( defined($j_terminal_match) ) {
18487
18488         # remember the column of the terminal ? or { to match with
18489         $col_matching_terminal = $current_line->get_column($j_terminal_match);
18490
18491         # set global flag for sub decide_if_aligned
18492         $is_matching_terminal_line = 1;
18493     }
18494
18495     # --------------------------------------------------------------------
18496     # It simplifies things to create a zero length side comment
18497     # if none exists.
18498     # --------------------------------------------------------------------
18499     make_side_comment( $new_line, $level_end );
18500
18501     # --------------------------------------------------------------------
18502     # Decide if this is a simple list of items.
18503     # There are 3 list types: none, comma, comma-arrow.
18504     # We use this below to be less restrictive in deciding what to align.
18505     # --------------------------------------------------------------------
18506     if ($is_forced_break) {
18507         decide_if_list($new_line);
18508     }
18509
18510     if ($current_line) {
18511
18512         # --------------------------------------------------------------------
18513         # Allow hanging side comment to join current group, if any
18514         # This will help keep side comments aligned, because otherwise we
18515         # will have to start a new group, making alignment less likely.
18516         # --------------------------------------------------------------------
18517         join_hanging_comment( $new_line, $current_line )
18518           if $is_hanging_side_comment;
18519
18520         # --------------------------------------------------------------------
18521         # If there is just one previous line, and it has more fields
18522         # than the new line, try to join fields together to get a match with
18523         # the new line.  At the present time, only a single leading '=' is
18524         # allowed to be compressed out.  This is useful in rare cases where
18525         # a table is forced to use old breakpoints because of side comments,
18526         # and the table starts out something like this:
18527         #   my %MonthChars = ('0', 'Jan',   # side comment
18528         #                     '1', 'Feb',
18529         #                     '2', 'Mar',
18530         # Eliminating the '=' field will allow the remaining fields to line up.
18531         # This situation does not occur if there are no side comments
18532         # because scan_list would put a break after the opening '('.
18533         # --------------------------------------------------------------------
18534         eliminate_old_fields( $new_line, $current_line );
18535
18536         # --------------------------------------------------------------------
18537         # If the new line has more fields than the current group,
18538         # see if we can match the first fields and combine the remaining
18539         # fields of the new line.
18540         # --------------------------------------------------------------------
18541         eliminate_new_fields( $new_line, $current_line );
18542
18543         # --------------------------------------------------------------------
18544         # Flush previous group unless all common tokens and patterns match..
18545         # --------------------------------------------------------------------
18546         check_match( $new_line, $current_line );
18547
18548         # --------------------------------------------------------------------
18549         # See if there is space for this line in the current group (if any)
18550         # --------------------------------------------------------------------
18551         if ($current_line) {
18552             check_fit( $new_line, $current_line );
18553         }
18554     }
18555
18556     # --------------------------------------------------------------------
18557     # Append this line to the current group (or start new group)
18558     # --------------------------------------------------------------------
18559     accept_line($new_line);
18560
18561     # Future update to allow this to vary:
18562     $current_line = $new_line if ( $maximum_line_index == 0 );
18563
18564     # output this group if it ends in a terminal else or ternary line
18565     if ( defined($j_terminal_match) ) {
18566
18567         # if there is only one line in the group (maybe due to failure to match
18568         # perfectly with previous lines), then align the ? or { of this
18569         # terminal line with the previous one unless that would make the line
18570         # too long
18571         if ( $maximum_line_index == 0 ) {
18572             my $col_now = $current_line->get_column($j_terminal_match);
18573             my $pad     = $col_matching_terminal - $col_now;
18574             my $padding_available =
18575               $current_line->get_available_space_on_right();
18576             if ( $pad > 0 && $pad <= $padding_available ) {
18577                 $current_line->increase_field_width( $j_terminal_match, $pad );
18578             }
18579         }
18580         my_flush();
18581         $is_matching_terminal_line = 0;
18582     }
18583
18584     # --------------------------------------------------------------------
18585     # Step 8. Some old debugging stuff
18586     # --------------------------------------------------------------------
18587     VALIGN_DEBUG_FLAG_APPEND && do {
18588         print "APPEND fields:";
18589         dump_array(@$rfields);
18590         print "APPEND tokens:";
18591         dump_array(@$rtokens);
18592         print "APPEND patterns:";
18593         dump_array(@$rpatterns);
18594         dump_alignments();
18595     };
18596
18597     return;
18598 }
18599
18600 sub join_hanging_comment {
18601
18602     my $line = shift;
18603     my $jmax = $line->get_jmax();
18604     return 0 unless $jmax == 1;    # must be 2 fields
18605     my $rtokens = $line->get_rtokens();
18606     return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
18607     my $rfields = $line->get_rfields();
18608     return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
18609     my $old_line            = shift;
18610     my $maximum_field_index = $old_line->get_jmax();
18611     return 0
18612       unless $maximum_field_index > $jmax;    # the current line has more fields
18613     my $rpatterns = $line->get_rpatterns();
18614
18615     $line->set_is_hanging_side_comment(1);
18616     $jmax = $maximum_field_index;
18617     $line->set_jmax($jmax);
18618     $$rfields[$jmax]         = $$rfields[1];
18619     $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
18620     $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
18621     for ( my $j = 1 ; $j < $jmax ; $j++ ) {
18622         $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
18623         $$rtokens[ $j - 1 ]   = "";
18624         $$rpatterns[ $j - 1 ] = "";
18625     }
18626     return 1;
18627 }
18628
18629 sub eliminate_old_fields {
18630
18631     my $new_line = shift;
18632     my $jmax     = $new_line->get_jmax();
18633     if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
18634     if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
18635
18636     # there must be one previous line
18637     return unless ( $maximum_line_index == 0 );
18638
18639     my $old_line            = shift;
18640     my $maximum_field_index = $old_line->get_jmax();
18641
18642     ###############################################
18643     # this line must have fewer fields
18644     return unless $maximum_field_index > $jmax;
18645     ###############################################
18646
18647     # Identify specific cases where field elimination is allowed:
18648     # case=1: both lines have comma-separated lists, and the first
18649     #         line has an equals
18650     # case=2: both lines have leading equals
18651
18652     # case 1 is the default
18653     my $case = 1;
18654
18655     # See if case 2: both lines have leading '='
18656     # We'll require smiliar leading patterns in this case
18657     my $old_rtokens   = $old_line->get_rtokens();
18658     my $rtokens       = $new_line->get_rtokens();
18659     my $rpatterns     = $new_line->get_rpatterns();
18660     my $old_rpatterns = $old_line->get_rpatterns();
18661     if (   $rtokens->[0] =~ /^=\d*$/
18662         && $old_rtokens->[0]   eq $rtokens->[0]
18663         && $old_rpatterns->[0] eq $rpatterns->[0] )
18664     {
18665         $case = 2;
18666     }
18667
18668     # not too many fewer fields in new line for case 1
18669     return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
18670
18671     # case 1 must have side comment
18672     my $old_rfields = $old_line->get_rfields();
18673     return
18674       if ( $case == 1
18675         && length( $$old_rfields[$maximum_field_index] ) == 0 );
18676
18677     my $rfields = $new_line->get_rfields();
18678
18679     my $hid_equals = 0;
18680
18681     my @new_alignments        = ();
18682     my @new_fields            = ();
18683     my @new_matching_patterns = ();
18684     my @new_matching_tokens   = ();
18685
18686     my $j = 0;
18687     my $k;
18688     my $current_field   = '';
18689     my $current_pattern = '';
18690
18691     # loop over all old tokens
18692     my $in_match = 0;
18693     for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
18694         $current_field   .= $$old_rfields[$k];
18695         $current_pattern .= $$old_rpatterns[$k];
18696         last if ( $j > $jmax - 1 );
18697
18698         if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
18699             $in_match                  = 1;
18700             $new_fields[$j]            = $current_field;
18701             $new_matching_patterns[$j] = $current_pattern;
18702             $current_field             = '';
18703             $current_pattern           = '';
18704             $new_matching_tokens[$j]   = $$old_rtokens[$k];
18705             $new_alignments[$j]        = $old_line->get_alignment($k);
18706             $j++;
18707         }
18708         else {
18709
18710             if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
18711                 last if ( $case == 2 );    # avoid problems with stuff
18712                                            # like:   $a=$b=$c=$d;
18713                 $hid_equals = 1;
18714             }
18715             last
18716               if ( $in_match && $case == 1 )
18717               ;    # disallow gaps in matching field types in case 1
18718         }
18719     }
18720
18721     # Modify the current state if we are successful.
18722     # We must exactly reach the ends of both lists for success.
18723     if (   ( $j == $jmax )
18724         && ( $current_field eq '' )
18725         && ( $case != 1 || $hid_equals ) )
18726     {
18727         $k = $maximum_field_index;
18728         $current_field   .= $$old_rfields[$k];
18729         $current_pattern .= $$old_rpatterns[$k];
18730         $new_fields[$j]            = $current_field;
18731         $new_matching_patterns[$j] = $current_pattern;
18732
18733         $new_alignments[$j] = $old_line->get_alignment($k);
18734         $maximum_field_index = $j;
18735
18736         $old_line->set_alignments(@new_alignments);
18737         $old_line->set_jmax($jmax);
18738         $old_line->set_rtokens( \@new_matching_tokens );
18739         $old_line->set_rfields( \@new_fields );
18740         $old_line->set_rpatterns( \@$rpatterns );
18741     }
18742 }
18743
18744 # create an empty side comment if none exists
18745 sub make_side_comment {
18746     my $new_line  = shift;
18747     my $level_end = shift;
18748     my $jmax      = $new_line->get_jmax();
18749     my $rtokens   = $new_line->get_rtokens();
18750
18751     # if line does not have a side comment...
18752     if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
18753         my $rfields   = $new_line->get_rfields();
18754         my $rpatterns = $new_line->get_rpatterns();
18755         $$rtokens[$jmax]     = '#';
18756         $$rfields[ ++$jmax ] = '';
18757         $$rpatterns[$jmax]   = '#';
18758         $new_line->set_jmax($jmax);
18759         $new_line->set_jmax_original_line($jmax);
18760     }
18761
18762     # line has a side comment..
18763     else {
18764
18765         # don't remember old side comment location for very long
18766         my $line_number = $vertical_aligner_self->get_output_line_number();
18767         my $rfields     = $new_line->get_rfields();
18768         if (
18769             $line_number - $last_side_comment_line_number > 12
18770
18771             # and don't remember comment location across block level changes
18772             || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
18773           )
18774         {
18775             forget_side_comment();
18776         }
18777         $last_side_comment_line_number = $line_number;
18778         $last_side_comment_level       = $level_end;
18779     }
18780 }
18781
18782 sub decide_if_list {
18783
18784     my $line = shift;
18785
18786     # A list will be taken to be a line with a forced break in which all
18787     # of the field separators are commas or comma-arrows (except for the
18788     # trailing #)
18789
18790     # List separator tokens are things like ',3'   or '=>2',
18791     # where the trailing digit is the nesting depth.  Allow braces
18792     # to allow nested list items.
18793     my $rtokens    = $line->get_rtokens();
18794     my $test_token = $$rtokens[0];
18795     if ( $test_token =~ /^(\,|=>)/ ) {
18796         my $list_type = $test_token;
18797         my $jmax      = $line->get_jmax();
18798
18799         foreach ( 1 .. $jmax - 2 ) {
18800             if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
18801                 $list_type = "";
18802                 last;
18803             }
18804         }
18805         $line->set_list_type($list_type);
18806     }
18807 }
18808
18809 sub eliminate_new_fields {
18810
18811     return unless ( $maximum_line_index >= 0 );
18812     my ( $new_line, $old_line ) = @_;
18813     my $jmax = $new_line->get_jmax();
18814
18815     my $old_rtokens = $old_line->get_rtokens();
18816     my $rtokens     = $new_line->get_rtokens();
18817     my $is_assignment =
18818       ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
18819
18820     # must be monotonic variation
18821     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
18822
18823     # must be more fields in the new line
18824     my $maximum_field_index = $old_line->get_jmax();
18825     return unless ( $maximum_field_index < $jmax );
18826
18827     unless ($is_assignment) {
18828         return
18829           unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
18830           ;    # only if monotonic
18831
18832         # never combine fields of a comma list
18833         return
18834           unless ( $maximum_field_index > 1 )
18835           && ( $new_line->get_list_type() !~ /^,/ );
18836     }
18837
18838     my $rfields       = $new_line->get_rfields();
18839     my $rpatterns     = $new_line->get_rpatterns();
18840     my $old_rpatterns = $old_line->get_rpatterns();
18841
18842     # loop over all OLD tokens except comment and check match
18843     my $match = 1;
18844     my $k;
18845     for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
18846         if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
18847             || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
18848         {
18849             $match = 0;
18850             last;
18851         }
18852     }
18853
18854     # first tokens agree, so combine extra new tokens
18855     if ($match) {
18856         for $k ( $maximum_field_index .. $jmax - 1 ) {
18857
18858             $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
18859             $$rfields[$k] = "";
18860             $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
18861             $$rpatterns[$k] = "";
18862         }
18863
18864         $$rtokens[ $maximum_field_index - 1 ] = '#';
18865         $$rfields[$maximum_field_index]       = $$rfields[$jmax];
18866         $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
18867         $jmax                                 = $maximum_field_index;
18868     }
18869     $new_line->set_jmax($jmax);
18870 }
18871
18872 sub fix_terminal_ternary {
18873
18874     # Add empty fields as necessary to align a ternary term
18875     # like this:
18876     #
18877     #  my $leapyear =
18878     #      $year % 4   ? 0
18879     #    : $year % 100 ? 1
18880     #    : $year % 400 ? 0
18881     #    :               1;
18882     #
18883     # returns 1 if the terminal item should be indented
18884
18885     my ( $rfields, $rtokens, $rpatterns ) = @_;
18886
18887     my $jmax        = @{$rfields} - 1;
18888     my $old_line    = $group_lines[$maximum_line_index];
18889     my $rfields_old = $old_line->get_rfields();
18890
18891     my $rpatterns_old       = $old_line->get_rpatterns();
18892     my $rtokens_old         = $old_line->get_rtokens();
18893     my $maximum_field_index = $old_line->get_jmax();
18894
18895     # look for the question mark after the :
18896     my ($jquestion);
18897     my $depth_question;
18898     my $pad = "";
18899     for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
18900         my $tok = $rtokens_old->[$j];
18901         if ( $tok =~ /^\?(\d+)$/ ) {
18902             $depth_question = $1;
18903
18904             # depth must be correct
18905             next unless ( $depth_question eq $group_level );
18906
18907             $jquestion = $j;
18908             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
18909                 $pad = " " x length($1);
18910             }
18911             else {
18912                 return;    # shouldn't happen
18913             }
18914             last;
18915         }
18916     }
18917     return unless ( defined($jquestion) );    # shouldn't happen
18918
18919     # Now splice the tokens and patterns of the previous line
18920     # into the else line to insure a match.  Add empty fields
18921     # as necessary.
18922     my $jadd = $jquestion;
18923
18924     # Work on copies of the actual arrays in case we have
18925     # to return due to an error
18926     my @fields   = @{$rfields};
18927     my @patterns = @{$rpatterns};
18928     my @tokens   = @{$rtokens};
18929
18930     VALIGN_DEBUG_FLAG_TERNARY && do {
18931         local $" = '><';
18932         print "CURRENT FIELDS=<@{$rfields_old}>\n";
18933         print "CURRENT TOKENS=<@{$rtokens_old}>\n";
18934         print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
18935         print "UNMODIFIED FIELDS=<@{$rfields}>\n";
18936         print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
18937         print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
18938     };
18939
18940     # handle cases of leading colon on this line
18941     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
18942
18943         my ( $colon, $therest ) = ( $1, $2 );
18944
18945         # Handle sub-case of first field with leading colon plus additional code
18946         # This is the usual situation as at the '1' below:
18947         #  ...
18948         #  : $year % 400 ? 0
18949         #  :               1;
18950         if ($therest) {
18951
18952             # Split the first field after the leading colon and insert padding.
18953             # Note that this padding will remain even if the terminal value goes
18954             # out on a separate line.  This does not seem to look to bad, so no
18955             # mechanism has been included to undo it.
18956             my $field1 = shift @fields;
18957             unshift @fields, ( $colon, $pad . $therest );
18958
18959             # change the leading pattern from : to ?
18960             return unless ( $patterns[0] =~ s/^\:/?/ );
18961
18962             # install leading tokens and patterns of existing line
18963             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
18964             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
18965
18966             # insert appropriate number of empty fields
18967             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
18968         }
18969
18970         # handle sub-case of first field just equal to leading colon.
18971         # This can happen for example in the example below where
18972         # the leading '(' would create a new alignment token
18973         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
18974         # :                        ( $mname = $name . '->' );
18975         else {
18976
18977             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
18978
18979             # prepend a leading ? onto the second pattern
18980             $patterns[1] = "?b" . $patterns[1];
18981
18982             # pad the second field
18983             $fields[1] = $pad . $fields[1];
18984
18985             # install leading tokens and patterns of existing line, replacing
18986             # leading token and inserting appropriate number of empty fields
18987             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
18988             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
18989             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
18990         }
18991     }
18992
18993     # Handle case of no leading colon on this line.  This will
18994     # be the case when -wba=':' is used.  For example,
18995     #  $year % 400 ? 0 :
18996     #                1;
18997     else {
18998
18999         # install leading tokens and patterns of existing line
19000         $patterns[0] = '?' . 'b' . $patterns[0];
19001         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
19002         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
19003
19004         # insert appropriate number of empty fields
19005         $jadd = $jquestion + 1;
19006         $fields[0] = $pad . $fields[0];
19007         splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
19008     }
19009
19010     VALIGN_DEBUG_FLAG_TERNARY && do {
19011         local $" = '><';
19012         print "MODIFIED TOKENS=<@tokens>\n";
19013         print "MODIFIED PATTERNS=<@patterns>\n";
19014         print "MODIFIED FIELDS=<@fields>\n";
19015     };
19016
19017     # all ok .. update the arrays
19018     @{$rfields}   = @fields;
19019     @{$rtokens}   = @tokens;
19020     @{$rpatterns} = @patterns;
19021
19022     # force a flush after this line
19023     return $jquestion;
19024 }
19025
19026 sub fix_terminal_else {
19027
19028     # Add empty fields as necessary to align a balanced terminal
19029     # else block to a previous if/elsif/unless block,
19030     # like this:
19031     #
19032     #  if   ( 1 || $x ) { print "ok 13\n"; }
19033     #  else             { print "not ok 13\n"; }
19034     #
19035     # returns 1 if the else block should be indented
19036     #
19037     my ( $rfields, $rtokens, $rpatterns ) = @_;
19038     my $jmax = @{$rfields} - 1;
19039     return unless ( $jmax > 0 );
19040
19041     # check for balanced else block following if/elsif/unless
19042     my $rfields_old = $current_line->get_rfields();
19043
19044     # TBD: add handling for 'case'
19045     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
19046
19047     # look for the opening brace after the else, and extrace the depth
19048     my $tok_brace = $rtokens->[0];
19049     my $depth_brace;
19050     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
19051
19052     # probably:  "else # side_comment"
19053     else { return }
19054
19055     my $rpatterns_old       = $current_line->get_rpatterns();
19056     my $rtokens_old         = $current_line->get_rtokens();
19057     my $maximum_field_index = $current_line->get_jmax();
19058
19059     # be sure the previous if/elsif is followed by an opening paren
19060     my $jparen    = 0;
19061     my $tok_paren = '(' . $depth_brace;
19062     my $tok_test  = $rtokens_old->[$jparen];
19063     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
19064
19065     # Now find the opening block brace
19066     my ($jbrace);
19067     for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
19068         my $tok = $rtokens_old->[$j];
19069         if ( $tok eq $tok_brace ) {
19070             $jbrace = $j;
19071             last;
19072         }
19073     }
19074     return unless ( defined($jbrace) );           # shouldn't happen
19075
19076     # Now splice the tokens and patterns of the previous line
19077     # into the else line to insure a match.  Add empty fields
19078     # as necessary.
19079     my $jadd = $jbrace - $jparen;
19080     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
19081     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
19082     splice( @{$rfields}, 1, 0, ('') x $jadd );
19083
19084     # force a flush after this line if it does not follow a case
19085     return $jbrace
19086       unless ( $rfields_old->[0] =~ /^case\s*$/ );
19087 }
19088
19089 {    # sub check_match
19090     my %is_good_alignment;
19091
19092     BEGIN {
19093
19094         # Vertically aligning on certain "good" tokens is usually okay
19095         # so we can be less restrictive in marginal cases.
19096         @_ = qw( { ? => = );
19097         push @_, (',');
19098         @is_good_alignment{@_} = (1) x scalar(@_);
19099     }
19100
19101     sub check_match {
19102
19103         # See if the current line matches the current vertical alignment group.
19104         # If not, flush the current group.
19105         my $new_line = shift;
19106         my $old_line = shift;
19107
19108         # uses global variables:
19109         #  $previous_minimum_jmax_seen
19110         #  $maximum_jmax_seen
19111         #  $maximum_line_index
19112         #  $marginal_match
19113         my $jmax                = $new_line->get_jmax();
19114         my $maximum_field_index = $old_line->get_jmax();
19115
19116         # flush if this line has too many fields
19117         if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
19118
19119         # flush if adding this line would make a non-monotonic field count
19120         if (
19121             ( $maximum_field_index > $jmax )    # this has too few fields
19122             && (
19123                 ( $previous_minimum_jmax_seen <
19124                     $jmax )                     # and wouldn't be monotonic
19125                 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
19126             )
19127           )
19128         {
19129             goto NO_MATCH;
19130         }
19131
19132         # otherwise see if this line matches the current group
19133         my $jmax_original_line      = $new_line->get_jmax_original_line();
19134         my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19135         my $rtokens                 = $new_line->get_rtokens();
19136         my $rfields                 = $new_line->get_rfields();
19137         my $rpatterns               = $new_line->get_rpatterns();
19138         my $list_type               = $new_line->get_list_type();
19139
19140         my $group_list_type = $old_line->get_list_type();
19141         my $old_rpatterns   = $old_line->get_rpatterns();
19142         my $old_rtokens     = $old_line->get_rtokens();
19143
19144         my $jlimit = $jmax - 1;
19145         if ( $maximum_field_index > $jmax ) {
19146             $jlimit = $jmax_original_line;
19147             --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
19148         }
19149
19150         # handle comma-separated lists ..
19151         if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
19152             for my $j ( 0 .. $jlimit ) {
19153                 my $old_tok = $$old_rtokens[$j];
19154                 next unless $old_tok;
19155                 my $new_tok = $$rtokens[$j];
19156                 next unless $new_tok;
19157
19158                 # lists always match ...
19159                 # unless they would align any '=>'s with ','s
19160                 goto NO_MATCH
19161                   if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
19162                     || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
19163             }
19164         }
19165
19166         # do detailed check for everything else except hanging side comments
19167         elsif ( !$is_hanging_side_comment ) {
19168
19169             my $leading_space_count = $new_line->get_leading_space_count();
19170
19171             my $max_pad = 0;
19172             my $min_pad = 0;
19173             my $saw_good_alignment;
19174
19175             for my $j ( 0 .. $jlimit ) {
19176
19177                 my $old_tok = $$old_rtokens[$j];
19178                 my $new_tok = $$rtokens[$j];
19179
19180                 # Note on encoding used for alignment tokens:
19181                 # -------------------------------------------
19182                 # Tokens are "decorated" with information which can help
19183                 # prevent unwanted alignments.  Consider for example the
19184                 # following two lines:
19185                 #   local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
19186                 #   local ( $i, $f ) = &'bdiv( $xn, $xd );
19187                 # There are three alignment tokens in each line, a comma,
19188                 # an =, and a comma.  In the first line these three tokens
19189                 # are encoded as:
19190                 #    ,4+local-18     =3      ,4+split-7
19191                 # and in the second line they are encoded as
19192                 #    ,4+local-18     =3      ,4+&'bdiv-8
19193                 # Tokens always at least have token name and nesting
19194                 # depth.  So in this example the ='s are at depth 3 and
19195                 # the ,'s are at depth 4.  This prevents aligning tokens
19196                 # of different depths.  Commas contain additional
19197                 # information, as follows:
19198                 # ,  {depth} + {container name} - {spaces to opening paren}
19199                 # This allows us to reject matching the rightmost commas
19200                 # in the above two lines, since they are for different
19201                 # function calls.  This encoding is done in
19202                 # 'sub send_lines_to_vertical_aligner'.
19203
19204                 # Pick off actual token.
19205                 # Everything up to the first digit is the actual token.
19206                 my $alignment_token = $new_tok;
19207                 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
19208
19209                 # see if the decorated tokens match
19210                 my $tokens_match = $new_tok eq $old_tok
19211
19212                   # Exception for matching terminal : of ternary statement..
19213                   # consider containers prefixed by ? and : a match
19214                   || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
19215
19216                 # No match if the alignment tokens differ...
19217                 if ( !$tokens_match ) {
19218
19219                     # ...Unless this is a side comment
19220                     if (
19221                         $j == $jlimit
19222
19223                         # and there is either at least one alignment token
19224                         # or this is a single item following a list.  This
19225                         # latter rule is required for 'December' to join
19226                         # the following list:
19227                         # my (@months) = (
19228                         #     '',       'January',   'February', 'March',
19229                         #     'April',  'May',       'June',     'July',
19230                         #     'August', 'September', 'October',  'November',
19231                         #     'December'
19232                         # );
19233                         # If it doesn't then the -lp formatting will fail.
19234                         && ( $j > 0 || $old_tok =~ /^,/ )
19235                       )
19236                     {
19237                         $marginal_match = 1
19238                           if ( $marginal_match == 0
19239                             && $maximum_line_index == 0 );
19240                         last;
19241                     }
19242
19243                     goto NO_MATCH;
19244                 }
19245
19246                 # Calculate amount of padding required to fit this in.
19247                 # $pad is the number of spaces by which we must increase
19248                 # the current field to squeeze in this field.
19249                 my $pad =
19250                   length( $$rfields[$j] ) - $old_line->current_field_width($j);
19251                 if ( $j == 0 ) { $pad += $leading_space_count; }
19252
19253                 # remember max pads to limit marginal cases
19254                 if ( $alignment_token ne '#' ) {
19255                     if ( $pad > $max_pad ) { $max_pad = $pad }
19256                     if ( $pad < $min_pad ) { $min_pad = $pad }
19257                 }
19258                 if ( $is_good_alignment{$alignment_token} ) {
19259                     $saw_good_alignment = 1;
19260                 }
19261
19262                 # If patterns don't match, we have to be careful...
19263                 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
19264
19265                     # flag this as a marginal match since patterns differ
19266                     $marginal_match = 1
19267                       if ( $marginal_match == 0 && $maximum_line_index == 0 );
19268
19269                     # We have to be very careful about aligning commas
19270                     # when the pattern's don't match, because it can be
19271                     # worse to create an alignment where none is needed
19272                     # than to omit one.  Here's an example where the ','s
19273                     # are not in named continers.  The first line below
19274                     # should not match the next two:
19275                     #   ( $a, $b ) = ( $b, $r );
19276                     #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
19277                     #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
19278                     if ( $alignment_token eq ',' ) {
19279
19280                        # do not align commas unless they are in named containers
19281                         goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
19282                     }
19283
19284                     # do not align parens unless patterns match;
19285                     # large ugly spaces can occur in math expressions.
19286                     elsif ( $alignment_token eq '(' ) {
19287
19288                         # But we can allow a match if the parens don't
19289                         # require any padding.
19290                         if ( $pad != 0 ) { goto NO_MATCH }
19291                     }
19292
19293                     # Handle an '=' alignment with different patterns to
19294                     # the left.
19295                     elsif ( $alignment_token eq '=' ) {
19296
19297                         # It is best to be a little restrictive when
19298                         # aligning '=' tokens.  Here is an example of
19299                         # two lines that we will not align:
19300                         #       my $variable=6;
19301                         #       $bb=4;
19302                         # The problem is that one is a 'my' declaration,
19303                         # and the other isn't, so they're not very similar.
19304                         # We will filter these out by comparing the first
19305                         # letter of the pattern.  This is crude, but works
19306                         # well enough.
19307                         if (
19308                             substr( $$old_rpatterns[$j], 0, 1 ) ne
19309                             substr( $$rpatterns[$j], 0, 1 ) )
19310                         {
19311                             goto NO_MATCH;
19312                         }
19313
19314                         # If we pass that test, we'll call it a marginal match.
19315                         # Here is an example of a marginal match:
19316                         #       $done{$$op} = 1;
19317                         #       $op         = compile_bblock($op);
19318                         # The left tokens are both identifiers, but
19319                         # one accesses a hash and the other doesn't.
19320                         # We'll let this be a tentative match and undo
19321                         # it later if we don't find more than 2 lines
19322                         # in the group.
19323                         elsif ( $maximum_line_index == 0 ) {
19324                             $marginal_match =
19325                               2;    # =2 prevents being undone below
19326                         }
19327                     }
19328                 }
19329
19330                 # Don't let line with fewer fields increase column widths
19331                 # ( align3.t )
19332                 if ( $maximum_field_index > $jmax ) {
19333
19334                     # Exception: suspend this rule to allow last lines to join
19335                     if ( $pad > 0 ) { goto NO_MATCH; }
19336                 }
19337             } ## end for my $j ( 0 .. $jlimit)
19338
19339             # Turn off the "marginal match" flag in some cases...
19340             # A "marginal match" occurs when the alignment tokens agree
19341             # but there are differences in the other tokens (patterns).
19342             # If we leave the marginal match flag set, then the rule is that we
19343             # will align only if there are more than two lines in the group.
19344             # We will turn of the flag if we almost have a match
19345             # and either we have seen a good alignment token or we
19346             # just need a small pad (2 spaces) to fit.  These rules are
19347             # the result of experimentation.  Tokens which misaligned by just
19348             # one or two characters are annoying.  On the other hand,
19349             # large gaps to less important alignment tokens are also annoying.
19350             if (   $marginal_match == 1
19351                 && $jmax == $maximum_field_index
19352                 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
19353               )
19354             {
19355                 $marginal_match = 0;
19356             }
19357             ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
19358         }
19359
19360         # We have a match (even if marginal).
19361         # If the current line has fewer fields than the current group
19362         # but otherwise matches, copy the remaining group fields to
19363         # make it a perfect match.
19364         if ( $maximum_field_index > $jmax ) {
19365             my $comment = $$rfields[$jmax];
19366             for $jmax ( $jlimit .. $maximum_field_index ) {
19367                 $$rtokens[$jmax]     = $$old_rtokens[$jmax];
19368                 $$rfields[ ++$jmax ] = '';
19369                 $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
19370             }
19371             $$rfields[$jmax] = $comment;
19372             $new_line->set_jmax($jmax);
19373         }
19374         return;
19375
19376       NO_MATCH:
19377         ##print "BUBBA: no match jmax=$jmax  max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
19378         my_flush();
19379         return;
19380     }
19381 }
19382
19383 sub check_fit {
19384
19385     return unless ( $maximum_line_index >= 0 );
19386     my $new_line = shift;
19387     my $old_line = shift;
19388
19389     my $jmax                    = $new_line->get_jmax();
19390     my $leading_space_count     = $new_line->get_leading_space_count();
19391     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19392     my $rtokens                 = $new_line->get_rtokens();
19393     my $rfields                 = $new_line->get_rfields();
19394     my $rpatterns               = $new_line->get_rpatterns();
19395
19396     my $group_list_type = $group_lines[0]->get_list_type();
19397
19398     my $padding_so_far    = 0;
19399     my $padding_available = $old_line->get_available_space_on_right();
19400
19401     # save current columns in case this doesn't work
19402     save_alignment_columns();
19403
19404     my ( $j, $pad, $eight );
19405     my $maximum_field_index = $old_line->get_jmax();
19406     for $j ( 0 .. $jmax ) {
19407
19408         $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
19409
19410         if ( $j == 0 ) {
19411             $pad += $leading_space_count;
19412         }
19413
19414         # remember largest gap of the group, excluding gap to side comment
19415         if (   $pad < 0
19416             && $group_maximum_gap < -$pad
19417             && $j > 0
19418             && $j < $jmax - 1 )
19419         {
19420             $group_maximum_gap = -$pad;
19421         }
19422
19423         next if $pad < 0;
19424
19425         ## This patch helps sometimes, but it doesn't check to see if
19426         ## the line is too long even without the side comment.  It needs
19427         ## to be reworked.
19428         ##don't let a long token with no trailing side comment push
19429         ##side comments out, or end a group.  (sidecmt1.t)
19430         ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
19431
19432         # This line will need space; lets see if we want to accept it..
19433         if (
19434
19435             # not if this won't fit
19436             ( $pad > $padding_available )
19437
19438             # previously, there were upper bounds placed on padding here
19439             # (maximum_whitespace_columns), but they were not really helpful
19440
19441           )
19442         {
19443
19444             # revert to starting state then flush; things didn't work out
19445             restore_alignment_columns();
19446             my_flush();
19447             last;
19448         }
19449
19450         # patch to avoid excessive gaps in previous lines,
19451         # due to a line of fewer fields.
19452         #   return join( ".",
19453         #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
19454         #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
19455         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
19456
19457         # looks ok, squeeze this field in
19458         $old_line->increase_field_width( $j, $pad );
19459         $padding_available -= $pad;
19460
19461         # remember largest gap of the group, excluding gap to side comment
19462         if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
19463             $group_maximum_gap = $pad;
19464         }
19465     }
19466 }
19467
19468 sub accept_line {
19469
19470     # The current line either starts a new alignment group or is
19471     # accepted into the current alignment group.
19472     my $new_line = shift;
19473     $group_lines[ ++$maximum_line_index ] = $new_line;
19474
19475     # initialize field lengths if starting new group
19476     if ( $maximum_line_index == 0 ) {
19477
19478         my $jmax    = $new_line->get_jmax();
19479         my $rfields = $new_line->get_rfields();
19480         my $rtokens = $new_line->get_rtokens();
19481         my $j;
19482         my $col = $new_line->get_leading_space_count();
19483
19484         for $j ( 0 .. $jmax ) {
19485             $col += length( $$rfields[$j] );
19486
19487             # create initial alignments for the new group
19488             my $token = "";
19489             if ( $j < $jmax ) { $token = $$rtokens[$j] }
19490             my $alignment = make_alignment( $col, $token );
19491             $new_line->set_alignment( $j, $alignment );
19492         }
19493
19494         $maximum_jmax_seen = $jmax;
19495         $minimum_jmax_seen = $jmax;
19496     }
19497
19498     # use previous alignments otherwise
19499     else {
19500         my @new_alignments =
19501           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
19502         $new_line->set_alignments(@new_alignments);
19503     }
19504
19505     # remember group jmax extremes for next call to append_line
19506     $previous_minimum_jmax_seen = $minimum_jmax_seen;
19507     $previous_maximum_jmax_seen = $maximum_jmax_seen;
19508 }
19509
19510 sub dump_array {
19511
19512     # debug routine to dump array contents
19513     local $" = ')(';
19514     print "(@_)\n";
19515 }
19516
19517 # flush() sends the current Perl::Tidy::VerticalAligner group down the
19518 # pipeline to Perl::Tidy::FileWriter.
19519
19520 # This is the external flush, which also empties the cache
19521 sub flush {
19522
19523     if ( $maximum_line_index < 0 ) {
19524         if ($cached_line_type) {
19525             $seqno_string = $cached_seqno_string;
19526             entab_and_output( $cached_line_text,
19527                 $cached_line_leading_space_count,
19528                 $last_group_level_written );
19529             $cached_line_type    = 0;
19530             $cached_line_text    = "";
19531             $cached_seqno_string = "";
19532         }
19533     }
19534     else {
19535         my_flush();
19536     }
19537 }
19538
19539 # This is the internal flush, which leaves the cache intact
19540 sub my_flush {
19541
19542     return if ( $maximum_line_index < 0 );
19543
19544     # handle a group of comment lines
19545     if ( $group_type eq 'COMMENT' ) {
19546
19547         VALIGN_DEBUG_FLAG_APPEND0 && do {
19548             my ( $a, $b, $c ) = caller();
19549             print
19550 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
19551
19552         };
19553         my $leading_space_count = $comment_leading_space_count;
19554         my $leading_string      = get_leading_string($leading_space_count);
19555
19556         # zero leading space count if any lines are too long
19557         my $max_excess = 0;
19558         for my $i ( 0 .. $maximum_line_index ) {
19559             my $str = $group_lines[$i];
19560             my $excess =
19561               length($str) + $leading_space_count - $rOpts_maximum_line_length;
19562             if ( $excess > $max_excess ) {
19563                 $max_excess = $excess;
19564             }
19565         }
19566
19567         if ( $max_excess > 0 ) {
19568             $leading_space_count -= $max_excess;
19569             if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
19570             $last_outdented_line_at =
19571               $file_writer_object->get_output_line_number();
19572             unless ($outdented_line_count) {
19573                 $first_outdented_line_at = $last_outdented_line_at;
19574             }
19575             $outdented_line_count += ( $maximum_line_index + 1 );
19576         }
19577
19578         # write the group of lines
19579         my $outdent_long_lines = 0;
19580         for my $i ( 0 .. $maximum_line_index ) {
19581             write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
19582                 $outdent_long_lines, "" );
19583         }
19584     }
19585
19586     # handle a group of code lines
19587     else {
19588
19589         VALIGN_DEBUG_FLAG_APPEND0 && do {
19590             my $group_list_type = $group_lines[0]->get_list_type();
19591             my ( $a, $b, $c ) = caller();
19592             my $maximum_field_index = $group_lines[0]->get_jmax();
19593             print
19594 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
19595
19596         };
19597
19598         # some small groups are best left unaligned
19599         my $do_not_align = decide_if_aligned();
19600
19601         # optimize side comment location
19602         $do_not_align = adjust_side_comment($do_not_align);
19603
19604         # recover spaces for -lp option if possible
19605         my $extra_leading_spaces = get_extra_leading_spaces();
19606
19607         # all lines of this group have the same basic leading spacing
19608         my $group_leader_length = $group_lines[0]->get_leading_space_count();
19609
19610         # add extra leading spaces if helpful
19611         my $min_ci_gap = improve_continuation_indentation( $do_not_align,
19612             $group_leader_length );
19613
19614         # loop to output all lines
19615         for my $i ( 0 .. $maximum_line_index ) {
19616             my $line = $group_lines[$i];
19617             write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
19618                 $group_leader_length, $extra_leading_spaces );
19619         }
19620     }
19621     initialize_for_new_group();
19622 }
19623
19624 sub decide_if_aligned {
19625
19626     # Do not try to align two lines which are not really similar
19627     return unless $maximum_line_index == 1;
19628     return if ($is_matching_terminal_line);
19629
19630     my $group_list_type = $group_lines[0]->get_list_type();
19631
19632     my $do_not_align = (
19633
19634         # always align lists
19635         !$group_list_type
19636
19637           && (
19638
19639             # don't align if it was just a marginal match
19640             $marginal_match
19641
19642             # don't align two lines with big gap
19643             || $group_maximum_gap > 12
19644
19645             # or lines with differing number of alignment tokens
19646             # TODO: this could be improved.  It occasionally rejects
19647             # good matches.
19648             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
19649           )
19650     );
19651
19652     # But try to convert them into a simple comment group if the first line
19653     # a has side comment
19654     my $rfields             = $group_lines[0]->get_rfields();
19655     my $maximum_field_index = $group_lines[0]->get_jmax();
19656     if (   $do_not_align
19657         && ( $maximum_line_index > 0 )
19658         && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
19659     {
19660         combine_fields();
19661         $do_not_align = 0;
19662     }
19663     return $do_not_align;
19664 }
19665
19666 sub adjust_side_comment {
19667
19668     my $do_not_align = shift;
19669
19670     # let's see if we can move the side comment field out a little
19671     # to improve readability (the last field is always a side comment field)
19672     my $have_side_comment       = 0;
19673     my $first_side_comment_line = -1;
19674     my $maximum_field_index     = $group_lines[0]->get_jmax();
19675     for my $i ( 0 .. $maximum_line_index ) {
19676         my $line = $group_lines[$i];
19677
19678         if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
19679             $have_side_comment       = 1;
19680             $first_side_comment_line = $i;
19681             last;
19682         }
19683     }
19684
19685     my $kmax = $maximum_field_index + 1;
19686
19687     if ($have_side_comment) {
19688
19689         my $line = $group_lines[0];
19690
19691         # the maximum space without exceeding the line length:
19692         my $avail = $line->get_available_space_on_right();
19693
19694         # try to use the previous comment column
19695         my $side_comment_column = $line->get_column( $kmax - 2 );
19696         my $move                = $last_comment_column - $side_comment_column;
19697
19698 ##        my $sc_line0 = $side_comment_history[0]->[0];
19699 ##        my $sc_col0  = $side_comment_history[0]->[1];
19700 ##        my $sc_line1 = $side_comment_history[1]->[0];
19701 ##        my $sc_col1  = $side_comment_history[1]->[1];
19702 ##        my $sc_line2 = $side_comment_history[2]->[0];
19703 ##        my $sc_col2  = $side_comment_history[2]->[1];
19704 ##
19705 ##        # FUTURE UPDATES:
19706 ##        # Be sure to ignore 'do not align' and  '} # end comments'
19707 ##        # Find first $move > 0 and $move <= $avail as follows:
19708 ##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
19709 ##        # 2. try sc_col2 if (line-sc_line2) < 12
19710 ##        # 3. try min possible space, plus up to 8,
19711 ##        # 4. try min possible space
19712
19713         if ( $kmax > 0 && !$do_not_align ) {
19714
19715             # but if this doesn't work, give up and use the minimum space
19716             if ( $move > $avail ) {
19717                 $move = $rOpts_minimum_space_to_comment - 1;
19718             }
19719
19720             # but we want some minimum space to the comment
19721             my $min_move = $rOpts_minimum_space_to_comment - 1;
19722             if (   $move >= 0
19723                 && $last_side_comment_length > 0
19724                 && ( $first_side_comment_line == 0 )
19725                 && $group_level == $last_group_level_written )
19726             {
19727                 $min_move = 0;
19728             }
19729
19730             if ( $move < $min_move ) {
19731                 $move = $min_move;
19732             }
19733
19734             # prevously, an upper bound was placed on $move here,
19735             # (maximum_space_to_comment), but it was not helpful
19736
19737             # don't exceed the available space
19738             if ( $move > $avail ) { $move = $avail }
19739
19740             # we can only increase space, never decrease
19741             if ( $move > 0 ) {
19742                 $line->increase_field_width( $maximum_field_index - 1, $move );
19743             }
19744
19745             # remember this column for the next group
19746             $last_comment_column = $line->get_column( $kmax - 2 );
19747         }
19748         else {
19749
19750             # try to at least line up the existing side comment location
19751             if ( $kmax > 0 && $move > 0 && $move < $avail ) {
19752                 $line->increase_field_width( $maximum_field_index - 1, $move );
19753                 $do_not_align = 0;
19754             }
19755
19756             # reset side comment column if we can't align
19757             else {
19758                 forget_side_comment();
19759             }
19760         }
19761     }
19762     return $do_not_align;
19763 }
19764
19765 sub improve_continuation_indentation {
19766     my ( $do_not_align, $group_leader_length ) = @_;
19767
19768     # See if we can increase the continuation indentation
19769     # to move all continuation lines closer to the next field
19770     # (unless it is a comment).
19771     #
19772     # '$min_ci_gap'is the extra indentation that we may need to introduce.
19773     # We will only introduce this to fields which already have some ci.
19774     # Without this variable, we would occasionally get something like this
19775     # (Complex.pm):
19776     #
19777     # use overload '+' => \&plus,
19778     #   '-'            => \&minus,
19779     #   '*'            => \&multiply,
19780     #   ...
19781     #   'tan'          => \&tan,
19782     #   'atan2'        => \&atan2,
19783     #
19784     # Whereas with this variable, we can shift variables over to get this:
19785     #
19786     # use overload '+' => \&plus,
19787     #          '-'     => \&minus,
19788     #          '*'     => \&multiply,
19789     #          ...
19790     #          'tan'   => \&tan,
19791     #          'atan2' => \&atan2,
19792
19793     ## BUB: Deactivated####################
19794     # The trouble with this patch is that it may, for example,
19795     # move in some 'or's  or ':'s, and leave some out, so that the
19796     # left edge alignment suffers.
19797     return 0;
19798     ###########################################
19799
19800     my $maximum_field_index = $group_lines[0]->get_jmax();
19801
19802     my $min_ci_gap = $rOpts_maximum_line_length;
19803     if ( $maximum_field_index > 1 && !$do_not_align ) {
19804
19805         for my $i ( 0 .. $maximum_line_index ) {
19806             my $line                = $group_lines[$i];
19807             my $leading_space_count = $line->get_leading_space_count();
19808             my $rfields             = $line->get_rfields();
19809
19810             my $gap =
19811               $line->get_column(0) -
19812               $leading_space_count -
19813               length( $$rfields[0] );
19814
19815             if ( $leading_space_count > $group_leader_length ) {
19816                 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
19817             }
19818         }
19819
19820         if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
19821             $min_ci_gap = 0;
19822         }
19823     }
19824     else {
19825         $min_ci_gap = 0;
19826     }
19827     return $min_ci_gap;
19828 }
19829
19830 sub write_vertically_aligned_line {
19831
19832     my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
19833         $extra_leading_spaces )
19834       = @_;
19835     my $rfields                   = $line->get_rfields();
19836     my $leading_space_count       = $line->get_leading_space_count();
19837     my $outdent_long_lines        = $line->get_outdent_long_lines();
19838     my $maximum_field_index       = $line->get_jmax();
19839     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
19840
19841     # add any extra spaces
19842     if ( $leading_space_count > $group_leader_length ) {
19843         $leading_space_count += $min_ci_gap;
19844     }
19845
19846     my $str = $$rfields[0];
19847
19848     # loop to concatenate all fields of this line and needed padding
19849     my $total_pad_count = 0;
19850     my ( $j, $pad );
19851     for $j ( 1 .. $maximum_field_index ) {
19852
19853         # skip zero-length side comments
19854         last
19855           if ( ( $j == $maximum_field_index )
19856             && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
19857           );
19858
19859         # compute spaces of padding before this field
19860         my $col = $line->get_column( $j - 1 );
19861         $pad = $col - ( length($str) + $leading_space_count );
19862
19863         if ($do_not_align) {
19864             $pad =
19865               ( $j < $maximum_field_index )
19866               ? 0
19867               : $rOpts_minimum_space_to_comment - 1;
19868         }
19869
19870         # if the -fpsc flag is set, move the side comment to the selected
19871         # column if and only if it is possible, ignoring constraints on
19872         # line length and minimum space to comment
19873         if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
19874         {
19875             my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
19876             if ( $newpad >= 0 ) { $pad = $newpad; }
19877         }
19878
19879         # accumulate the padding
19880         if ( $pad > 0 ) { $total_pad_count += $pad; }
19881
19882         # add this field
19883         if ( !defined $$rfields[$j] ) {
19884             write_diagnostics("UNDEFined field at j=$j\n");
19885         }
19886
19887         # only add padding when we have a finite field;
19888         # this avoids extra terminal spaces if we have empty fields
19889         if ( length( $$rfields[$j] ) > 0 ) {
19890             $str .= ' ' x $total_pad_count;
19891             $total_pad_count = 0;
19892             $str .= $$rfields[$j];
19893         }
19894         else {
19895             $total_pad_count = 0;
19896         }
19897
19898         # update side comment history buffer
19899         if ( $j == $maximum_field_index ) {
19900             my $lineno = $file_writer_object->get_output_line_number();
19901             shift @side_comment_history;
19902             push @side_comment_history, [ $lineno, $col ];
19903         }
19904     }
19905
19906     my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
19907
19908     # ship this line off
19909     write_leader_and_string( $leading_space_count + $extra_leading_spaces,
19910         $str, $side_comment_length, $outdent_long_lines,
19911         $rvertical_tightness_flags );
19912 }
19913
19914 sub get_extra_leading_spaces {
19915
19916     #----------------------------------------------------------
19917     # Define any extra indentation space (for the -lp option).
19918     # Here is why:
19919     # If a list has side comments, sub scan_list must dump the
19920     # list before it sees everything.  When this happens, it sets
19921     # the indentation to the standard scheme, but notes how
19922     # many spaces it would have liked to use.  We may be able
19923     # to recover that space here in the event that that all of the
19924     # lines of a list are back together again.
19925     #----------------------------------------------------------
19926
19927     my $extra_leading_spaces = 0;
19928     if ($extra_indent_ok) {
19929         my $object = $group_lines[0]->get_indentation();
19930         if ( ref($object) ) {
19931             my $extra_indentation_spaces_wanted =
19932               get_RECOVERABLE_SPACES($object);
19933
19934             # all indentation objects must be the same
19935             my $i;
19936             for $i ( 1 .. $maximum_line_index ) {
19937                 if ( $object != $group_lines[$i]->get_indentation() ) {
19938                     $extra_indentation_spaces_wanted = 0;
19939                     last;
19940                 }
19941             }
19942
19943             if ($extra_indentation_spaces_wanted) {
19944
19945                 # the maximum space without exceeding the line length:
19946                 my $avail = $group_lines[0]->get_available_space_on_right();
19947                 $extra_leading_spaces =
19948                   ( $avail > $extra_indentation_spaces_wanted )
19949                   ? $extra_indentation_spaces_wanted
19950                   : $avail;
19951
19952                 # update the indentation object because with -icp the terminal
19953                 # ');' will use the same adjustment.
19954                 $object->permanently_decrease_AVAILABLE_SPACES(
19955                     -$extra_leading_spaces );
19956             }
19957         }
19958     }
19959     return $extra_leading_spaces;
19960 }
19961
19962 sub combine_fields {
19963
19964     # combine all fields except for the comment field  ( sidecmt.t )
19965     # Uses global variables:
19966     #  @group_lines
19967     #  $maximum_line_index
19968     my ( $j, $k );
19969     my $maximum_field_index = $group_lines[0]->get_jmax();
19970     for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
19971         my $line    = $group_lines[$j];
19972         my $rfields = $line->get_rfields();
19973         foreach ( 1 .. $maximum_field_index - 1 ) {
19974             $$rfields[0] .= $$rfields[$_];
19975         }
19976         $$rfields[1] = $$rfields[$maximum_field_index];
19977
19978         $line->set_jmax(1);
19979         $line->set_column( 0, 0 );
19980         $line->set_column( 1, 0 );
19981
19982     }
19983     $maximum_field_index = 1;
19984
19985     for $j ( 0 .. $maximum_line_index ) {
19986         my $line    = $group_lines[$j];
19987         my $rfields = $line->get_rfields();
19988         for $k ( 0 .. $maximum_field_index ) {
19989             my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
19990             if ( $k == 0 ) {
19991                 $pad += $group_lines[$j]->get_leading_space_count();
19992             }
19993
19994             if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
19995
19996         }
19997     }
19998 }
19999
20000 sub get_output_line_number {
20001
20002     # the output line number reported to a caller is the number of items
20003     # written plus the number of items in the buffer
20004     my $self = shift;
20005     1 + $maximum_line_index + $file_writer_object->get_output_line_number();
20006 }
20007
20008 sub write_leader_and_string {
20009
20010     my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
20011         $rvertical_tightness_flags )
20012       = @_;
20013
20014     # handle outdenting of long lines:
20015     if ($outdent_long_lines) {
20016         my $excess =
20017           length($str) -
20018           $side_comment_length +
20019           $leading_space_count -
20020           $rOpts_maximum_line_length;
20021         if ( $excess > 0 ) {
20022             $leading_space_count = 0;
20023             $last_outdented_line_at =
20024               $file_writer_object->get_output_line_number();
20025
20026             unless ($outdented_line_count) {
20027                 $first_outdented_line_at = $last_outdented_line_at;
20028             }
20029             $outdented_line_count++;
20030         }
20031     }
20032
20033     # Make preliminary leading whitespace.  It could get changed
20034     # later by entabbing, so we have to keep track of any changes
20035     # to the leading_space_count from here on.
20036     my $leading_string =
20037       $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
20038
20039     # Unpack any recombination data; it was packed by
20040     # sub send_lines_to_vertical_aligner. Contents:
20041     #
20042     #   [0] type: 1=opening  2=closing  3=opening block brace
20043     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
20044     #             if closing: spaces of padding to use
20045     #   [2] sequence number of container
20046     #   [3] valid flag: do not append if this flag is false
20047     #
20048     my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20049         $seqno_end );
20050     if ($rvertical_tightness_flags) {
20051         (
20052             $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
20053             $seqno_end
20054         ) = @{$rvertical_tightness_flags};
20055     }
20056
20057     $seqno_string = $seqno_end;
20058
20059     # handle any cached line ..
20060     # either append this line to it or write it out
20061     if ( length($cached_line_text) ) {
20062
20063         if ( !$cached_line_valid ) {
20064             entab_and_output( $cached_line_text,
20065                 $cached_line_leading_space_count,
20066                 $last_group_level_written );
20067         }
20068
20069         # handle cached line with opening container token
20070         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
20071
20072             my $gap = $leading_space_count - length($cached_line_text);
20073
20074             # handle option of just one tight opening per line:
20075             if ( $cached_line_flag == 1 ) {
20076                 if ( defined($open_or_close) && $open_or_close == 1 ) {
20077                     $gap = -1;
20078                 }
20079             }
20080
20081             if ( $gap >= 0 ) {
20082                 $leading_string      = $cached_line_text . ' ' x $gap;
20083                 $leading_space_count = $cached_line_leading_space_count;
20084                 $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
20085             }
20086             else {
20087                 entab_and_output( $cached_line_text,
20088                     $cached_line_leading_space_count,
20089                     $last_group_level_written );
20090             }
20091         }
20092
20093         # handle cached line to place before this closing container token
20094         else {
20095             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
20096
20097             if ( length($test_line) <= $rOpts_maximum_line_length ) {
20098
20099                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
20100
20101                 # Patch to outdent closing tokens ending # in ');'
20102                 # If we are joining a line like ');' to a previous stacked
20103                 # set of closing tokens, then decide if we may outdent the
20104                 # combined stack to the indentation of the ');'.  Since we
20105                 # should not normally outdent any of the other tokens more than
20106                 # the indentation of the lines that contained them, we will
20107                 # only do this if all of the corresponding opening
20108                 # tokens were on the same line.  This can happen with
20109                 # -sot and -sct.  For example, it is ok here:
20110                 #   __PACKAGE__->load_components( qw(
20111                 #         PK::Auto
20112                 #         Core
20113                 #   ));
20114                 #
20115                 #   But, for example, we do not outdent in this example because
20116                 #   that would put the closing sub brace out farther than the
20117                 #   opening sub brace:
20118                 #
20119                 #   perltidy -sot -sct
20120                 #   $c->Tk::bind(
20121                 #       '<Control-f>' => sub {
20122                 #           my ($c) = @_;
20123                 #           my $e = $c->XEvent;
20124                 #           itemsUnderArea $c;
20125                 #       } );
20126                 #
20127                 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
20128
20129                     # The way to tell this is if the stacked sequence numbers
20130                     # of this output line are the reverse of the stacked
20131                     # sequence numbers of the previous non-blank line of
20132                     # sequence numbers.  So we can join if the previous
20133                     # nonblank string of tokens is the mirror image.  For
20134                     # example if stack )}] is 13:8:6 then we are looking for a
20135                     # leading stack like [{( which is 6:8:13 We only need to
20136                     # check the two ends, because the intermediate tokens must
20137                     # fall in order.  Note on speed: having to split on colons
20138                     # and eliminate multiple colons might appear to be slow,
20139                     # but it's not an issue because we almost never come
20140                     # through here.  In a typical file we don't.
20141                     $seqno_string               =~ s/^:+//;
20142                     $last_nonblank_seqno_string =~ s/^:+//;
20143                     $seqno_string               =~ s/:+/:/g;
20144                     $last_nonblank_seqno_string =~ s/:+/:/g;
20145
20146                     # how many spaces can we outdent?
20147                     my $diff =
20148                       $cached_line_leading_space_count - $leading_space_count;
20149                     if (   $diff > 0
20150                         && length($seqno_string)
20151                         && length($last_nonblank_seqno_string) ==
20152                         length($seqno_string) )
20153                     {
20154                         my @seqno_last =
20155                           ( split ':', $last_nonblank_seqno_string );
20156                         my @seqno_now = ( split ':', $seqno_string );
20157                         if (   $seqno_now[-1] == $seqno_last[0]
20158                             && $seqno_now[0] == $seqno_last[-1] )
20159                         {
20160
20161                             # OK to outdent ..
20162                             # for absolute safety, be sure we only remove
20163                             # whitespace
20164                             my $ws = substr( $test_line, 0, $diff );
20165                             if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
20166
20167                                 $test_line = substr( $test_line, $diff );
20168                                 $cached_line_leading_space_count -= $diff;
20169                             }
20170
20171                             # shouldn't happen, but not critical:
20172                             ##else {
20173                             ## ERROR transferring indentation here
20174                             ##}
20175                         }
20176                     }
20177                 }
20178
20179                 $str                 = $test_line;
20180                 $leading_string      = "";
20181                 $leading_space_count = $cached_line_leading_space_count;
20182             }
20183             else {
20184                 entab_and_output( $cached_line_text,
20185                     $cached_line_leading_space_count,
20186                     $last_group_level_written );
20187             }
20188         }
20189     }
20190     $cached_line_type = 0;
20191     $cached_line_text = "";
20192
20193     # make the line to be written
20194     my $line = $leading_string . $str;
20195
20196     # write or cache this line
20197     if ( !$open_or_close || $side_comment_length > 0 ) {
20198         entab_and_output( $line, $leading_space_count, $group_level );
20199     }
20200     else {
20201         $cached_line_text                = $line;
20202         $cached_line_type                = $open_or_close;
20203         $cached_line_flag                = $tightness_flag;
20204         $cached_seqno                    = $seqno;
20205         $cached_line_valid               = $valid;
20206         $cached_line_leading_space_count = $leading_space_count;
20207         $cached_seqno_string             = $seqno_string;
20208     }
20209
20210     $last_group_level_written = $group_level;
20211     $last_side_comment_length = $side_comment_length;
20212     $extra_indent_ok          = 0;
20213 }
20214
20215 sub entab_and_output {
20216     my ( $line, $leading_space_count, $level ) = @_;
20217
20218     # The line is currently correct if there is no tabbing (recommended!)
20219     # We may have to lop off some leading spaces and replace with tabs.
20220     if ( $leading_space_count > 0 ) {
20221
20222         # Nothing to do if no tabs
20223         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20224             || $rOpts_indent_columns <= 0 )
20225         {
20226
20227             # nothing to do
20228         }
20229
20230         # Handle entab option
20231         elsif ($rOpts_entab_leading_whitespace) {
20232             my $space_count =
20233               $leading_space_count % $rOpts_entab_leading_whitespace;
20234             my $tab_count =
20235               int( $leading_space_count / $rOpts_entab_leading_whitespace );
20236             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
20237             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20238                 substr( $line, 0, $leading_space_count ) = $leading_string;
20239             }
20240             else {
20241
20242                 # REMOVE AFTER TESTING
20243                 # shouldn't happen - program error counting whitespace
20244                 # we'll skip entabbing
20245                 warning(
20246 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20247                 );
20248             }
20249         }
20250
20251         # Handle option of one tab per level
20252         else {
20253             my $leading_string = ( "\t" x $level );
20254             my $space_count =
20255               $leading_space_count - $level * $rOpts_indent_columns;
20256
20257             # shouldn't happen:
20258             if ( $space_count < 0 ) {
20259                 warning(
20260 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
20261                 );
20262                 $leading_string = ( ' ' x $leading_space_count );
20263             }
20264             else {
20265                 $leading_string .= ( ' ' x $space_count );
20266             }
20267             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20268                 substr( $line, 0, $leading_space_count ) = $leading_string;
20269             }
20270             else {
20271
20272                 # REMOVE AFTER TESTING
20273                 # shouldn't happen - program error counting whitespace
20274                 # we'll skip entabbing
20275                 warning(
20276 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20277                 );
20278             }
20279         }
20280     }
20281     $file_writer_object->write_code_line( $line . "\n" );
20282     if ($seqno_string) {
20283         $last_nonblank_seqno_string = $seqno_string;
20284     }
20285 }
20286
20287 {    # begin get_leading_string
20288
20289     my @leading_string_cache;
20290
20291     sub get_leading_string {
20292
20293         # define the leading whitespace string for this line..
20294         my $leading_whitespace_count = shift;
20295
20296         # Handle case of zero whitespace, which includes multi-line quotes
20297         # (which may have a finite level; this prevents tab problems)
20298         if ( $leading_whitespace_count <= 0 ) {
20299             return "";
20300         }
20301
20302         # look for previous result
20303         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
20304             return $leading_string_cache[$leading_whitespace_count];
20305         }
20306
20307         # must compute a string for this number of spaces
20308         my $leading_string;
20309
20310         # Handle simple case of no tabs
20311         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20312             || $rOpts_indent_columns <= 0 )
20313         {
20314             $leading_string = ( ' ' x $leading_whitespace_count );
20315         }
20316
20317         # Handle entab option
20318         elsif ($rOpts_entab_leading_whitespace) {
20319             my $space_count =
20320               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
20321             my $tab_count = int(
20322                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
20323             $leading_string = "\t" x $tab_count . ' ' x $space_count;
20324         }
20325
20326         # Handle option of one tab per level
20327         else {
20328             $leading_string = ( "\t" x $group_level );
20329             my $space_count =
20330               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
20331
20332             # shouldn't happen:
20333             if ( $space_count < 0 ) {
20334                 warning(
20335 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
20336                 );
20337                 $leading_string = ( ' ' x $leading_whitespace_count );
20338             }
20339             else {
20340                 $leading_string .= ( ' ' x $space_count );
20341             }
20342         }
20343         $leading_string_cache[$leading_whitespace_count] = $leading_string;
20344         return $leading_string;
20345     }
20346 }    # end get_leading_string
20347
20348 sub report_anything_unusual {
20349     my $self = shift;
20350     if ( $outdented_line_count > 0 ) {
20351         write_logfile_entry(
20352             "$outdented_line_count long lines were outdented:\n");
20353         write_logfile_entry(
20354             "  First at output line $first_outdented_line_at\n");
20355
20356         if ( $outdented_line_count > 1 ) {
20357             write_logfile_entry(
20358                 "   Last at output line $last_outdented_line_at\n");
20359         }
20360         write_logfile_entry(
20361             "  use -noll to prevent outdenting, -l=n to increase line length\n"
20362         );
20363         write_logfile_entry("\n");
20364     }
20365 }
20366
20367 #####################################################################
20368 #
20369 # the Perl::Tidy::FileWriter class writes the output file
20370 #
20371 #####################################################################
20372
20373 package Perl::Tidy::FileWriter;
20374
20375 # Maximum number of little messages; probably need not be changed.
20376 use constant MAX_NAG_MESSAGES => 6;
20377
20378 sub write_logfile_entry {
20379     my $self          = shift;
20380     my $logger_object = $self->{_logger_object};
20381     if ($logger_object) {
20382         $logger_object->write_logfile_entry(@_);
20383     }
20384 }
20385
20386 sub new {
20387     my $class = shift;
20388     my ( $line_sink_object, $rOpts, $logger_object ) = @_;
20389
20390     bless {
20391         _line_sink_object           => $line_sink_object,
20392         _logger_object              => $logger_object,
20393         _rOpts                      => $rOpts,
20394         _output_line_number         => 1,
20395         _consecutive_blank_lines    => 0,
20396         _consecutive_nonblank_lines => 0,
20397         _first_line_length_error    => 0,
20398         _max_line_length_error      => 0,
20399         _last_line_length_error     => 0,
20400         _first_line_length_error_at => 0,
20401         _max_line_length_error_at   => 0,
20402         _last_line_length_error_at  => 0,
20403         _line_length_error_count    => 0,
20404         _max_output_line_length     => 0,
20405         _max_output_line_length_at  => 0,
20406     }, $class;
20407 }
20408
20409 sub tee_on {
20410     my $self = shift;
20411     $self->{_line_sink_object}->tee_on();
20412 }
20413
20414 sub tee_off {
20415     my $self = shift;
20416     $self->{_line_sink_object}->tee_off();
20417 }
20418
20419 sub get_output_line_number {
20420     my $self = shift;
20421     return $self->{_output_line_number};
20422 }
20423
20424 sub decrement_output_line_number {
20425     my $self = shift;
20426     $self->{_output_line_number}--;
20427 }
20428
20429 sub get_consecutive_nonblank_lines {
20430     my $self = shift;
20431     return $self->{_consecutive_nonblank_lines};
20432 }
20433
20434 sub reset_consecutive_blank_lines {
20435     my $self = shift;
20436     $self->{_consecutive_blank_lines} = 0;
20437 }
20438
20439 sub want_blank_line {
20440     my $self = shift;
20441     unless ( $self->{_consecutive_blank_lines} ) {
20442         $self->write_blank_code_line();
20443     }
20444 }
20445
20446 sub write_blank_code_line {
20447     my $self   = shift;
20448     my $forced = shift;
20449     my $rOpts  = $self->{_rOpts};
20450     return
20451       if (!$forced
20452         && $self->{_consecutive_blank_lines} >=
20453         $rOpts->{'maximum-consecutive-blank-lines'} );
20454     $self->{_consecutive_blank_lines}++;
20455     $self->{_consecutive_nonblank_lines} = 0;
20456     $self->write_line("\n");
20457 }
20458
20459 sub write_code_line {
20460     my $self = shift;
20461     my $a    = shift;
20462
20463     if ( $a =~ /^\s*$/ ) {
20464         my $rOpts = $self->{_rOpts};
20465         return
20466           if ( $self->{_consecutive_blank_lines} >=
20467             $rOpts->{'maximum-consecutive-blank-lines'} );
20468         $self->{_consecutive_blank_lines}++;
20469         $self->{_consecutive_nonblank_lines} = 0;
20470     }
20471     else {
20472         $self->{_consecutive_blank_lines} = 0;
20473         $self->{_consecutive_nonblank_lines}++;
20474     }
20475     $self->write_line($a);
20476 }
20477
20478 sub write_line {
20479     my $self = shift;
20480     my $a    = shift;
20481
20482     # TODO: go through and see if the test is necessary here
20483     if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
20484
20485     $self->{_line_sink_object}->write_line($a);
20486
20487     # This calculation of excess line length ignores any internal tabs
20488     my $rOpts  = $self->{_rOpts};
20489     my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
20490     if ( $a =~ /^\t+/g ) {
20491         $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
20492     }
20493
20494     # Note that we just incremented output line number to future value
20495     # so we must subtract 1 for current line number
20496     if ( length($a) > 1 + $self->{_max_output_line_length} ) {
20497         $self->{_max_output_line_length}    = length($a) - 1;
20498         $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
20499     }
20500
20501     if ( $exceed > 0 ) {
20502         my $output_line_number = $self->{_output_line_number};
20503         $self->{_last_line_length_error}    = $exceed;
20504         $self->{_last_line_length_error_at} = $output_line_number - 1;
20505         if ( $self->{_line_length_error_count} == 0 ) {
20506             $self->{_first_line_length_error}    = $exceed;
20507             $self->{_first_line_length_error_at} = $output_line_number - 1;
20508         }
20509
20510         if (
20511             $self->{_last_line_length_error} > $self->{_max_line_length_error} )
20512         {
20513             $self->{_max_line_length_error}    = $exceed;
20514             $self->{_max_line_length_error_at} = $output_line_number - 1;
20515         }
20516
20517         if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
20518             $self->write_logfile_entry(
20519                 "Line length exceeded by $exceed characters\n");
20520         }
20521         $self->{_line_length_error_count}++;
20522     }
20523
20524 }
20525
20526 sub report_line_length_errors {
20527     my $self                    = shift;
20528     my $rOpts                   = $self->{_rOpts};
20529     my $line_length_error_count = $self->{_line_length_error_count};
20530     if ( $line_length_error_count == 0 ) {
20531         $self->write_logfile_entry(
20532             "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
20533         my $max_output_line_length    = $self->{_max_output_line_length};
20534         my $max_output_line_length_at = $self->{_max_output_line_length_at};
20535         $self->write_logfile_entry(
20536 "  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
20537         );
20538
20539     }
20540     else {
20541
20542         my $word = ( $line_length_error_count > 1 ) ? "s" : "";
20543         $self->write_logfile_entry(
20544 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
20545         );
20546
20547         $word = ( $line_length_error_count > 1 ) ? "First" : "";
20548         my $first_line_length_error    = $self->{_first_line_length_error};
20549         my $first_line_length_error_at = $self->{_first_line_length_error_at};
20550         $self->write_logfile_entry(
20551 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
20552         );
20553
20554         if ( $line_length_error_count > 1 ) {
20555             my $max_line_length_error     = $self->{_max_line_length_error};
20556             my $max_line_length_error_at  = $self->{_max_line_length_error_at};
20557             my $last_line_length_error    = $self->{_last_line_length_error};
20558             my $last_line_length_error_at = $self->{_last_line_length_error_at};
20559             $self->write_logfile_entry(
20560 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
20561             );
20562             $self->write_logfile_entry(
20563 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
20564             );
20565         }
20566     }
20567 }
20568
20569 #####################################################################
20570 #
20571 # The Perl::Tidy::Debugger class shows line tokenization
20572 #
20573 #####################################################################
20574
20575 package Perl::Tidy::Debugger;
20576
20577 sub new {
20578
20579     my ( $class, $filename ) = @_;
20580
20581     bless {
20582         _debug_file        => $filename,
20583         _debug_file_opened => 0,
20584         _fh                => undef,
20585     }, $class;
20586 }
20587
20588 sub really_open_debug_file {
20589
20590     my $self       = shift;
20591     my $debug_file = $self->{_debug_file};
20592     my $fh;
20593     unless ( $fh = IO::File->new("> $debug_file") ) {
20594         warn("can't open $debug_file: $!\n");
20595     }
20596     $self->{_debug_file_opened} = 1;
20597     $self->{_fh}                = $fh;
20598     print $fh
20599       "Use -dump-token-types (-dtt) to get a list of token type codes\n";
20600 }
20601
20602 sub close_debug_file {
20603
20604     my $self = shift;
20605     my $fh   = $self->{_fh};
20606     if ( $self->{_debug_file_opened} ) {
20607
20608         eval { $self->{_fh}->close() };
20609     }
20610 }
20611
20612 sub write_debug_entry {
20613
20614     # This is a debug dump routine which may be modified as necessary
20615     # to dump tokens on a line-by-line basis.  The output will be written
20616     # to the .DEBUG file when the -D flag is entered.
20617     my $self           = shift;
20618     my $line_of_tokens = shift;
20619
20620     my $input_line        = $line_of_tokens->{_line_text};
20621     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
20622     my $rtokens           = $line_of_tokens->{_rtokens};
20623     my $rlevels           = $line_of_tokens->{_rlevels};
20624     my $rslevels          = $line_of_tokens->{_rslevels};
20625     my $rblock_type       = $line_of_tokens->{_rblock_type};
20626     my $input_line_number = $line_of_tokens->{_line_number};
20627     my $line_type         = $line_of_tokens->{_line_type};
20628
20629     my ( $j, $num );
20630
20631     my $token_str              = "$input_line_number: ";
20632     my $reconstructed_original = "$input_line_number: ";
20633     my $block_str              = "$input_line_number: ";
20634
20635     #$token_str .= "$line_type: ";
20636     #$reconstructed_original .= "$line_type: ";
20637
20638     my $pattern   = "";
20639     my @next_char = ( '"', '"' );
20640     my $i_next    = 0;
20641     unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
20642     my $fh = $self->{_fh};
20643
20644     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
20645
20646         # testing patterns
20647         if ( $$rtoken_type[$j] eq 'k' ) {
20648             $pattern .= $$rtokens[$j];
20649         }
20650         else {
20651             $pattern .= $$rtoken_type[$j];
20652         }
20653         $reconstructed_original .= $$rtokens[$j];
20654         $block_str .= "($$rblock_type[$j])";
20655         $num = length( $$rtokens[$j] );
20656         my $type_str = $$rtoken_type[$j];
20657
20658         # be sure there are no blank tokens (shouldn't happen)
20659         # This can only happen if a programming error has been made
20660         # because all valid tokens are non-blank
20661         if ( $type_str eq ' ' ) {
20662             print $fh "BLANK TOKEN on the next line\n";
20663             $type_str = $next_char[$i_next];
20664             $i_next   = 1 - $i_next;
20665         }
20666
20667         if ( length($type_str) == 1 ) {
20668             $type_str = $type_str x $num;
20669         }
20670         $token_str .= $type_str;
20671     }
20672
20673     # Write what you want here ...
20674     # print $fh "$input_line\n";
20675     # print $fh "$pattern\n";
20676     print $fh "$reconstructed_original\n";
20677     print $fh "$token_str\n";
20678
20679     #print $fh "$block_str\n";
20680 }
20681
20682 #####################################################################
20683 #
20684 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
20685 # method for returning the next line to be parsed, as well as a
20686 # 'peek_ahead()' method
20687 #
20688 # The input parameter is an object with a 'get_line()' method
20689 # which returns the next line to be parsed
20690 #
20691 #####################################################################
20692
20693 package Perl::Tidy::LineBuffer;
20694
20695 sub new {
20696
20697     my $class              = shift;
20698     my $line_source_object = shift;
20699
20700     return bless {
20701         _line_source_object => $line_source_object,
20702         _rlookahead_buffer  => [],
20703     }, $class;
20704 }
20705
20706 sub peek_ahead {
20707     my $self               = shift;
20708     my $buffer_index       = shift;
20709     my $line               = undef;
20710     my $line_source_object = $self->{_line_source_object};
20711     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
20712     if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
20713         $line = $$rlookahead_buffer[$buffer_index];
20714     }
20715     else {
20716         $line = $line_source_object->get_line();
20717         push( @$rlookahead_buffer, $line );
20718     }
20719     return $line;
20720 }
20721
20722 sub get_line {
20723     my $self               = shift;
20724     my $line               = undef;
20725     my $line_source_object = $self->{_line_source_object};
20726     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
20727
20728     if ( scalar(@$rlookahead_buffer) ) {
20729         $line = shift @$rlookahead_buffer;
20730     }
20731     else {
20732         $line = $line_source_object->get_line();
20733     }
20734     return $line;
20735 }
20736
20737 ########################################################################
20738 #
20739 # the Perl::Tidy::Tokenizer package is essentially a filter which
20740 # reads lines of perl source code from a source object and provides
20741 # corresponding tokenized lines through its get_line() method.  Lines
20742 # flow from the source_object to the caller like this:
20743 #
20744 # source_object --> LineBuffer_object --> Tokenizer -->  calling routine
20745 #   get_line()         get_line()           get_line()     line_of_tokens
20746 #
20747 # The source object can be any object with a get_line() method which
20748 # supplies one line (a character string) perl call.
20749 # The LineBuffer object is created by the Tokenizer.
20750 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
20751 # containing one tokenized line for each call to its get_line() method.
20752 #
20753 # WARNING: This is not a real class yet.  Only one tokenizer my be used.
20754 #
20755 ########################################################################
20756
20757 package Perl::Tidy::Tokenizer;
20758
20759 BEGIN {
20760
20761     # Caution: these debug flags produce a lot of output
20762     # They should all be 0 except when debugging small scripts
20763
20764     use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
20765     use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
20766     use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
20767     use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
20768     use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
20769
20770     my $debug_warning = sub {
20771         print "TOKENIZER_DEBUGGING with key $_[0]\n";
20772     };
20773
20774     TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
20775     TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
20776     TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
20777     TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
20778     TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
20779
20780 }
20781
20782 use Carp;
20783
20784 # PACKAGE VARIABLES for for processing an entire FILE.
20785 use vars qw{
20786   $tokenizer_self
20787
20788   $last_nonblank_token
20789   $last_nonblank_type
20790   $last_nonblank_block_type
20791   $statement_type
20792   $in_attribute_list
20793   $current_package
20794   $context
20795
20796   %is_constant
20797   %is_user_function
20798   %user_function_prototype
20799   %is_block_function
20800   %is_block_list_function
20801   %saw_function_definition
20802
20803   $brace_depth
20804   $paren_depth
20805   $square_bracket_depth
20806
20807   @current_depth
20808   @total_depth
20809   $total_depth
20810   @nesting_sequence_number
20811   @current_sequence_number
20812   @paren_type
20813   @paren_semicolon_count
20814   @paren_structural_type
20815   @brace_type
20816   @brace_structural_type
20817   @brace_statement_type
20818   @brace_context
20819   @brace_package
20820   @square_bracket_type
20821   @square_bracket_structural_type
20822   @depth_array
20823   @nested_ternary_flag
20824   @starting_line_of_current_depth
20825 };
20826
20827 # GLOBAL CONSTANTS for routines in this package
20828 use vars qw{
20829   %is_indirect_object_taker
20830   %is_block_operator
20831   %expecting_operator_token
20832   %expecting_operator_types
20833   %expecting_term_types
20834   %expecting_term_token
20835   %is_digraph
20836   %is_file_test_operator
20837   %is_trigraph
20838   %is_valid_token_type
20839   %is_keyword
20840   %is_code_block_token
20841   %really_want_term
20842   @opening_brace_names
20843   @closing_brace_names
20844   %is_keyword_taking_list
20845   %is_q_qq_qw_qx_qr_s_y_tr_m
20846 };
20847
20848 # possible values of operator_expected()
20849 use constant TERM     => -1;
20850 use constant UNKNOWN  => 0;
20851 use constant OPERATOR => 1;
20852
20853 # possible values of context
20854 use constant SCALAR_CONTEXT  => -1;
20855 use constant UNKNOWN_CONTEXT => 0;
20856 use constant LIST_CONTEXT    => 1;
20857
20858 # Maximum number of little messages; probably need not be changed.
20859 use constant MAX_NAG_MESSAGES => 6;
20860
20861 {
20862
20863     # methods to count instances
20864     my $_count = 0;
20865     sub get_count        { $_count; }
20866     sub _increment_count { ++$_count }
20867     sub _decrement_count { --$_count }
20868 }
20869
20870 sub DESTROY {
20871     $_[0]->_decrement_count();
20872 }
20873
20874 sub new {
20875
20876     my $class = shift;
20877
20878     # Note: 'tabs' and 'indent_columns' are temporary and should be
20879     # removed asap
20880     my %defaults = (
20881         source_object        => undef,
20882         debugger_object      => undef,
20883         diagnostics_object   => undef,
20884         logger_object        => undef,
20885         starting_level       => undef,
20886         indent_columns       => 4,
20887         tabs                 => 0,
20888         look_for_hash_bang   => 0,
20889         trim_qw              => 1,
20890         look_for_autoloader  => 1,
20891         look_for_selfloader  => 1,
20892         starting_line_number => 1,
20893     );
20894     my %args = ( %defaults, @_ );
20895
20896     # we are given an object with a get_line() method to supply source lines
20897     my $source_object = $args{source_object};
20898
20899     # we create another object with a get_line() and peek_ahead() method
20900     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
20901
20902     # Tokenizer state data is as follows:
20903     # _rhere_target_list    reference to list of here-doc targets
20904     # _here_doc_target      the target string for a here document
20905     # _here_quote_character the type of here-doc quoting (" ' ` or none)
20906     #                       to determine if interpolation is done
20907     # _quote_target         character we seek if chasing a quote
20908     # _line_start_quote     line where we started looking for a long quote
20909     # _in_here_doc          flag indicating if we are in a here-doc
20910     # _in_pod               flag set if we are in pod documentation
20911     # _in_error             flag set if we saw severe error (binary in script)
20912     # _in_data              flag set if we are in __DATA__ section
20913     # _in_end               flag set if we are in __END__ section
20914     # _in_format            flag set if we are in a format description
20915     # _in_attribute_list    flag telling if we are looking for attributes
20916     # _in_quote             flag telling if we are chasing a quote
20917     # _starting_level       indentation level of first line
20918     # _input_tabstr         string denoting one indentation level of input file
20919     # _know_input_tabstr    flag indicating if we know _input_tabstr
20920     # _line_buffer_object   object with get_line() method to supply source code
20921     # _diagnostics_object   place to write debugging information
20922     # _unexpected_error_count  error count used to limit output
20923     # _lower_case_labels_at  line numbers where lower case labels seen
20924     $tokenizer_self = {
20925         _rhere_target_list                  => [],
20926         _in_here_doc                        => 0,
20927         _here_doc_target                    => "",
20928         _here_quote_character               => "",
20929         _in_data                            => 0,
20930         _in_end                             => 0,
20931         _in_format                          => 0,
20932         _in_error                           => 0,
20933         _in_pod                             => 0,
20934         _in_attribute_list                  => 0,
20935         _in_quote                           => 0,
20936         _quote_target                       => "",
20937         _line_start_quote                   => -1,
20938         _starting_level                     => $args{starting_level},
20939         _know_starting_level                => defined( $args{starting_level} ),
20940         _tabs                               => $args{tabs},
20941         _indent_columns                     => $args{indent_columns},
20942         _look_for_hash_bang                 => $args{look_for_hash_bang},
20943         _trim_qw                            => $args{trim_qw},
20944         _input_tabstr                       => "",
20945         _know_input_tabstr                  => -1,
20946         _last_line_number                   => $args{starting_line_number} - 1,
20947         _saw_perl_dash_P                    => 0,
20948         _saw_perl_dash_w                    => 0,
20949         _saw_use_strict                     => 0,
20950         _saw_v_string                       => 0,
20951         _look_for_autoloader                => $args{look_for_autoloader},
20952         _look_for_selfloader                => $args{look_for_selfloader},
20953         _saw_autoloader                     => 0,
20954         _saw_selfloader                     => 0,
20955         _saw_hash_bang                      => 0,
20956         _saw_end                            => 0,
20957         _saw_data                           => 0,
20958         _saw_negative_indentation           => 0,
20959         _started_tokenizing                 => 0,
20960         _line_buffer_object                 => $line_buffer_object,
20961         _debugger_object                    => $args{debugger_object},
20962         _diagnostics_object                 => $args{diagnostics_object},
20963         _logger_object                      => $args{logger_object},
20964         _unexpected_error_count             => 0,
20965         _started_looking_for_here_target_at => 0,
20966         _nearly_matched_here_target_at      => undef,
20967         _line_text                          => "",
20968         _rlower_case_labels_at              => undef,
20969     };
20970
20971     prepare_for_a_new_file();
20972     find_starting_indentation_level();
20973
20974     bless $tokenizer_self, $class;
20975
20976     # This is not a full class yet, so die if an attempt is made to
20977     # create more than one object.
20978
20979     if ( _increment_count() > 1 ) {
20980         confess
20981 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
20982     }
20983
20984     return $tokenizer_self;
20985
20986 }
20987
20988 # interface to Perl::Tidy::Logger routines
20989 sub warning {
20990     my $logger_object = $tokenizer_self->{_logger_object};
20991     if ($logger_object) {
20992         $logger_object->warning(@_);
20993     }
20994 }
20995
20996 sub complain {
20997     my $logger_object = $tokenizer_self->{_logger_object};
20998     if ($logger_object) {
20999         $logger_object->complain(@_);
21000     }
21001 }
21002
21003 sub write_logfile_entry {
21004     my $logger_object = $tokenizer_self->{_logger_object};
21005     if ($logger_object) {
21006         $logger_object->write_logfile_entry(@_);
21007     }
21008 }
21009
21010 sub interrupt_logfile {
21011     my $logger_object = $tokenizer_self->{_logger_object};
21012     if ($logger_object) {
21013         $logger_object->interrupt_logfile();
21014     }
21015 }
21016
21017 sub resume_logfile {
21018     my $logger_object = $tokenizer_self->{_logger_object};
21019     if ($logger_object) {
21020         $logger_object->resume_logfile();
21021     }
21022 }
21023
21024 sub increment_brace_error {
21025     my $logger_object = $tokenizer_self->{_logger_object};
21026     if ($logger_object) {
21027         $logger_object->increment_brace_error();
21028     }
21029 }
21030
21031 sub report_definite_bug {
21032     my $logger_object = $tokenizer_self->{_logger_object};
21033     if ($logger_object) {
21034         $logger_object->report_definite_bug();
21035     }
21036 }
21037
21038 sub brace_warning {
21039     my $logger_object = $tokenizer_self->{_logger_object};
21040     if ($logger_object) {
21041         $logger_object->brace_warning(@_);
21042     }
21043 }
21044
21045 sub get_saw_brace_error {
21046     my $logger_object = $tokenizer_self->{_logger_object};
21047     if ($logger_object) {
21048         $logger_object->get_saw_brace_error();
21049     }
21050     else {
21051         0;
21052     }
21053 }
21054
21055 # interface to Perl::Tidy::Diagnostics routines
21056 sub write_diagnostics {
21057     if ( $tokenizer_self->{_diagnostics_object} ) {
21058         $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
21059     }
21060 }
21061
21062 sub report_tokenization_errors {
21063
21064     my $self = shift;
21065
21066     my $level = get_indentation_level();
21067     if ( $level != $tokenizer_self->{_starting_level} ) {
21068         warning("final indentation level: $level\n");
21069     }
21070
21071     check_final_nesting_depths();
21072
21073     if ( $tokenizer_self->{_look_for_hash_bang}
21074         && !$tokenizer_self->{_saw_hash_bang} )
21075     {
21076         warning(
21077             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
21078     }
21079
21080     if ( $tokenizer_self->{_in_format} ) {
21081         warning("hit EOF while in format description\n");
21082     }
21083
21084     if ( $tokenizer_self->{_in_pod} ) {
21085
21086         # Just write log entry if this is after __END__ or __DATA__
21087         # because this happens to often, and it is not likely to be
21088         # a parsing error.
21089         if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21090             write_logfile_entry(
21091 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21092             );
21093         }
21094
21095         else {
21096             complain(
21097 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
21098             );
21099         }
21100
21101     }
21102
21103     if ( $tokenizer_self->{_in_here_doc} ) {
21104         my $here_doc_target = $tokenizer_self->{_here_doc_target};
21105         my $started_looking_for_here_target_at =
21106           $tokenizer_self->{_started_looking_for_here_target_at};
21107         if ($here_doc_target) {
21108             warning(
21109 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
21110             );
21111         }
21112         else {
21113             warning(
21114 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
21115             );
21116         }
21117         my $nearly_matched_here_target_at =
21118           $tokenizer_self->{_nearly_matched_here_target_at};
21119         if ($nearly_matched_here_target_at) {
21120             warning(
21121 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
21122             );
21123         }
21124     }
21125
21126     if ( $tokenizer_self->{_in_quote} ) {
21127         my $line_start_quote = $tokenizer_self->{_line_start_quote};
21128         my $quote_target     = $tokenizer_self->{_quote_target};
21129         my $what =
21130           ( $tokenizer_self->{_in_attribute_list} )
21131           ? "attribute list"
21132           : "quote/pattern";
21133         warning(
21134 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
21135         );
21136     }
21137
21138     unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
21139         if ( $] < 5.006 ) {
21140             write_logfile_entry("Suggest including '-w parameter'\n");
21141         }
21142         else {
21143             write_logfile_entry("Suggest including 'use warnings;'\n");
21144         }
21145     }
21146
21147     if ( $tokenizer_self->{_saw_perl_dash_P} ) {
21148         write_logfile_entry("Use of -P parameter for defines is discouraged\n");
21149     }
21150
21151     unless ( $tokenizer_self->{_saw_use_strict} ) {
21152         write_logfile_entry("Suggest including 'use strict;'\n");
21153     }
21154
21155     # it is suggested that lables have at least one upper case character
21156     # for legibility and to avoid code breakage as new keywords are introduced
21157     if ( $tokenizer_self->{_rlower_case_labels_at} ) {
21158         my @lower_case_labels_at =
21159           @{ $tokenizer_self->{_rlower_case_labels_at} };
21160         write_logfile_entry(
21161             "Suggest using upper case characters in label(s)\n");
21162         local $" = ')(';
21163         write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
21164     }
21165 }
21166
21167 sub report_v_string {
21168
21169     # warn if this version can't handle v-strings
21170     my $tok = shift;
21171     unless ( $tokenizer_self->{_saw_v_string} ) {
21172         $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
21173     }
21174     if ( $] < 5.006 ) {
21175         warning(
21176 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
21177         );
21178     }
21179 }
21180
21181 sub get_input_line_number {
21182     return $tokenizer_self->{_last_line_number};
21183 }
21184
21185 # returns the next tokenized line
21186 sub get_line {
21187
21188     my $self = shift;
21189
21190     # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
21191     # $square_bracket_depth, $paren_depth
21192
21193     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
21194     $tokenizer_self->{_line_text} = $input_line;
21195
21196     return undef unless ($input_line);
21197
21198     my $input_line_number = ++$tokenizer_self->{_last_line_number};
21199
21200     # Find and remove what characters terminate this line, including any
21201     # control r
21202     my $input_line_separator = "";
21203     if ( chomp($input_line) ) { $input_line_separator = $/ }
21204
21205     # TODO: what other characters should be included here?
21206     if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
21207         $input_line_separator = $2 . $input_line_separator;
21208     }
21209
21210     # for backwards compatability we keep the line text terminated with
21211     # a newline character
21212     $input_line .= "\n";
21213     $tokenizer_self->{_line_text} = $input_line;    # update
21214
21215     # create a data structure describing this line which will be
21216     # returned to the caller.
21217
21218     # _line_type codes are:
21219     #   SYSTEM         - system-specific code before hash-bang line
21220     #   CODE           - line of perl code (including comments)
21221     #   POD_START      - line starting pod, such as '=head'
21222     #   POD            - pod documentation text
21223     #   POD_END        - last line of pod section, '=cut'
21224     #   HERE           - text of here-document
21225     #   HERE_END       - last line of here-doc (target word)
21226     #   FORMAT         - format section
21227     #   FORMAT_END     - last line of format section, '.'
21228     #   DATA_START     - __DATA__ line
21229     #   DATA           - unidentified text following __DATA__
21230     #   END_START      - __END__ line
21231     #   END            - unidentified text following __END__
21232     #   ERROR          - we are in big trouble, probably not a perl script
21233
21234     # Other variables:
21235     #   _curly_brace_depth     - depth of curly braces at start of line
21236     #   _square_bracket_depth  - depth of square brackets at start of line
21237     #   _paren_depth           - depth of parens at start of line
21238     #   _starting_in_quote     - this line continues a multi-line quote
21239     #                            (so don't trim leading blanks!)
21240     #   _ending_in_quote       - this line ends in a multi-line quote
21241     #                            (so don't trim trailing blanks!)
21242     my $line_of_tokens = {
21243         _line_type                => 'EOF',
21244         _line_text                => $input_line,
21245         _line_number              => $input_line_number,
21246         _rtoken_type              => undef,
21247         _rtokens                  => undef,
21248         _rlevels                  => undef,
21249         _rslevels                 => undef,
21250         _rblock_type              => undef,
21251         _rcontainer_type          => undef,
21252         _rcontainer_environment   => undef,
21253         _rtype_sequence           => undef,
21254         _rnesting_tokens          => undef,
21255         _rci_levels               => undef,
21256         _rnesting_blocks          => undef,
21257         _python_indentation_level => -1,                   ## 0,
21258         _starting_in_quote    => 0,                    # to be set by subroutine
21259         _ending_in_quote      => 0,
21260         _curly_brace_depth    => $brace_depth,
21261         _square_bracket_depth => $square_bracket_depth,
21262         _paren_depth          => $paren_depth,
21263         _quote_character      => '',
21264     };
21265
21266     # must print line unchanged if we are in a here document
21267     if ( $tokenizer_self->{_in_here_doc} ) {
21268
21269         $line_of_tokens->{_line_type} = 'HERE';
21270         my $here_doc_target      = $tokenizer_self->{_here_doc_target};
21271         my $here_quote_character = $tokenizer_self->{_here_quote_character};
21272         my $candidate_target     = $input_line;
21273         chomp $candidate_target;
21274         if ( $candidate_target eq $here_doc_target ) {
21275             $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21276             $line_of_tokens->{_line_type}                     = 'HERE_END';
21277             write_logfile_entry("Exiting HERE document $here_doc_target\n");
21278
21279             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21280             if (@$rhere_target_list) {    # there can be multiple here targets
21281                 ( $here_doc_target, $here_quote_character ) =
21282                   @{ shift @$rhere_target_list };
21283                 $tokenizer_self->{_here_doc_target} = $here_doc_target;
21284                 $tokenizer_self->{_here_quote_character} =
21285                   $here_quote_character;
21286                 write_logfile_entry(
21287                     "Entering HERE document $here_doc_target\n");
21288                 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
21289                 $tokenizer_self->{_started_looking_for_here_target_at} =
21290                   $input_line_number;
21291             }
21292             else {
21293                 $tokenizer_self->{_in_here_doc}          = 0;
21294                 $tokenizer_self->{_here_doc_target}      = "";
21295                 $tokenizer_self->{_here_quote_character} = "";
21296             }
21297         }
21298
21299         # check for error of extra whitespace
21300         # note for PERL6: leading whitespace is allowed
21301         else {
21302             $candidate_target =~ s/\s*$//;
21303             $candidate_target =~ s/^\s*//;
21304             if ( $candidate_target eq $here_doc_target ) {
21305                 $tokenizer_self->{_nearly_matched_here_target_at} =
21306                   $input_line_number;
21307             }
21308         }
21309         return $line_of_tokens;
21310     }
21311
21312     # must print line unchanged if we are in a format section
21313     elsif ( $tokenizer_self->{_in_format} ) {
21314
21315         if ( $input_line =~ /^\.[\s#]*$/ ) {
21316             write_logfile_entry("Exiting format section\n");
21317             $tokenizer_self->{_in_format} = 0;
21318             $line_of_tokens->{_line_type} = 'FORMAT_END';
21319         }
21320         else {
21321             $line_of_tokens->{_line_type} = 'FORMAT';
21322         }
21323         return $line_of_tokens;
21324     }
21325
21326     # must print line unchanged if we are in pod documentation
21327     elsif ( $tokenizer_self->{_in_pod} ) {
21328
21329         $line_of_tokens->{_line_type} = 'POD';
21330         if ( $input_line =~ /^=cut/ ) {
21331             $line_of_tokens->{_line_type} = 'POD_END';
21332             write_logfile_entry("Exiting POD section\n");
21333             $tokenizer_self->{_in_pod} = 0;
21334         }
21335         if ( $input_line =~ /^\#\!.*perl\b/ ) {
21336             warning(
21337                 "Hash-bang in pod can cause older versions of perl to fail! \n"
21338             );
21339         }
21340
21341         return $line_of_tokens;
21342     }
21343
21344     # must print line unchanged if we have seen a severe error (i.e., we
21345     # are seeing illegal tokens and connot continue.  Syntax errors do
21346     # not pass this route).  Calling routine can decide what to do, but
21347     # the default can be to just pass all lines as if they were after __END__
21348     elsif ( $tokenizer_self->{_in_error} ) {
21349         $line_of_tokens->{_line_type} = 'ERROR';
21350         return $line_of_tokens;
21351     }
21352
21353     # print line unchanged if we are __DATA__ section
21354     elsif ( $tokenizer_self->{_in_data} ) {
21355
21356         # ...but look for POD
21357         # Note that the _in_data and _in_end flags remain set
21358         # so that we return to that state after seeing the
21359         # end of a pod section
21360         if ( $input_line =~ /^=(?!cut)/ ) {
21361             $line_of_tokens->{_line_type} = 'POD_START';
21362             write_logfile_entry("Entering POD section\n");
21363             $tokenizer_self->{_in_pod} = 1;
21364             return $line_of_tokens;
21365         }
21366         else {
21367             $line_of_tokens->{_line_type} = 'DATA';
21368             return $line_of_tokens;
21369         }
21370     }
21371
21372     # print line unchanged if we are in __END__ section
21373     elsif ( $tokenizer_self->{_in_end} ) {
21374
21375         # ...but look for POD
21376         # Note that the _in_data and _in_end flags remain set
21377         # so that we return to that state after seeing the
21378         # end of a pod section
21379         if ( $input_line =~ /^=(?!cut)/ ) {
21380             $line_of_tokens->{_line_type} = 'POD_START';
21381             write_logfile_entry("Entering POD section\n");
21382             $tokenizer_self->{_in_pod} = 1;
21383             return $line_of_tokens;
21384         }
21385         else {
21386             $line_of_tokens->{_line_type} = 'END';
21387             return $line_of_tokens;
21388         }
21389     }
21390
21391     # check for a hash-bang line if we haven't seen one
21392     if ( !$tokenizer_self->{_saw_hash_bang} ) {
21393         if ( $input_line =~ /^\#\!.*perl\b/ ) {
21394             $tokenizer_self->{_saw_hash_bang} = $input_line_number;
21395
21396             # check for -w and -P flags
21397             if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
21398                 $tokenizer_self->{_saw_perl_dash_P} = 1;
21399             }
21400
21401             if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
21402                 $tokenizer_self->{_saw_perl_dash_w} = 1;
21403             }
21404
21405             if (   ( $input_line_number > 1 )
21406                 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
21407             {
21408
21409                 # this is helpful for VMS systems; we may have accidentally
21410                 # tokenized some DCL commands
21411                 if ( $tokenizer_self->{_started_tokenizing} ) {
21412                     warning(
21413 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
21414                     );
21415                 }
21416                 else {
21417                     complain("Useless hash-bang after line 1\n");
21418                 }
21419             }
21420
21421             # Report the leading hash-bang as a system line
21422             # This will prevent -dac from deleting it
21423             else {
21424                 $line_of_tokens->{_line_type} = 'SYSTEM';
21425                 return $line_of_tokens;
21426             }
21427         }
21428     }
21429
21430     # wait for a hash-bang before parsing if the user invoked us with -x
21431     if ( $tokenizer_self->{_look_for_hash_bang}
21432         && !$tokenizer_self->{_saw_hash_bang} )
21433     {
21434         $line_of_tokens->{_line_type} = 'SYSTEM';
21435         return $line_of_tokens;
21436     }
21437
21438     # a first line of the form ': #' will be marked as SYSTEM
21439     # since lines of this form may be used by tcsh
21440     if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
21441         $line_of_tokens->{_line_type} = 'SYSTEM';
21442         return $line_of_tokens;
21443     }
21444
21445     # now we know that it is ok to tokenize the line...
21446     # the line tokenizer will modify any of these private variables:
21447     #        _rhere_target_list
21448     #        _in_data
21449     #        _in_end
21450     #        _in_format
21451     #        _in_error
21452     #        _in_pod
21453     #        _in_quote
21454     my $ending_in_quote_last = $tokenizer_self->{_in_quote};
21455     tokenize_this_line($line_of_tokens);
21456
21457     # Now finish defining the return structure and return it
21458     $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
21459
21460     # handle severe error (binary data in script)
21461     if ( $tokenizer_self->{_in_error} ) {
21462         $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
21463         warning("Giving up after error\n");
21464         $line_of_tokens->{_line_type} = 'ERROR';
21465         reset_indentation_level(0);          # avoid error messages
21466         return $line_of_tokens;
21467     }
21468
21469     # handle start of pod documentation
21470     if ( $tokenizer_self->{_in_pod} ) {
21471
21472         # This gets tricky..above a __DATA__ or __END__ section, perl
21473         # accepts '=cut' as the start of pod section. But afterwards,
21474         # only pod utilities see it and they may ignore an =cut without
21475         # leading =head.  In any case, this isn't good.
21476         if ( $input_line =~ /^=cut\b/ ) {
21477             if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21478                 complain("=cut while not in pod ignored\n");
21479                 $tokenizer_self->{_in_pod}    = 0;
21480                 $line_of_tokens->{_line_type} = 'POD_END';
21481             }
21482             else {
21483                 $line_of_tokens->{_line_type} = 'POD_START';
21484                 complain(
21485 "=cut starts a pod section .. this can fool pod utilities.\n"
21486                 );
21487                 write_logfile_entry("Entering POD section\n");
21488             }
21489         }
21490
21491         else {
21492             $line_of_tokens->{_line_type} = 'POD_START';
21493             write_logfile_entry("Entering POD section\n");
21494         }
21495
21496         return $line_of_tokens;
21497     }
21498
21499     # update indentation levels for log messages
21500     if ( $input_line !~ /^\s*$/ ) {
21501         my $rlevels                      = $line_of_tokens->{_rlevels};
21502         my $structural_indentation_level = $$rlevels[0];
21503         my ( $python_indentation_level, $msg ) =
21504           find_indentation_level( $input_line, $structural_indentation_level );
21505         if ($msg) { write_logfile_entry("$msg") }
21506         if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
21507             $line_of_tokens->{_python_indentation_level} =
21508               $python_indentation_level;
21509         }
21510     }
21511
21512     # see if this line contains here doc targets
21513     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21514     if (@$rhere_target_list) {
21515
21516         my ( $here_doc_target, $here_quote_character ) =
21517           @{ shift @$rhere_target_list };
21518         $tokenizer_self->{_in_here_doc}          = 1;
21519         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
21520         $tokenizer_self->{_here_quote_character} = $here_quote_character;
21521         write_logfile_entry("Entering HERE document $here_doc_target\n");
21522         $tokenizer_self->{_started_looking_for_here_target_at} =
21523           $input_line_number;
21524     }
21525
21526     # NOTE: __END__ and __DATA__ statements are written unformatted
21527     # because they can theoretically contain additional characters
21528     # which are not tokenized (and cannot be read with <DATA> either!).
21529     if ( $tokenizer_self->{_in_data} ) {
21530         $line_of_tokens->{_line_type} = 'DATA_START';
21531         write_logfile_entry("Starting __DATA__ section\n");
21532         $tokenizer_self->{_saw_data} = 1;
21533
21534         # keep parsing after __DATA__ if use SelfLoader was seen
21535         if ( $tokenizer_self->{_saw_selfloader} ) {
21536             $tokenizer_self->{_in_data} = 0;
21537             write_logfile_entry(
21538                 "SelfLoader seen, continuing; -nlsl deactivates\n");
21539         }
21540
21541         return $line_of_tokens;
21542     }
21543
21544     elsif ( $tokenizer_self->{_in_end} ) {
21545         $line_of_tokens->{_line_type} = 'END_START';
21546         write_logfile_entry("Starting __END__ section\n");
21547         $tokenizer_self->{_saw_end} = 1;
21548
21549         # keep parsing after __END__ if use AutoLoader was seen
21550         if ( $tokenizer_self->{_saw_autoloader} ) {
21551             $tokenizer_self->{_in_end} = 0;
21552             write_logfile_entry(
21553                 "AutoLoader seen, continuing; -nlal deactivates\n");
21554         }
21555         return $line_of_tokens;
21556     }
21557
21558     # now, finally, we know that this line is type 'CODE'
21559     $line_of_tokens->{_line_type} = 'CODE';
21560
21561     # remember if we have seen any real code
21562     if (  !$tokenizer_self->{_started_tokenizing}
21563         && $input_line !~ /^\s*$/
21564         && $input_line !~ /^\s*#/ )
21565     {
21566         $tokenizer_self->{_started_tokenizing} = 1;
21567     }
21568
21569     if ( $tokenizer_self->{_debugger_object} ) {
21570         $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
21571     }
21572
21573     # Note: if keyword 'format' occurs in this line code, it is still CODE
21574     # (keyword 'format' need not start a line)
21575     if ( $tokenizer_self->{_in_format} ) {
21576         write_logfile_entry("Entering format section\n");
21577     }
21578
21579     if ( $tokenizer_self->{_in_quote}
21580         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
21581     {
21582
21583         #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
21584         if (
21585             ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
21586         {
21587             $tokenizer_self->{_line_start_quote} = $input_line_number;
21588             write_logfile_entry(
21589                 "Start multi-line quote or pattern ending in $quote_target\n");
21590         }
21591     }
21592     elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
21593         and !$tokenizer_self->{_in_quote} )
21594     {
21595         $tokenizer_self->{_line_start_quote} = -1;
21596         write_logfile_entry("End of multi-line quote or pattern\n");
21597     }
21598
21599     # we are returning a line of CODE
21600     return $line_of_tokens;
21601 }
21602
21603 sub find_starting_indentation_level {
21604
21605     # USES GLOBAL VARIABLES: $tokenizer_self
21606     my $starting_level    = 0;
21607     my $know_input_tabstr = -1;    # flag for find_indentation_level
21608
21609     # use value if given as parameter
21610     if ( $tokenizer_self->{_know_starting_level} ) {
21611         $starting_level = $tokenizer_self->{_starting_level};
21612     }
21613
21614     # if we know there is a hash_bang line, the level must be zero
21615     elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
21616         $tokenizer_self->{_know_starting_level} = 1;
21617     }
21618
21619     # otherwise figure it out from the input file
21620     else {
21621         my $line;
21622         my $i                            = 0;
21623         my $structural_indentation_level = -1; # flag for find_indentation_level
21624
21625         my $msg = "";
21626         while ( $line =
21627             $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
21628         {
21629
21630             # if first line is #! then assume starting level is zero
21631             if ( $i == 1 && $line =~ /^\#\!/ ) {
21632                 $starting_level = 0;
21633                 last;
21634             }
21635             next if ( $line =~ /^\s*#/ );      # must not be comment
21636             next if ( $line =~ /^\s*$/ );      # must not be blank
21637             ( $starting_level, $msg ) =
21638               find_indentation_level( $line, $structural_indentation_level );
21639             if ($msg) { write_logfile_entry("$msg") }
21640             last;
21641         }
21642         $msg = "Line $i implies starting-indentation-level = $starting_level\n";
21643
21644         if ( $starting_level > 0 ) {
21645
21646             my $input_tabstr = $tokenizer_self->{_input_tabstr};
21647             if ( $input_tabstr eq "\t" ) {
21648                 $msg .= "by guessing input tabbing uses 1 tab per level\n";
21649             }
21650             else {
21651                 my $cols = length($input_tabstr);
21652                 $msg .=
21653                   "by guessing input tabbing uses $cols blanks per level\n";
21654             }
21655         }
21656         write_logfile_entry("$msg");
21657     }
21658     $tokenizer_self->{_starting_level} = $starting_level;
21659     reset_indentation_level($starting_level);
21660 }
21661
21662 # Find indentation level given a input line.  At the same time, try to
21663 # figure out the input tabbing scheme.
21664 #
21665 # There are two types of calls:
21666 #
21667 # Type 1: $structural_indentation_level < 0
21668 #  In this case we have to guess $input_tabstr to figure out the level.
21669 #
21670 # Type 2: $structural_indentation_level >= 0
21671 #  In this case the level of this line is known, and this routine can
21672 #  update the tabbing string, if still unknown, to make the level correct.
21673
21674 sub find_indentation_level {
21675     my ( $line, $structural_indentation_level ) = @_;
21676
21677     # USES GLOBAL VARIABLES: $tokenizer_self
21678     my $level = 0;
21679     my $msg   = "";
21680
21681     my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
21682     my $input_tabstr      = $tokenizer_self->{_input_tabstr};
21683
21684     # find leading whitespace
21685     my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
21686
21687     # make first guess at input tabbing scheme if necessary
21688     if ( $know_input_tabstr < 0 ) {
21689
21690         $know_input_tabstr = 0;
21691
21692         if ( $tokenizer_self->{_tabs} ) {
21693             $input_tabstr = "\t";
21694             if ( length($leading_whitespace) > 0 ) {
21695                 if ( $leading_whitespace !~ /\t/ ) {
21696
21697                     my $cols = $tokenizer_self->{_indent_columns};
21698
21699                     if ( length($leading_whitespace) < $cols ) {
21700                         $cols = length($leading_whitespace);
21701                     }
21702                     $input_tabstr = " " x $cols;
21703                 }
21704             }
21705         }
21706         else {
21707             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
21708
21709             if ( length($leading_whitespace) > 0 ) {
21710                 if ( $leading_whitespace =~ /^\t/ ) {
21711                     $input_tabstr = "\t";
21712                 }
21713             }
21714         }
21715         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21716         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
21717     }
21718
21719     # determine the input tabbing scheme if possible
21720     if (   ( $know_input_tabstr == 0 )
21721         && ( length($leading_whitespace) > 0 )
21722         && ( $structural_indentation_level > 0 ) )
21723     {
21724         my $saved_input_tabstr = $input_tabstr;
21725
21726         # check for common case of one tab per indentation level
21727         if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21728             if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21729                 $input_tabstr = "\t";
21730                 $msg          = "Guessing old indentation was tab character\n";
21731             }
21732         }
21733
21734         else {
21735
21736             # detab any tabs based on 8 blanks per tab
21737             my $entabbed = "";
21738             if ( $leading_whitespace =~ s/^\t+/        /g ) {
21739                 $entabbed = "entabbed";
21740             }
21741
21742             # now compute tabbing from number of spaces
21743             my $columns =
21744               length($leading_whitespace) / $structural_indentation_level;
21745             if ( $columns == int $columns ) {
21746                 $msg =
21747                   "Guessing old indentation was $columns $entabbed spaces\n";
21748             }
21749             else {
21750                 $columns = int $columns;
21751                 $msg =
21752 "old indentation is unclear, using $columns $entabbed spaces\n";
21753             }
21754             $input_tabstr = " " x $columns;
21755         }
21756         $know_input_tabstr                    = 1;
21757         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21758         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
21759
21760         # see if mistakes were made
21761         if ( ( $tokenizer_self->{_starting_level} > 0 )
21762             && !$tokenizer_self->{_know_starting_level} )
21763         {
21764
21765             if ( $input_tabstr ne $saved_input_tabstr ) {
21766                 complain(
21767 "I made a bad starting level guess; rerun with a value for -sil \n"
21768                 );
21769             }
21770         }
21771     }
21772
21773     # use current guess at input tabbing to get input indentation level
21774     #
21775     # Patch to handle a common case of entabbed leading whitespace
21776     # If the leading whitespace equals 4 spaces and we also have
21777     # tabs, detab the input whitespace assuming 8 spaces per tab.
21778     if ( length($input_tabstr) == 4 ) {
21779         $leading_whitespace =~ s/^\t+/        /g;
21780     }
21781
21782     if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
21783         my $pos = 0;
21784
21785         while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
21786         {
21787             $pos += $len_tab;
21788             $level++;
21789         }
21790     }
21791     return ( $level, $msg );
21792 }
21793
21794 # This is a currently unused debug routine
21795 sub dump_functions {
21796
21797     my $fh = *STDOUT;
21798     my ( $pkg, $sub );
21799     foreach $pkg ( keys %is_user_function ) {
21800         print $fh "\nnon-constant subs in package $pkg\n";
21801
21802         foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
21803             my $msg = "";
21804             if ( $is_block_list_function{$pkg}{$sub} ) {
21805                 $msg = 'block_list';
21806             }
21807
21808             if ( $is_block_function{$pkg}{$sub} ) {
21809                 $msg = 'block';
21810             }
21811             print $fh "$sub $msg\n";
21812         }
21813     }
21814
21815     foreach $pkg ( keys %is_constant ) {
21816         print $fh "\nconstants and constant subs in package $pkg\n";
21817
21818         foreach $sub ( keys %{ $is_constant{$pkg} } ) {
21819             print $fh "$sub\n";
21820         }
21821     }
21822 }
21823
21824 sub ones_count {
21825
21826     # count number of 1's in a string of 1's and 0's
21827     # example: ones_count("010101010101") gives 6
21828     return ( my $cis = $_[0] ) =~ tr/1/0/;
21829 }
21830
21831 sub prepare_for_a_new_file {
21832
21833     # previous tokens needed to determine what to expect next
21834     $last_nonblank_token      = ';';    # the only possible starting state which
21835     $last_nonblank_type       = ';';    # will make a leading brace a code block
21836     $last_nonblank_block_type = '';
21837
21838     # scalars for remembering statement types across multiple lines
21839     $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
21840     $in_attribute_list = 0;
21841
21842     # scalars for remembering where we are in the file
21843     $current_package = "main";
21844     $context         = UNKNOWN_CONTEXT;
21845
21846     # hashes used to remember function information
21847     %is_constant             = ();      # user-defined constants
21848     %is_user_function        = ();      # user-defined functions
21849     %user_function_prototype = ();      # their prototypes
21850     %is_block_function       = ();
21851     %is_block_list_function  = ();
21852     %saw_function_definition = ();
21853
21854     # variables used to track depths of various containers
21855     # and report nesting errors
21856     $paren_depth          = 0;
21857     $brace_depth          = 0;
21858     $square_bracket_depth = 0;
21859     @current_depth[ 0 .. $#closing_brace_names ] =
21860       (0) x scalar @closing_brace_names;
21861     $total_depth = 0;
21862     @total_depth = ();
21863     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
21864       ( 0 .. $#closing_brace_names );
21865     @current_sequence_number             = ();
21866     $paren_type[$paren_depth]            = '';
21867     $paren_semicolon_count[$paren_depth] = 0;
21868     $paren_structural_type[$brace_depth] = '';
21869     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
21870     $brace_structural_type[$brace_depth]                   = '';
21871     $brace_statement_type[$brace_depth]                    = "";
21872     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
21873     $brace_package[$paren_depth]                           = $current_package;
21874     $square_bracket_type[$square_bracket_depth]            = '';
21875     $square_bracket_structural_type[$square_bracket_depth] = '';
21876
21877     initialize_tokenizer_state();
21878 }
21879
21880 {                                       # begin tokenize_this_line
21881
21882     use constant BRACE          => 0;
21883     use constant SQUARE_BRACKET => 1;
21884     use constant PAREN          => 2;
21885     use constant QUESTION_COLON => 3;
21886
21887     # TV1: scalars for processing one LINE.
21888     # Re-initialized on each entry to sub tokenize_this_line.
21889     my (
21890         $block_type,        $container_type,    $expecting,
21891         $i,                 $i_tok,             $input_line,
21892         $input_line_number, $last_nonblank_i,   $max_token_index,
21893         $next_tok,          $next_type,         $peeked_ahead,
21894         $prototype,         $rhere_target_list, $rtoken_map,
21895         $rtoken_type,       $rtokens,           $tok,
21896         $type,              $type_sequence,     $indent_flag,
21897     );
21898
21899     # TV2: refs to ARRAYS for processing one LINE
21900     # Re-initialized on each call.
21901     my $routput_token_list     = [];    # stack of output token indexes
21902     my $routput_token_type     = [];    # token types
21903     my $routput_block_type     = [];    # types of code block
21904     my $routput_container_type = [];    # paren types, such as if, elsif, ..
21905     my $routput_type_sequence  = [];    # nesting sequential number
21906     my $routput_indent_flag    = [];    #
21907
21908     # TV3: SCALARS for quote variables.  These are initialized with a
21909     # subroutine call and continually updated as lines are processed.
21910     my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
21911         $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
21912
21913     # TV4: SCALARS for multi-line identifiers and
21914     # statements. These are initialized with a subroutine call
21915     # and continually updated as lines are processed.
21916     my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
21917
21918     # TV5: SCALARS for tracking indentation level.
21919     # Initialized once and continually updated as lines are
21920     # processed.
21921     my (
21922         $nesting_token_string,      $nesting_type_string,
21923         $nesting_block_string,      $nesting_block_flag,
21924         $nesting_list_string,       $nesting_list_flag,
21925         $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
21926         $in_statement_continuation, $level_in_tokenizer,
21927         $slevel_in_tokenizer,       $rslevel_stack,
21928     );
21929
21930     # TV6: SCALARS for remembering several previous
21931     # tokens. Initialized once and continually updated as
21932     # lines are processed.
21933     my (
21934         $last_nonblank_container_type,     $last_nonblank_type_sequence,
21935         $last_last_nonblank_token,         $last_last_nonblank_type,
21936         $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
21937         $last_last_nonblank_type_sequence, $last_nonblank_prototype,
21938     );
21939
21940     # ----------------------------------------------------------------
21941     # beginning of tokenizer variable access and manipulation routines
21942     # ----------------------------------------------------------------
21943
21944     sub initialize_tokenizer_state {
21945
21946         # TV1: initialized on each call
21947         # TV2: initialized on each call
21948         # TV3:
21949         $in_quote                = 0;
21950         $quote_type              = 'Q';
21951         $quote_character         = "";
21952         $quote_pos               = 0;
21953         $quote_depth             = 0;
21954         $quoted_string_1         = "";
21955         $quoted_string_2         = "";
21956         $allowed_quote_modifiers = "";
21957
21958         # TV4:
21959         $id_scan_state     = '';
21960         $identifier        = '';
21961         $want_paren        = "";
21962         $indented_if_level = 0;
21963
21964         # TV5:
21965         $nesting_token_string             = "";
21966         $nesting_type_string              = "";
21967         $nesting_block_string             = '1';    # initially in a block
21968         $nesting_block_flag               = 1;
21969         $nesting_list_string              = '0';    # initially not in a list
21970         $nesting_list_flag                = 0;      # initially not in a list
21971         $ci_string_in_tokenizer           = "";
21972         $continuation_string_in_tokenizer = "0";
21973         $in_statement_continuation        = 0;
21974         $level_in_tokenizer               = 0;
21975         $slevel_in_tokenizer              = 0;
21976         $rslevel_stack                    = [];
21977
21978         # TV6:
21979         $last_nonblank_container_type      = '';
21980         $last_nonblank_type_sequence       = '';
21981         $last_last_nonblank_token          = ';';
21982         $last_last_nonblank_type           = ';';
21983         $last_last_nonblank_block_type     = '';
21984         $last_last_nonblank_container_type = '';
21985         $last_last_nonblank_type_sequence  = '';
21986         $last_nonblank_prototype           = "";
21987     }
21988
21989     sub save_tokenizer_state {
21990
21991         my $rTV1 = [
21992             $block_type,        $container_type,    $expecting,
21993             $i,                 $i_tok,             $input_line,
21994             $input_line_number, $last_nonblank_i,   $max_token_index,
21995             $next_tok,          $next_type,         $peeked_ahead,
21996             $prototype,         $rhere_target_list, $rtoken_map,
21997             $rtoken_type,       $rtokens,           $tok,
21998             $type,              $type_sequence,     $indent_flag,
21999         ];
22000
22001         my $rTV2 = [
22002             $routput_token_list,    $routput_token_type,
22003             $routput_block_type,    $routput_container_type,
22004             $routput_type_sequence, $routput_indent_flag,
22005         ];
22006
22007         my $rTV3 = [
22008             $in_quote,        $quote_type,
22009             $quote_character, $quote_pos,
22010             $quote_depth,     $quoted_string_1,
22011             $quoted_string_2, $allowed_quote_modifiers,
22012         ];
22013
22014         my $rTV4 =
22015           [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
22016
22017         my $rTV5 = [
22018             $nesting_token_string,      $nesting_type_string,
22019             $nesting_block_string,      $nesting_block_flag,
22020             $nesting_list_string,       $nesting_list_flag,
22021             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22022             $in_statement_continuation, $level_in_tokenizer,
22023             $slevel_in_tokenizer,       $rslevel_stack,
22024         ];
22025
22026         my $rTV6 = [
22027             $last_nonblank_container_type,
22028             $last_nonblank_type_sequence,
22029             $last_last_nonblank_token,
22030             $last_last_nonblank_type,
22031             $last_last_nonblank_block_type,
22032             $last_last_nonblank_container_type,
22033             $last_last_nonblank_type_sequence,
22034             $last_nonblank_prototype,
22035         ];
22036         return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
22037     }
22038
22039     sub restore_tokenizer_state {
22040         my ($rstate) = @_;
22041         my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
22042         (
22043             $block_type,        $container_type,    $expecting,
22044             $i,                 $i_tok,             $input_line,
22045             $input_line_number, $last_nonblank_i,   $max_token_index,
22046             $next_tok,          $next_type,         $peeked_ahead,
22047             $prototype,         $rhere_target_list, $rtoken_map,
22048             $rtoken_type,       $rtokens,           $tok,
22049             $type,              $type_sequence,     $indent_flag,
22050         ) = @{$rTV1};
22051
22052         (
22053             $routput_token_list,    $routput_token_type,
22054             $routput_block_type,    $routput_container_type,
22055             $routput_type_sequence, $routput_type_sequence,
22056         ) = @{$rTV2};
22057
22058         (
22059             $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
22060             $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
22061         ) = @{$rTV3};
22062
22063         ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
22064           @{$rTV4};
22065
22066         (
22067             $nesting_token_string,      $nesting_type_string,
22068             $nesting_block_string,      $nesting_block_flag,
22069             $nesting_list_string,       $nesting_list_flag,
22070             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
22071             $in_statement_continuation, $level_in_tokenizer,
22072             $slevel_in_tokenizer,       $rslevel_stack,
22073         ) = @{$rTV5};
22074
22075         (
22076             $last_nonblank_container_type,
22077             $last_nonblank_type_sequence,
22078             $last_last_nonblank_token,
22079             $last_last_nonblank_type,
22080             $last_last_nonblank_block_type,
22081             $last_last_nonblank_container_type,
22082             $last_last_nonblank_type_sequence,
22083             $last_nonblank_prototype,
22084         ) = @{$rTV6};
22085     }
22086
22087     sub get_indentation_level {
22088
22089         # patch to avoid reporting error if indented if is not terminated
22090         if ($indented_if_level) { return $level_in_tokenizer - 1 }
22091         return $level_in_tokenizer;
22092     }
22093
22094     sub reset_indentation_level {
22095         $level_in_tokenizer  = $_[0];
22096         $slevel_in_tokenizer = $_[0];
22097         push @{$rslevel_stack}, $slevel_in_tokenizer;
22098     }
22099
22100     sub peeked_ahead {
22101         $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
22102     }
22103
22104     # ------------------------------------------------------------
22105     # end of tokenizer variable access and manipulation routines
22106     # ------------------------------------------------------------
22107
22108     # ------------------------------------------------------------
22109     # beginning of various scanner interface routines
22110     # ------------------------------------------------------------
22111     sub scan_replacement_text {
22112
22113         # check for here-docs in replacement text invoked by
22114         # a substitution operator with executable modifier 'e'.
22115         #
22116         # given:
22117         #  $replacement_text
22118         # return:
22119         #  $rht = reference to any here-doc targets
22120         my ($replacement_text) = @_;
22121
22122         # quick check
22123         return undef unless ( $replacement_text =~ /<</ );
22124
22125         write_logfile_entry("scanning replacement text for here-doc targets\n");
22126
22127         # save the logger object for error messages
22128         my $logger_object = $tokenizer_self->{_logger_object};
22129
22130         # localize all package variables
22131         local (
22132             $tokenizer_self,          $last_nonblank_token,
22133             $last_nonblank_type,      $last_nonblank_block_type,
22134             $statement_type,          $in_attribute_list,
22135             $current_package,         $context,
22136             %is_constant,             %is_user_function,
22137             %user_function_prototype, %is_block_function,
22138             %is_block_list_function,  %saw_function_definition,
22139             $brace_depth,             $paren_depth,
22140             $square_bracket_depth,    @current_depth,
22141             @total_depth,             $total_depth,
22142             @nesting_sequence_number, @current_sequence_number,
22143             @paren_type,              @paren_semicolon_count,
22144             @paren_structural_type,   @brace_type,
22145             @brace_structural_type,   @brace_statement_type,
22146             @brace_context,           @brace_package,
22147             @square_bracket_type,     @square_bracket_structural_type,
22148             @depth_array,             @starting_line_of_current_depth,
22149             @nested_ternary_flag,
22150         );
22151
22152         # save all lexical variables
22153         my $rstate = save_tokenizer_state();
22154         _decrement_count();    # avoid error check for multiple tokenizers
22155
22156         # make a new tokenizer
22157         my $rOpts = {};
22158         my $rpending_logfile_message;
22159         my $source_object =
22160           Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
22161             $rpending_logfile_message );
22162         my $tokenizer = Perl::Tidy::Tokenizer->new(
22163             source_object        => $source_object,
22164             logger_object        => $logger_object,
22165             starting_line_number => $input_line_number,
22166         );
22167
22168         # scan the replacement text
22169         1 while ( $tokenizer->get_line() );
22170
22171         # remove any here doc targets
22172         my $rht = undef;
22173         if ( $tokenizer_self->{_in_here_doc} ) {
22174             $rht = [];
22175             push @{$rht},
22176               [
22177                 $tokenizer_self->{_here_doc_target},
22178                 $tokenizer_self->{_here_quote_character}
22179               ];
22180             if ( $tokenizer_self->{_rhere_target_list} ) {
22181                 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
22182                 $tokenizer_self->{_rhere_target_list} = undef;
22183             }
22184             $tokenizer_self->{_in_here_doc} = undef;
22185         }
22186
22187         # now its safe to report errors
22188         $tokenizer->report_tokenization_errors();
22189
22190         # restore all tokenizer lexical variables
22191         restore_tokenizer_state($rstate);
22192
22193         # return the here doc targets
22194         return $rht;
22195     }
22196
22197     sub scan_bare_identifier {
22198         ( $i, $tok, $type, $prototype ) =
22199           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
22200             $rtoken_map, $max_token_index );
22201     }
22202
22203     sub scan_identifier {
22204         ( $i, $tok, $type, $id_scan_state, $identifier ) =
22205           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
22206             $max_token_index, $expecting );
22207     }
22208
22209     sub scan_id {
22210         ( $i, $tok, $type, $id_scan_state ) =
22211           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
22212             $id_scan_state, $max_token_index );
22213     }
22214
22215     sub scan_number {
22216         my $number;
22217         ( $i, $type, $number ) =
22218           scan_number_do( $input_line, $i, $rtoken_map, $type,
22219             $max_token_index );
22220         return $number;
22221     }
22222
22223     # a sub to warn if token found where term expected
22224     sub error_if_expecting_TERM {
22225         if ( $expecting == TERM ) {
22226             if ( $really_want_term{$last_nonblank_type} ) {
22227                 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
22228                     $rtoken_type, $input_line );
22229                 1;
22230             }
22231         }
22232     }
22233
22234     # a sub to warn if token found where operator expected
22235     sub error_if_expecting_OPERATOR {
22236         if ( $expecting == OPERATOR ) {
22237             my $thing = defined $_[0] ? $_[0] : $tok;
22238             unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
22239                 $rtoken_map, $rtoken_type, $input_line );
22240             if ( $i_tok == 0 ) {
22241                 interrupt_logfile();
22242                 warning("Missing ';' above?\n");
22243                 resume_logfile();
22244             }
22245             1;
22246         }
22247     }
22248
22249     # ------------------------------------------------------------
22250     # end scanner interfaces
22251     # ------------------------------------------------------------
22252
22253     my %is_for_foreach;
22254     @_ = qw(for foreach);
22255     @is_for_foreach{@_} = (1) x scalar(@_);
22256
22257     my %is_my_our;
22258     @_ = qw(my our);
22259     @is_my_our{@_} = (1) x scalar(@_);
22260
22261     # These keywords may introduce blocks after parenthesized expressions,
22262     # in the form:
22263     # keyword ( .... ) { BLOCK }
22264     # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
22265     my %is_blocktype_with_paren;
22266     @_ = qw(if elsif unless while until for foreach switch case given when);
22267     @is_blocktype_with_paren{@_} = (1) x scalar(@_);
22268
22269     # ------------------------------------------------------------
22270     # begin hash of code for handling most token types
22271     # ------------------------------------------------------------
22272     my $tokenization_code = {
22273
22274         # no special code for these types yet, but syntax checks
22275         # could be added
22276
22277 ##      '!'   => undef,
22278 ##      '!='  => undef,
22279 ##      '!~'  => undef,
22280 ##      '%='  => undef,
22281 ##      '&&=' => undef,
22282 ##      '&='  => undef,
22283 ##      '+='  => undef,
22284 ##      '-='  => undef,
22285 ##      '..'  => undef,
22286 ##      '..'  => undef,
22287 ##      '...' => undef,
22288 ##      '.='  => undef,
22289 ##      '<<=' => undef,
22290 ##      '<='  => undef,
22291 ##      '<=>' => undef,
22292 ##      '<>'  => undef,
22293 ##      '='   => undef,
22294 ##      '=='  => undef,
22295 ##      '=~'  => undef,
22296 ##      '>='  => undef,
22297 ##      '>>'  => undef,
22298 ##      '>>=' => undef,
22299 ##      '\\'  => undef,
22300 ##      '^='  => undef,
22301 ##      '|='  => undef,
22302 ##      '||=' => undef,
22303 ##      '//=' => undef,
22304 ##      '~'   => undef,
22305 ##      '~~'  => undef,
22306 ##      '!~~'  => undef,
22307
22308         '>' => sub {
22309             error_if_expecting_TERM()
22310               if ( $expecting == TERM );
22311         },
22312         '|' => sub {
22313             error_if_expecting_TERM()
22314               if ( $expecting == TERM );
22315         },
22316         '$' => sub {
22317
22318             # start looking for a scalar
22319             error_if_expecting_OPERATOR("Scalar")
22320               if ( $expecting == OPERATOR );
22321             scan_identifier();
22322
22323             if ( $identifier eq '$^W' ) {
22324                 $tokenizer_self->{_saw_perl_dash_w} = 1;
22325             }
22326
22327             # Check for indentifier in indirect object slot
22328             # (vorboard.pl, sort.t).  Something like:
22329             #   /^(print|printf|sort|exec|system)$/
22330             if (
22331                 $is_indirect_object_taker{$last_nonblank_token}
22332
22333                 || ( ( $last_nonblank_token eq '(' )
22334                     && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
22335                 || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
22336               )
22337             {
22338                 $type = 'Z';
22339             }
22340         },
22341         '(' => sub {
22342
22343             ++$paren_depth;
22344             $paren_semicolon_count[$paren_depth] = 0;
22345             if ($want_paren) {
22346                 $container_type = $want_paren;
22347                 $want_paren     = "";
22348             }
22349             else {
22350                 $container_type = $last_nonblank_token;
22351
22352                 # We can check for a syntax error here of unexpected '(',
22353                 # but this is going to get messy...
22354                 if (
22355                     $expecting == OPERATOR
22356
22357                     # be sure this is not a method call of the form
22358                     # &method(...), $method->(..), &{method}(...),
22359                     # $ref[2](list) is ok & short for $ref[2]->(list)
22360                     # NOTE: at present, braces in something like &{ xxx }
22361                     # are not marked as a block, we might have a method call
22362                     && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
22363
22364                   )
22365                 {
22366
22367                     # ref: camel 3 p 703.
22368                     if ( $last_last_nonblank_token eq 'do' ) {
22369                         complain(
22370 "do SUBROUTINE is deprecated; consider & or -> notation\n"
22371                         );
22372                     }
22373                     else {
22374
22375                         # if this is an empty list, (), then it is not an
22376                         # error; for example, we might have a constant pi and
22377                         # invoke it with pi() or just pi;
22378                         my ( $next_nonblank_token, $i_next ) =
22379                           find_next_nonblank_token( $i, $rtokens,
22380                             $max_token_index );
22381                         if ( $next_nonblank_token ne ')' ) {
22382                             my $hint;
22383                             error_if_expecting_OPERATOR('(');
22384
22385                             if ( $last_nonblank_type eq 'C' ) {
22386                                 $hint =
22387                                   "$last_nonblank_token has a void prototype\n";
22388                             }
22389                             elsif ( $last_nonblank_type eq 'i' ) {
22390                                 if (   $i_tok > 0
22391                                     && $last_nonblank_token =~ /^\$/ )
22392                                 {
22393                                     $hint =
22394 "Do you mean '$last_nonblank_token->(' ?\n";
22395                                 }
22396                             }
22397                             if ($hint) {
22398                                 interrupt_logfile();
22399                                 warning($hint);
22400                                 resume_logfile();
22401                             }
22402                         } ## end if ( $next_nonblank_token...
22403                     } ## end else [ if ( $last_last_nonblank_token...
22404                 } ## end if ( $expecting == OPERATOR...
22405             }
22406             $paren_type[$paren_depth] = $container_type;
22407             ( $type_sequence, $indent_flag ) =
22408               increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22409
22410             # propagate types down through nested parens
22411             # for example: the second paren in 'if ((' would be structural
22412             # since the first is.
22413
22414             if ( $last_nonblank_token eq '(' ) {
22415                 $type = $last_nonblank_type;
22416             }
22417
22418             #     We exclude parens as structural after a ',' because it
22419             #     causes subtle problems with continuation indentation for
22420             #     something like this, where the first 'or' will not get
22421             #     indented.
22422             #
22423             #         assert(
22424             #             __LINE__,
22425             #             ( not defined $check )
22426             #               or ref $check
22427             #               or $check eq "new"
22428             #               or $check eq "old",
22429             #         );
22430             #
22431             #     Likewise, we exclude parens where a statement can start
22432             #     because of problems with continuation indentation, like
22433             #     these:
22434             #
22435             #         ($firstline =~ /^#\!.*perl/)
22436             #         and (print $File::Find::name, "\n")
22437             #           and (return 1);
22438             #
22439             #         (ref($usage_fref) =~ /CODE/)
22440             #         ? &$usage_fref
22441             #           : (&blast_usage, &blast_params, &blast_general_params);
22442
22443             else {
22444                 $type = '{';
22445             }
22446
22447             if ( $last_nonblank_type eq ')' ) {
22448                 warning(
22449                     "Syntax error? found token '$last_nonblank_type' then '('\n"
22450                 );
22451             }
22452             $paren_structural_type[$paren_depth] = $type;
22453
22454         },
22455         ')' => sub {
22456             ( $type_sequence, $indent_flag ) =
22457               decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22458
22459             if ( $paren_structural_type[$paren_depth] eq '{' ) {
22460                 $type = '}';
22461             }
22462
22463             $container_type = $paren_type[$paren_depth];
22464
22465             #    /^(for|foreach)$/
22466             if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
22467                 my $num_sc = $paren_semicolon_count[$paren_depth];
22468                 if ( $num_sc > 0 && $num_sc != 2 ) {
22469                     warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
22470                 }
22471             }
22472
22473             if ( $paren_depth > 0 ) { $paren_depth-- }
22474         },
22475         ',' => sub {
22476             if ( $last_nonblank_type eq ',' ) {
22477                 complain("Repeated ','s \n");
22478             }
22479
22480             # patch for operator_expected: note if we are in the list (use.t)
22481             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
22482 ##                FIXME: need to move this elsewhere, perhaps check after a '('
22483 ##                elsif ($last_nonblank_token eq '(') {
22484 ##                    warning("Leading ','s illegal in some versions of perl\n");
22485 ##                }
22486         },
22487         ';' => sub {
22488             $context        = UNKNOWN_CONTEXT;
22489             $statement_type = '';
22490
22491             #    /^(for|foreach)$/
22492             if ( $is_for_foreach{ $paren_type[$paren_depth] } )
22493             {    # mark ; in for loop
22494
22495                 # Be careful: we do not want a semicolon such as the
22496                 # following to be included:
22497                 #
22498                 #    for (sort {strcoll($a,$b);} keys %investments) {
22499
22500                 if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
22501                     && $square_bracket_depth ==
22502                     $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
22503                 {
22504
22505                     $type = 'f';
22506                     $paren_semicolon_count[$paren_depth]++;
22507                 }
22508             }
22509
22510         },
22511         '"' => sub {
22512             error_if_expecting_OPERATOR("String")
22513               if ( $expecting == OPERATOR );
22514             $in_quote                = 1;
22515             $type                    = 'Q';
22516             $allowed_quote_modifiers = "";
22517         },
22518         "'" => sub {
22519             error_if_expecting_OPERATOR("String")
22520               if ( $expecting == OPERATOR );
22521             $in_quote                = 1;
22522             $type                    = 'Q';
22523             $allowed_quote_modifiers = "";
22524         },
22525         '`' => sub {
22526             error_if_expecting_OPERATOR("String")
22527               if ( $expecting == OPERATOR );
22528             $in_quote                = 1;
22529             $type                    = 'Q';
22530             $allowed_quote_modifiers = "";
22531         },
22532         '/' => sub {
22533             my $is_pattern;
22534
22535             if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
22536                 my $msg;
22537                 ( $is_pattern, $msg ) =
22538                   guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
22539                     $max_token_index );
22540
22541                 if ($msg) {
22542                     write_diagnostics("DIVIDE:$msg\n");
22543                     write_logfile_entry($msg);
22544                 }
22545             }
22546             else { $is_pattern = ( $expecting == TERM ) }
22547
22548             if ($is_pattern) {
22549                 $in_quote                = 1;
22550                 $type                    = 'Q';
22551                 $allowed_quote_modifiers = '[cgimosxp]';
22552             }
22553             else {    # not a pattern; check for a /= token
22554
22555                 if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
22556                     $i++;
22557                     $tok  = '/=';
22558                     $type = $tok;
22559                 }
22560
22561               #DEBUG - collecting info on what tokens follow a divide
22562               # for development of guessing algorithm
22563               #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
22564               #    #write_diagnostics( "DIVIDE? $input_line\n" );
22565               #}
22566             }
22567         },
22568         '{' => sub {
22569
22570             # if we just saw a ')', we will label this block with
22571             # its type.  We need to do this to allow sub
22572             # code_block_type to determine if this brace starts a
22573             # code block or anonymous hash.  (The type of a paren
22574             # pair is the preceding token, such as 'if', 'else',
22575             # etc).
22576             $container_type = "";
22577
22578             # ATTRS: for a '{' following an attribute list, reset
22579             # things to look like we just saw the sub name
22580             if ( $statement_type =~ /^sub/ ) {
22581                 $last_nonblank_token = $statement_type;
22582                 $last_nonblank_type  = 'i';
22583                 $statement_type      = "";
22584             }
22585
22586             # patch for SWITCH/CASE: hide these keywords from an immediately
22587             # following opening brace
22588             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
22589                 && $statement_type eq $last_nonblank_token )
22590             {
22591                 $last_nonblank_token = ";";
22592             }
22593
22594             elsif ( $last_nonblank_token eq ')' ) {
22595                 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
22596
22597                 # defensive move in case of a nesting error (pbug.t)
22598                 # in which this ')' had no previous '('
22599                 # this nesting error will have been caught
22600                 if ( !defined($last_nonblank_token) ) {
22601                     $last_nonblank_token = 'if';
22602                 }
22603
22604                 # check for syntax error here;
22605                 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
22606                     my $list = join( ' ', sort keys %is_blocktype_with_paren );
22607                     warning(
22608                         "syntax error at ') {', didn't see one of: $list\n");
22609                 }
22610             }
22611
22612             # patch for paren-less for/foreach glitch, part 2.
22613             # see note below under 'qw'
22614             elsif ($last_nonblank_token eq 'qw'
22615                 && $is_for_foreach{$want_paren} )
22616             {
22617                 $last_nonblank_token = $want_paren;
22618                 if ( $last_last_nonblank_token eq $want_paren ) {
22619                     warning(
22620 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
22621                     );
22622
22623                 }
22624                 $want_paren = "";
22625             }
22626
22627             # now identify which of the three possible types of
22628             # curly braces we have: hash index container, anonymous
22629             # hash reference, or code block.
22630
22631             # non-structural (hash index) curly brace pair
22632             # get marked 'L' and 'R'
22633             if ( is_non_structural_brace() ) {
22634                 $type = 'L';
22635
22636                 # patch for SWITCH/CASE:
22637                 # allow paren-less identifier after 'when'
22638                 # if the brace is preceded by a space
22639                 if (   $statement_type eq 'when'
22640                     && $last_nonblank_type      eq 'i'
22641                     && $last_last_nonblank_type eq 'k'
22642                     && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
22643                 {
22644                     $type       = '{';
22645                     $block_type = $statement_type;
22646                 }
22647             }
22648
22649             # code and anonymous hash have the same type, '{', but are
22650             # distinguished by 'block_type',
22651             # which will be blank for an anonymous hash
22652             else {
22653
22654                 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
22655                     $max_token_index );
22656
22657                 # patch to promote bareword type to function taking block
22658                 if (   $block_type
22659                     && $last_nonblank_type eq 'w'
22660                     && $last_nonblank_i >= 0 )
22661                 {
22662                     if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
22663                         $routput_token_type->[$last_nonblank_i] = 'G';
22664                     }
22665                 }
22666
22667                 # patch for SWITCH/CASE: if we find a stray opening block brace
22668                 # where we might accept a 'case' or 'when' block, then take it
22669                 if (   $statement_type eq 'case'
22670                     || $statement_type eq 'when' )
22671                 {
22672                     if ( !$block_type || $block_type eq '}' ) {
22673                         $block_type = $statement_type;
22674                     }
22675                 }
22676             }
22677             $brace_type[ ++$brace_depth ] = $block_type;
22678             $brace_package[$brace_depth] = $current_package;
22679             ( $type_sequence, $indent_flag ) =
22680               increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22681             $brace_structural_type[$brace_depth] = $type;
22682             $brace_context[$brace_depth]         = $context;
22683             $brace_statement_type[$brace_depth]  = $statement_type;
22684         },
22685         '}' => sub {
22686             $block_type = $brace_type[$brace_depth];
22687             if ($block_type) { $statement_type = '' }
22688             if ( defined( $brace_package[$brace_depth] ) ) {
22689                 $current_package = $brace_package[$brace_depth];
22690             }
22691
22692             # can happen on brace error (caught elsewhere)
22693             else {
22694             }
22695             ( $type_sequence, $indent_flag ) =
22696               decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22697
22698             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
22699                 $type = 'R';
22700             }
22701
22702             # propagate type information for 'do' and 'eval' blocks.
22703             # This is necessary to enable us to know if an operator
22704             # or term is expected next
22705             if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
22706                 $tok = $brace_type[$brace_depth];
22707             }
22708
22709             $context        = $brace_context[$brace_depth];
22710             $statement_type = $brace_statement_type[$brace_depth];
22711             if ( $brace_depth > 0 ) { $brace_depth--; }
22712         },
22713         '&' => sub {    # maybe sub call? start looking
22714
22715             # We have to check for sub call unless we are sure we
22716             # are expecting an operator.  This example from s2p
22717             # got mistaken as a q operator in an early version:
22718             #   print BODY &q(<<'EOT');
22719             if ( $expecting != OPERATOR ) {
22720                 scan_identifier();
22721             }
22722             else {
22723             }
22724         },
22725         '<' => sub {    # angle operator or less than?
22726
22727             if ( $expecting != OPERATOR ) {
22728                 ( $i, $type ) =
22729                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
22730                     $expecting, $max_token_index );
22731
22732                 if ( $type eq '<' && $expecting == TERM ) {
22733                     error_if_expecting_TERM();
22734                     interrupt_logfile();
22735                     warning("Unterminated <> operator?\n");
22736                     resume_logfile();
22737                 }
22738             }
22739             else {
22740             }
22741         },
22742         '?' => sub {    # ?: conditional or starting pattern?
22743
22744             my $is_pattern;
22745
22746             if ( $expecting == UNKNOWN ) {
22747
22748                 my $msg;
22749                 ( $is_pattern, $msg ) =
22750                   guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
22751                     $max_token_index );
22752
22753                 if ($msg) { write_logfile_entry($msg) }
22754             }
22755             else { $is_pattern = ( $expecting == TERM ) }
22756
22757             if ($is_pattern) {
22758                 $in_quote                = 1;
22759                 $type                    = 'Q';
22760                 $allowed_quote_modifiers = '[cgimosxp]';
22761             }
22762             else {
22763                 ( $type_sequence, $indent_flag ) =
22764                   increase_nesting_depth( QUESTION_COLON,
22765                     $$rtoken_map[$i_tok] );
22766             }
22767         },
22768         '*' => sub {    # typeglob, or multiply?
22769
22770             if ( $expecting == TERM ) {
22771                 scan_identifier();
22772             }
22773             else {
22774
22775                 if ( $$rtokens[ $i + 1 ] eq '=' ) {
22776                     $tok  = '*=';
22777                     $type = $tok;
22778                     $i++;
22779                 }
22780                 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
22781                     $tok  = '**';
22782                     $type = $tok;
22783                     $i++;
22784                     if ( $$rtokens[ $i + 1 ] eq '=' ) {
22785                         $tok  = '**=';
22786                         $type = $tok;
22787                         $i++;
22788                     }
22789                 }
22790             }
22791         },
22792         '.' => sub {    # what kind of . ?
22793
22794             if ( $expecting != OPERATOR ) {
22795                 scan_number();
22796                 if ( $type eq '.' ) {
22797                     error_if_expecting_TERM()
22798                       if ( $expecting == TERM );
22799                 }
22800             }
22801             else {
22802             }
22803         },
22804         ':' => sub {
22805
22806             # if this is the first nonblank character, call it a label
22807             # since perl seems to just swallow it
22808             if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
22809                 $type = 'J';
22810             }
22811
22812             # ATTRS: check for a ':' which introduces an attribute list
22813             # (this might eventually get its own token type)
22814             elsif ( $statement_type =~ /^sub/ ) {
22815                 $type              = 'A';
22816                 $in_attribute_list = 1;
22817             }
22818
22819             # check for scalar attribute, such as
22820             # my $foo : shared = 1;
22821             elsif ($is_my_our{$statement_type}
22822                 && $current_depth[QUESTION_COLON] == 0 )
22823             {
22824                 $type              = 'A';
22825                 $in_attribute_list = 1;
22826             }
22827
22828             # otherwise, it should be part of a ?/: operator
22829             else {
22830                 ( $type_sequence, $indent_flag ) =
22831                   decrease_nesting_depth( QUESTION_COLON,
22832                     $$rtoken_map[$i_tok] );
22833                 if ( $last_nonblank_token eq '?' ) {
22834                     warning("Syntax error near ? :\n");
22835                 }
22836             }
22837         },
22838         '+' => sub {    # what kind of plus?
22839
22840             if ( $expecting == TERM ) {
22841                 my $number = scan_number();
22842
22843                 # unary plus is safest assumption if not a number
22844                 if ( !defined($number) ) { $type = 'p'; }
22845             }
22846             elsif ( $expecting == OPERATOR ) {
22847             }
22848             else {
22849                 if ( $next_type eq 'w' ) { $type = 'p' }
22850             }
22851         },
22852         '@' => sub {
22853
22854             error_if_expecting_OPERATOR("Array")
22855               if ( $expecting == OPERATOR );
22856             scan_identifier();
22857         },
22858         '%' => sub {    # hash or modulo?
22859
22860             # first guess is hash if no following blank
22861             if ( $expecting == UNKNOWN ) {
22862                 if ( $next_type ne 'b' ) { $expecting = TERM }
22863             }
22864             if ( $expecting == TERM ) {
22865                 scan_identifier();
22866             }
22867         },
22868         '[' => sub {
22869             $square_bracket_type[ ++$square_bracket_depth ] =
22870               $last_nonblank_token;
22871             ( $type_sequence, $indent_flag ) =
22872               increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
22873
22874             # It may seem odd, but structural square brackets have
22875             # type '{' and '}'.  This simplifies the indentation logic.
22876             if ( !is_non_structural_brace() ) {
22877                 $type = '{';
22878             }
22879             $square_bracket_structural_type[$square_bracket_depth] = $type;
22880         },
22881         ']' => sub {
22882             ( $type_sequence, $indent_flag ) =
22883               decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
22884
22885             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
22886             {
22887                 $type = '}';
22888             }
22889             if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
22890         },
22891         '-' => sub {    # what kind of minus?
22892
22893             if ( ( $expecting != OPERATOR )
22894                 && $is_file_test_operator{$next_tok} )
22895             {
22896                 my ( $next_nonblank_token, $i_next ) =
22897                   find_next_nonblank_token( $i + 1, $rtokens,
22898                     $max_token_index );
22899
22900                 # check for a quoted word like "-w=>xx";
22901                 # it is sufficient to just check for a following '='
22902                 if ( $next_nonblank_token eq '=' ) {
22903                     $type = 'm';
22904                 }
22905                 else {
22906                     $i++;
22907                     $tok .= $next_tok;
22908                     $type = 'F';
22909                 }
22910             }
22911             elsif ( $expecting == TERM ) {
22912                 my $number = scan_number();
22913
22914                 # maybe part of bareword token? unary is safest
22915                 if ( !defined($number) ) { $type = 'm'; }
22916
22917             }
22918             elsif ( $expecting == OPERATOR ) {
22919             }
22920             else {
22921
22922                 if ( $next_type eq 'w' ) {
22923                     $type = 'm';
22924                 }
22925             }
22926         },
22927
22928         '^' => sub {
22929
22930             # check for special variables like ${^WARNING_BITS}
22931             if ( $expecting == TERM ) {
22932
22933                 # FIXME: this should work but will not catch errors
22934                 # because we also have to be sure that previous token is
22935                 # a type character ($,@,%).
22936                 if ( $last_nonblank_token eq '{'
22937                     && ( $next_tok =~ /^[A-Za-z_]/ ) )
22938                 {
22939
22940                     if ( $next_tok eq 'W' ) {
22941                         $tokenizer_self->{_saw_perl_dash_w} = 1;
22942                     }
22943                     $tok  = $tok . $next_tok;
22944                     $i    = $i + 1;
22945                     $type = 'w';
22946                 }
22947
22948                 else {
22949                     unless ( error_if_expecting_TERM() ) {
22950
22951                         # Something like this is valid but strange:
22952                         # undef ^I;
22953                         complain("The '^' seems unusual here\n");
22954                     }
22955                 }
22956             }
22957         },
22958
22959         '::' => sub {    # probably a sub call
22960             scan_bare_identifier();
22961         },
22962         '<<' => sub {    # maybe a here-doc?
22963             return
22964               unless ( $i < $max_token_index )
22965               ;          # here-doc not possible if end of line
22966
22967             if ( $expecting != OPERATOR ) {
22968                 my ( $found_target, $here_doc_target, $here_quote_character,
22969                     $saw_error );
22970                 (
22971                     $found_target, $here_doc_target, $here_quote_character, $i,
22972                     $saw_error
22973                   )
22974                   = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
22975                     $max_token_index );
22976
22977                 if ($found_target) {
22978                     push @{$rhere_target_list},
22979                       [ $here_doc_target, $here_quote_character ];
22980                     $type = 'h';
22981                     if ( length($here_doc_target) > 80 ) {
22982                         my $truncated = substr( $here_doc_target, 0, 80 );
22983                         complain("Long here-target: '$truncated' ...\n");
22984                     }
22985                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
22986                         complain(
22987                             "Unconventional here-target: '$here_doc_target'\n"
22988                         );
22989                     }
22990                 }
22991                 elsif ( $expecting == TERM ) {
22992                     unless ($saw_error) {
22993
22994                         # shouldn't happen..
22995                         warning("Program bug; didn't find here doc target\n");
22996                         report_definite_bug();
22997                     }
22998                 }
22999             }
23000             else {
23001             }
23002         },
23003         '->' => sub {
23004
23005             # if -> points to a bare word, we must scan for an identifier,
23006             # otherwise something like ->y would look like the y operator
23007             scan_identifier();
23008         },
23009
23010         # type = 'pp' for pre-increment, '++' for post-increment
23011         '++' => sub {
23012             if ( $expecting == TERM ) { $type = 'pp' }
23013             elsif ( $expecting == UNKNOWN ) {
23014                 my ( $next_nonblank_token, $i_next ) =
23015                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23016                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
23017             }
23018         },
23019
23020         '=>' => sub {
23021             if ( $last_nonblank_type eq $tok ) {
23022                 complain("Repeated '=>'s \n");
23023             }
23024
23025             # patch for operator_expected: note if we are in the list (use.t)
23026             # TODO: make version numbers a new token type
23027             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
23028         },
23029
23030         # type = 'mm' for pre-decrement, '--' for post-decrement
23031         '--' => sub {
23032
23033             if ( $expecting == TERM ) { $type = 'mm' }
23034             elsif ( $expecting == UNKNOWN ) {
23035                 my ( $next_nonblank_token, $i_next ) =
23036                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23037                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
23038             }
23039         },
23040
23041         '&&' => sub {
23042             error_if_expecting_TERM()
23043               if ( $expecting == TERM );
23044         },
23045
23046         '||' => sub {
23047             error_if_expecting_TERM()
23048               if ( $expecting == TERM );
23049         },
23050
23051         '//' => sub {
23052             error_if_expecting_TERM()
23053               if ( $expecting == TERM );
23054         },
23055     };
23056
23057     # ------------------------------------------------------------
23058     # end hash of code for handling individual token types
23059     # ------------------------------------------------------------
23060
23061     my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
23062
23063     # These block types terminate statements and do not need a trailing
23064     # semicolon
23065     # patched for SWITCH/CASE/
23066     my %is_zero_continuation_block_type;
23067     @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
23068       if elsif else unless while until for foreach switch case given when);
23069     @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
23070
23071     my %is_not_zero_continuation_block_type;
23072     @_ = qw(sort grep map do eval);
23073     @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
23074
23075     my %is_logical_container;
23076     @_ = qw(if elsif unless while and or err not && !  || for foreach);
23077     @is_logical_container{@_} = (1) x scalar(@_);
23078
23079     my %is_binary_type;
23080     @_ = qw(|| &&);
23081     @is_binary_type{@_} = (1) x scalar(@_);
23082
23083     my %is_binary_keyword;
23084     @_ = qw(and or err eq ne cmp);
23085     @is_binary_keyword{@_} = (1) x scalar(@_);
23086
23087     # 'L' is token for opening { at hash key
23088     my %is_opening_type;
23089     @_ = qw" L { ( [ ";
23090     @is_opening_type{@_} = (1) x scalar(@_);
23091
23092     # 'R' is token for closing } at hash key
23093     my %is_closing_type;
23094     @_ = qw" R } ) ] ";
23095     @is_closing_type{@_} = (1) x scalar(@_);
23096
23097     my %is_redo_last_next_goto;
23098     @_ = qw(redo last next goto);
23099     @is_redo_last_next_goto{@_} = (1) x scalar(@_);
23100
23101     my %is_use_require;
23102     @_ = qw(use require);
23103     @is_use_require{@_} = (1) x scalar(@_);
23104
23105     my %is_sub_package;
23106     @_ = qw(sub package);
23107     @is_sub_package{@_} = (1) x scalar(@_);
23108
23109     # This hash holds the hash key in $tokenizer_self for these keywords:
23110     my %is_format_END_DATA = (
23111         'format'   => '_in_format',
23112         '__END__'  => '_in_end',
23113         '__DATA__' => '_in_data',
23114     );
23115
23116     # ref: camel 3 p 147,
23117     # but perl may accept undocumented flags
23118     # perl 5.10 adds 'p' (preserve)
23119     my %quote_modifiers = (
23120         's'  => '[cegimosxp]',
23121         'y'  => '[cds]',
23122         'tr' => '[cds]',
23123         'm'  => '[cgimosxp]',
23124         'qr' => '[imosxp]',
23125         'q'  => "",
23126         'qq' => "",
23127         'qw' => "",
23128         'qx' => "",
23129     );
23130
23131     # table showing how many quoted things to look for after quote operator..
23132     # s, y, tr have 2 (pattern and replacement)
23133     # others have 1 (pattern only)
23134     my %quote_items = (
23135         's'  => 2,
23136         'y'  => 2,
23137         'tr' => 2,
23138         'm'  => 1,
23139         'qr' => 1,
23140         'q'  => 1,
23141         'qq' => 1,
23142         'qw' => 1,
23143         'qx' => 1,
23144     );
23145
23146     sub tokenize_this_line {
23147
23148   # This routine breaks a line of perl code into tokens which are of use in
23149   # indentation and reformatting.  One of my goals has been to define tokens
23150   # such that a newline may be inserted between any pair of tokens without
23151   # changing or invalidating the program. This version comes close to this,
23152   # although there are necessarily a few exceptions which must be caught by
23153   # the formatter.  Many of these involve the treatment of bare words.
23154   #
23155   # The tokens and their types are returned in arrays.  See previous
23156   # routine for their names.
23157   #
23158   # See also the array "valid_token_types" in the BEGIN section for an
23159   # up-to-date list.
23160   #
23161   # To simplify things, token types are either a single character, or they
23162   # are identical to the tokens themselves.
23163   #
23164   # As a debugging aid, the -D flag creates a file containing a side-by-side
23165   # comparison of the input string and its tokenization for each line of a file.
23166   # This is an invaluable debugging aid.
23167   #
23168   # In addition to tokens, and some associated quantities, the tokenizer
23169   # also returns flags indication any special line types.  These include
23170   # quotes, here_docs, formats.
23171   #
23172   # -----------------------------------------------------------------------
23173   #
23174   # How to add NEW_TOKENS:
23175   #
23176   # New token types will undoubtedly be needed in the future both to keep up
23177   # with changes in perl and to help adapt the tokenizer to other applications.
23178   #
23179   # Here are some notes on the minimal steps.  I wrote these notes while
23180   # adding the 'v' token type for v-strings, which are things like version
23181   # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
23182   # can use your editor to search for the string "NEW_TOKENS" to find the
23183   # appropriate sections to change):
23184   #
23185   # *. Try to talk somebody else into doing it!  If not, ..
23186   #
23187   # *. Make a backup of your current version in case things don't work out!
23188   #
23189   # *. Think of a new, unused character for the token type, and add to
23190   # the array @valid_token_types in the BEGIN section of this package.
23191   # For example, I used 'v' for v-strings.
23192   #
23193   # *. Implement coding to recognize the $type of the token in this routine.
23194   # This is the hardest part, and is best done by immitating or modifying
23195   # some of the existing coding.  For example, to recognize v-strings, I
23196   # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
23197   # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
23198   #
23199   # *. Update sub operator_expected.  This update is critically important but
23200   # the coding is trivial.  Look at the comments in that routine for help.
23201   # For v-strings, which should behave like numbers, I just added 'v' to the
23202   # regex used to handle numbers and strings (types 'n' and 'Q').
23203   #
23204   # *. Implement a 'bond strength' rule in sub set_bond_strengths in
23205   # Perl::Tidy::Formatter for breaking lines around this token type.  You can
23206   # skip this step and take the default at first, then adjust later to get
23207   # desired results.  For adding type 'v', I looked at sub bond_strength and
23208   # saw that number type 'n' was using default strengths, so I didn't do
23209   # anything.  I may tune it up someday if I don't like the way line
23210   # breaks with v-strings look.
23211   #
23212   # *. Implement a 'whitespace' rule in sub set_white_space_flag in
23213   # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
23214   # and saw that type 'n' used spaces on both sides, so I just added 'v'
23215   # to the array @spaces_both_sides.
23216   #
23217   # *. Update HtmlWriter package so that users can colorize the token as
23218   # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
23219   # that package.  For v-strings, I initially chose to use a default color
23220   # equal to the default for numbers, but it might be nice to change that
23221   # eventually.
23222   #
23223   # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
23224   #
23225   # *. Run lots and lots of debug tests.  Start with special files designed
23226   # to test the new token type.  Run with the -D flag to create a .DEBUG
23227   # file which shows the tokenization.  When these work ok, test as many old
23228   # scripts as possible.  Start with all of the '.t' files in the 'test'
23229   # directory of the distribution file.  Compare .tdy output with previous
23230   # version and updated version to see the differences.  Then include as
23231   # many more files as possible. My own technique has been to collect a huge
23232   # number of perl scripts (thousands!) into one directory and run perltidy
23233   # *, then run diff between the output of the previous version and the
23234   # current version.
23235   #
23236   # *. For another example, search for the smartmatch operator '~~'
23237   # with your editor to see where updates were made for it.
23238   #
23239   # -----------------------------------------------------------------------
23240
23241         my $line_of_tokens = shift;
23242         my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
23243
23244         # patch while coding change is underway
23245         # make callers private data to allow access
23246         # $tokenizer_self = $caller_tokenizer_self;
23247
23248         # extract line number for use in error messages
23249         $input_line_number = $line_of_tokens->{_line_number};
23250
23251         # reinitialize for multi-line quote
23252         $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
23253
23254         # check for pod documentation
23255         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
23256
23257             # must not be in multi-line quote
23258             # and must not be in an eqn
23259             if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
23260             {
23261                 $tokenizer_self->{_in_pod} = 1;
23262                 return;
23263             }
23264         }
23265
23266         $input_line = $untrimmed_input_line;
23267
23268         chomp $input_line;
23269
23270         # trim start of this line unless we are continuing a quoted line
23271         # do not trim end because we might end in a quote (test: deken4.pl)
23272         # Perl::Tidy::Formatter will delete needless trailing blanks
23273         unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
23274             $input_line =~ s/^\s*//;    # trim left end
23275         }
23276
23277         # update the copy of the line for use in error messages
23278         # This must be exactly what we give the pre_tokenizer
23279         $tokenizer_self->{_line_text} = $input_line;
23280
23281         # re-initialize for the main loop
23282         $routput_token_list     = [];    # stack of output token indexes
23283         $routput_token_type     = [];    # token types
23284         $routput_block_type     = [];    # types of code block
23285         $routput_container_type = [];    # paren types, such as if, elsif, ..
23286         $routput_type_sequence  = [];    # nesting sequential number
23287
23288         $rhere_target_list = [];
23289
23290         $tok             = $last_nonblank_token;
23291         $type            = $last_nonblank_type;
23292         $prototype       = $last_nonblank_prototype;
23293         $last_nonblank_i = -1;
23294         $block_type      = $last_nonblank_block_type;
23295         $container_type  = $last_nonblank_container_type;
23296         $type_sequence   = $last_nonblank_type_sequence;
23297         $indent_flag     = 0;
23298         $peeked_ahead    = 0;
23299
23300         # tokenization is done in two stages..
23301         # stage 1 is a very simple pre-tokenization
23302         my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
23303
23304         # a little optimization for a full-line comment
23305         if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
23306             $max_tokens_wanted = 1    # no use tokenizing a comment
23307         }
23308
23309         # start by breaking the line into pre-tokens
23310         ( $rtokens, $rtoken_map, $rtoken_type ) =
23311           pre_tokenize( $input_line, $max_tokens_wanted );
23312
23313         $max_token_index = scalar(@$rtokens) - 1;
23314         push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
23315         push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
23316         push( @$rtoken_type, 'b', 'b', 'b' );
23317
23318         # initialize for main loop
23319         for $i ( 0 .. $max_token_index + 3 ) {
23320             $routput_token_type->[$i]     = "";
23321             $routput_block_type->[$i]     = "";
23322             $routput_container_type->[$i] = "";
23323             $routput_type_sequence->[$i]  = "";
23324             $routput_indent_flag->[$i]    = 0;
23325         }
23326         $i     = -1;
23327         $i_tok = -1;
23328
23329         # ------------------------------------------------------------
23330         # begin main tokenization loop
23331         # ------------------------------------------------------------
23332
23333         # we are looking at each pre-token of one line and combining them
23334         # into tokens
23335         while ( ++$i <= $max_token_index ) {
23336
23337             if ($in_quote) {    # continue looking for end of a quote
23338                 $type = $quote_type;
23339
23340                 unless ( @{$routput_token_list} )
23341                 {               # initialize if continuation line
23342                     push( @{$routput_token_list}, $i );
23343                     $routput_token_type->[$i] = $type;
23344
23345                 }
23346                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
23347
23348                 # scan for the end of the quote or pattern
23349                 (
23350                     $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23351                     $quoted_string_1, $quoted_string_2
23352                   )
23353                   = do_quote(
23354                     $i,               $in_quote,    $quote_character,
23355                     $quote_pos,       $quote_depth, $quoted_string_1,
23356                     $quoted_string_2, $rtokens,     $rtoken_map,
23357                     $max_token_index
23358                   );
23359
23360                 # all done if we didn't find it
23361                 last if ($in_quote);
23362
23363                 # save pattern and replacement text for rescanning
23364                 my $qs1 = $quoted_string_1;
23365                 my $qs2 = $quoted_string_2;
23366
23367                 # re-initialize for next search
23368                 $quote_character = '';
23369                 $quote_pos       = 0;
23370                 $quote_type      = 'Q';
23371                 $quoted_string_1 = "";
23372                 $quoted_string_2 = "";
23373                 last if ( ++$i > $max_token_index );
23374
23375                 # look for any modifiers
23376                 if ($allowed_quote_modifiers) {
23377
23378                     # check for exact quote modifiers
23379                     if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
23380                         my $str = $$rtokens[$i];
23381                         my $saw_modifier_e;
23382                         while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
23383                             my $pos = pos($str);
23384                             my $char = substr( $str, $pos - 1, 1 );
23385                             $saw_modifier_e ||= ( $char eq 'e' );
23386                         }
23387
23388                         # For an 'e' quote modifier we must scan the replacement
23389                         # text for here-doc targets.
23390                         if ($saw_modifier_e) {
23391
23392                             my $rht = scan_replacement_text($qs1);
23393
23394                             # Change type from 'Q' to 'h' for quotes with
23395                             # here-doc targets so that the formatter (see sub
23396                             # print_line_of_tokens) will not make any line
23397                             # breaks after this point.
23398                             if ($rht) {
23399                                 push @{$rhere_target_list}, @{$rht};
23400                                 $type = 'h';
23401                                 if ( $i_tok < 0 ) {
23402                                     my $ilast = $routput_token_list->[-1];
23403                                     $routput_token_type->[$ilast] = $type;
23404                                 }
23405                             }
23406                         }
23407
23408                         if ( defined( pos($str) ) ) {
23409
23410                             # matched
23411                             if ( pos($str) == length($str) ) {
23412                                 last if ( ++$i > $max_token_index );
23413                             }
23414
23415                             # Looks like a joined quote modifier
23416                             # and keyword, maybe something like
23417                             # s/xxx/yyy/gefor @k=...
23418                             # Example is "galgen.pl".  Would have to split
23419                             # the word and insert a new token in the
23420                             # pre-token list.  This is so rare that I haven't
23421                             # done it.  Will just issue a warning citation.
23422
23423                             # This error might also be triggered if my quote
23424                             # modifier characters are incomplete
23425                             else {
23426                                 warning(<<EOM);
23427
23428 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
23429 Please put a space between quote modifiers and trailing keywords.
23430 EOM
23431
23432                            # print "token $$rtokens[$i]\n";
23433                            # my $num = length($str) - pos($str);
23434                            # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
23435                            # print "continuing with new token $$rtokens[$i]\n";
23436
23437                                 # skipping past this token does least damage
23438                                 last if ( ++$i > $max_token_index );
23439                             }
23440                         }
23441                         else {
23442
23443                             # example file: rokicki4.pl
23444                             # This error might also be triggered if my quote
23445                             # modifier characters are incomplete
23446                             write_logfile_entry(
23447 "Note: found word $str at quote modifier location\n"
23448                             );
23449                         }
23450                     }
23451
23452                     # re-initialize
23453                     $allowed_quote_modifiers = "";
23454                 }
23455             }
23456
23457             unless ( $tok =~ /^\s*$/ ) {
23458
23459                 # try to catch some common errors
23460                 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
23461
23462                     if ( $last_nonblank_token eq 'eq' ) {
23463                         complain("Should 'eq' be '==' here ?\n");
23464                     }
23465                     elsif ( $last_nonblank_token eq 'ne' ) {
23466                         complain("Should 'ne' be '!=' here ?\n");
23467                     }
23468                 }
23469
23470                 $last_last_nonblank_token      = $last_nonblank_token;
23471                 $last_last_nonblank_type       = $last_nonblank_type;
23472                 $last_last_nonblank_block_type = $last_nonblank_block_type;
23473                 $last_last_nonblank_container_type =
23474                   $last_nonblank_container_type;
23475                 $last_last_nonblank_type_sequence =
23476                   $last_nonblank_type_sequence;
23477                 $last_nonblank_token          = $tok;
23478                 $last_nonblank_type           = $type;
23479                 $last_nonblank_prototype      = $prototype;
23480                 $last_nonblank_block_type     = $block_type;
23481                 $last_nonblank_container_type = $container_type;
23482                 $last_nonblank_type_sequence  = $type_sequence;
23483                 $last_nonblank_i              = $i_tok;
23484             }
23485
23486             # store previous token type
23487             if ( $i_tok >= 0 ) {
23488                 $routput_token_type->[$i_tok]     = $type;
23489                 $routput_block_type->[$i_tok]     = $block_type;
23490                 $routput_container_type->[$i_tok] = $container_type;
23491                 $routput_type_sequence->[$i_tok]  = $type_sequence;
23492                 $routput_indent_flag->[$i_tok]    = $indent_flag;
23493             }
23494             my $pre_tok  = $$rtokens[$i];        # get the next pre-token
23495             my $pre_type = $$rtoken_type[$i];    # and type
23496             $tok  = $pre_tok;
23497             $type = $pre_type;                   # to be modified as necessary
23498             $block_type = "";    # blank for all tokens except code block braces
23499             $container_type = "";    # blank for all tokens except some parens
23500             $type_sequence  = "";    # blank for all tokens except ?/:
23501             $indent_flag    = 0;
23502             $prototype = "";    # blank for all tokens except user defined subs
23503             $i_tok     = $i;
23504
23505             # this pre-token will start an output token
23506             push( @{$routput_token_list}, $i_tok );
23507
23508             # continue gathering identifier if necessary
23509             # but do not start on blanks and comments
23510             if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
23511
23512                 if ( $id_scan_state =~ /^(sub|package)/ ) {
23513                     scan_id();
23514                 }
23515                 else {
23516                     scan_identifier();
23517                 }
23518
23519                 last if ($id_scan_state);
23520                 next if ( ( $i > 0 ) || $type );
23521
23522                 # didn't find any token; start over
23523                 $type = $pre_type;
23524                 $tok  = $pre_tok;
23525             }
23526
23527             # handle whitespace tokens..
23528             next if ( $type eq 'b' );
23529             my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
23530             my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
23531
23532             # Build larger tokens where possible, since we are not in a quote.
23533             #
23534             # First try to assemble digraphs.  The following tokens are
23535             # excluded and handled specially:
23536             # '/=' is excluded because the / might start a pattern.
23537             # 'x=' is excluded since it might be $x=, with $ on previous line
23538             # '**' and *= might be typeglobs of punctuation variables
23539             # I have allowed tokens starting with <, such as <=,
23540             # because I don't think these could be valid angle operators.
23541             # test file: storrs4.pl
23542             my $test_tok   = $tok . $$rtokens[ $i + 1 ];
23543             my $combine_ok = $is_digraph{$test_tok};
23544
23545             # check for special cases which cannot be combined
23546             if ($combine_ok) {
23547
23548                 # '//' must be defined_or operator if an operator is expected.
23549                 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
23550                 # could be migrated here for clarity
23551                 if ( $test_tok eq '//' ) {
23552                     my $next_type = $$rtokens[ $i + 1 ];
23553                     my $expecting =
23554                       operator_expected( $prev_type, $tok, $next_type );
23555                     $combine_ok = 0 unless ( $expecting == OPERATOR );
23556                 }
23557             }
23558
23559             if (
23560                 $combine_ok
23561                 && ( $test_tok ne '/=' )    # might be pattern
23562                 && ( $test_tok ne 'x=' )    # might be $x
23563                 && ( $test_tok ne '**' )    # typeglob?
23564                 && ( $test_tok ne '*=' )    # typeglob?
23565               )
23566             {
23567                 $tok = $test_tok;
23568                 $i++;
23569
23570                 # Now try to assemble trigraphs.  Note that all possible
23571                 # perl trigraphs can be constructed by appending a character
23572                 # to a digraph.
23573                 $test_tok = $tok . $$rtokens[ $i + 1 ];
23574
23575                 if ( $is_trigraph{$test_tok} ) {
23576                     $tok = $test_tok;
23577                     $i++;
23578                 }
23579             }
23580
23581             $type      = $tok;
23582             $next_tok  = $$rtokens[ $i + 1 ];
23583             $next_type = $$rtoken_type[ $i + 1 ];
23584
23585             TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
23586                 local $" = ')(';
23587                 my @debug_list = (
23588                     $last_nonblank_token,      $tok,
23589                     $next_tok,                 $brace_depth,
23590                     $brace_type[$brace_depth], $paren_depth,
23591                     $paren_type[$paren_depth]
23592                 );
23593                 print "TOKENIZE:(@debug_list)\n";
23594             };
23595
23596             # turn off attribute list on first non-blank, non-bareword
23597             if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
23598
23599             ###############################################################
23600             # We have the next token, $tok.
23601             # Now we have to examine this token and decide what it is
23602             # and define its $type
23603             #
23604             # section 1: bare words
23605             ###############################################################
23606
23607             if ( $pre_type eq 'w' ) {
23608                 $expecting = operator_expected( $prev_type, $tok, $next_type );
23609                 my ( $next_nonblank_token, $i_next ) =
23610                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
23611
23612                 # ATTRS: handle sub and variable attributes
23613                 if ($in_attribute_list) {
23614
23615                     # treat bare word followed by open paren like qw(
23616                     if ( $next_nonblank_token eq '(' ) {
23617                         $in_quote                = $quote_items{'q'};
23618                         $allowed_quote_modifiers = $quote_modifiers{'q'};
23619                         $type                    = 'q';
23620                         $quote_type              = 'q';
23621                         next;
23622                     }
23623
23624                     # handle bareword not followed by open paren
23625                     else {
23626                         $type = 'w';
23627                         next;
23628                     }
23629                 }
23630
23631                 # quote a word followed by => operator
23632                 if ( $next_nonblank_token eq '=' ) {
23633
23634                     if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
23635                         if ( $is_constant{$current_package}{$tok} ) {
23636                             $type = 'C';
23637                         }
23638                         elsif ( $is_user_function{$current_package}{$tok} ) {
23639                             $type = 'U';
23640                             $prototype =
23641                               $user_function_prototype{$current_package}{$tok};
23642                         }
23643                         elsif ( $tok =~ /^v\d+$/ ) {
23644                             $type = 'v';
23645                             report_v_string($tok);
23646                         }
23647                         else { $type = 'w' }
23648
23649                         next;
23650                     }
23651                 }
23652
23653      # quote a bare word within braces..like xxx->{s}; note that we
23654      # must be sure this is not a structural brace, to avoid
23655      # mistaking {s} in the following for a quoted bare word:
23656      #     for(@[){s}bla}BLA}
23657      # Also treat q in something like var{-q} as a bare word, not qoute operator
23658                 ##if (   ( $last_nonblank_type eq 'L' )
23659                 ##    && ( $next_nonblank_token eq '}' ) )
23660                 if (
23661                     $next_nonblank_token eq '}'
23662                     && (
23663                         $last_nonblank_type eq 'L'
23664                         || (   $last_nonblank_type eq 'm'
23665                             && $last_last_nonblank_type eq 'L' )
23666                     )
23667                   )
23668                 {
23669                     $type = 'w';
23670                     next;
23671                 }
23672
23673                 # a bare word immediately followed by :: is not a keyword;
23674                 # use $tok_kw when testing for keywords to avoid a mistake
23675                 my $tok_kw = $tok;
23676                 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
23677                 {
23678                     $tok_kw .= '::';
23679                 }
23680
23681                 # handle operator x (now we know it isn't $x=)
23682                 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
23683                     if ( $tok eq 'x' ) {
23684
23685                         if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
23686                             $tok  = 'x=';
23687                             $type = $tok;
23688                             $i++;
23689                         }
23690                         else {
23691                             $type = 'x';
23692                         }
23693                     }
23694
23695                     # FIXME: Patch: mark something like x4 as an integer for now
23696                     # It gets fixed downstream.  This is easier than
23697                     # splitting the pretoken.
23698                     else {
23699                         $type = 'n';
23700                     }
23701                 }
23702
23703                 elsif ( ( $tok eq 'strict' )
23704                     and ( $last_nonblank_token eq 'use' ) )
23705                 {
23706                     $tokenizer_self->{_saw_use_strict} = 1;
23707                     scan_bare_identifier();
23708                 }
23709
23710                 elsif ( ( $tok eq 'warnings' )
23711                     and ( $last_nonblank_token eq 'use' ) )
23712                 {
23713                     $tokenizer_self->{_saw_perl_dash_w} = 1;
23714
23715                     # scan as identifier, so that we pick up something like:
23716                     # use warnings::register
23717                     scan_bare_identifier();
23718                 }
23719
23720                 elsif (
23721                        $tok eq 'AutoLoader'
23722                     && $tokenizer_self->{_look_for_autoloader}
23723                     && (
23724                         $last_nonblank_token eq 'use'
23725
23726                         # these regexes are from AutoSplit.pm, which we want
23727                         # to mimic
23728                         || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
23729                         || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
23730                     )
23731                   )
23732                 {
23733                     write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
23734                     $tokenizer_self->{_saw_autoloader}      = 1;
23735                     $tokenizer_self->{_look_for_autoloader} = 0;
23736                     scan_bare_identifier();
23737                 }
23738
23739                 elsif (
23740                        $tok eq 'SelfLoader'
23741                     && $tokenizer_self->{_look_for_selfloader}
23742                     && (   $last_nonblank_token eq 'use'
23743                         || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
23744                         || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
23745                   )
23746                 {
23747                     write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
23748                     $tokenizer_self->{_saw_selfloader}      = 1;
23749                     $tokenizer_self->{_look_for_selfloader} = 0;
23750                     scan_bare_identifier();
23751                 }
23752
23753                 elsif ( ( $tok eq 'constant' )
23754                     and ( $last_nonblank_token eq 'use' ) )
23755                 {
23756                     scan_bare_identifier();
23757                     my ( $next_nonblank_token, $i_next ) =
23758                       find_next_nonblank_token( $i, $rtokens,
23759                         $max_token_index );
23760
23761                     if ($next_nonblank_token) {
23762
23763                         if ( $is_keyword{$next_nonblank_token} ) {
23764                             warning(
23765 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
23766                             );
23767                         }
23768
23769                         # FIXME: could check for error in which next token is
23770                         # not a word (number, punctuation, ..)
23771                         else {
23772                             $is_constant{$current_package}
23773                               {$next_nonblank_token} = 1;
23774                         }
23775                     }
23776                 }
23777
23778                 # various quote operators
23779                 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
23780                     if ( $expecting == OPERATOR ) {
23781
23782                         # patch for paren-less for/foreach glitch, part 1
23783                         # perl will accept this construct as valid:
23784                         #
23785                         #    foreach my $key qw\Uno Due Tres Quadro\ {
23786                         #        print "Set $key\n";
23787                         #    }
23788                         unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
23789                         {
23790                             error_if_expecting_OPERATOR();
23791                         }
23792                     }
23793                     $in_quote                = $quote_items{$tok};
23794                     $allowed_quote_modifiers = $quote_modifiers{$tok};
23795
23796                    # All quote types are 'Q' except possibly qw quotes.
23797                    # qw quotes are special in that they may generally be trimmed
23798                    # of leading and trailing whitespace.  So they are given a
23799                    # separate type, 'q', unless requested otherwise.
23800                     $type =
23801                       ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
23802                       ? 'q'
23803                       : 'Q';
23804                     $quote_type = $type;
23805                 }
23806
23807                 # check for a statement label
23808                 elsif (
23809                        ( $next_nonblank_token eq ':' )
23810                     && ( $$rtokens[ $i_next + 1 ] ne ':' )
23811                     && ( $i_next <= $max_token_index )    # colon on same line
23812                     && label_ok()
23813                   )
23814                 {
23815                     if ( $tok !~ /[A-Z]/ ) {
23816                         push @{ $tokenizer_self->{_rlower_case_labels_at} },
23817                           $input_line_number;
23818                     }
23819                     $type = 'J';
23820                     $tok .= ':';
23821                     $i = $i_next;
23822                     next;
23823                 }
23824
23825                 #      'sub' || 'package'
23826                 elsif ( $is_sub_package{$tok_kw} ) {
23827                     error_if_expecting_OPERATOR()
23828                       if ( $expecting == OPERATOR );
23829                     scan_id();
23830                 }
23831
23832                 # Note on token types for format, __DATA__, __END__:
23833                 # It simplifies things to give these type ';', so that when we
23834                 # start rescanning we will be expecting a token of type TERM.
23835                 # We will switch to type 'k' before outputting the tokens.
23836                 elsif ( $is_format_END_DATA{$tok_kw} ) {
23837                     $type = ';';    # make tokenizer look for TERM next
23838                     $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
23839                     last;
23840                 }
23841
23842                 elsif ( $is_keyword{$tok_kw} ) {
23843                     $type = 'k';
23844
23845                     # Since for and foreach may not be followed immediately
23846                     # by an opening paren, we have to remember which keyword
23847                     # is associated with the next '('
23848                     if ( $is_for_foreach{$tok} ) {
23849                         if ( new_statement_ok() ) {
23850                             $want_paren = $tok;
23851                         }
23852                     }
23853
23854                     # recognize 'use' statements, which are special
23855                     elsif ( $is_use_require{$tok} ) {
23856                         $statement_type = $tok;
23857                         error_if_expecting_OPERATOR()
23858                           if ( $expecting == OPERATOR );
23859                     }
23860
23861                     # remember my and our to check for trailing ": shared"
23862                     elsif ( $is_my_our{$tok} ) {
23863                         $statement_type = $tok;
23864                     }
23865
23866                     # Check for misplaced 'elsif' and 'else', but allow isolated
23867                     # else or elsif blocks to be formatted.  This is indicated
23868                     # by a last noblank token of ';'
23869                     elsif ( $tok eq 'elsif' ) {
23870                         if (   $last_nonblank_token ne ';'
23871                             && $last_nonblank_block_type !~
23872                             /^(if|elsif|unless)$/ )
23873                         {
23874                             warning(
23875 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
23876                             );
23877                         }
23878                     }
23879                     elsif ( $tok eq 'else' ) {
23880
23881                         # patched for SWITCH/CASE
23882                         if (   $last_nonblank_token ne ';'
23883                             && $last_nonblank_block_type !~
23884                             /^(if|elsif|unless|case|when)$/ )
23885                         {
23886                             warning(
23887 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
23888                             );
23889                         }
23890                     }
23891                     elsif ( $tok eq 'continue' ) {
23892                         if (   $last_nonblank_token ne ';'
23893                             && $last_nonblank_block_type !~
23894                             /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
23895                         {
23896
23897                             # note: ';' '{' and '}' in list above
23898                             # because continues can follow bare blocks;
23899                             # ':' is labeled block
23900                             #
23901                             ############################################
23902                             # NOTE: This check has been deactivated because
23903                             # continue has an alternative usage for given/when
23904                             # blocks in perl 5.10
23905                             ## warning("'$tok' should follow a block\n");
23906                             ############################################
23907                         }
23908                     }
23909
23910                     # patch for SWITCH/CASE if 'case' and 'when are
23911                     # treated as keywords.
23912                     elsif ( $tok eq 'when' || $tok eq 'case' ) {
23913                         $statement_type = $tok;    # next '{' is block
23914                     }
23915
23916                     # indent trailing if/unless/while/until
23917                     # outdenting will be handled by later indentation loop
23918                     if (   $tok =~ /^(if|unless|while|until)$/
23919                         && $next_nonblank_token ne '(' )
23920                     {
23921                         $indent_flag = 1;
23922                     }
23923                 }
23924
23925                 # check for inline label following
23926                 #         /^(redo|last|next|goto)$/
23927                 elsif (( $last_nonblank_type eq 'k' )
23928                     && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
23929                 {
23930                     $type = 'j';
23931                     next;
23932                 }
23933
23934                 # something else --
23935                 else {
23936
23937                     scan_bare_identifier();
23938                     if ( $type eq 'w' ) {
23939
23940                         if ( $expecting == OPERATOR ) {
23941
23942                             # don't complain about possible indirect object
23943                             # notation.
23944                             # For example:
23945                             #   package main;
23946                             #   sub new($) { ... }
23947                             #   $b = new A::;  # calls A::new
23948                             #   $c = new A;    # same thing but suspicious
23949                             # This will call A::new but we have a 'new' in
23950                             # main:: which looks like a constant.
23951                             #
23952                             if ( $last_nonblank_type eq 'C' ) {
23953                                 if ( $tok !~ /::$/ ) {
23954                                     complain(<<EOM);
23955 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
23956        Maybe indirectet object notation?
23957 EOM
23958                                 }
23959                             }
23960                             else {
23961                                 error_if_expecting_OPERATOR("bareword");
23962                             }
23963                         }
23964
23965                         # mark bare words immediately followed by a paren as
23966                         # functions
23967                         $next_tok = $$rtokens[ $i + 1 ];
23968                         if ( $next_tok eq '(' ) {
23969                             $type = 'U';
23970                         }
23971
23972                         # underscore after file test operator is file handle
23973                         if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
23974                             $type = 'Z';
23975                         }
23976
23977                         # patch for SWITCH/CASE if 'case' and 'when are
23978                         # not treated as keywords:
23979                         if (
23980                             (
23981                                    $tok eq 'case'
23982                                 && $brace_type[$brace_depth] eq 'switch'
23983                             )
23984                             || (   $tok eq 'when'
23985                                 && $brace_type[$brace_depth] eq 'given' )
23986                           )
23987                         {
23988                             $statement_type = $tok;    # next '{' is block
23989                             $type = 'k';    # for keyword syntax coloring
23990                         }
23991
23992                         # patch for SWITCH/CASE if switch and given not keywords
23993                         # Switch is not a perl 5 keyword, but we will gamble
23994                         # and mark switch followed by paren as a keyword.  This
23995                         # is only necessary to get html syntax coloring nice,
23996                         # and does not commit this as being a switch/case.
23997                         if ( $next_nonblank_token eq '('
23998                             && ( $tok eq 'switch' || $tok eq 'given' ) )
23999                         {
24000                             $type = 'k';    # for keyword syntax coloring
24001                         }
24002                     }
24003                 }
24004             }
24005
24006             ###############################################################
24007             # section 2: strings of digits
24008             ###############################################################
24009             elsif ( $pre_type eq 'd' ) {
24010                 $expecting = operator_expected( $prev_type, $tok, $next_type );
24011                 error_if_expecting_OPERATOR("Number")
24012                   if ( $expecting == OPERATOR );
24013                 my $number = scan_number();
24014                 if ( !defined($number) ) {
24015
24016                     # shouldn't happen - we should always get a number
24017                     warning("non-number beginning with digit--program bug\n");
24018                     report_definite_bug();
24019                 }
24020             }
24021
24022             ###############################################################
24023             # section 3: all other tokens
24024             ###############################################################
24025
24026             else {
24027                 last if ( $tok eq '#' );
24028                 my $code = $tokenization_code->{$tok};
24029                 if ($code) {
24030                     $expecting =
24031                       operator_expected( $prev_type, $tok, $next_type );
24032                     $code->();
24033                     redo if $in_quote;
24034                 }
24035             }
24036         }
24037
24038         # -----------------------------
24039         # end of main tokenization loop
24040         # -----------------------------
24041
24042         if ( $i_tok >= 0 ) {
24043             $routput_token_type->[$i_tok]     = $type;
24044             $routput_block_type->[$i_tok]     = $block_type;
24045             $routput_container_type->[$i_tok] = $container_type;
24046             $routput_type_sequence->[$i_tok]  = $type_sequence;
24047             $routput_indent_flag->[$i_tok]    = $indent_flag;
24048         }
24049
24050         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
24051             $last_last_nonblank_token          = $last_nonblank_token;
24052             $last_last_nonblank_type           = $last_nonblank_type;
24053             $last_last_nonblank_block_type     = $last_nonblank_block_type;
24054             $last_last_nonblank_container_type = $last_nonblank_container_type;
24055             $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
24056             $last_nonblank_token               = $tok;
24057             $last_nonblank_type                = $type;
24058             $last_nonblank_block_type          = $block_type;
24059             $last_nonblank_container_type      = $container_type;
24060             $last_nonblank_type_sequence       = $type_sequence;
24061             $last_nonblank_prototype           = $prototype;
24062         }
24063
24064         # reset indentation level if necessary at a sub or package
24065         # in an attempt to recover from a nesting error
24066         if ( $level_in_tokenizer < 0 ) {
24067             if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
24068                 reset_indentation_level(0);
24069                 brace_warning("resetting level to 0 at $1 $2\n");
24070             }
24071         }
24072
24073         # all done tokenizing this line ...
24074         # now prepare the final list of tokens and types
24075
24076         my @token_type     = ();   # stack of output token types
24077         my @block_type     = ();   # stack of output code block types
24078         my @container_type = ();   # stack of output code container types
24079         my @type_sequence  = ();   # stack of output type sequence numbers
24080         my @tokens         = ();   # output tokens
24081         my @levels         = ();   # structural brace levels of output tokens
24082         my @slevels        = ();   # secondary nesting levels of output tokens
24083         my @nesting_tokens = ();   # string of tokens leading to this depth
24084         my @nesting_types  = ();   # string of token types leading to this depth
24085         my @nesting_blocks = ();   # string of block types leading to this depth
24086         my @nesting_lists  = ();   # string of list types leading to this depth
24087         my @ci_string = ();  # string needed to compute continuation indentation
24088         my @container_environment = ();    # BLOCK or LIST
24089         my $container_environment = '';
24090         my $im                    = -1;    # previous $i value
24091         my $num;
24092         my $ci_string_sum = ones_count($ci_string_in_tokenizer);
24093
24094 # Computing Token Indentation
24095 #
24096 #     The final section of the tokenizer forms tokens and also computes
24097 #     parameters needed to find indentation.  It is much easier to do it
24098 #     in the tokenizer than elsewhere.  Here is a brief description of how
24099 #     indentation is computed.  Perl::Tidy computes indentation as the sum
24100 #     of 2 terms:
24101 #
24102 #     (1) structural indentation, such as if/else/elsif blocks
24103 #     (2) continuation indentation, such as long parameter call lists.
24104 #
24105 #     These are occasionally called primary and secondary indentation.
24106 #
24107 #     Structural indentation is introduced by tokens of type '{', although
24108 #     the actual tokens might be '{', '(', or '['.  Structural indentation
24109 #     is of two types: BLOCK and non-BLOCK.  Default structural indentation
24110 #     is 4 characters if the standard indentation scheme is used.
24111 #
24112 #     Continuation indentation is introduced whenever a line at BLOCK level
24113 #     is broken before its termination.  Default continuation indentation
24114 #     is 2 characters in the standard indentation scheme.
24115 #
24116 #     Both types of indentation may be nested arbitrarily deep and
24117 #     interlaced.  The distinction between the two is somewhat arbitrary.
24118 #
24119 #     For each token, we will define two variables which would apply if
24120 #     the current statement were broken just before that token, so that
24121 #     that token started a new line:
24122 #
24123 #     $level = the structural indentation level,
24124 #     $ci_level = the continuation indentation level
24125 #
24126 #     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
24127 #     assuming defaults.  However, in some special cases it is customary
24128 #     to modify $ci_level from this strict value.
24129 #
24130 #     The total structural indentation is easy to compute by adding and
24131 #     subtracting 1 from a saved value as types '{' and '}' are seen.  The
24132 #     running value of this variable is $level_in_tokenizer.
24133 #
24134 #     The total continuation is much more difficult to compute, and requires
24135 #     several variables.  These veriables are:
24136 #
24137 #     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
24138 #       each indentation level, if there are intervening open secondary
24139 #       structures just prior to that level.
24140 #     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
24141 #       if the last token at that level is "continued", meaning that it
24142 #       is not the first token of an expression.
24143 #     $nesting_block_string = a string of 1's and 0's indicating, for each
24144 #       indentation level, if the level is of type BLOCK or not.
24145 #     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
24146 #     $nesting_list_string = a string of 1's and 0's indicating, for each
24147 #       indentation level, if it is is appropriate for list formatting.
24148 #       If so, continuation indentation is used to indent long list items.
24149 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
24150 #     @{$rslevel_stack} = a stack of total nesting depths at each
24151 #       structural indentation level, where "total nesting depth" means
24152 #       the nesting depth that would occur if every nesting token -- '{', '[',
24153 #       and '(' -- , regardless of context, is used to compute a nesting
24154 #       depth.
24155
24156         #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
24157         #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
24158
24159         my ( $ci_string_i, $level_i, $nesting_block_string_i,
24160             $nesting_list_string_i, $nesting_token_string_i,
24161             $nesting_type_string_i, );
24162
24163         foreach $i ( @{$routput_token_list} )
24164         {    # scan the list of pre-tokens indexes
24165
24166             # self-checking for valid token types
24167             my $type                    = $routput_token_type->[$i];
24168             my $forced_indentation_flag = $routput_indent_flag->[$i];
24169
24170             # See if we should undo the $forced_indentation_flag.
24171             # Forced indentation after 'if', 'unless', 'while' and 'until'
24172             # expressions without trailing parens is optional and doesn't
24173             # always look good.  It is usually okay for a trailing logical
24174             # expression, but if the expression is a function call, code block,
24175             # or some kind of list it puts in an unwanted extra indentation
24176             # level which is hard to remove.
24177             #
24178             # Example where extra indentation looks ok:
24179             # return 1
24180             #   if $det_a < 0 and $det_b > 0
24181             #       or $det_a > 0 and $det_b < 0;
24182             #
24183             # Example where extra indentation is not needed because
24184             # the eval brace also provides indentation:
24185             # print "not " if defined eval {
24186             #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
24187             # };
24188             #
24189             # The following rule works fairly well:
24190             #   Undo the flag if the end of this line, or start of the next
24191             #   line, is an opening container token or a comma.
24192             # This almost always works, but if not after another pass it will
24193             # be stable.
24194             if ( $forced_indentation_flag && $type eq 'k' ) {
24195                 my $ixlast  = -1;
24196                 my $ilast   = $routput_token_list->[$ixlast];
24197                 my $toklast = $routput_token_type->[$ilast];
24198                 if ( $toklast eq '#' ) {
24199                     $ixlast--;
24200                     $ilast   = $routput_token_list->[$ixlast];
24201                     $toklast = $routput_token_type->[$ilast];
24202                 }
24203                 if ( $toklast eq 'b' ) {
24204                     $ixlast--;
24205                     $ilast   = $routput_token_list->[$ixlast];
24206                     $toklast = $routput_token_type->[$ilast];
24207                 }
24208                 if ( $toklast =~ /^[\{,]$/ ) {
24209                     $forced_indentation_flag = 0;
24210                 }
24211                 else {
24212                     ( $toklast, my $i_next ) =
24213                       find_next_nonblank_token( $max_token_index, $rtokens,
24214                         $max_token_index );
24215                     if ( $toklast =~ /^[\{,]$/ ) {
24216                         $forced_indentation_flag = 0;
24217                     }
24218                 }
24219             }
24220
24221             # if we are already in an indented if, see if we should outdent
24222             if ($indented_if_level) {
24223
24224                 # don't try to nest trailing if's - shouldn't happen
24225                 if ( $type eq 'k' ) {
24226                     $forced_indentation_flag = 0;
24227                 }
24228
24229                 # check for the normal case - outdenting at next ';'
24230                 elsif ( $type eq ';' ) {
24231                     if ( $level_in_tokenizer == $indented_if_level ) {
24232                         $forced_indentation_flag = -1;
24233                         $indented_if_level       = 0;
24234                     }
24235                 }
24236
24237                 # handle case of missing semicolon
24238                 elsif ( $type eq '}' ) {
24239                     if ( $level_in_tokenizer == $indented_if_level ) {
24240                         $indented_if_level = 0;
24241
24242                         # TBD: This could be a subroutine call
24243                         $level_in_tokenizer--;
24244                         if ( @{$rslevel_stack} > 1 ) {
24245                             pop( @{$rslevel_stack} );
24246                         }
24247                         if ( length($nesting_block_string) > 1 )
24248                         {    # true for valid script
24249                             chop $nesting_block_string;
24250                             chop $nesting_list_string;
24251                         }
24252
24253                     }
24254                 }
24255             }
24256
24257             my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
24258             $level_i = $level_in_tokenizer;
24259
24260             # This can happen by running perltidy on non-scripts
24261             # although it could also be bug introduced by programming change.
24262             # Perl silently accepts a 032 (^Z) and takes it as the end
24263             if ( !$is_valid_token_type{$type} ) {
24264                 my $val = ord($type);
24265                 warning(
24266                     "unexpected character decimal $val ($type) in script\n");
24267                 $tokenizer_self->{_in_error} = 1;
24268             }
24269
24270             # ----------------------------------------------------------------
24271             # TOKEN TYPE PATCHES
24272             #  output __END__, __DATA__, and format as type 'k' instead of ';'
24273             # to make html colors correct, etc.
24274             my $fix_type = $type;
24275             if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
24276
24277             # output anonymous 'sub' as keyword
24278             if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
24279
24280             # -----------------------------------------------------------------
24281
24282             $nesting_token_string_i = $nesting_token_string;
24283             $nesting_type_string_i  = $nesting_type_string;
24284             $nesting_block_string_i = $nesting_block_string;
24285             $nesting_list_string_i  = $nesting_list_string;
24286
24287             # set primary indentation levels based on structural braces
24288             # Note: these are set so that the leading braces have a HIGHER
24289             # level than their CONTENTS, which is convenient for indentation
24290             # Also, define continuation indentation for each token.
24291             if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
24292             {
24293
24294                 # use environment before updating
24295                 $container_environment =
24296                     $nesting_block_flag ? 'BLOCK'
24297                   : $nesting_list_flag  ? 'LIST'
24298                   :                       "";
24299
24300                 # if the difference between total nesting levels is not 1,
24301                 # there are intervening non-structural nesting types between
24302                 # this '{' and the previous unclosed '{'
24303                 my $intervening_secondary_structure = 0;
24304                 if ( @{$rslevel_stack} ) {
24305                     $intervening_secondary_structure =
24306                       $slevel_in_tokenizer - $rslevel_stack->[-1];
24307                 }
24308
24309      # Continuation Indentation
24310      #
24311      # Having tried setting continuation indentation both in the formatter and
24312      # in the tokenizer, I can say that setting it in the tokenizer is much,
24313      # much easier.  The formatter already has too much to do, and can't
24314      # make decisions on line breaks without knowing what 'ci' will be at
24315      # arbitrary locations.
24316      #
24317      # But a problem with setting the continuation indentation (ci) here
24318      # in the tokenizer is that we do not know where line breaks will actually
24319      # be.  As a result, we don't know if we should propagate continuation
24320      # indentation to higher levels of structure.
24321      #
24322      # For nesting of only structural indentation, we never need to do this.
24323      # For example, in a long if statement, like this
24324      #
24325      #   if ( !$output_block_type[$i]
24326      #     && ($in_statement_continuation) )
24327      #   {           <--outdented
24328      #       do_something();
24329      #   }
24330      #
24331      # the second line has ci but we do normally give the lines within the BLOCK
24332      # any ci.  This would be true if we had blocks nested arbitrarily deeply.
24333      #
24334      # But consider something like this, where we have created a break after
24335      # an opening paren on line 1, and the paren is not (currently) a
24336      # structural indentation token:
24337      #
24338      # my $file = $menubar->Menubutton(
24339      #   qw/-text File -underline 0 -menuitems/ => [
24340      #       [
24341      #           Cascade    => '~View',
24342      #           -menuitems => [
24343      #           ...
24344      #
24345      # The second line has ci, so it would seem reasonable to propagate it
24346      # down, giving the third line 1 ci + 1 indentation.  This suggests the
24347      # following rule, which is currently used to propagating ci down: if there
24348      # are any non-structural opening parens (or brackets, or braces), before
24349      # an opening structural brace, then ci is propagated down, and otherwise
24350      # not.  The variable $intervening_secondary_structure contains this
24351      # information for the current token, and the string
24352      # "$ci_string_in_tokenizer" is a stack of previous values of this
24353      # variable.
24354
24355                 # save the current states
24356                 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
24357                 $level_in_tokenizer++;
24358
24359                 if ($forced_indentation_flag) {
24360
24361                     # break BEFORE '?' when there is forced indentation
24362                     if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
24363                     if ( $type eq 'k' ) {
24364                         $indented_if_level = $level_in_tokenizer;
24365                     }
24366                 }
24367
24368                 if ( $routput_block_type->[$i] ) {
24369                     $nesting_block_flag = 1;
24370                     $nesting_block_string .= '1';
24371                 }
24372                 else {
24373                     $nesting_block_flag = 0;
24374                     $nesting_block_string .= '0';
24375                 }
24376
24377                 # we will use continuation indentation within containers
24378                 # which are not blocks and not logical expressions
24379                 my $bit = 0;
24380                 if ( !$routput_block_type->[$i] ) {
24381
24382                     # propagate flag down at nested open parens
24383                     if ( $routput_container_type->[$i] eq '(' ) {
24384                         $bit = 1 if $nesting_list_flag;
24385                     }
24386
24387                   # use list continuation if not a logical grouping
24388                   # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
24389                     else {
24390                         $bit = 1
24391                           unless
24392                             $is_logical_container{ $routput_container_type->[$i]
24393                               };
24394                     }
24395                 }
24396                 $nesting_list_string .= $bit;
24397                 $nesting_list_flag = $bit;
24398
24399                 $ci_string_in_tokenizer .=
24400                   ( $intervening_secondary_structure != 0 ) ? '1' : '0';
24401                 $ci_string_sum = ones_count($ci_string_in_tokenizer);
24402                 $continuation_string_in_tokenizer .=
24403                   ( $in_statement_continuation > 0 ) ? '1' : '0';
24404
24405    #  Sometimes we want to give an opening brace continuation indentation,
24406    #  and sometimes not.  For code blocks, we don't do it, so that the leading
24407    #  '{' gets outdented, like this:
24408    #
24409    #   if ( !$output_block_type[$i]
24410    #     && ($in_statement_continuation) )
24411    #   {           <--outdented
24412    #
24413    #  For other types, we will give them continuation indentation.  For example,
24414    #  here is how a list looks with the opening paren indented:
24415    #
24416    #     @LoL =
24417    #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
24418    #         [ "homer", "marge", "bart" ], );
24419    #
24420    #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
24421
24422                 my $total_ci = $ci_string_sum;
24423                 if (
24424                     !$routput_block_type->[$i]    # patch: skip for BLOCK
24425                     && ($in_statement_continuation)
24426                     && !( $forced_indentation_flag && $type eq ':' )
24427                   )
24428                 {
24429                     $total_ci += $in_statement_continuation
24430                       unless ( $ci_string_in_tokenizer =~ /1$/ );
24431                 }
24432
24433                 $ci_string_i               = $total_ci;
24434                 $in_statement_continuation = 0;
24435             }
24436
24437             elsif ($type eq '}'
24438                 || $type eq 'R'
24439                 || $forced_indentation_flag < 0 )
24440             {
24441
24442                 # only a nesting error in the script would prevent popping here
24443                 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
24444
24445                 $level_i = --$level_in_tokenizer;
24446
24447                 # restore previous level values
24448                 if ( length($nesting_block_string) > 1 )
24449                 {    # true for valid script
24450                     chop $nesting_block_string;
24451                     $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
24452                     chop $nesting_list_string;
24453                     $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
24454
24455                     chop $ci_string_in_tokenizer;
24456                     $ci_string_sum = ones_count($ci_string_in_tokenizer);
24457
24458                     $in_statement_continuation =
24459                       chop $continuation_string_in_tokenizer;
24460
24461                     # zero continuation flag at terminal BLOCK '}' which
24462                     # ends a statement.
24463                     if ( $routput_block_type->[$i] ) {
24464
24465                         # ...These include non-anonymous subs
24466                         # note: could be sub ::abc { or sub 'abc
24467                         if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
24468
24469                          # note: older versions of perl require the /gc modifier
24470                          # here or else the \G does not work.
24471                             if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
24472                             {
24473                                 $in_statement_continuation = 0;
24474                             }
24475                         }
24476
24477 # ...and include all block types except user subs with
24478 # block prototypes and these: (sort|grep|map|do|eval)
24479 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
24480                         elsif (
24481                             $is_zero_continuation_block_type{
24482                                 $routput_block_type->[$i] } )
24483                         {
24484                             $in_statement_continuation = 0;
24485                         }
24486
24487                         # ..but these are not terminal types:
24488                         #     /^(sort|grep|map|do|eval)$/ )
24489                         elsif (
24490                             $is_not_zero_continuation_block_type{
24491                                 $routput_block_type->[$i] } )
24492                         {
24493                         }
24494
24495                         # ..and a block introduced by a label
24496                         # /^\w+\s*:$/gc ) {
24497                         elsif ( $routput_block_type->[$i] =~ /:$/ ) {
24498                             $in_statement_continuation = 0;
24499                         }
24500
24501                         # user function with block prototype
24502                         else {
24503                             $in_statement_continuation = 0;
24504                         }
24505                     }
24506
24507                     # If we are in a list, then
24508                     # we must set continuatoin indentation at the closing
24509                     # paren of something like this (paren after $check):
24510                     #     assert(
24511                     #         __LINE__,
24512                     #         ( not defined $check )
24513                     #           or ref $check
24514                     #           or $check eq "new"
24515                     #           or $check eq "old",
24516                     #     );
24517                     elsif ( $tok eq ')' ) {
24518                         $in_statement_continuation = 1
24519                           if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
24520                     }
24521
24522                     elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
24523                 }
24524
24525                 # use environment after updating
24526                 $container_environment =
24527                     $nesting_block_flag ? 'BLOCK'
24528                   : $nesting_list_flag  ? 'LIST'
24529                   :                       "";
24530                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
24531                 $nesting_block_string_i = $nesting_block_string;
24532                 $nesting_list_string_i  = $nesting_list_string;
24533             }
24534
24535             # not a structural indentation type..
24536             else {
24537
24538                 $container_environment =
24539                     $nesting_block_flag ? 'BLOCK'
24540                   : $nesting_list_flag  ? 'LIST'
24541                   :                       "";
24542
24543                 # zero the continuation indentation at certain tokens so
24544                 # that they will be at the same level as its container.  For
24545                 # commas, this simplifies the -lp indentation logic, which
24546                 # counts commas.  For ?: it makes them stand out.
24547                 if ($nesting_list_flag) {
24548                     if ( $type =~ /^[,\?\:]$/ ) {
24549                         $in_statement_continuation = 0;
24550                     }
24551                 }
24552
24553                 # be sure binary operators get continuation indentation
24554                 if (
24555                     $container_environment
24556                     && (   $type eq 'k' && $is_binary_keyword{$tok}
24557                         || $is_binary_type{$type} )
24558                   )
24559                 {
24560                     $in_statement_continuation = 1;
24561                 }
24562
24563                 # continuation indentation is sum of any open ci from previous
24564                 # levels plus the current level
24565                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
24566
24567                 # update continuation flag ...
24568                 # if this isn't a blank or comment..
24569                 if ( $type ne 'b' && $type ne '#' ) {
24570
24571                     # and we are in a BLOCK
24572                     if ($nesting_block_flag) {
24573
24574                         # the next token after a ';' and label starts a new stmt
24575                         if ( $type eq ';' || $type eq 'J' ) {
24576                             $in_statement_continuation = 0;
24577                         }
24578
24579                         # otherwise, we are continuing the current statement
24580                         else {
24581                             $in_statement_continuation = 1;
24582                         }
24583                     }
24584
24585                     # if we are not in a BLOCK..
24586                     else {
24587
24588                         # do not use continuation indentation if not list
24589                         # environment (could be within if/elsif clause)
24590                         if ( !$nesting_list_flag ) {
24591                             $in_statement_continuation = 0;
24592                         }
24593
24594                        # otherwise, the next token after a ',' starts a new term
24595                         elsif ( $type eq ',' ) {
24596                             $in_statement_continuation = 0;
24597                         }
24598
24599                         # otherwise, we are continuing the current term
24600                         else {
24601                             $in_statement_continuation = 1;
24602                         }
24603                     }
24604                 }
24605             }
24606
24607             if ( $level_in_tokenizer < 0 ) {
24608                 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
24609                     $tokenizer_self->{_saw_negative_indentation} = 1;
24610                     warning("Starting negative indentation\n");
24611                 }
24612             }
24613
24614             # set secondary nesting levels based on all continment token types
24615             # Note: these are set so that the nesting depth is the depth
24616             # of the PREVIOUS TOKEN, which is convenient for setting
24617             # the stength of token bonds
24618             my $slevel_i = $slevel_in_tokenizer;
24619
24620             #    /^[L\{\(\[]$/
24621             if ( $is_opening_type{$type} ) {
24622                 $slevel_in_tokenizer++;
24623                 $nesting_token_string .= $tok;
24624                 $nesting_type_string  .= $type;
24625             }
24626
24627             #       /^[R\}\)\]]$/
24628             elsif ( $is_closing_type{$type} ) {
24629                 $slevel_in_tokenizer--;
24630                 my $char = chop $nesting_token_string;
24631
24632                 if ( $char ne $matching_start_token{$tok} ) {
24633                     $nesting_token_string .= $char . $tok;
24634                     $nesting_type_string  .= $type;
24635                 }
24636                 else {
24637                     chop $nesting_type_string;
24638                 }
24639             }
24640
24641             push( @block_type,            $routput_block_type->[$i] );
24642             push( @ci_string,             $ci_string_i );
24643             push( @container_environment, $container_environment );
24644             push( @container_type,        $routput_container_type->[$i] );
24645             push( @levels,                $level_i );
24646             push( @nesting_tokens,        $nesting_token_string_i );
24647             push( @nesting_types,         $nesting_type_string_i );
24648             push( @slevels,               $slevel_i );
24649             push( @token_type,            $fix_type );
24650             push( @type_sequence,         $routput_type_sequence->[$i] );
24651             push( @nesting_blocks,        $nesting_block_string );
24652             push( @nesting_lists,         $nesting_list_string );
24653
24654             # now form the previous token
24655             if ( $im >= 0 ) {
24656                 $num =
24657                   $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
24658
24659                 if ( $num > 0 ) {
24660                     push( @tokens,
24661                         substr( $input_line, $$rtoken_map[$im], $num ) );
24662                 }
24663             }
24664             $im = $i;
24665         }
24666
24667         $num = length($input_line) - $$rtoken_map[$im];    # make the last token
24668         if ( $num > 0 ) {
24669             push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
24670         }
24671
24672         $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
24673         $tokenizer_self->{_in_quote}          = $in_quote;
24674         $tokenizer_self->{_quote_target} =
24675           $in_quote ? matching_end_token($quote_character) : "";
24676         $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
24677
24678         $line_of_tokens->{_rtoken_type}            = \@token_type;
24679         $line_of_tokens->{_rtokens}                = \@tokens;
24680         $line_of_tokens->{_rblock_type}            = \@block_type;
24681         $line_of_tokens->{_rcontainer_type}        = \@container_type;
24682         $line_of_tokens->{_rcontainer_environment} = \@container_environment;
24683         $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
24684         $line_of_tokens->{_rlevels}                = \@levels;
24685         $line_of_tokens->{_rslevels}               = \@slevels;
24686         $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
24687         $line_of_tokens->{_rci_levels}             = \@ci_string;
24688         $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
24689
24690         return;
24691     }
24692 }    # end tokenize_this_line
24693
24694 #########i#############################################################
24695 # Tokenizer routines which assist in identifying token types
24696 #######################################################################
24697
24698 sub operator_expected {
24699
24700     # Many perl symbols have two or more meanings.  For example, '<<'
24701     # can be a shift operator or a here-doc operator.  The
24702     # interpretation of these symbols depends on the current state of
24703     # the tokenizer, which may either be expecting a term or an
24704     # operator.  For this example, a << would be a shift if an operator
24705     # is expected, and a here-doc if a term is expected.  This routine
24706     # is called to make this decision for any current token.  It returns
24707     # one of three possible values:
24708     #
24709     #     OPERATOR - operator expected (or at least, not a term)
24710     #     UNKNOWN  - can't tell
24711     #     TERM     - a term is expected (or at least, not an operator)
24712     #
24713     # The decision is based on what has been seen so far.  This
24714     # information is stored in the "$last_nonblank_type" and
24715     # "$last_nonblank_token" variables.  For example, if the
24716     # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
24717     # if $last_nonblank_type is 'n' (numeric), we are expecting an
24718     # OPERATOR.
24719     #
24720     # If a UNKNOWN is returned, the calling routine must guess. A major
24721     # goal of this tokenizer is to minimize the possiblity of returning
24722     # UNKNOWN, because a wrong guess can spoil the formatting of a
24723     # script.
24724     #
24725     # adding NEW_TOKENS: it is critically important that this routine be
24726     # updated to allow it to determine if an operator or term is to be
24727     # expected after the new token.  Doing this simply involves adding
24728     # the new token character to one of the regexes in this routine or
24729     # to one of the hash lists
24730     # that it uses, which are initialized in the BEGIN section.
24731     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
24732     # $statement_type
24733
24734     my ( $prev_type, $tok, $next_type ) = @_;
24735
24736     my $op_expected = UNKNOWN;
24737
24738 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
24739
24740 # Note: function prototype is available for token type 'U' for future
24741 # program development.  It contains the leading and trailing parens,
24742 # and no blanks.  It might be used to eliminate token type 'C', for
24743 # example (prototype = '()'). Thus:
24744 # if ($last_nonblank_type eq 'U') {
24745 #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
24746 # }
24747
24748     # A possible filehandle (or object) requires some care...
24749     if ( $last_nonblank_type eq 'Z' ) {
24750
24751         # angle.t
24752         if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
24753             $op_expected = UNKNOWN;
24754         }
24755
24756         # For possible file handle like "$a", Perl uses weird parsing rules.
24757         # For example:
24758         # print $a/2,"/hi";   - division
24759         # print $a / 2,"/hi"; - division
24760         # print $a/ 2,"/hi";  - division
24761         # print $a /2,"/hi";  - pattern (and error)!
24762         elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
24763             $op_expected = TERM;
24764         }
24765
24766         # Note when an operation is being done where a
24767         # filehandle might be expected, since a change in whitespace
24768         # could change the interpretation of the statement.
24769         else {
24770             if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
24771                 complain("operator in print statement not recommended\n");
24772                 $op_expected = OPERATOR;
24773             }
24774         }
24775     }
24776
24777     # handle something after 'do' and 'eval'
24778     elsif ( $is_block_operator{$last_nonblank_token} ) {
24779
24780         # something like $a = eval "expression";
24781         #                          ^
24782         if ( $last_nonblank_type eq 'k' ) {
24783             $op_expected = TERM;    # expression or list mode following keyword
24784         }
24785
24786         # something like $a = do { BLOCK } / 2;
24787         #                                  ^
24788         else {
24789             $op_expected = OPERATOR;    # block mode following }
24790         }
24791     }
24792
24793     # handle bare word..
24794     elsif ( $last_nonblank_type eq 'w' ) {
24795
24796         # unfortunately, we can't tell what type of token to expect next
24797         # after most bare words
24798         $op_expected = UNKNOWN;
24799     }
24800
24801     # operator, but not term possible after these types
24802     # Note: moved ')' from type to token because parens in list context
24803     # get marked as '{' '}' now.  This is a minor glitch in the following:
24804     #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
24805     #
24806     elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
24807         || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
24808     {
24809         $op_expected = OPERATOR;
24810
24811         # in a 'use' statement, numbers and v-strings are not true
24812         # numbers, so to avoid incorrect error messages, we will
24813         # mark them as unknown for now (use.t)
24814         # TODO: it would be much nicer to create a new token V for VERSION
24815         # number in a use statement.  Then this could be a check on type V
24816         # and related patches which change $statement_type for '=>'
24817         # and ',' could be removed.  Further, it would clean things up to
24818         # scan the 'use' statement with a separate subroutine.
24819         if (   ( $statement_type eq 'use' )
24820             && ( $last_nonblank_type =~ /^[nv]$/ ) )
24821         {
24822             $op_expected = UNKNOWN;
24823         }
24824     }
24825
24826     # no operator after many keywords, such as "die", "warn", etc
24827     elsif ( $expecting_term_token{$last_nonblank_token} ) {
24828
24829         # patch for dor.t (defined or).
24830         # perl functions which may be unary operators
24831         # TODO: This list is incomplete, and these should be put
24832         # into a hash.
24833         if (   $tok eq '/'
24834             && $next_type          eq '/'
24835             && $last_nonblank_type eq 'k'
24836             && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
24837         {
24838             $op_expected = OPERATOR;
24839         }
24840         else {
24841             $op_expected = TERM;
24842         }
24843     }
24844
24845     # no operator after things like + - **  (i.e., other operators)
24846     elsif ( $expecting_term_types{$last_nonblank_type} ) {
24847         $op_expected = TERM;
24848     }
24849
24850     # a few operators, like "time", have an empty prototype () and so
24851     # take no parameters but produce a value to operate on
24852     elsif ( $expecting_operator_token{$last_nonblank_token} ) {
24853         $op_expected = OPERATOR;
24854     }
24855
24856     # post-increment and decrement produce values to be operated on
24857     elsif ( $expecting_operator_types{$last_nonblank_type} ) {
24858         $op_expected = OPERATOR;
24859     }
24860
24861     # no value to operate on after sub block
24862     elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
24863
24864     # a right brace here indicates the end of a simple block.
24865     # all non-structural right braces have type 'R'
24866     # all braces associated with block operator keywords have been given those
24867     # keywords as "last_nonblank_token" and caught above.
24868     # (This statement is order dependent, and must come after checking
24869     # $last_nonblank_token).
24870     elsif ( $last_nonblank_type eq '}' ) {
24871
24872         # patch for dor.t (defined or).
24873         if (   $tok eq '/'
24874             && $next_type eq '/'
24875             && $last_nonblank_token eq ']' )
24876         {
24877             $op_expected = OPERATOR;
24878         }
24879         else {
24880             $op_expected = TERM;
24881         }
24882     }
24883
24884     # something else..what did I forget?
24885     else {
24886
24887         # collecting diagnostics on unknown operator types..see what was missed
24888         $op_expected = UNKNOWN;
24889         write_diagnostics(
24890 "OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
24891         );
24892     }
24893
24894     TOKENIZER_DEBUG_FLAG_EXPECT && do {
24895         print
24896 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
24897     };
24898     return $op_expected;
24899 }
24900
24901 sub new_statement_ok {
24902
24903     # return true if the current token can start a new statement
24904     # USES GLOBAL VARIABLES: $last_nonblank_type
24905
24906     return label_ok()    # a label would be ok here
24907
24908       || $last_nonblank_type eq 'J';    # or we follow a label
24909
24910 }
24911
24912 sub label_ok {
24913
24914     # Decide if a bare word followed by a colon here is a label
24915     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
24916     # $brace_depth, @brace_type
24917
24918     # if it follows an opening or closing code block curly brace..
24919     if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
24920         && $last_nonblank_type eq $last_nonblank_token )
24921     {
24922
24923         # it is a label if and only if the curly encloses a code block
24924         return $brace_type[$brace_depth];
24925     }
24926
24927     # otherwise, it is a label if and only if it follows a ';'
24928     # (real or fake)
24929     else {
24930         return ( $last_nonblank_type eq ';' );
24931     }
24932 }
24933
24934 sub code_block_type {
24935
24936     # Decide if this is a block of code, and its type.
24937     # Must be called only when $type = $token = '{'
24938     # The problem is to distinguish between the start of a block of code
24939     # and the start of an anonymous hash reference
24940     # Returns "" if not code block, otherwise returns 'last_nonblank_token'
24941     # to indicate the type of code block.  (For example, 'last_nonblank_token'
24942     # might be 'if' for an if block, 'else' for an else block, etc).
24943     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
24944     # $last_nonblank_block_type, $brace_depth, @brace_type
24945
24946     # handle case of multiple '{'s
24947
24948 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
24949
24950     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
24951     if (   $last_nonblank_token eq '{'
24952         && $last_nonblank_type eq $last_nonblank_token )
24953     {
24954
24955         # opening brace where a statement may appear is probably
24956         # a code block but might be and anonymous hash reference
24957         if ( $brace_type[$brace_depth] ) {
24958             return decide_if_code_block( $i, $rtokens, $rtoken_type,
24959                 $max_token_index );
24960         }
24961
24962         # cannot start a code block within an anonymous hash
24963         else {
24964             return "";
24965         }
24966     }
24967
24968     elsif ( $last_nonblank_token eq ';' ) {
24969
24970         # an opening brace where a statement may appear is probably
24971         # a code block but might be and anonymous hash reference
24972         return decide_if_code_block( $i, $rtokens, $rtoken_type,
24973             $max_token_index );
24974     }
24975
24976     # handle case of '}{'
24977     elsif ($last_nonblank_token eq '}'
24978         && $last_nonblank_type eq $last_nonblank_token )
24979     {
24980
24981         # a } { situation ...
24982         # could be hash reference after code block..(blktype1.t)
24983         if ($last_nonblank_block_type) {
24984             return decide_if_code_block( $i, $rtokens, $rtoken_type,
24985                 $max_token_index );
24986         }
24987
24988         # must be a block if it follows a closing hash reference
24989         else {
24990             return $last_nonblank_token;
24991         }
24992     }
24993
24994     # NOTE: braces after type characters start code blocks, but for
24995     # simplicity these are not identified as such.  See also
24996     # sub is_non_structural_brace.
24997     # elsif ( $last_nonblank_type eq 't' ) {
24998     #    return $last_nonblank_token;
24999     # }
25000
25001     # brace after label:
25002     elsif ( $last_nonblank_type eq 'J' ) {
25003         return $last_nonblank_token;
25004     }
25005
25006 # otherwise, look at previous token.  This must be a code block if
25007 # it follows any of these:
25008 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
25009     elsif ( $is_code_block_token{$last_nonblank_token} ) {
25010
25011         # Bug Patch: Note that the opening brace after the 'if' in the following
25012         # snippet is an anonymous hash ref and not a code block!
25013         #   print 'hi' if { x => 1, }->{x};
25014         # We can identify this situation because the last nonblank type
25015         # will be a keyword (instead of a closing peren)
25016         if (   $last_nonblank_token =~ /^(if|unless)$/
25017             && $last_nonblank_type eq 'k' )
25018         {
25019             return "";
25020         }
25021         else {
25022             return $last_nonblank_token;
25023         }
25024     }
25025
25026     # or a sub definition
25027     elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
25028         && $last_nonblank_token =~ /^sub\b/ )
25029     {
25030         return $last_nonblank_token;
25031     }
25032
25033     # user-defined subs with block parameters (like grep/map/eval)
25034     elsif ( $last_nonblank_type eq 'G' ) {
25035         return $last_nonblank_token;
25036     }
25037
25038     # check bareword
25039     elsif ( $last_nonblank_type eq 'w' ) {
25040         return decide_if_code_block( $i, $rtokens, $rtoken_type,
25041             $max_token_index );
25042     }
25043
25044     # anything else must be anonymous hash reference
25045     else {
25046         return "";
25047     }
25048 }
25049
25050 sub decide_if_code_block {
25051
25052     # USES GLOBAL VARIABLES: $last_nonblank_token
25053     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
25054     my ( $next_nonblank_token, $i_next ) =
25055       find_next_nonblank_token( $i, $rtokens, $max_token_index );
25056
25057     # we are at a '{' where a statement may appear.
25058     # We must decide if this brace starts an anonymous hash or a code
25059     # block.
25060     # return "" if anonymous hash, and $last_nonblank_token otherwise
25061
25062     # initialize to be code BLOCK
25063     my $code_block_type = $last_nonblank_token;
25064
25065     # Check for the common case of an empty anonymous hash reference:
25066     # Maybe something like sub { { } }
25067     if ( $next_nonblank_token eq '}' ) {
25068         $code_block_type = "";
25069     }
25070
25071     else {
25072
25073         # To guess if this '{' is an anonymous hash reference, look ahead
25074         # and test as follows:
25075         #
25076         # it is a hash reference if next come:
25077         #   - a string or digit followed by a comma or =>
25078         #   - bareword followed by =>
25079         # otherwise it is a code block
25080         #
25081         # Examples of anonymous hash ref:
25082         # {'aa',};
25083         # {1,2}
25084         #
25085         # Examples of code blocks:
25086         # {1; print "hello\n", 1;}
25087         # {$a,1};
25088
25089         # We are only going to look ahead one more (nonblank/comment) line.
25090         # Strange formatting could cause a bad guess, but that's unlikely.
25091         my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
25092         my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
25093         my ( $rpre_tokens, $rpre_types ) =
25094           peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
25095                                                        # generous, and prevents
25096                                                        # wasting lots of
25097                                                        # time in mangled files
25098         if ( defined($rpre_types) && @$rpre_types ) {
25099             push @pre_types,  @$rpre_types;
25100             push @pre_tokens, @$rpre_tokens;
25101         }
25102
25103         # put a sentinal token to simplify stopping the search
25104         push @pre_types, '}';
25105
25106         my $jbeg = 0;
25107         $jbeg = 1 if $pre_types[0] eq 'b';
25108
25109         # first look for one of these
25110         #  - bareword
25111         #  - bareword with leading -
25112         #  - digit
25113         #  - quoted string
25114         my $j = $jbeg;
25115         if ( $pre_types[$j] =~ /^[\'\"]/ ) {
25116
25117             # find the closing quote; don't worry about escapes
25118             my $quote_mark = $pre_types[$j];
25119             for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
25120                 if ( $pre_types[$k] eq $quote_mark ) {
25121                     $j = $k + 1;
25122                     my $next = $pre_types[$j];
25123                     last;
25124                 }
25125             }
25126         }
25127         elsif ( $pre_types[$j] eq 'd' ) {
25128             $j++;
25129         }
25130         elsif ( $pre_types[$j] eq 'w' ) {
25131             unless ( $is_keyword{ $pre_tokens[$j] } ) {
25132                 $j++;
25133             }
25134         }
25135         elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
25136             $j++;
25137         }
25138         if ( $j > $jbeg ) {
25139
25140             $j++ if $pre_types[$j] eq 'b';
25141
25142             # it's a hash ref if a comma or => follow next
25143             if ( $pre_types[$j] eq ','
25144                 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
25145             {
25146                 $code_block_type = "";
25147             }
25148         }
25149     }
25150
25151     return $code_block_type;
25152 }
25153
25154 sub unexpected {
25155
25156     # report unexpected token type and show where it is
25157     # USES GLOBAL VARIABLES: $tokenizer_self
25158     my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
25159         $rpretoken_type, $input_line )
25160       = @_;
25161
25162     if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
25163         my $msg = "found $found where $expecting expected";
25164         my $pos = $$rpretoken_map[$i_tok];
25165         interrupt_logfile();
25166         my $input_line_number = $tokenizer_self->{_last_line_number};
25167         my ( $offset, $numbered_line, $underline ) =
25168           make_numbered_line( $input_line_number, $input_line, $pos );
25169         $underline = write_on_underline( $underline, $pos - $offset, '^' );
25170
25171         my $trailer = "";
25172         if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
25173             my $pos_prev = $$rpretoken_map[$last_nonblank_i];
25174             my $num;
25175             if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
25176                 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
25177             }
25178             else {
25179                 $num = $pos - $pos_prev;
25180             }
25181             if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
25182
25183             $underline =
25184               write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
25185             $trailer = " (previous token underlined)";
25186         }
25187         warning( $numbered_line . "\n" );
25188         warning( $underline . "\n" );
25189         warning( $msg . $trailer . "\n" );
25190         resume_logfile();
25191     }
25192 }
25193
25194 sub is_non_structural_brace {
25195
25196     # Decide if a brace or bracket is structural or non-structural
25197     # by looking at the previous token and type
25198     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
25199
25200     # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
25201     # Tentatively deactivated because it caused the wrong operator expectation
25202     # for this code:
25203     #      $user = @vars[1] / 100;
25204     # Must update sub operator_expected before re-implementing.
25205     # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
25206     #    return 0;
25207     # }
25208
25209     # NOTE: braces after type characters start code blocks, but for
25210     # simplicity these are not identified as such.  See also
25211     # sub code_block_type
25212     # if ($last_nonblank_type eq 't') {return 0}
25213
25214     # otherwise, it is non-structural if it is decorated
25215     # by type information.
25216     # For example, the '{' here is non-structural:   ${xxx}
25217     (
25218         $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
25219
25220           # or if we follow a hash or array closing curly brace or bracket
25221           # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
25222           # because the first '}' would have been given type 'R'
25223           || $last_nonblank_type =~ /^([R\]])$/
25224     );
25225 }
25226
25227 #########i#############################################################
25228 # Tokenizer routines for tracking container nesting depths
25229 #######################################################################
25230
25231 # The following routines keep track of nesting depths of the nesting
25232 # types, ( [ { and ?.  This is necessary for determining the indentation
25233 # level, and also for debugging programs.  Not only do they keep track of
25234 # nesting depths of the individual brace types, but they check that each
25235 # of the other brace types is balanced within matching pairs.  For
25236 # example, if the program sees this sequence:
25237 #
25238 #         {  ( ( ) }
25239 #
25240 # then it can determine that there is an extra left paren somewhere
25241 # between the { and the }.  And so on with every other possible
25242 # combination of outer and inner brace types.  For another
25243 # example:
25244 #
25245 #         ( [ ..... ]  ] )
25246 #
25247 # which has an extra ] within the parens.
25248 #
25249 # The brace types have indexes 0 .. 3 which are indexes into
25250 # the matrices.
25251 #
25252 # The pair ? : are treated as just another nesting type, with ? acting
25253 # as the opening brace and : acting as the closing brace.
25254 #
25255 # The matrix
25256 #
25257 #         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
25258 #
25259 # saves the nesting depth of brace type $b (where $b is either of the other
25260 # nesting types) when brace type $a enters a new depth.  When this depth
25261 # decreases, a check is made that the current depth of brace types $b is
25262 # unchanged, or otherwise there must have been an error.  This can
25263 # be very useful for localizing errors, particularly when perl runs to
25264 # the end of a large file (such as this one) and announces that there
25265 # is a problem somewhere.
25266 #
25267 # A numerical sequence number is maintained for every nesting type,
25268 # so that each matching pair can be uniquely identified in a simple
25269 # way.
25270
25271 sub increase_nesting_depth {
25272     my ( $aa, $pos ) = @_;
25273
25274     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25275     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
25276     my $bb;
25277     $current_depth[$aa]++;
25278     $total_depth++;
25279     $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
25280     my $input_line_number = $tokenizer_self->{_last_line_number};
25281     my $input_line        = $tokenizer_self->{_line_text};
25282
25283     # Sequence numbers increment by number of items.  This keeps
25284     # a unique set of numbers but still allows the relative location
25285     # of any type to be determined.
25286     $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
25287     my $seqno = $nesting_sequence_number[$aa];
25288     $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
25289
25290     $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
25291       [ $input_line_number, $input_line, $pos ];
25292
25293     for $bb ( 0 .. $#closing_brace_names ) {
25294         next if ( $bb == $aa );
25295         $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
25296     }
25297
25298     # set a flag for indenting a nested ternary statement
25299     my $indent = 0;
25300     if ( $aa == QUESTION_COLON ) {
25301         $nested_ternary_flag[ $current_depth[$aa] ] = 0;
25302         if ( $current_depth[$aa] > 1 ) {
25303             if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
25304                 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
25305                 if ( $pdepth == $total_depth - 1 ) {
25306                     $indent = 1;
25307                     $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
25308                 }
25309             }
25310         }
25311     }
25312     return ( $seqno, $indent );
25313 }
25314
25315 sub decrease_nesting_depth {
25316
25317     my ( $aa, $pos ) = @_;
25318
25319     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25320     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
25321     my $bb;
25322     my $seqno             = 0;
25323     my $input_line_number = $tokenizer_self->{_last_line_number};
25324     my $input_line        = $tokenizer_self->{_line_text};
25325
25326     my $outdent = 0;
25327     $total_depth--;
25328     if ( $current_depth[$aa] > 0 ) {
25329
25330         # set a flag for un-indenting after seeing a nested ternary statement
25331         $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
25332         if ( $aa == QUESTION_COLON ) {
25333             $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
25334         }
25335
25336         # check that any brace types $bb contained within are balanced
25337         for $bb ( 0 .. $#closing_brace_names ) {
25338             next if ( $bb == $aa );
25339
25340             unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
25341                 $current_depth[$bb] )
25342             {
25343                 my $diff =
25344                   $current_depth[$bb] -
25345                   $depth_array[$aa][$bb][ $current_depth[$aa] ];
25346
25347                 # don't whine too many times
25348                 my $saw_brace_error = get_saw_brace_error();
25349                 if (
25350                     $saw_brace_error <= MAX_NAG_MESSAGES
25351
25352                     # if too many closing types have occured, we probably
25353                     # already caught this error
25354                     && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
25355                   )
25356                 {
25357                     interrupt_logfile();
25358                     my $rsl =
25359                       $starting_line_of_current_depth[$aa]
25360                       [ $current_depth[$aa] ];
25361                     my $sl  = $$rsl[0];
25362                     my $rel = [ $input_line_number, $input_line, $pos ];
25363                     my $el  = $$rel[0];
25364                     my ($ess);
25365
25366                     if ( $diff == 1 || $diff == -1 ) {
25367                         $ess = '';
25368                     }
25369                     else {
25370                         $ess = 's';
25371                     }
25372                     my $bname =
25373                       ( $diff > 0 )
25374                       ? $opening_brace_names[$bb]
25375                       : $closing_brace_names[$bb];
25376                     write_error_indicator_pair( @$rsl, '^' );
25377                     my $msg = <<"EOM";
25378 Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
25379 EOM
25380
25381                     if ( $diff > 0 ) {
25382                         my $rml =
25383                           $starting_line_of_current_depth[$bb]
25384                           [ $current_depth[$bb] ];
25385                         my $ml = $$rml[0];
25386                         $msg .=
25387 "    The most recent un-matched $bname is on line $ml\n";
25388                         write_error_indicator_pair( @$rml, '^' );
25389                     }
25390                     write_error_indicator_pair( @$rel, '^' );
25391                     warning($msg);
25392                     resume_logfile();
25393                 }
25394                 increment_brace_error();
25395             }
25396         }
25397         $current_depth[$aa]--;
25398     }
25399     else {
25400
25401         my $saw_brace_error = get_saw_brace_error();
25402         if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
25403             my $msg = <<"EOM";
25404 There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
25405 EOM
25406             indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
25407         }
25408         increment_brace_error();
25409     }
25410     return ( $seqno, $outdent );
25411 }
25412
25413 sub check_final_nesting_depths {
25414     my ($aa);
25415
25416     # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
25417
25418     for $aa ( 0 .. $#closing_brace_names ) {
25419
25420         if ( $current_depth[$aa] ) {
25421             my $rsl =
25422               $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
25423             my $sl  = $$rsl[0];
25424             my $msg = <<"EOM";
25425 Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
25426 The most recent un-matched $opening_brace_names[$aa] is on line $sl
25427 EOM
25428             indicate_error( $msg, @$rsl, '^' );
25429             increment_brace_error();
25430         }
25431     }
25432 }
25433
25434 #########i#############################################################
25435 # Tokenizer routines for looking ahead in input stream
25436 #######################################################################
25437
25438 sub peek_ahead_for_n_nonblank_pre_tokens {
25439
25440     # returns next n pretokens if they exist
25441     # returns undef's if hits eof without seeing any pretokens
25442     # USES GLOBAL VARIABLES: $tokenizer_self
25443     my $max_pretokens = shift;
25444     my $line;
25445     my $i = 0;
25446     my ( $rpre_tokens, $rmap, $rpre_types );
25447
25448     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25449     {
25450         $line =~ s/^\s*//;    # trim leading blanks
25451         next if ( length($line) <= 0 );    # skip blank
25452         next if ( $line =~ /^#/ );         # skip comment
25453         ( $rpre_tokens, $rmap, $rpre_types ) =
25454           pre_tokenize( $line, $max_pretokens );
25455         last;
25456     }
25457     return ( $rpre_tokens, $rpre_types );
25458 }
25459
25460 # look ahead for next non-blank, non-comment line of code
25461 sub peek_ahead_for_nonblank_token {
25462
25463     # USES GLOBAL VARIABLES: $tokenizer_self
25464     my ( $rtokens, $max_token_index ) = @_;
25465     my $line;
25466     my $i = 0;
25467
25468     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25469     {
25470         $line =~ s/^\s*//;    # trim leading blanks
25471         next if ( length($line) <= 0 );    # skip blank
25472         next if ( $line =~ /^#/ );         # skip comment
25473         my ( $rtok, $rmap, $rtype ) =
25474           pre_tokenize( $line, 2 );        # only need 2 pre-tokens
25475         my $j = $max_token_index + 1;
25476         my $tok;
25477
25478         foreach $tok (@$rtok) {
25479             last if ( $tok =~ "\n" );
25480             $$rtokens[ ++$j ] = $tok;
25481         }
25482         last;
25483     }
25484     return $rtokens;
25485 }
25486
25487 #########i#############################################################
25488 # Tokenizer guessing routines for ambiguous situations
25489 #######################################################################
25490
25491 sub guess_if_pattern_or_conditional {
25492
25493     # this routine is called when we have encountered a ? following an
25494     # unknown bareword, and we must decide if it starts a pattern or not
25495     # input parameters:
25496     #   $i - token index of the ? starting possible pattern
25497     # output parameters:
25498     #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
25499     #   msg = a warning or diagnostic message
25500     # USES GLOBAL VARIABLES: $last_nonblank_token
25501     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25502     my $is_pattern = 0;
25503     my $msg        = "guessing that ? after $last_nonblank_token starts a ";
25504
25505     if ( $i >= $max_token_index ) {
25506         $msg .= "conditional (no end to pattern found on the line)\n";
25507     }
25508     else {
25509         my $ibeg = $i;
25510         $i = $ibeg + 1;
25511         my $next_token = $$rtokens[$i];    # first token after ?
25512
25513         # look for a possible ending ? on this line..
25514         my $in_quote        = 1;
25515         my $quote_depth     = 0;
25516         my $quote_character = '';
25517         my $quote_pos       = 0;
25518         my $quoted_string;
25519         (
25520             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25521             $quoted_string
25522           )
25523           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25524             $quote_pos, $quote_depth, $max_token_index );
25525
25526         if ($in_quote) {
25527
25528             # we didn't find an ending ? on this line,
25529             # so we bias towards conditional
25530             $is_pattern = 0;
25531             $msg .= "conditional (no ending ? on this line)\n";
25532
25533             # we found an ending ?, so we bias towards a pattern
25534         }
25535         else {
25536
25537             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25538                 $is_pattern = 1;
25539                 $msg .= "pattern (found ending ? and pattern expected)\n";
25540             }
25541             else {
25542                 $msg .= "pattern (uncertain, but found ending ?)\n";
25543             }
25544         }
25545     }
25546     return ( $is_pattern, $msg );
25547 }
25548
25549 sub guess_if_pattern_or_division {
25550
25551     # this routine is called when we have encountered a / following an
25552     # unknown bareword, and we must decide if it starts a pattern or is a
25553     # division
25554     # input parameters:
25555     #   $i - token index of the / starting possible pattern
25556     # output parameters:
25557     #   $is_pattern = 0 if probably division,  =1 if probably a pattern
25558     #   msg = a warning or diagnostic message
25559     # USES GLOBAL VARIABLES: $last_nonblank_token
25560     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25561     my $is_pattern = 0;
25562     my $msg        = "guessing that / after $last_nonblank_token starts a ";
25563
25564     if ( $i >= $max_token_index ) {
25565         "division (no end to pattern found on the line)\n";
25566     }
25567     else {
25568         my $ibeg = $i;
25569         my $divide_expected =
25570           numerator_expected( $i, $rtokens, $max_token_index );
25571         $i = $ibeg + 1;
25572         my $next_token = $$rtokens[$i];    # first token after slash
25573
25574         # look for a possible ending / on this line..
25575         my $in_quote        = 1;
25576         my $quote_depth     = 0;
25577         my $quote_character = '';
25578         my $quote_pos       = 0;
25579         my $quoted_string;
25580         (
25581             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25582             $quoted_string
25583           )
25584           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25585             $quote_pos, $quote_depth, $max_token_index );
25586
25587         if ($in_quote) {
25588
25589             # we didn't find an ending / on this line,
25590             # so we bias towards division
25591             if ( $divide_expected >= 0 ) {
25592                 $is_pattern = 0;
25593                 $msg .= "division (no ending / on this line)\n";
25594             }
25595             else {
25596                 $msg        = "multi-line pattern (division not possible)\n";
25597                 $is_pattern = 1;
25598             }
25599
25600         }
25601
25602         # we found an ending /, so we bias towards a pattern
25603         else {
25604
25605             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25606
25607                 if ( $divide_expected >= 0 ) {
25608
25609                     if ( $i - $ibeg > 60 ) {
25610                         $msg .= "division (matching / too distant)\n";
25611                         $is_pattern = 0;
25612                     }
25613                     else {
25614                         $msg .= "pattern (but division possible too)\n";
25615                         $is_pattern = 1;
25616                     }
25617                 }
25618                 else {
25619                     $is_pattern = 1;
25620                     $msg .= "pattern (division not possible)\n";
25621                 }
25622             }
25623             else {
25624
25625                 if ( $divide_expected >= 0 ) {
25626                     $is_pattern = 0;
25627                     $msg .= "division (pattern not possible)\n";
25628                 }
25629                 else {
25630                     $is_pattern = 1;
25631                     $msg .=
25632                       "pattern (uncertain, but division would not work here)\n";
25633                 }
25634             }
25635         }
25636     }
25637     return ( $is_pattern, $msg );
25638 }
25639
25640 # try to resolve here-doc vs. shift by looking ahead for
25641 # non-code or the end token (currently only looks for end token)
25642 # returns 1 if it is probably a here doc, 0 if not
25643 sub guess_if_here_doc {
25644
25645     # This is how many lines we will search for a target as part of the
25646     # guessing strategy.  It is a constant because there is probably
25647     # little reason to change it.
25648     # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
25649     # %is_constant,
25650     use constant HERE_DOC_WINDOW => 40;
25651
25652     my $next_token        = shift;
25653     my $here_doc_expected = 0;
25654     my $line;
25655     my $k   = 0;
25656     my $msg = "checking <<";
25657
25658     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
25659     {
25660         chomp $line;
25661
25662         if ( $line =~ /^$next_token$/ ) {
25663             $msg .= " -- found target $next_token ahead $k lines\n";
25664             $here_doc_expected = 1;    # got it
25665             last;
25666         }
25667         last if ( $k >= HERE_DOC_WINDOW );
25668     }
25669
25670     unless ($here_doc_expected) {
25671
25672         if ( !defined($line) ) {
25673             $here_doc_expected = -1;    # hit eof without seeing target
25674             $msg .= " -- must be shift; target $next_token not in file\n";
25675
25676         }
25677         else {                          # still unsure..taking a wild guess
25678
25679             if ( !$is_constant{$current_package}{$next_token} ) {
25680                 $here_doc_expected = 1;
25681                 $msg .=
25682                   " -- guessing it's a here-doc ($next_token not a constant)\n";
25683             }
25684             else {
25685                 $msg .=
25686                   " -- guessing it's a shift ($next_token is a constant)\n";
25687             }
25688         }
25689     }
25690     write_logfile_entry($msg);
25691     return $here_doc_expected;
25692 }
25693
25694 #########i#############################################################
25695 # Tokenizer Routines for scanning identifiers and related items
25696 #######################################################################
25697
25698 sub scan_bare_identifier_do {
25699
25700     # this routine is called to scan a token starting with an alphanumeric
25701     # variable or package separator, :: or '.
25702     # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
25703     # $last_nonblank_type,@paren_type, $paren_depth
25704
25705     my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
25706         $max_token_index )
25707       = @_;
25708     my $i_begin = $i;
25709     my $package = undef;
25710
25711     my $i_beg = $i;
25712
25713     # we have to back up one pretoken at a :: since each : is one pretoken
25714     if ( $tok eq '::' ) { $i_beg-- }
25715     if ( $tok eq '->' ) { $i_beg-- }
25716     my $pos_beg = $$rtoken_map[$i_beg];
25717     pos($input_line) = $pos_beg;
25718
25719     #  Examples:
25720     #   A::B::C
25721     #   A::
25722     #   ::A
25723     #   A'B
25724     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
25725
25726         my $pos  = pos($input_line);
25727         my $numc = $pos - $pos_beg;
25728         $tok = substr( $input_line, $pos_beg, $numc );
25729
25730         # type 'w' includes anything without leading type info
25731         # ($,%,@,*) including something like abc::def::ghi
25732         $type = 'w';
25733
25734         my $sub_name = "";
25735         if ( defined($2) ) { $sub_name = $2; }
25736         if ( defined($1) ) {
25737             $package = $1;
25738
25739             # patch: don't allow isolated package name which just ends
25740             # in the old style package separator (single quote).  Example:
25741             #   use CGI':all';
25742             if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
25743                 $pos--;
25744             }
25745
25746             $package =~ s/\'/::/g;
25747             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
25748             $package =~ s/::$//;
25749         }
25750         else {
25751             $package = $current_package;
25752
25753             if ( $is_keyword{$tok} ) {
25754                 $type = 'k';
25755             }
25756         }
25757
25758         # if it is a bareword..
25759         if ( $type eq 'w' ) {
25760
25761             # check for v-string with leading 'v' type character
25762             # (This seems to have presidence over filehandle, type 'Y')
25763             if ( $tok =~ /^v\d[_\d]*$/ ) {
25764
25765                 # we only have the first part - something like 'v101' -
25766                 # look for more
25767                 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
25768                     $pos  = pos($input_line);
25769                     $numc = $pos - $pos_beg;
25770                     $tok  = substr( $input_line, $pos_beg, $numc );
25771                 }
25772                 $type = 'v';
25773
25774                 # warn if this version can't handle v-strings
25775                 report_v_string($tok);
25776             }
25777
25778             elsif ( $is_constant{$package}{$sub_name} ) {
25779                 $type = 'C';
25780             }
25781
25782             # bareword after sort has implied empty prototype; for example:
25783             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
25784             # This has priority over whatever the user has specified.
25785             elsif ($last_nonblank_token eq 'sort'
25786                 && $last_nonblank_type eq 'k' )
25787             {
25788                 $type = 'Z';
25789             }
25790
25791             # Note: strangely, perl does not seem to really let you create
25792             # functions which act like eval and do, in the sense that eval
25793             # and do may have operators following the final }, but any operators
25794             # that you create with prototype (&) apparently do not allow
25795             # trailing operators, only terms.  This seems strange.
25796             # If this ever changes, here is the update
25797             # to make perltidy behave accordingly:
25798
25799             # elsif ( $is_block_function{$package}{$tok} ) {
25800             #    $tok='eval'; # patch to do braces like eval  - doesn't work
25801             #    $type = 'k';
25802             #}
25803             # FIXME: This could become a separate type to allow for different
25804             # future behavior:
25805             elsif ( $is_block_function{$package}{$sub_name} ) {
25806                 $type = 'G';
25807             }
25808
25809             elsif ( $is_block_list_function{$package}{$sub_name} ) {
25810                 $type = 'G';
25811             }
25812             elsif ( $is_user_function{$package}{$sub_name} ) {
25813                 $type      = 'U';
25814                 $prototype = $user_function_prototype{$package}{$sub_name};
25815             }
25816
25817             # check for indirect object
25818             elsif (
25819
25820                 # added 2001-03-27: must not be followed immediately by '('
25821                 # see fhandle.t
25822                 ( $input_line !~ m/\G\(/gc )
25823
25824                 # and
25825                 && (
25826
25827                     # preceded by keyword like 'print', 'printf' and friends
25828                     $is_indirect_object_taker{$last_nonblank_token}
25829
25830                     # or preceded by something like 'print(' or 'printf('
25831                     || (
25832                         ( $last_nonblank_token eq '(' )
25833                         && $is_indirect_object_taker{ $paren_type[$paren_depth]
25834                         }
25835
25836                     )
25837                 )
25838               )
25839             {
25840
25841                 # may not be indirect object unless followed by a space
25842                 if ( $input_line =~ m/\G\s+/gc ) {
25843                     $type = 'Y';
25844
25845                     # Abandon Hope ...
25846                     # Perl's indirect object notation is a very bad
25847                     # thing and can cause subtle bugs, especially for
25848                     # beginning programmers.  And I haven't even been
25849                     # able to figure out a sane warning scheme which
25850                     # doesn't get in the way of good scripts.
25851
25852                     # Complain if a filehandle has any lower case
25853                     # letters.  This is suggested good practice.
25854                     # Use 'sub_name' because something like
25855                     # main::MYHANDLE is ok for filehandle
25856                     if ( $sub_name =~ /[a-z]/ ) {
25857
25858                         # could be bug caused by older perltidy if
25859                         # followed by '('
25860                         if ( $input_line =~ m/\G\s*\(/gc ) {
25861                             complain(
25862 "Caution: unknown word '$tok' in indirect object slot\n"
25863                             );
25864                         }
25865                     }
25866                 }
25867
25868                 # bareword not followed by a space -- may not be filehandle
25869                 # (may be function call defined in a 'use' statement)
25870                 else {
25871                     $type = 'Z';
25872                 }
25873             }
25874         }
25875
25876         # Now we must convert back from character position
25877         # to pre_token index.
25878         # I don't think an error flag can occur here ..but who knows
25879         my $error;
25880         ( $i, $error ) =
25881           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
25882         if ($error) {
25883             warning("scan_bare_identifier: Possibly invalid tokenization\n");
25884         }
25885     }
25886
25887     # no match but line not blank - could be syntax error
25888     # perl will take '::' alone without complaint
25889     else {
25890         $type = 'w';
25891
25892         # change this warning to log message if it becomes annoying
25893         warning("didn't find identifier after leading ::\n");
25894     }
25895     return ( $i, $tok, $type, $prototype );
25896 }
25897
25898 sub scan_id_do {
25899
25900 # This is the new scanner and will eventually replace scan_identifier.
25901 # Only type 'sub' and 'package' are implemented.
25902 # Token types $ * % @ & -> are not yet implemented.
25903 #
25904 # Scan identifier following a type token.
25905 # The type of call depends on $id_scan_state: $id_scan_state = ''
25906 # for starting call, in which case $tok must be the token defining
25907 # the type.
25908 #
25909 # If the type token is the last nonblank token on the line, a value
25910 # of $id_scan_state = $tok is returned, indicating that further
25911 # calls must be made to get the identifier.  If the type token is
25912 # not the last nonblank token on the line, the identifier is
25913 # scanned and handled and a value of '' is returned.
25914 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
25915 # $statement_type, $tokenizer_self
25916
25917     my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
25918         $max_token_index )
25919       = @_;
25920     my $type = '';
25921     my ( $i_beg, $pos_beg );
25922
25923     #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
25924     #my ($a,$b,$c) = caller;
25925     #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
25926
25927     # on re-entry, start scanning at first token on the line
25928     if ($id_scan_state) {
25929         $i_beg = $i;
25930         $type  = '';
25931     }
25932
25933     # on initial entry, start scanning just after type token
25934     else {
25935         $i_beg         = $i + 1;
25936         $id_scan_state = $tok;
25937         $type          = 't';
25938     }
25939
25940     # find $i_beg = index of next nonblank token,
25941     # and handle empty lines
25942     my $blank_line          = 0;
25943     my $next_nonblank_token = $$rtokens[$i_beg];
25944     if ( $i_beg > $max_token_index ) {
25945         $blank_line = 1;
25946     }
25947     else {
25948
25949         # only a '#' immediately after a '$' is not a comment
25950         if ( $next_nonblank_token eq '#' ) {
25951             unless ( $tok eq '$' ) {
25952                 $blank_line = 1;
25953             }
25954         }
25955
25956         if ( $next_nonblank_token =~ /^\s/ ) {
25957             ( $next_nonblank_token, $i_beg ) =
25958               find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
25959                 $max_token_index );
25960             if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
25961                 $blank_line = 1;
25962             }
25963         }
25964     }
25965
25966     # handle non-blank line; identifier, if any, must follow
25967     unless ($blank_line) {
25968
25969         if ( $id_scan_state eq 'sub' ) {
25970             ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
25971                 $input_line, $i,             $i_beg,
25972                 $tok,        $type,          $rtokens,
25973                 $rtoken_map, $id_scan_state, $max_token_index
25974             );
25975         }
25976
25977         elsif ( $id_scan_state eq 'package' ) {
25978             ( $i, $tok, $type ) =
25979               do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
25980                 $rtoken_map, $max_token_index );
25981             $id_scan_state = '';
25982         }
25983
25984         else {
25985             warning("invalid token in scan_id: $tok\n");
25986             $id_scan_state = '';
25987         }
25988     }
25989
25990     if ( $id_scan_state && ( !defined($type) || !$type ) ) {
25991
25992         # shouldn't happen:
25993         warning(
25994 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
25995         );
25996         report_definite_bug();
25997     }
25998
25999     TOKENIZER_DEBUG_FLAG_NSCAN && do {
26000         print
26001           "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
26002     };
26003     return ( $i, $tok, $type, $id_scan_state );
26004 }
26005
26006 sub check_prototype {
26007     my ( $proto, $package, $subname ) = @_;
26008     return unless ( defined($package) && defined($subname) );
26009     if ( defined($proto) ) {
26010         $proto =~ s/^\s*\(\s*//;
26011         $proto =~ s/\s*\)$//;
26012         if ($proto) {
26013             $is_user_function{$package}{$subname}        = 1;
26014             $user_function_prototype{$package}{$subname} = "($proto)";
26015
26016             # prototypes containing '&' must be treated specially..
26017             if ( $proto =~ /\&/ ) {
26018
26019                 # right curly braces of prototypes ending in
26020                 # '&' may be followed by an operator
26021                 if ( $proto =~ /\&$/ ) {
26022                     $is_block_function{$package}{$subname} = 1;
26023                 }
26024
26025                 # right curly braces of prototypes NOT ending in
26026                 # '&' may NOT be followed by an operator
26027                 elsif ( $proto !~ /\&$/ ) {
26028                     $is_block_list_function{$package}{$subname} = 1;
26029                 }
26030             }
26031         }
26032         else {
26033             $is_constant{$package}{$subname} = 1;
26034         }
26035     }
26036     else {
26037         $is_user_function{$package}{$subname} = 1;
26038     }
26039 }
26040
26041 sub do_scan_package {
26042
26043     # do_scan_package parses a package name
26044     # it is called with $i_beg equal to the index of the first nonblank
26045     # token following a 'package' token.
26046     # USES GLOBAL VARIABLES: $current_package,
26047
26048     my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
26049         $max_token_index )
26050       = @_;
26051     my $package = undef;
26052     my $pos_beg = $$rtoken_map[$i_beg];
26053     pos($input_line) = $pos_beg;
26054
26055     # handle non-blank line; package name, if any, must follow
26056     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
26057         $package = $1;
26058         $package = ( defined($1) && $1 ) ? $1 : 'main';
26059         $package =~ s/\'/::/g;
26060         if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26061         $package =~ s/::$//;
26062         my $pos  = pos($input_line);
26063         my $numc = $pos - $pos_beg;
26064         $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
26065         $type = 'i';
26066
26067         # Now we must convert back from character position
26068         # to pre_token index.
26069         # I don't think an error flag can occur here ..but ?
26070         my $error;
26071         ( $i, $error ) =
26072           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26073         if ($error) { warning("Possibly invalid package\n") }
26074         $current_package = $package;
26075
26076         # check for error
26077         my ( $next_nonblank_token, $i_next ) =
26078           find_next_nonblank_token( $i, $rtokens, $max_token_index );
26079         if ( $next_nonblank_token !~ /^[;\}]$/ ) {
26080             warning(
26081                 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
26082             );
26083         }
26084     }
26085
26086     # no match but line not blank --
26087     # could be a label with name package, like package:  , for example.
26088     else {
26089         $type = 'k';
26090     }
26091
26092     return ( $i, $tok, $type );
26093 }
26094
26095 sub scan_identifier_do {
26096
26097     # This routine assembles tokens into identifiers.  It maintains a
26098     # scan state, id_scan_state.  It updates id_scan_state based upon
26099     # current id_scan_state and token, and returns an updated
26100     # id_scan_state and the next index after the identifier.
26101     # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
26102     # $last_nonblank_type
26103
26104     my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
26105         $expecting )
26106       = @_;
26107     my $i_begin   = $i;
26108     my $type      = '';
26109     my $tok_begin = $$rtokens[$i_begin];
26110     if ( $tok_begin eq ':' ) { $tok_begin = '::' }
26111     my $id_scan_state_begin = $id_scan_state;
26112     my $identifier_begin    = $identifier;
26113     my $tok                 = $tok_begin;
26114     my $message             = "";
26115
26116     # these flags will be used to help figure out the type:
26117     my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
26118     my $saw_type;
26119
26120     # allow old package separator (') except in 'use' statement
26121     my $allow_tick = ( $last_nonblank_token ne 'use' );
26122
26123     # get started by defining a type and a state if necessary
26124     unless ($id_scan_state) {
26125         $context = UNKNOWN_CONTEXT;
26126
26127         # fixup for digraph
26128         if ( $tok eq '>' ) {
26129             $tok       = '->';
26130             $tok_begin = $tok;
26131         }
26132         $identifier = $tok;
26133
26134         if ( $tok eq '$' || $tok eq '*' ) {
26135             $id_scan_state = '$';
26136             $context       = SCALAR_CONTEXT;
26137         }
26138         elsif ( $tok eq '%' || $tok eq '@' ) {
26139             $id_scan_state = '$';
26140             $context       = LIST_CONTEXT;
26141         }
26142         elsif ( $tok eq '&' ) {
26143             $id_scan_state = '&';
26144         }
26145         elsif ( $tok eq 'sub' or $tok eq 'package' ) {
26146             $saw_alpha     = 0;     # 'sub' is considered type info here
26147             $id_scan_state = '$';
26148             $identifier .= ' ';     # need a space to separate sub from sub name
26149         }
26150         elsif ( $tok eq '::' ) {
26151             $id_scan_state = 'A';
26152         }
26153         elsif ( $tok =~ /^[A-Za-z_]/ ) {
26154             $id_scan_state = ':';
26155         }
26156         elsif ( $tok eq '->' ) {
26157             $id_scan_state = '$';
26158         }
26159         else {
26160
26161             # shouldn't happen
26162             my ( $a, $b, $c ) = caller;
26163             warning("Program Bug: scan_identifier given bad token = $tok \n");
26164             warning("   called from sub $a  line: $c\n");
26165             report_definite_bug();
26166         }
26167         $saw_type = !$saw_alpha;
26168     }
26169     else {
26170         $i--;
26171         $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
26172     }
26173
26174     # now loop to gather the identifier
26175     my $i_save = $i;
26176
26177     while ( $i < $max_token_index ) {
26178         $i_save = $i unless ( $tok =~ /^\s*$/ );
26179         $tok = $$rtokens[ ++$i ];
26180
26181         if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
26182             $tok = '::';
26183             $i++;
26184         }
26185
26186         if ( $id_scan_state eq '$' ) {    # starting variable name
26187
26188             if ( $tok eq '$' ) {
26189
26190                 $identifier .= $tok;
26191
26192                 # we've got a punctuation variable if end of line (punct.t)
26193                 if ( $i == $max_token_index ) {
26194                     $type          = 'i';
26195                     $id_scan_state = '';
26196                     last;
26197                 }
26198             }
26199             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
26200                 $saw_alpha     = 1;
26201                 $id_scan_state = ':';           # now need ::
26202                 $identifier .= $tok;
26203             }
26204             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26205                 $saw_alpha     = 1;
26206                 $id_scan_state = ':';                 # now need ::
26207                 $identifier .= $tok;
26208
26209                 # Perl will accept leading digits in identifiers,
26210                 # although they may not always produce useful results.
26211                 # Something like $main::0 is ok.  But this also works:
26212                 #
26213                 #  sub howdy::123::bubba{ print "bubba $54321!\n" }
26214                 #  howdy::123::bubba();
26215                 #
26216             }
26217             elsif ( $tok =~ /^[0-9]/ ) {              # numeric
26218                 $saw_alpha     = 1;
26219                 $id_scan_state = ':';                 # now need ::
26220                 $identifier .= $tok;
26221             }
26222             elsif ( $tok eq '::' ) {
26223                 $id_scan_state = 'A';
26224                 $identifier .= $tok;
26225             }
26226             elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
26227                 $identifier .= $tok;    # keep same state, a $ could follow
26228             }
26229             elsif ( $tok eq '{' ) {
26230
26231                 # check for something like ${#} or ${©}
26232                 if (   $identifier eq '$'
26233                     && $i + 2 <= $max_token_index
26234                     && $$rtokens[ $i + 2 ] eq '}'
26235                     && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
26236                 {
26237                     my $next2 = $$rtokens[ $i + 2 ];
26238                     my $next1 = $$rtokens[ $i + 1 ];
26239                     $identifier .= $tok . $next1 . $next2;
26240                     $i += 2;
26241                     $id_scan_state = '';
26242                     last;
26243                 }
26244
26245                 # skip something like ${xxx} or ->{
26246                 $id_scan_state = '';
26247
26248                 # if this is the first token of a line, any tokens for this
26249                 # identifier have already been accumulated
26250                 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
26251                 $i = $i_save;
26252                 last;
26253             }
26254
26255             # space ok after leading $ % * & @
26256             elsif ( $tok =~ /^\s*$/ ) {
26257
26258                 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
26259
26260                     if ( length($identifier) > 1 ) {
26261                         $id_scan_state = '';
26262                         $i             = $i_save;
26263                         $type          = 'i';    # probably punctuation variable
26264                         last;
26265                     }
26266                     else {
26267
26268                         # spaces after $'s are common, and space after @
26269                         # is harmless, so only complain about space
26270                         # after other type characters. Space after $ and
26271                         # @ will be removed in formatting.  Report space
26272                         # after % and * because they might indicate a
26273                         # parsing error.  In other words '% ' might be a
26274                         # modulo operator.  Delete this warning if it
26275                         # gets annoying.
26276                         if ( $identifier !~ /^[\@\$]$/ ) {
26277                             $message =
26278                               "Space in identifier, following $identifier\n";
26279                         }
26280                     }
26281                 }
26282
26283                 # else:
26284                 # space after '->' is ok
26285             }
26286             elsif ( $tok eq '^' ) {
26287
26288                 # check for some special variables like $^W
26289                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26290                     $identifier .= $tok;
26291                     $id_scan_state = 'A';
26292
26293                     # Perl accepts '$^]' or '@^]', but
26294                     # there must not be a space before the ']'.
26295                     my $next1 = $$rtokens[ $i + 1 ];
26296                     if ( $next1 eq ']' ) {
26297                         $i++;
26298                         $identifier .= $next1;
26299                         $id_scan_state = "";
26300                         last;
26301                     }
26302                 }
26303                 else {
26304                     $id_scan_state = '';
26305                 }
26306             }
26307             else {    # something else
26308
26309                 # check for various punctuation variables
26310                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26311                     $identifier .= $tok;
26312                 }
26313
26314                 elsif ( $identifier eq '$#' ) {
26315
26316                     if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
26317
26318                     # perl seems to allow just these: $#: $#- $#+
26319                     elsif ( $tok =~ /^[\:\-\+]$/ ) {
26320                         $type = 'i';
26321                         $identifier .= $tok;
26322                     }
26323                     else {
26324                         $i = $i_save;
26325                         write_logfile_entry( 'Use of $# is deprecated' . "\n" );
26326                     }
26327                 }
26328                 elsif ( $identifier eq '$$' ) {
26329
26330                     # perl does not allow references to punctuation
26331                     # variables without braces.  For example, this
26332                     # won't work:
26333                     #  $:=\4;
26334                     #  $a = $$:;
26335                     # You would have to use
26336                     #  $a = ${$:};
26337
26338                     $i = $i_save;
26339                     if   ( $tok eq '{' ) { $type = 't' }
26340                     else                 { $type = 'i' }
26341                 }
26342                 elsif ( $identifier eq '->' ) {
26343                     $i = $i_save;
26344                 }
26345                 else {
26346                     $i = $i_save;
26347                     if ( length($identifier) == 1 ) { $identifier = ''; }
26348                 }
26349                 $id_scan_state = '';
26350                 last;
26351             }
26352         }
26353         elsif ( $id_scan_state eq '&' ) {    # starting sub call?
26354
26355             if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
26356                 $id_scan_state = ':';          # now need ::
26357                 $saw_alpha     = 1;
26358                 $identifier .= $tok;
26359             }
26360             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
26361                 $id_scan_state = ':';                 # now need ::
26362                 $saw_alpha     = 1;
26363                 $identifier .= $tok;
26364             }
26365             elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
26366                 $id_scan_state = ':';       # now need ::
26367                 $saw_alpha     = 1;
26368                 $identifier .= $tok;
26369             }
26370             elsif ( $tok =~ /^\s*$/ ) {     # allow space
26371             }
26372             elsif ( $tok eq '::' ) {        # leading ::
26373                 $id_scan_state = 'A';       # accept alpha next
26374                 $identifier .= $tok;
26375             }
26376             elsif ( $tok eq '{' ) {
26377                 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
26378                 $i             = $i_save;
26379                 $id_scan_state = '';
26380                 last;
26381             }
26382             else {
26383
26384                 # punctuation variable?
26385                 # testfile: cunningham4.pl
26386                 #
26387                 # We have to be careful here.  If we are in an unknown state,
26388                 # we will reject the punctuation variable.  In the following
26389                 # example the '&' is a binary opeator but we are in an unknown
26390                 # state because there is no sigil on 'Prima', so we don't
26391                 # know what it is.  But it is a bad guess that
26392                 # '&~' is a punction variable.
26393                 # $self->{text}->{colorMap}->[
26394                 #   Prima::PodView::COLOR_CODE_FOREGROUND
26395                 #   & ~tb::COLOR_INDEX ] =
26396                 #   $sec->{ColorCode}
26397                 if ( $identifier eq '&' && $expecting ) {
26398                     $identifier .= $tok;
26399                 }
26400                 else {
26401                     $identifier = '';
26402                     $i          = $i_save;
26403                     $type       = '&';
26404                 }
26405                 $id_scan_state = '';
26406                 last;
26407             }
26408         }
26409         elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
26410
26411             if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
26412                 $identifier .= $tok;
26413                 $id_scan_state = ':';        # now need ::
26414                 $saw_alpha     = 1;
26415             }
26416             elsif ( $tok eq "'" && $allow_tick ) {
26417                 $identifier .= $tok;
26418                 $id_scan_state = ':';        # now need ::
26419                 $saw_alpha     = 1;
26420             }
26421             elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
26422                 $identifier .= $tok;
26423                 $id_scan_state = ':';        # now need ::
26424                 $saw_alpha     = 1;
26425             }
26426             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26427                 $id_scan_state = '(';
26428                 $identifier .= $tok;
26429             }
26430             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26431                 $id_scan_state = ')';
26432                 $identifier .= $tok;
26433             }
26434             else {
26435                 $id_scan_state = '';
26436                 $i             = $i_save;
26437                 last;
26438             }
26439         }
26440         elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
26441
26442             if ( $tok eq '::' ) {            # got it
26443                 $identifier .= $tok;
26444                 $id_scan_state = 'A';        # now require alpha
26445             }
26446             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
26447                 $identifier .= $tok;
26448                 $id_scan_state = ':';           # now need ::
26449                 $saw_alpha     = 1;
26450             }
26451             elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
26452                 $identifier .= $tok;
26453                 $id_scan_state = ':';           # now need ::
26454                 $saw_alpha     = 1;
26455             }
26456             elsif ( $tok eq "'" && $allow_tick ) {    # tick
26457
26458                 if ( $is_keyword{$identifier} ) {
26459                     $id_scan_state = '';              # that's all
26460                     $i             = $i_save;
26461                 }
26462                 else {
26463                     $identifier .= $tok;
26464                 }
26465             }
26466             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26467                 $id_scan_state = '(';
26468                 $identifier .= $tok;
26469             }
26470             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26471                 $id_scan_state = ')';
26472                 $identifier .= $tok;
26473             }
26474             else {
26475                 $id_scan_state = '';        # that's all
26476                 $i             = $i_save;
26477                 last;
26478             }
26479         }
26480         elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
26481
26482             if ( $tok eq '(' ) {             # got it
26483                 $identifier .= $tok;
26484                 $id_scan_state = ')';        # now find the end of it
26485             }
26486             elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
26487                 $identifier .= $tok;
26488             }
26489             else {
26490                 $id_scan_state = '';         # that's all - no prototype
26491                 $i             = $i_save;
26492                 last;
26493             }
26494         }
26495         elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
26496
26497             if ( $tok eq ')' ) {             # got it
26498                 $identifier .= $tok;
26499                 $id_scan_state = '';         # all done
26500                 last;
26501             }
26502             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
26503                 $identifier .= $tok;
26504             }
26505             else {    # probable error in script, but keep going
26506                 warning("Unexpected '$tok' while seeking end of prototype\n");
26507                 $identifier .= $tok;
26508             }
26509         }
26510         else {        # can get here due to error in initialization
26511             $id_scan_state = '';
26512             $i             = $i_save;
26513             last;
26514         }
26515     }
26516
26517     if ( $id_scan_state eq ')' ) {
26518         warning("Hit end of line while seeking ) to end prototype\n");
26519     }
26520
26521     # once we enter the actual identifier, it may not extend beyond
26522     # the end of the current line
26523     if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
26524         $id_scan_state = '';
26525     }
26526     if ( $i < 0 ) { $i = 0 }
26527
26528     unless ($type) {
26529
26530         if ($saw_type) {
26531
26532             if ($saw_alpha) {
26533                 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
26534                     $type = 'w';
26535                 }
26536                 else { $type = 'i' }
26537             }
26538             elsif ( $identifier eq '->' ) {
26539                 $type = '->';
26540             }
26541             elsif (
26542                 ( length($identifier) > 1 )
26543
26544                 # In something like '@$=' we have an identifier '@$'
26545                 # In something like '$${' we have type '$$' (and only
26546                 # part of an identifier)
26547                 && !( $identifier =~ /\$$/ && $tok eq '{' )
26548                 && ( $identifier !~ /^(sub |package )$/ )
26549               )
26550             {
26551                 $type = 'i';
26552             }
26553             else { $type = 't' }
26554         }
26555         elsif ($saw_alpha) {
26556
26557             # type 'w' includes anything without leading type info
26558             # ($,%,@,*) including something like abc::def::ghi
26559             $type = 'w';
26560         }
26561         else {
26562             $type = '';
26563         }    # this can happen on a restart
26564     }
26565
26566     if ($identifier) {
26567         $tok = $identifier;
26568         if ($message) { write_logfile_entry($message) }
26569     }
26570     else {
26571         $tok = $tok_begin;
26572         $i   = $i_begin;
26573     }
26574
26575     TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
26576         my ( $a, $b, $c ) = caller;
26577         print
26578 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
26579         print
26580 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
26581     };
26582     return ( $i, $tok, $type, $id_scan_state, $identifier );
26583 }
26584
26585 {
26586
26587     # saved package and subnames in case prototype is on separate line
26588     my ( $package_saved, $subname_saved );
26589
26590     sub do_scan_sub {
26591
26592         # do_scan_sub parses a sub name and prototype
26593         # it is called with $i_beg equal to the index of the first nonblank
26594         # token following a 'sub' token.
26595
26596         # TODO: add future error checks to be sure we have a valid
26597         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
26598         # a name is given if and only if a non-anonymous sub is
26599         # appropriate.
26600         # USES GLOBAL VARS: $current_package, $last_nonblank_token,
26601         # $in_attribute_list, %saw_function_definition,
26602         # $statement_type
26603
26604         my (
26605             $input_line, $i,             $i_beg,
26606             $tok,        $type,          $rtokens,
26607             $rtoken_map, $id_scan_state, $max_token_index
26608         ) = @_;
26609         $id_scan_state = "";    # normally we get everything in one call
26610         my $subname = undef;
26611         my $package = undef;
26612         my $proto   = undef;
26613         my $attrs   = undef;
26614         my $match;
26615
26616         my $pos_beg = $$rtoken_map[$i_beg];
26617         pos($input_line) = $pos_beg;
26618
26619         # sub NAME PROTO ATTRS
26620         if (
26621             $input_line =~ m/\G\s*
26622         ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
26623         (\w+)               # NAME    - required
26624         (\s*\([^){]*\))?    # PROTO   - something in parens
26625         (\s*:)?             # ATTRS   - leading : of attribute list
26626         /gcx
26627           )
26628         {
26629             $match   = 1;
26630             $subname = $2;
26631             $proto   = $3;
26632             $attrs   = $4;
26633
26634             $package = ( defined($1) && $1 ) ? $1 : $current_package;
26635             $package =~ s/\'/::/g;
26636             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26637             $package =~ s/::$//;
26638             my $pos  = pos($input_line);
26639             my $numc = $pos - $pos_beg;
26640             $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
26641             $type = 'i';
26642         }
26643
26644         # Look for prototype/attributes not preceded on this line by subname;
26645         # This might be an anonymous sub with attributes,
26646         # or a prototype on a separate line from its sub name
26647         elsif (
26648             $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
26649             (\s*:)?                              # ATTRS leading ':'
26650             /gcx
26651             && ( $1 || $2 )
26652           )
26653         {
26654             $match = 1;
26655             $proto = $1;
26656             $attrs = $2;
26657
26658             # Handle prototype on separate line from subname
26659             if ($subname_saved) {
26660                 $package = $package_saved;
26661                 $subname = $subname_saved;
26662                 $tok     = $last_nonblank_token;
26663             }
26664             $type = 'i';
26665         }
26666
26667         if ($match) {
26668
26669             # ATTRS: if there are attributes, back up and let the ':' be
26670             # found later by the scanner.
26671             my $pos = pos($input_line);
26672             if ($attrs) {
26673                 $pos -= length($attrs);
26674             }
26675
26676             my $next_nonblank_token = $tok;
26677
26678             # catch case of line with leading ATTR ':' after anonymous sub
26679             if ( $pos == $pos_beg && $tok eq ':' ) {
26680                 $type              = 'A';
26681                 $in_attribute_list = 1;
26682             }
26683
26684             # We must convert back from character position
26685             # to pre_token index.
26686             else {
26687
26688                 # I don't think an error flag can occur here ..but ?
26689                 my $error;
26690                 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
26691                     $max_token_index );
26692                 if ($error) { warning("Possibly invalid sub\n") }
26693
26694                 # check for multiple definitions of a sub
26695                 ( $next_nonblank_token, my $i_next ) =
26696                   find_next_nonblank_token_on_this_line( $i, $rtokens,
26697                     $max_token_index );
26698             }
26699
26700             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
26701             {    # skip blank or side comment
26702                 my ( $rpre_tokens, $rpre_types ) =
26703                   peek_ahead_for_n_nonblank_pre_tokens(1);
26704                 if ( defined($rpre_tokens) && @$rpre_tokens ) {
26705                     $next_nonblank_token = $rpre_tokens->[0];
26706                 }
26707                 else {
26708                     $next_nonblank_token = '}';
26709                 }
26710             }
26711             $package_saved = "";
26712             $subname_saved = "";
26713             if ( $next_nonblank_token eq '{' ) {
26714                 if ($subname) {
26715
26716                     # Check for multiple definitions of a sub, but
26717                     # it is ok to have multiple sub BEGIN, etc,
26718                     # so we do not complain if name is all caps
26719                     if (   $saw_function_definition{$package}{$subname}
26720                         && $subname !~ /^[A-Z]+$/ )
26721                     {
26722                         my $lno = $saw_function_definition{$package}{$subname};
26723                         warning(
26724 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
26725                         );
26726                     }
26727                     $saw_function_definition{$package}{$subname} =
26728                       $tokenizer_self->{_last_line_number};
26729                 }
26730             }
26731             elsif ( $next_nonblank_token eq ';' ) {
26732             }
26733             elsif ( $next_nonblank_token eq '}' ) {
26734             }
26735
26736             # ATTRS - if an attribute list follows, remember the name
26737             # of the sub so the next opening brace can be labeled.
26738             # Setting 'statement_type' causes any ':'s to introduce
26739             # attributes.
26740             elsif ( $next_nonblank_token eq ':' ) {
26741                 $statement_type = $tok;
26742             }
26743
26744             # see if PROTO follows on another line:
26745             elsif ( $next_nonblank_token eq '(' ) {
26746                 if ( $attrs || $proto ) {
26747                     warning(
26748 "unexpected '(' after definition or declaration of sub '$subname'\n"
26749                     );
26750                 }
26751                 else {
26752                     $id_scan_state  = 'sub';    # we must come back to get proto
26753                     $statement_type = $tok;
26754                     $package_saved  = $package;
26755                     $subname_saved  = $subname;
26756                 }
26757             }
26758             elsif ($next_nonblank_token) {      # EOF technically ok
26759                 warning(
26760 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
26761                 );
26762             }
26763             check_prototype( $proto, $package, $subname );
26764         }
26765
26766         # no match but line not blank
26767         else {
26768         }
26769         return ( $i, $tok, $type, $id_scan_state );
26770     }
26771 }
26772
26773 #########i###############################################################
26774 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
26775 #########################################################################
26776
26777 sub find_next_nonblank_token {
26778     my ( $i, $rtokens, $max_token_index ) = @_;
26779
26780     if ( $i >= $max_token_index ) {
26781         if ( !peeked_ahead() ) {
26782             peeked_ahead(1);
26783             $rtokens =
26784               peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
26785         }
26786     }
26787     my $next_nonblank_token = $$rtokens[ ++$i ];
26788
26789     if ( $next_nonblank_token =~ /^\s*$/ ) {
26790         $next_nonblank_token = $$rtokens[ ++$i ];
26791     }
26792     return ( $next_nonblank_token, $i );
26793 }
26794
26795 sub numerator_expected {
26796
26797     # this is a filter for a possible numerator, in support of guessing
26798     # for the / pattern delimiter token.
26799     # returns -
26800     #   1 - yes
26801     #   0 - can't tell
26802     #  -1 - no
26803     # Note: I am using the convention that variables ending in
26804     # _expected have these 3 possible values.
26805     my ( $i, $rtokens, $max_token_index ) = @_;
26806     my $next_token = $$rtokens[ $i + 1 ];
26807     if ( $next_token eq '=' ) { $i++; }    # handle /=
26808     my ( $next_nonblank_token, $i_next ) =
26809       find_next_nonblank_token( $i, $rtokens, $max_token_index );
26810
26811     if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
26812         1;
26813     }
26814     else {
26815
26816         if ( $next_nonblank_token =~ /^\s*$/ ) {
26817             0;
26818         }
26819         else {
26820             -1;
26821         }
26822     }
26823 }
26824
26825 sub pattern_expected {
26826
26827     # This is the start of a filter for a possible pattern.
26828     # It looks at the token after a possbible pattern and tries to
26829     # determine if that token could end a pattern.
26830     # returns -
26831     #   1 - yes
26832     #   0 - can't tell
26833     #  -1 - no
26834     my ( $i, $rtokens, $max_token_index ) = @_;
26835     my $next_token = $$rtokens[ $i + 1 ];
26836     if ( $next_token =~ /^[cgimosxp]/ ) { $i++; }    # skip possible modifier
26837     my ( $next_nonblank_token, $i_next ) =
26838       find_next_nonblank_token( $i, $rtokens, $max_token_index );
26839
26840     # list of tokens which may follow a pattern
26841     # (can probably be expanded)
26842     if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
26843     {
26844         1;
26845     }
26846     else {
26847
26848         if ( $next_nonblank_token =~ /^\s*$/ ) {
26849             0;
26850         }
26851         else {
26852             -1;
26853         }
26854     }
26855 }
26856
26857 sub find_next_nonblank_token_on_this_line {
26858     my ( $i, $rtokens, $max_token_index ) = @_;
26859     my $next_nonblank_token;
26860
26861     if ( $i < $max_token_index ) {
26862         $next_nonblank_token = $$rtokens[ ++$i ];
26863
26864         if ( $next_nonblank_token =~ /^\s*$/ ) {
26865
26866             if ( $i < $max_token_index ) {
26867                 $next_nonblank_token = $$rtokens[ ++$i ];
26868             }
26869         }
26870     }
26871     else {
26872         $next_nonblank_token = "";
26873     }
26874     return ( $next_nonblank_token, $i );
26875 }
26876
26877 sub find_angle_operator_termination {
26878
26879     # We are looking at a '<' and want to know if it is an angle operator.
26880     # We are to return:
26881     #   $i = pretoken index of ending '>' if found, current $i otherwise
26882     #   $type = 'Q' if found, '>' otherwise
26883     my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
26884     my $i    = $i_beg;
26885     my $type = '<';
26886     pos($input_line) = 1 + $$rtoken_map[$i];
26887
26888     my $filter;
26889
26890     # we just have to find the next '>' if a term is expected
26891     if ( $expecting == TERM ) { $filter = '[\>]' }
26892
26893     # we have to guess if we don't know what is expected
26894     elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
26895
26896     # shouldn't happen - we shouldn't be here if operator is expected
26897     else { warning("Program Bug in find_angle_operator_termination\n") }
26898
26899     # To illustrate what we might be looking at, in case we are
26900     # guessing, here are some examples of valid angle operators
26901     # (or file globs):
26902     #  <tmp_imp/*>
26903     #  <FH>
26904     #  <$fh>
26905     #  <*.c *.h>
26906     #  <_>
26907     #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
26908     #  <${PREFIX}*img*.$IMAGE_TYPE>
26909     #  <img*.$IMAGE_TYPE>
26910     #  <Timg*.$IMAGE_TYPE>
26911     #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
26912     #
26913     # Here are some examples of lines which do not have angle operators:
26914     #  return undef unless $self->[2]++ < $#{$self->[1]};
26915     #  < 2  || @$t >
26916     #
26917     # the following line from dlister.pl caused trouble:
26918     #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
26919     #
26920     # If the '<' starts an angle operator, it must end on this line and
26921     # it must not have certain characters like ';' and '=' in it.  I use
26922     # this to limit the testing.  This filter should be improved if
26923     # possible.
26924
26925     if ( $input_line =~ /($filter)/g ) {
26926
26927         if ( $1 eq '>' ) {
26928
26929             # We MAY have found an angle operator termination if we get
26930             # here, but we need to do more to be sure we haven't been
26931             # fooled.
26932             my $pos = pos($input_line);
26933
26934             my $pos_beg = $$rtoken_map[$i];
26935             my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
26936
26937             # Reject if the closing '>' follows a '-' as in:
26938             # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
26939             if ( $expecting eq UNKNOWN ) {
26940                 my $check = substr( $input_line, $pos - 2, 1 );
26941                 if ( $check eq '-' ) {
26942                     return ( $i, $type );
26943                 }
26944             }
26945
26946             ######################################debug#####
26947             #write_diagnostics( "ANGLE? :$str\n");
26948             #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
26949             ######################################debug#####
26950             $type = 'Q';
26951             my $error;
26952             ( $i, $error ) =
26953               inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26954
26955             # It may be possible that a quote ends midway in a pretoken.
26956             # If this happens, it may be necessary to split the pretoken.
26957             if ($error) {
26958                 warning(
26959                     "Possible tokinization error..please check this line\n");
26960                 report_possible_bug();
26961             }
26962
26963             # Now let's see where we stand....
26964             # OK if math op not possible
26965             if ( $expecting == TERM ) {
26966             }
26967
26968             # OK if there are no more than 2 pre-tokens inside
26969             # (not possible to write 2 token math between < and >)
26970             # This catches most common cases
26971             elsif ( $i <= $i_beg + 3 ) {
26972                 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
26973             }
26974
26975             # Not sure..
26976             else {
26977
26978                 # Let's try a Brace Test: any braces inside must balance
26979                 my $br = 0;
26980                 while ( $str =~ /\{/g ) { $br++ }
26981                 while ( $str =~ /\}/g ) { $br-- }
26982                 my $sb = 0;
26983                 while ( $str =~ /\[/g ) { $sb++ }
26984                 while ( $str =~ /\]/g ) { $sb-- }
26985                 my $pr = 0;
26986                 while ( $str =~ /\(/g ) { $pr++ }
26987                 while ( $str =~ /\)/g ) { $pr-- }
26988
26989                 # if braces do not balance - not angle operator
26990                 if ( $br || $sb || $pr ) {
26991                     $i    = $i_beg;
26992                     $type = '<';
26993                     write_diagnostics(
26994                         "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
26995                 }
26996
26997                 # we should keep doing more checks here...to be continued
26998                 # Tentatively accepting this as a valid angle operator.
26999                 # There are lots more things that can be checked.
27000                 else {
27001                     write_diagnostics(
27002                         "ANGLE-Guessing yes: $str expecting=$expecting\n");
27003                     write_logfile_entry("Guessing angle operator here: $str\n");
27004                 }
27005             }
27006         }
27007
27008         # didn't find ending >
27009         else {
27010             if ( $expecting == TERM ) {
27011                 warning("No ending > for angle operator\n");
27012             }
27013         }
27014     }
27015     return ( $i, $type );
27016 }
27017
27018 sub scan_number_do {
27019
27020     #  scan a number in any of the formats that Perl accepts
27021     #  Underbars (_) are allowed in decimal numbers.
27022     #  input parameters -
27023     #      $input_line  - the string to scan
27024     #      $i           - pre_token index to start scanning
27025     #    $rtoken_map    - reference to the pre_token map giving starting
27026     #                    character position in $input_line of token $i
27027     #  output parameters -
27028     #    $i            - last pre_token index of the number just scanned
27029     #    number        - the number (characters); or undef if not a number
27030
27031     my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
27032     my $pos_beg = $$rtoken_map[$i];
27033     my $pos;
27034     my $i_begin = $i;
27035     my $number  = undef;
27036     my $type    = $input_type;
27037
27038     my $first_char = substr( $input_line, $pos_beg, 1 );
27039
27040     # Look for bad starting characters; Shouldn't happen..
27041     if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
27042         warning("Program bug - scan_number given character $first_char\n");
27043         report_definite_bug();
27044         return ( $i, $type, $number );
27045     }
27046
27047     # handle v-string without leading 'v' character ('Two Dot' rule)
27048     # (vstring.t)
27049     # TODO: v-strings may contain underscores
27050     pos($input_line) = $pos_beg;
27051     if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
27052         $pos = pos($input_line);
27053         my $numc = $pos - $pos_beg;
27054         $number = substr( $input_line, $pos_beg, $numc );
27055         $type = 'v';
27056         report_v_string($number);
27057     }
27058
27059     # handle octal, hex, binary
27060     if ( !defined($number) ) {
27061         pos($input_line) = $pos_beg;
27062         if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
27063         {
27064             $pos = pos($input_line);
27065             my $numc = $pos - $pos_beg;
27066             $number = substr( $input_line, $pos_beg, $numc );
27067             $type = 'n';
27068         }
27069     }
27070
27071     # handle decimal
27072     if ( !defined($number) ) {
27073         pos($input_line) = $pos_beg;
27074
27075         if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
27076             $pos = pos($input_line);
27077
27078             # watch out for things like 0..40 which would give 0. by this;
27079             if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
27080                 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
27081             {
27082                 $pos--;
27083             }
27084             my $numc = $pos - $pos_beg;
27085             $number = substr( $input_line, $pos_beg, $numc );
27086             $type = 'n';
27087         }
27088     }
27089
27090     # filter out non-numbers like e + - . e2  .e3 +e6
27091     # the rule: at least one digit, and any 'e' must be preceded by a digit
27092     if (
27093         $number !~ /\d/    # no digits
27094         || (   $number =~ /^(.*)[eE]/
27095             && $1 !~ /\d/ )    # or no digits before the 'e'
27096       )
27097     {
27098         $number = undef;
27099         $type   = $input_type;
27100         return ( $i, $type, $number );
27101     }
27102
27103     # Found a number; now we must convert back from character position
27104     # to pre_token index. An error here implies user syntax error.
27105     # An example would be an invalid octal number like '009'.
27106     my $error;
27107     ( $i, $error ) =
27108       inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27109     if ($error) { warning("Possibly invalid number\n") }
27110
27111     return ( $i, $type, $number );
27112 }
27113
27114 sub inverse_pretoken_map {
27115
27116     # Starting with the current pre_token index $i, scan forward until
27117     # finding the index of the next pre_token whose position is $pos.
27118     my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
27119     my $error = 0;
27120
27121     while ( ++$i <= $max_token_index ) {
27122
27123         if ( $pos <= $$rtoken_map[$i] ) {
27124
27125             # Let the calling routine handle errors in which we do not
27126             # land on a pre-token boundary.  It can happen by running
27127             # perltidy on some non-perl scripts, for example.
27128             if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
27129             $i--;
27130             last;
27131         }
27132     }
27133     return ( $i, $error );
27134 }
27135
27136 sub find_here_doc {
27137
27138     # find the target of a here document, if any
27139     # input parameters:
27140     #   $i - token index of the second < of <<
27141     #   ($i must be less than the last token index if this is called)
27142     # output parameters:
27143     #   $found_target = 0 didn't find target; =1 found target
27144     #   HERE_TARGET - the target string (may be empty string)
27145     #   $i - unchanged if not here doc,
27146     #    or index of the last token of the here target
27147     #   $saw_error - flag noting unbalanced quote on here target
27148     my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27149     my $ibeg                 = $i;
27150     my $found_target         = 0;
27151     my $here_doc_target      = '';
27152     my $here_quote_character = '';
27153     my $saw_error            = 0;
27154     my ( $next_nonblank_token, $i_next_nonblank, $next_token );
27155     $next_token = $$rtokens[ $i + 1 ];
27156
27157     # perl allows a backslash before the target string (heredoc.t)
27158     my $backslash = 0;
27159     if ( $next_token eq '\\' ) {
27160         $backslash  = 1;
27161         $next_token = $$rtokens[ $i + 2 ];
27162     }
27163
27164     ( $next_nonblank_token, $i_next_nonblank ) =
27165       find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
27166
27167     if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
27168
27169         my $in_quote    = 1;
27170         my $quote_depth = 0;
27171         my $quote_pos   = 0;
27172         my $quoted_string;
27173
27174         (
27175             $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
27176             $quoted_string
27177           )
27178           = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
27179             $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
27180
27181         if ($in_quote) {    # didn't find end of quote, so no target found
27182             $i = $ibeg;
27183             if ( $expecting == TERM ) {
27184                 warning(
27185 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
27186                 );
27187                 $saw_error = 1;
27188             }
27189         }
27190         else {              # found ending quote
27191             my $j;
27192             $found_target = 1;
27193
27194             my $tokj;
27195             for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
27196                 $tokj = $$rtokens[$j];
27197
27198                 # we have to remove any backslash before the quote character
27199                 # so that the here-doc-target exactly matches this string
27200                 next
27201                   if ( $tokj eq "\\"
27202                     && $j < $i - 1
27203                     && $$rtokens[ $j + 1 ] eq $here_quote_character );
27204                 $here_doc_target .= $tokj;
27205             }
27206         }
27207     }
27208
27209     elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
27210         $found_target = 1;
27211         write_logfile_entry(
27212             "found blank here-target after <<; suggest using \"\"\n");
27213         $i = $ibeg;
27214     }
27215     elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
27216
27217         my $here_doc_expected;
27218         if ( $expecting == UNKNOWN ) {
27219             $here_doc_expected = guess_if_here_doc($next_token);
27220         }
27221         else {
27222             $here_doc_expected = 1;
27223         }
27224
27225         if ($here_doc_expected) {
27226             $found_target    = 1;
27227             $here_doc_target = $next_token;
27228             $i               = $ibeg + 1;
27229         }
27230
27231     }
27232     else {
27233
27234         if ( $expecting == TERM ) {
27235             $found_target = 1;
27236             write_logfile_entry("Note: bare here-doc operator <<\n");
27237         }
27238         else {
27239             $i = $ibeg;
27240         }
27241     }
27242
27243     # patch to neglect any prepended backslash
27244     if ( $found_target && $backslash ) { $i++ }
27245
27246     return ( $found_target, $here_doc_target, $here_quote_character, $i,
27247         $saw_error );
27248 }
27249
27250 sub do_quote {
27251
27252     # follow (or continue following) quoted string(s)
27253     # $in_quote return code:
27254     #   0 - ok, found end
27255     #   1 - still must find end of quote whose target is $quote_character
27256     #   2 - still looking for end of first of two quotes
27257     #
27258     # Returns updated strings:
27259     #  $quoted_string_1 = quoted string seen while in_quote=1
27260     #  $quoted_string_2 = quoted string seen while in_quote=2
27261     my (
27262         $i,               $in_quote,    $quote_character,
27263         $quote_pos,       $quote_depth, $quoted_string_1,
27264         $quoted_string_2, $rtokens,     $rtoken_map,
27265         $max_token_index
27266     ) = @_;
27267
27268     my $in_quote_starting = $in_quote;
27269
27270     my $quoted_string;
27271     if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
27272         my $ibeg = $i;
27273         (
27274             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27275             $quoted_string
27276           )
27277           = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
27278             $quote_pos, $quote_depth, $max_token_index );
27279         $quoted_string_2 .= $quoted_string;
27280         if ( $in_quote == 1 ) {
27281             if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
27282             $quote_character = '';
27283         }
27284         else {
27285             $quoted_string_2 .= "\n";
27286         }
27287     }
27288
27289     if ( $in_quote == 1 ) {    # one (more) quote to follow
27290         my $ibeg = $i;
27291         (
27292             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27293             $quoted_string
27294           )
27295           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27296             $quote_pos, $quote_depth, $max_token_index );
27297         $quoted_string_1 .= $quoted_string;
27298         if ( $in_quote == 1 ) {
27299             $quoted_string_1 .= "\n";
27300         }
27301     }
27302     return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27303         $quoted_string_1, $quoted_string_2 );
27304 }
27305
27306 sub follow_quoted_string {
27307
27308     # scan for a specific token, skipping escaped characters
27309     # if the quote character is blank, use the first non-blank character
27310     # input parameters:
27311     #   $rtokens = reference to the array of tokens
27312     #   $i = the token index of the first character to search
27313     #   $in_quote = number of quoted strings being followed
27314     #   $beginning_tok = the starting quote character
27315     #   $quote_pos = index to check next for alphanumeric delimiter
27316     # output parameters:
27317     #   $i = the token index of the ending quote character
27318     #   $in_quote = decremented if found end, unchanged if not
27319     #   $beginning_tok = the starting quote character
27320     #   $quote_pos = index to check next for alphanumeric delimiter
27321     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
27322     #   $quoted_string = the text of the quote (without quotation tokens)
27323     my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
27324         $max_token_index )
27325       = @_;
27326     my ( $tok, $end_tok );
27327     my $i             = $i_beg - 1;
27328     my $quoted_string = "";
27329
27330     TOKENIZER_DEBUG_FLAG_QUOTE && do {
27331         print
27332 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
27333     };
27334
27335     # get the corresponding end token
27336     if ( $beginning_tok !~ /^\s*$/ ) {
27337         $end_tok = matching_end_token($beginning_tok);
27338     }
27339
27340     # a blank token means we must find and use the first non-blank one
27341     else {
27342         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
27343
27344         while ( $i < $max_token_index ) {
27345             $tok = $$rtokens[ ++$i ];
27346
27347             if ( $tok !~ /^\s*$/ ) {
27348
27349                 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
27350                     $i = $max_token_index;
27351                 }
27352                 else {
27353
27354                     if ( length($tok) > 1 ) {
27355                         if ( $quote_pos <= 0 ) { $quote_pos = 1 }
27356                         $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
27357                     }
27358                     else {
27359                         $beginning_tok = $tok;
27360                         $quote_pos     = 0;
27361                     }
27362                     $end_tok     = matching_end_token($beginning_tok);
27363                     $quote_depth = 1;
27364                     last;
27365                 }
27366             }
27367             else {
27368                 $allow_quote_comments = 1;
27369             }
27370         }
27371     }
27372
27373     # There are two different loops which search for the ending quote
27374     # character.  In the rare case of an alphanumeric quote delimiter, we
27375     # have to look through alphanumeric tokens character-by-character, since
27376     # the pre-tokenization process combines multiple alphanumeric
27377     # characters, whereas for a non-alphanumeric delimiter, only tokens of
27378     # length 1 can match.
27379
27380     ###################################################################
27381     # Case 1 (rare): loop for case of alphanumeric quote delimiter..
27382     # "quote_pos" is the position the current word to begin searching
27383     ###################################################################
27384     if ( $beginning_tok =~ /\w/ ) {
27385
27386         # Note this because it is not recommended practice except
27387         # for obfuscated perl contests
27388         if ( $in_quote == 1 ) {
27389             write_logfile_entry(
27390                 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
27391         }
27392
27393         while ( $i < $max_token_index ) {
27394
27395             if ( $quote_pos == 0 || ( $i < 0 ) ) {
27396                 $tok = $$rtokens[ ++$i ];
27397
27398                 if ( $tok eq '\\' ) {
27399
27400                     # retain backslash unless it hides the end token
27401                     $quoted_string .= $tok
27402                       unless $$rtokens[ $i + 1 ] eq $end_tok;
27403                     $quote_pos++;
27404                     last if ( $i >= $max_token_index );
27405                     $tok = $$rtokens[ ++$i ];
27406                 }
27407             }
27408             my $old_pos = $quote_pos;
27409
27410             unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
27411             {
27412
27413             }
27414             $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
27415
27416             if ( $quote_pos > 0 ) {
27417
27418                 $quoted_string .=
27419                   substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
27420
27421                 $quote_depth--;
27422
27423                 if ( $quote_depth == 0 ) {
27424                     $in_quote--;
27425                     last;
27426                 }
27427             }
27428             else {
27429                 $quoted_string .= substr( $tok, $old_pos );
27430             }
27431         }
27432     }
27433
27434     ########################################################################
27435     # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
27436     ########################################################################
27437     else {
27438
27439         while ( $i < $max_token_index ) {
27440             $tok = $$rtokens[ ++$i ];
27441
27442             if ( $tok eq $end_tok ) {
27443                 $quote_depth--;
27444
27445                 if ( $quote_depth == 0 ) {
27446                     $in_quote--;
27447                     last;
27448                 }
27449             }
27450             elsif ( $tok eq $beginning_tok ) {
27451                 $quote_depth++;
27452             }
27453             elsif ( $tok eq '\\' ) {
27454
27455                 # retain backslash unless it hides the beginning or end token
27456                 $tok = $$rtokens[ ++$i ];
27457                 $quoted_string .= '\\'
27458                   unless ( $tok eq $end_tok || $tok eq $beginning_tok );
27459             }
27460             $quoted_string .= $tok;
27461         }
27462     }
27463     if ( $i > $max_token_index ) { $i = $max_token_index }
27464     return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
27465         $quoted_string );
27466 }
27467
27468 sub indicate_error {
27469     my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
27470     interrupt_logfile();
27471     warning($msg);
27472     write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
27473     resume_logfile();
27474 }
27475
27476 sub write_error_indicator_pair {
27477     my ( $line_number, $input_line, $pos, $carrat ) = @_;
27478     my ( $offset, $numbered_line, $underline ) =
27479       make_numbered_line( $line_number, $input_line, $pos );
27480     $underline = write_on_underline( $underline, $pos - $offset, $carrat );
27481     warning( $numbered_line . "\n" );
27482     $underline =~ s/\s*$//;
27483     warning( $underline . "\n" );
27484 }
27485
27486 sub make_numbered_line {
27487
27488     #  Given an input line, its line number, and a character position of
27489     #  interest, create a string not longer than 80 characters of the form
27490     #     $lineno: sub_string
27491     #  such that the sub_string of $str contains the position of interest
27492     #
27493     #  Here is an example of what we want, in this case we add trailing
27494     #  '...' because the line is long.
27495     #
27496     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27497     #
27498     #  Here is another example, this time in which we used leading '...'
27499     #  because of excessive length:
27500     #
27501     # 2: ... er of the World Wide Web Consortium's
27502     #
27503     #  input parameters are:
27504     #   $lineno = line number
27505     #   $str = the text of the line
27506     #   $pos = position of interest (the error) : 0 = first character
27507     #
27508     #   We return :
27509     #     - $offset = an offset which corrects the position in case we only
27510     #       display part of a line, such that $pos-$offset is the effective
27511     #       position from the start of the displayed line.
27512     #     - $numbered_line = the numbered line as above,
27513     #     - $underline = a blank 'underline' which is all spaces with the same
27514     #       number of characters as the numbered line.
27515
27516     my ( $lineno, $str, $pos ) = @_;
27517     my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
27518     my $excess = length($str) - $offset - 68;
27519     my $numc   = ( $excess > 0 ) ? 68 : undef;
27520
27521     if ( defined($numc) ) {
27522         if ( $offset == 0 ) {
27523             $str = substr( $str, $offset, $numc - 4 ) . " ...";
27524         }
27525         else {
27526             $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
27527         }
27528     }
27529     else {
27530
27531         if ( $offset == 0 ) {
27532         }
27533         else {
27534             $str = "... " . substr( $str, $offset + 4 );
27535         }
27536     }
27537
27538     my $numbered_line = sprintf( "%d: ", $lineno );
27539     $offset -= length($numbered_line);
27540     $numbered_line .= $str;
27541     my $underline = " " x length($numbered_line);
27542     return ( $offset, $numbered_line, $underline );
27543 }
27544
27545 sub write_on_underline {
27546
27547     # The "underline" is a string that shows where an error is; it starts
27548     # out as a string of blanks with the same length as the numbered line of
27549     # code above it, and we have to add marking to show where an error is.
27550     # In the example below, we want to write the string '--^' just below
27551     # the line of bad code:
27552     #
27553     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27554     #                 ---^
27555     # We are given the current underline string, plus a position and a
27556     # string to write on it.
27557     #
27558     # In the above example, there will be 2 calls to do this:
27559     # First call:  $pos=19, pos_chr=^
27560     # Second call: $pos=16, pos_chr=---
27561     #
27562     # This is a trivial thing to do with substr, but there is some
27563     # checking to do.
27564
27565     my ( $underline, $pos, $pos_chr ) = @_;
27566
27567     # check for error..shouldn't happen
27568     unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
27569         return $underline;
27570     }
27571     my $excess = length($pos_chr) + $pos - length($underline);
27572     if ( $excess > 0 ) {
27573         $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
27574     }
27575     substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
27576     return ($underline);
27577 }
27578
27579 sub pre_tokenize {
27580
27581     # Break a string, $str, into a sequence of preliminary tokens.  We
27582     # are interested in these types of tokens:
27583     #   words       (type='w'),            example: 'max_tokens_wanted'
27584     #   digits      (type = 'd'),          example: '0755'
27585     #   whitespace  (type = 'b'),          example: '   '
27586     #   any other single character (i.e. punct; type = the character itself).
27587     # We cannot do better than this yet because we might be in a quoted
27588     # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
27589     # tokens.
27590     my ( $str, $max_tokens_wanted ) = @_;
27591
27592     # we return references to these 3 arrays:
27593     my @tokens    = ();     # array of the tokens themselves
27594     my @token_map = (0);    # string position of start of each token
27595     my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
27596
27597     do {
27598
27599         # whitespace
27600         if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
27601
27602         # numbers
27603         # note that this must come before words!
27604         elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
27605
27606         # words
27607         elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
27608
27609         # single-character punctuation
27610         elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
27611
27612         # that's all..
27613         else {
27614             return ( \@tokens, \@token_map, \@type );
27615         }
27616
27617         push @tokens,    $1;
27618         push @token_map, pos($str);
27619
27620     } while ( --$max_tokens_wanted != 0 );
27621
27622     return ( \@tokens, \@token_map, \@type );
27623 }
27624
27625 sub show_tokens {
27626
27627     # this is an old debug routine
27628     my ( $rtokens, $rtoken_map ) = @_;
27629     my $num = scalar(@$rtokens);
27630     my $i;
27631
27632     for ( $i = 0 ; $i < $num ; $i++ ) {
27633         my $len = length( $$rtokens[$i] );
27634         print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
27635     }
27636 }
27637
27638 sub matching_end_token {
27639
27640     # find closing character for a pattern
27641     my $beginning_token = shift;
27642
27643     if ( $beginning_token eq '{' ) {
27644         '}';
27645     }
27646     elsif ( $beginning_token eq '[' ) {
27647         ']';
27648     }
27649     elsif ( $beginning_token eq '<' ) {
27650         '>';
27651     }
27652     elsif ( $beginning_token eq '(' ) {
27653         ')';
27654     }
27655     else {
27656         $beginning_token;
27657     }
27658 }
27659
27660 sub dump_token_types {
27661     my $class = shift;
27662     my $fh    = shift;
27663
27664     # This should be the latest list of token types in use
27665     # adding NEW_TOKENS: add a comment here
27666     print $fh <<'END_OF_LIST';
27667
27668 Here is a list of the token types currently used for lines of type 'CODE'.
27669 For the following tokens, the "type" of a token is just the token itself.
27670
27671 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27672 ( ) <= >= == =~ !~ != ++ -- /= x=
27673 ... **= <<= >>= &&= ||= //= <=>
27674 , + - / * | % ! x ~ = \ ? : . < > ^ &
27675
27676 The following additional token types are defined:
27677
27678  type    meaning
27679     b    blank (white space)
27680     {    indent: opening structural curly brace or square bracket or paren
27681          (code block, anonymous hash reference, or anonymous array reference)
27682     }    outdent: right structural curly brace or square bracket or paren
27683     [    left non-structural square bracket (enclosing an array index)
27684     ]    right non-structural square bracket
27685     (    left non-structural paren (all but a list right of an =)
27686     )    right non-structural parena
27687     L    left non-structural curly brace (enclosing a key)
27688     R    right non-structural curly brace
27689     ;    terminal semicolon
27690     f    indicates a semicolon in a "for" statement
27691     h    here_doc operator <<
27692     #    a comment
27693     Q    indicates a quote or pattern
27694     q    indicates a qw quote block
27695     k    a perl keyword
27696     C    user-defined constant or constant function (with void prototype = ())
27697     U    user-defined function taking parameters
27698     G    user-defined function taking block parameter (like grep/map/eval)
27699     M    (unused, but reserved for subroutine definition name)
27700     P    (unused, but -html uses it to label pod text)
27701     t    type indicater such as %,$,@,*,&,sub
27702     w    bare word (perhaps a subroutine call)
27703     i    identifier of some type (with leading %, $, @, *, &, sub, -> )
27704     n    a number
27705     v    a v-string
27706     F    a file test operator (like -e)
27707     Y    File handle
27708     Z    identifier in indirect object slot: may be file handle, object
27709     J    LABEL:  code block label
27710     j    LABEL after next, last, redo, goto
27711     p    unary +
27712     m    unary -
27713     pp   pre-increment operator ++
27714     mm   pre-decrement operator --
27715     A    : used as attribute separator
27716
27717     Here are the '_line_type' codes used internally:
27718     SYSTEM         - system-specific code before hash-bang line
27719     CODE           - line of perl code (including comments)
27720     POD_START      - line starting pod, such as '=head'
27721     POD            - pod documentation text
27722     POD_END        - last line of pod section, '=cut'
27723     HERE           - text of here-document
27724     HERE_END       - last line of here-doc (target word)
27725     FORMAT         - format section
27726     FORMAT_END     - last line of format section, '.'
27727     DATA_START     - __DATA__ line
27728     DATA           - unidentified text following __DATA__
27729     END_START      - __END__ line
27730     END            - unidentified text following __END__
27731     ERROR          - we are in big trouble, probably not a perl script
27732 END_OF_LIST
27733 }
27734
27735 BEGIN {
27736
27737     # These names are used in error messages
27738     @opening_brace_names = qw# '{' '[' '(' '?' #;
27739     @closing_brace_names = qw# '}' ']' ')' ':' #;
27740
27741     my @digraphs = qw(
27742       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27743       <= >= == =~ !~ != ++ -- /= x= ~~
27744     );
27745     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
27746
27747     my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
27748     @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
27749
27750     # make a hash of all valid token types for self-checking the tokenizer
27751     # (adding NEW_TOKENS : select a new character and add to this list)
27752     my @valid_token_types = qw#
27753       A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
27754       { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
27755       #;
27756     push( @valid_token_types, @digraphs );
27757     push( @valid_token_types, @trigraphs );
27758     push( @valid_token_types, '#' );
27759     push( @valid_token_types, ',' );
27760     @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
27761
27762     # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
27763     my @file_test_operators =
27764       qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
27765     @is_file_test_operator{@file_test_operators} =
27766       (1) x scalar(@file_test_operators);
27767
27768     # these functions have prototypes of the form (&), so when they are
27769     # followed by a block, that block MAY BE followed by an operator.
27770     @_ = qw( do eval );
27771     @is_block_operator{@_} = (1) x scalar(@_);
27772
27773     # these functions allow an identifier in the indirect object slot
27774     @_ = qw( print printf sort exec system say);
27775     @is_indirect_object_taker{@_} = (1) x scalar(@_);
27776
27777     # These tokens may precede a code block
27778     # patched for SWITCH/CASE
27779     @_ =
27780       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
27781       unless do while until eval for foreach map grep sort
27782       switch case given when);
27783     @is_code_block_token{@_} = (1) x scalar(@_);
27784
27785     # I'll build the list of keywords incrementally
27786     my @Keywords = ();
27787
27788     # keywords and tokens after which a value or pattern is expected,
27789     # but not an operator.  In other words, these should consume terms
27790     # to their right, or at least they are not expected to be followed
27791     # immediately by operators.
27792     my @value_requestor = qw(
27793       AUTOLOAD
27794       BEGIN
27795       CHECK
27796       DESTROY
27797       END
27798       EQ
27799       GE
27800       GT
27801       INIT
27802       LE
27803       LT
27804       NE
27805       UNITCHECK
27806       abs
27807       accept
27808       alarm
27809       and
27810       atan2
27811       bind
27812       binmode
27813       bless
27814       break
27815       caller
27816       chdir
27817       chmod
27818       chomp
27819       chop
27820       chown
27821       chr
27822       chroot
27823       close
27824       closedir
27825       cmp
27826       connect
27827       continue
27828       cos
27829       crypt
27830       dbmclose
27831       dbmopen
27832       defined
27833       delete
27834       die
27835       dump
27836       each
27837       else
27838       elsif
27839       eof
27840       eq
27841       exec
27842       exists
27843       exit
27844       exp
27845       fcntl
27846       fileno
27847       flock
27848       for
27849       foreach
27850       formline
27851       ge
27852       getc
27853       getgrgid
27854       getgrnam
27855       gethostbyaddr
27856       gethostbyname
27857       getnetbyaddr
27858       getnetbyname
27859       getpeername
27860       getpgrp
27861       getpriority
27862       getprotobyname
27863       getprotobynumber
27864       getpwnam
27865       getpwuid
27866       getservbyname
27867       getservbyport
27868       getsockname
27869       getsockopt
27870       glob
27871       gmtime
27872       goto
27873       grep
27874       gt
27875       hex
27876       if
27877       index
27878       int
27879       ioctl
27880       join
27881       keys
27882       kill
27883       last
27884       lc
27885       lcfirst
27886       le
27887       length
27888       link
27889       listen
27890       local
27891       localtime
27892       lock
27893       log
27894       lstat
27895       lt
27896       map
27897       mkdir
27898       msgctl
27899       msgget
27900       msgrcv
27901       msgsnd
27902       my
27903       ne
27904       next
27905       no
27906       not
27907       oct
27908       open
27909       opendir
27910       or
27911       ord
27912       our
27913       pack
27914       pipe
27915       pop
27916       pos
27917       print
27918       printf
27919       prototype
27920       push
27921       quotemeta
27922       rand
27923       read
27924       readdir
27925       readlink
27926       readline
27927       readpipe
27928       recv
27929       redo
27930       ref
27931       rename
27932       require
27933       reset
27934       return
27935       reverse
27936       rewinddir
27937       rindex
27938       rmdir
27939       scalar
27940       seek
27941       seekdir
27942       select
27943       semctl
27944       semget
27945       semop
27946       send
27947       sethostent
27948       setnetent
27949       setpgrp
27950       setpriority
27951       setprotoent
27952       setservent
27953       setsockopt
27954       shift
27955       shmctl
27956       shmget
27957       shmread
27958       shmwrite
27959       shutdown
27960       sin
27961       sleep
27962       socket
27963       socketpair
27964       sort
27965       splice
27966       split
27967       sprintf
27968       sqrt
27969       srand
27970       stat
27971       study
27972       substr
27973       symlink
27974       syscall
27975       sysopen
27976       sysread
27977       sysseek
27978       system
27979       syswrite
27980       tell
27981       telldir
27982       tie
27983       tied
27984       truncate
27985       uc
27986       ucfirst
27987       umask
27988       undef
27989       unless
27990       unlink
27991       unpack
27992       unshift
27993       untie
27994       until
27995       use
27996       utime
27997       values
27998       vec
27999       waitpid
28000       warn
28001       while
28002       write
28003       xor
28004
28005       switch
28006       case
28007       given
28008       when
28009       err
28010       say
28011     );
28012
28013     # patched above for SWITCH/CASE given/when err say
28014     # 'err' is a fairly safe addition.
28015     # TODO: 'default' still needed if appropriate
28016     # 'use feature' seen, but perltidy works ok without it.
28017     # Concerned that 'default' could break code.
28018     push( @Keywords, @value_requestor );
28019
28020     # These are treated the same but are not keywords:
28021     my @extra_vr = qw(
28022       constant
28023       vars
28024     );
28025     push( @value_requestor, @extra_vr );
28026
28027     @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
28028
28029     # this list contains keywords which do not look for arguments,
28030     # so that they might be followed by an operator, or at least
28031     # not a term.
28032     my @operator_requestor = qw(
28033       endgrent
28034       endhostent
28035       endnetent
28036       endprotoent
28037       endpwent
28038       endservent
28039       fork
28040       getgrent
28041       gethostent
28042       getlogin
28043       getnetent
28044       getppid
28045       getprotoent
28046       getpwent
28047       getservent
28048       setgrent
28049       setpwent
28050       time
28051       times
28052       wait
28053       wantarray
28054     );
28055
28056     push( @Keywords, @operator_requestor );
28057
28058     # These are treated the same but are not considered keywords:
28059     my @extra_or = qw(
28060       STDERR
28061       STDIN
28062       STDOUT
28063     );
28064
28065     push( @operator_requestor, @extra_or );
28066
28067     @expecting_operator_token{@operator_requestor} =
28068       (1) x scalar(@operator_requestor);
28069
28070     # these token TYPES expect trailing operator but not a term
28071     # note: ++ and -- are post-increment and decrement, 'C' = constant
28072     my @operator_requestor_types = qw( ++ -- C <> q );
28073     @expecting_operator_types{@operator_requestor_types} =
28074       (1) x scalar(@operator_requestor_types);
28075
28076     # these token TYPES consume values (terms)
28077     # note: pp and mm are pre-increment and decrement
28078     # f=semicolon in for,  F=file test operator
28079     my @value_requestor_type = qw#
28080       L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
28081       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
28082       <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
28083       f F pp mm Y p m U J G j >> << ^ t
28084       #;
28085     push( @value_requestor_type, ',' )
28086       ;    # (perl doesn't like a ',' in a qw block)
28087     @expecting_term_types{@value_requestor_type} =
28088       (1) x scalar(@value_requestor_type);
28089
28090     # Note: the following valid token types are not assigned here to
28091     # hashes requesting to be followed by values or terms, but are
28092     # instead currently hard-coded into sub operator_expected:
28093     # ) -> :: Q R Z ] b h i k n v w } #
28094
28095     # For simple syntax checking, it is nice to have a list of operators which
28096     # will really be unhappy if not followed by a term.  This includes most
28097     # of the above...
28098     %really_want_term = %expecting_term_types;
28099
28100     # with these exceptions...
28101     delete $really_want_term{'U'}; # user sub, depends on prototype
28102     delete $really_want_term{'F'}; # file test works on $_ if no following term
28103     delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
28104                                    # let perl do it
28105
28106     @_ = qw(q qq qw qx qr s y tr m);
28107     @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
28108
28109     # These keywords are handled specially in the tokenizer code:
28110     my @special_keywords = qw(
28111       do
28112       eval
28113       format
28114       m
28115       package
28116       q
28117       qq
28118       qr
28119       qw
28120       qx
28121       s
28122       sub
28123       tr
28124       y
28125     );
28126     push( @Keywords, @special_keywords );
28127
28128     # Keywords after which list formatting may be used
28129     # WARNING: do not include |map|grep|eval or perl may die on
28130     # syntax errors (map1.t).
28131     my @keyword_taking_list = qw(
28132       and
28133       chmod
28134       chomp
28135       chop
28136       chown
28137       dbmopen
28138       die
28139       elsif
28140       exec
28141       fcntl
28142       for
28143       foreach
28144       formline
28145       getsockopt
28146       if
28147       index
28148       ioctl
28149       join
28150       kill
28151       local
28152       msgctl
28153       msgrcv
28154       msgsnd
28155       my
28156       open
28157       or
28158       our
28159       pack
28160       print
28161       printf
28162       push
28163       read
28164       readpipe
28165       recv
28166       return
28167       reverse
28168       rindex
28169       seek
28170       select
28171       semctl
28172       semget
28173       send
28174       setpriority
28175       setsockopt
28176       shmctl
28177       shmget
28178       shmread
28179       shmwrite
28180       socket
28181       socketpair
28182       sort
28183       splice
28184       split
28185       sprintf
28186       substr
28187       syscall
28188       sysopen
28189       sysread
28190       sysseek
28191       system
28192       syswrite
28193       tie
28194       unless
28195       unlink
28196       unpack
28197       unshift
28198       until
28199       vec
28200       warn
28201       while
28202     );
28203     @is_keyword_taking_list{@keyword_taking_list} =
28204       (1) x scalar(@keyword_taking_list);
28205
28206     # These are not used in any way yet
28207     #    my @unused_keywords = qw(
28208     #      CORE
28209     #     __FILE__
28210     #     __LINE__
28211     #     __PACKAGE__
28212     #     );
28213
28214     #  The list of keywords was extracted from function 'keyword' in
28215     #  perl file toke.c version 5.005.03, using this utility, plus a
28216     #  little editing: (file getkwd.pl):
28217     #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
28218     #  Add 'get' prefix where necessary, then split into the above lists.
28219     #  This list should be updated as necessary.
28220     #  The list should not contain these special variables:
28221     #  ARGV DATA ENV SIG STDERR STDIN STDOUT
28222     #  __DATA__ __END__
28223
28224     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
28225 }
28226 1;
28227 __END__
28228
28229 =head1 NAME
28230
28231 Perl::Tidy - Parses and beautifies perl source
28232
28233 =head1 SYNOPSIS
28234
28235     use Perl::Tidy;
28236
28237     Perl::Tidy::perltidy(
28238         source            => $source,
28239         destination       => $destination,
28240         stderr            => $stderr,
28241         argv              => $argv,
28242         perltidyrc        => $perltidyrc,
28243         logfile           => $logfile,
28244         errorfile         => $errorfile,
28245         formatter         => $formatter,           # callback object (see below)
28246         dump_options      => $dump_options,
28247         dump_options_type => $dump_options_type,
28248     );
28249
28250 =head1 DESCRIPTION
28251
28252 This module makes the functionality of the perltidy utility available to perl
28253 scripts.  Any or all of the input parameters may be omitted, in which case the
28254 @ARGV array will be used to provide input parameters as described
28255 in the perltidy(1) man page.
28256
28257 For example, the perltidy script is basically just this:
28258
28259     use Perl::Tidy;
28260     Perl::Tidy::perltidy();
28261
28262 The module accepts input and output streams by a variety of methods.
28263 The following list of parameters may be any of a the following: a
28264 filename, an ARRAY reference, a SCALAR reference, or an object with
28265 either a B<getline> or B<print> method, as appropriate.
28266
28267         source            - the source of the script to be formatted
28268         destination       - the destination of the formatted output
28269         stderr            - standard error output
28270         perltidyrc        - the .perltidyrc file
28271         logfile           - the .LOG file stream, if any
28272         errorfile         - the .ERR file stream, if any
28273         dump_options      - ref to a hash to receive parameters (see below),
28274         dump_options_type - controls contents of dump_options
28275         dump_getopt_flags - ref to a hash to receive Getopt flags
28276         dump_options_category - ref to a hash giving category of options
28277         dump_abbreviations    - ref to a hash giving all abbreviations
28278
28279 The following chart illustrates the logic used to decide how to
28280 treat a parameter.
28281
28282    ref($param)  $param is assumed to be:
28283    -----------  ---------------------
28284    undef        a filename
28285    SCALAR       ref to string
28286    ARRAY        ref to array
28287    (other)      object with getline (if source) or print method
28288
28289 If the parameter is an object, and the object has a B<close> method, that
28290 close method will be called at the end of the stream.
28291
28292 =over 4
28293
28294 =item source
28295
28296 If the B<source> parameter is given, it defines the source of the
28297 input stream.
28298
28299 =item destination
28300
28301 If the B<destination> parameter is given, it will be used to define the
28302 file or memory location to receive output of perltidy.
28303
28304 =item stderr
28305
28306 The B<stderr> parameter allows the calling program to capture the output
28307 to what would otherwise go to the standard error output device.
28308
28309 =item perltidyrc
28310
28311 If the B<perltidyrc> file is given, it will be used instead of any
28312 F<.perltidyrc> configuration file that would otherwise be used.
28313
28314 =item argv
28315
28316 If the B<argv> parameter is given, it will be used instead of the
28317 B<@ARGV> array.  The B<argv> parameter may be a string, a reference to a
28318 string, or a reference to an array.  If it is a string or reference to a
28319 string, it will be parsed into an array of items just as if it were a
28320 command line string.
28321
28322 =item dump_options
28323
28324 If the B<dump_options> parameter is given, it must be the reference to a hash.
28325 In this case, the parameters contained in any perltidyrc configuration file
28326 will be placed in this hash and perltidy will return immediately.  This is
28327 equivalent to running perltidy with --dump-options, except that the perameters
28328 are returned in a hash rather than dumped to standard output.  Also, by default
28329 only the parameters in the perltidyrc file are returned, but this can be
28330 changed (see the next parameter).  This parameter provides a convenient method
28331 for external programs to read a perltidyrc file.  An example program using
28332 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
28333
28334 Any combination of the B<dump_> parameters may be used together.
28335
28336 =item dump_options_type
28337
28338 This parameter is a string which can be used to control the parameters placed
28339 in the hash reference supplied by B<dump_options>.  The possible values are
28340 'perltidyrc' (default) and 'full'.  The 'full' parameter causes both the
28341 default options plus any options found in a perltidyrc file to be returned.
28342
28343 =item dump_getopt_flags
28344
28345 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
28346 hash.  This hash will receive all of the parameters that perltidy understands
28347 and flags that are passed to Getopt::Long.  This parameter may be
28348 used alone or with the B<dump_options> flag.  Perltidy will
28349 exit immediately after filling this hash.  See the demo program
28350 F<perltidyrc_dump.pl> for example usage.
28351
28352 =item dump_options_category
28353
28354 If the B<dump_options_category> parameter is given, it must be the reference to a
28355 hash.  This hash will receive a hash with keys equal to all long parameter names
28356 and values equal to the title of the corresponding section of the perltidy manual.
28357 See the demo program F<perltidyrc_dump.pl> for example usage.
28358
28359 =item dump_abbreviations
28360
28361 If the B<dump_abbreviations> parameter is given, it must be the reference to a
28362 hash.  This hash will receive all abbreviations used by Perl::Tidy.  See the
28363 demo program F<perltidyrc_dump.pl> for example usage.
28364
28365 =back
28366
28367 =head1 EXAMPLE
28368
28369 The following example passes perltidy a snippet as a reference
28370 to a string and receives the result back in a reference to
28371 an array.
28372
28373  use Perl::Tidy;
28374
28375  # some messy source code to format
28376  my $source = <<'EOM';
28377  use strict;
28378  my @editors=('Emacs', 'Vi   '); my $rand = rand();
28379  print "A poll of 10 random programmers gave these results:\n";
28380  foreach(0..10) {
28381  my $i=int ($rand+rand());
28382  print " $editors[$i] users are from Venus" . ", " .
28383  "$editors[1-$i] users are from Mars" .
28384  "\n";
28385  }
28386  EOM
28387
28388  # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
28389  my @dest;
28390  perltidy( source => \$source, destination => \@dest );
28391  foreach (@dest) {print}
28392
28393 =head1 Using the B<formatter> Callback Object
28394
28395 The B<formatter> parameter is an optional callback object which allows
28396 the calling program to receive tokenized lines directly from perltidy for
28397 further specialized processing.  When this parameter is used, the two
28398 formatting options which are built into perltidy (beautification or
28399 html) are ignored.  The following diagram illustrates the logical flow:
28400
28401                     |-- (normal route)   -> code beautification
28402   caller->perltidy->|-- (-html flag )    -> create html
28403                     |-- (formatter given)-> callback to write_line
28404
28405 This can be useful for processing perl scripts in some way.  The
28406 parameter C<$formatter> in the perltidy call,
28407
28408         formatter   => $formatter,
28409
28410 is an object created by the caller with a C<write_line> method which
28411 will accept and process tokenized lines, one line per call.  Here is
28412 a simple example of a C<write_line> which merely prints the line number,
28413 the line type (as determined by perltidy), and the text of the line:
28414
28415  sub write_line {
28416
28417      # This is called from perltidy line-by-line
28418      my $self              = shift;
28419      my $line_of_tokens    = shift;
28420      my $line_type         = $line_of_tokens->{_line_type};
28421      my $input_line_number = $line_of_tokens->{_line_number};
28422      my $input_line        = $line_of_tokens->{_line_text};
28423      print "$input_line_number:$line_type:$input_line";
28424  }
28425
28426 The complete program, B<perllinetype>, is contained in the examples section of
28427 the source distribution.  As this example shows, the callback method
28428 receives a parameter B<$line_of_tokens>, which is a reference to a hash
28429 of other useful information.  This example uses these hash entries:
28430
28431  $line_of_tokens->{_line_number} - the line number (1,2,...)
28432  $line_of_tokens->{_line_text}   - the text of the line
28433  $line_of_tokens->{_line_type}   - the type of the line, one of:
28434
28435     SYSTEM         - system-specific code before hash-bang line
28436     CODE           - line of perl code (including comments)
28437     POD_START      - line starting pod, such as '=head'
28438     POD            - pod documentation text
28439     POD_END        - last line of pod section, '=cut'
28440     HERE           - text of here-document
28441     HERE_END       - last line of here-doc (target word)
28442     FORMAT         - format section
28443     FORMAT_END     - last line of format section, '.'
28444     DATA_START     - __DATA__ line
28445     DATA           - unidentified text following __DATA__
28446     END_START      - __END__ line
28447     END            - unidentified text following __END__
28448     ERROR          - we are in big trouble, probably not a perl script
28449
28450 Most applications will be only interested in lines of type B<CODE>.  For
28451 another example, let's write a program which checks for one of the
28452 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
28453 can slow down processing.  Here is a B<write_line>, from the example
28454 program B<find_naughty.pl>, which does that:
28455
28456  sub write_line {
28457
28458      # This is called back from perltidy line-by-line
28459      # We're looking for $`, $&, and $'
28460      my ( $self, $line_of_tokens ) = @_;
28461
28462      # pull out some stuff we might need
28463      my $line_type         = $line_of_tokens->{_line_type};
28464      my $input_line_number = $line_of_tokens->{_line_number};
28465      my $input_line        = $line_of_tokens->{_line_text};
28466      my $rtoken_type       = $line_of_tokens->{_rtoken_type};
28467      my $rtokens           = $line_of_tokens->{_rtokens};
28468      chomp $input_line;
28469
28470      # skip comments, pod, etc
28471      return if ( $line_type ne 'CODE' );
28472
28473      # loop over tokens looking for $`, $&, and $'
28474      for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
28475
28476          # we only want to examine token types 'i' (identifier)
28477          next unless $$rtoken_type[$j] eq 'i';
28478
28479          # pull out the actual token text
28480          my $token = $$rtokens[$j];
28481
28482          # and check it
28483          if ( $token =~ /^\$[\`\&\']$/ ) {
28484              print STDERR
28485                "$input_line_number: $token\n";
28486          }
28487      }
28488  }
28489
28490 This example pulls out these tokenization variables from the $line_of_tokens
28491 hash reference:
28492
28493      $rtoken_type = $line_of_tokens->{_rtoken_type};
28494      $rtokens     = $line_of_tokens->{_rtokens};
28495
28496 The variable C<$rtoken_type> is a reference to an array of token type codes,
28497 and C<$rtokens> is a reference to a corresponding array of token text.
28498 These are obviously only defined for lines of type B<CODE>.
28499 Perltidy classifies tokens into types, and has a brief code for each type.
28500 You can get a complete list at any time by running perltidy from the
28501 command line with
28502
28503      perltidy --dump-token-types
28504
28505 In the present example, we are only looking for tokens of type B<i>
28506 (identifiers), so the for loop skips past all other types.  When an
28507 identifier is found, its actual text is checked to see if it is one
28508 being sought.  If so, the above write_line prints the token and its
28509 line number.
28510
28511 The B<formatter> feature is relatively new in perltidy, and further
28512 documentation needs to be written to complete its description.  However,
28513 several example programs have been written and can be found in the
28514 B<examples> section of the source distribution.  Probably the best way
28515 to get started is to find one of the examples which most closely matches
28516 your application and start modifying it.
28517
28518 For help with perltidy's pecular way of breaking lines into tokens, you
28519 might run, from the command line,
28520
28521  perltidy -D filename
28522
28523 where F<filename> is a short script of interest.  This will produce
28524 F<filename.DEBUG> with interleaved lines of text and their token types.
28525 The B<-D> flag has been in perltidy from the beginning for this purpose.
28526 If you want to see the code which creates this file, it is
28527 C<write_debug_entry> in Tidy.pm.
28528
28529 =head1 EXPORT
28530
28531   &perltidy
28532
28533 =head1 CREDITS
28534
28535 Thanks to Hugh Myers who developed the initial modular interface
28536 to perltidy.
28537
28538 =head1 VERSION
28539
28540 This man page documents Perl::Tidy version 20090616.
28541
28542 =head1 AUTHOR
28543
28544  Steve Hancock
28545  perltidy at users.sourceforge.net
28546
28547 =head1 SEE ALSO
28548
28549 The perltidy(1) man page describes all of the features of perltidy.  It
28550 can be found at http://perltidy.sourceforge.net.
28551
28552 =cut